diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 8b18726c9499..9c2edf68ce4b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,25 +1 @@ -/*.md @ydb-platform/docs - -/ydb/core/fq/ @ydb-platform/fq -/ydb/core/public_http/ @ydb-platform/fq - -/ydb/docs/ @ydb-platform/docs - -/ydb/library/yql/ @ydb-platform/yql -/ydb/library/yql/dq @ydb-platform/yql @ydb-platform/qp -/ydb/library/yql/providers/generic @ydb-platform/fq -/ydb/library/yql/providers/pq @ydb-platform/fq -/ydb/library/yql/providers/s3 @ydb-platform/fq -/ydb/library/yql/providers/solomon @ydb-platform/fq - -/ydb/library/yql/yt @Krock21 @Krisha11 @zlobober @gritukan - -/ydb/services/fq/ @ydb-platform/fq - -/ydb/core/kafka_proxy @ydb-platform/Topics -/ydb/core/persqueue @ydb-platform/Topics -/ydb/services/datastreams @ydb-platform/Topics -/ydb/services/deprecated/persqueue_v0 @ydb-platform/Topics -/ydb/services/persqueue_v1 @ydb-platform/Topics - -/ydb/core/config/ut @ydb-platform/core +* @ydb-platform/ReleaseApprovers diff --git a/.github/actions/s3cmd/action.yml b/.github/actions/s3cmd/action.yml index a4ebb583555e..8a3a94d48d3e 100644 --- a/.github/actions/s3cmd/action.yml +++ b/.github/actions/s3cmd/action.yml @@ -8,16 +8,16 @@ inputs: required: true description: "s3 key secret" s3_bucket: - required: true + required: false description: "s3 bucket" s3_endpoint: required: true description: "s3 endpoint" folder_prefix: - required: true + required: false description: "folder prefix" build_preset: - required: true + required: false description: "build preset like relwithdebinfo" runs: using: "composite" @@ -35,7 +35,14 @@ runs: host_base = storage.yandexcloud.net host_bucket = %(bucket)s.storage.yandexcloud.net EOF - + env: + s3_key_id: ${{ inputs.s3_key_id }} + s3_secret_access_key: ${{ inputs.s3_key_secret }} + + - name: export s3 path variables + shell: bash + if: inputs.build_preset + run: | folder="${{ runner.arch == 'X64' && 'x86-64' || runner.arch == 'ARM64' && 'arm64' || 'unknown' }}" BUILD_PRESET="${{ inputs.build_preset }}" @@ -57,7 +64,4 @@ runs: echo "S3_BUCKET_PATH=s3://${{ inputs.s3_bucket }}/${{ github.repository }}/${{github.workflow}}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV echo "S3_URL_PREFIX=${{ inputs.s3_endpoint }}/${{ inputs.s3_bucket }}/${{ github.repository }}/${{ github.workflow }}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV echo "S3_TEST_ARTIFACTS_BUCKET_PATH=s3://${{ inputs.s3_bucket }}/testing_out_stuff/${{ github.repository }}/${{github.workflow}}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV - echo "S3_TEST_ARTIFACTS_URL_PREFIX=${{ inputs.s3_endpoint }}/${{ inputs.s3_bucket }}/testing_out_stuff/${{ github.repository }}/${{ github.workflow }}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV - env: - s3_key_id: ${{ inputs.s3_key_id }} - s3_secret_access_key: ${{ inputs.s3_key_secret }} + echo "S3_TEST_ARTIFACTS_URL_PREFIX=${{ inputs.s3_endpoint }}/${{ inputs.s3_bucket }}/testing_out_stuff/${{ github.repository }}/${{ github.workflow }}/${{ github.run_id }}/${{ inputs.folder_prefix }}${folder}" >> $GITHUB_ENV \ No newline at end of file diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index 63a8df0cbe9f..cfc7c0d7ee48 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -8,21 +8,17 @@ ydb/core/external_sources * ydb/core/quoter/ut QuoterWithKesusTest.PrefetchCoefficient ydb/core/keyvalue/ut_trace TKeyValueTracingTest.* ydb/core/kqp/provider/ut KikimrIcGateway.TestLoadBasicSecretValueFromExternalDataSourceMetadata -ydb/core/kqp/ut/olap KqpOlapIndexes.IndexesActualization ydb/core/kqp/ut/olap KqpOlapBlobsSharing.* -ydb/core/kqp/ut/olap KqpOlap.ScanQueryOltpAndOlap ydb/core/kqp/ut/olap KqpOlapStatistics.StatsUsageWithTTL -ydb/core/kqp/ut/olap KqpOlap.YqlScriptOltpAndOlap -ydb/core/kqp/ut/olap KqpOlapAggregations.Aggregation_ResultCountAll_FilterL ydb/core/kqp/ut/pg KqpPg.CreateIndex +ydb/core/kqp/ut/tx KqpLocksTricky.TestNoLocksIssueInteractiveTx+withSink +ydb/core/kqp/ut/tx KqpLocksTricky.TestNoLocksIssue+withSink +ydb/core/kqp/ut/tx KqpSnapshotRead.ReadOnlyTxWithIndexCommitsOnConcurrentWrite+withSink +ydb/core/kqp/ut/tx KqpSinkTx.InvalidateOnError ydb/core/kqp/ut/query KqpLimits.QueryReplySize ydb/core/kqp/ut/query KqpQuery.QueryTimeout ydb/core/kqp/ut/scan KqpRequestContext.TraceIdInErrorMessage ydb/core/kqp/ut/scheme KqpOlapScheme.TenThousandColumns -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_GenericQuerys -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_StreamGenericQuery -ydb/core/kqp/ut/scheme KqpOlap.OlapRead_UsesGenericQueryOnJoinWithDataShardTable -ydb/core/kqp/ut/scheme KqpOlapScheme.DropTable ydb/core/kqp/ut/scheme KqpScheme.AlterAsyncReplication ydb/core/kqp/ut/scheme KqpScheme.QueryWithAlter ydb/core/kqp/ut/scheme [14/50]* @@ -32,6 +28,8 @@ ydb/core/kqp/ut/service KqpQueryService.ExecuteQueryPgTableSelect ydb/core/kqp/ut/service KqpQueryService.QueryOnClosedSession ydb/core/kqp/ut/service KqpService.CloseSessionsWithLoad ydb/core/kqp/ut/service [38/50]* +ydb/core/kqp/ut/service KqpQueryService.TableSink_OltpUpdate +ydb/core/kqp/ut/service KqpQueryService.TableSink_OltpReplace+HasSecondaryIndex ydb/core/persqueue/ut [37/40] chunk chunk ydb/core/persqueue/ut [38/40] chunk chunk ydb/core/persqueue/ut TPQTest.*DirectRead* @@ -107,4 +105,6 @@ ydb/tests/functional/tenants test_storage_config.py.TestStorageConfig.* ydb/tests/functional/tenants test_tenants.py.* ydb/tests/functional/ydb_cli test_ydb_impex.py.TestImpex.test_big_dataset* ydb/tests/tools/pq_read/test test_timeout.py.TestTimeout.test_timeout +ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestFilterCompare +ydb/core/kqp/ut/olap KqpDecimalColumnShard.TestAggregation ydb/tests/functional/rename [test_rename.py */10] chunk chunk diff --git a/.github/workflows/nightly_build.yml b/.github/workflows/nightly_build.yml new file mode 100644 index 000000000000..673e375ee5ea --- /dev/null +++ b/.github/workflows/nightly_build.yml @@ -0,0 +1,54 @@ +name: Nightly-Build # workflow used to upload built binaries to s3 +on: + workflow_dispatch: + inputs: + runner_label: + type: string + default: "auto-provisioned" + description: "runner label" + commit_sha: + type: string + default: "" +jobs: + build_and_test: + strategy: + fail-fast: false + matrix: + build_preset: ["relwithdebinfo", "release-asan"] + runs-on: [ self-hosted, auto-provisioned, "${{ format('build-preset-{0}', matrix.build_preset) }}" ] + name: Build and test ${{ matrix.build_preset }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + ref: ${{ inputs.commit_sha }} + fetch-depth: 2 + - name: Setup ydb access + uses: ./.github/actions/setup_ci_ydb_service_account_key_file_credentials + with: + ci_ydb_service_account_key_file_credentials: ${{ secrets.CI_YDB_SERVICE_ACCOUNT_KEY_FILE_CREDENTIALS }} + - name: Build and test + uses: ./.github/actions/build_and_test_ya + with: + build_preset: ${{ matrix.build_preset }} + build_target: "ydb/apps/ydbd" + increment: false + run_tests: false + put_build_results_to_cache: false + secs: ${{ format('{{"TESTMO_TOKEN2":"{0}","AWS_KEY_ID":"{1}","AWS_KEY_VALUE":"{2}","REMOTE_CACHE_USERNAME":"{3}","REMOTE_CACHE_PASSWORD":"{4}"}}', + secrets.TESTMO_TOKEN2, secrets.AWS_KEY_ID, secrets.AWS_KEY_VALUE, secrets.REMOTE_CACHE_USERNAME, secrets.REMOTE_CACHE_PASSWORD ) }} + vars: ${{ format('{{"AWS_BUCKET":"{0}","AWS_ENDPOINT":"{1}","REMOTE_CACHE_URL":"{2}","TESTMO_URL":"{3}","TESTMO_PROJECT_ID":"{4}"}}', + vars.AWS_BUCKET, vars.AWS_ENDPOINT, vars.REMOTE_CACHE_URL_YA, vars.TESTMO_URL, vars.TESTMO_PROJECT_ID ) }} + - name: Setup s3cmd + uses: ./.github/actions/s3cmd + with: + s3_bucket: "ydb-builds" + s3_endpoint: ${{ vars.AWS_ENDPOINT }} + s3_key_id: ${{ secrets.AWS_KEY_ID }} + s3_key_secret: ${{ secrets.AWS_KEY_VALUE }} + + - name: sync results to s3 and publish links + shell: bash + run: | + set -x + s3cmd sync --follow-symlinks --acl-public --no-progress --stats --no-check-md5 "ydb/apps/ydbd/ydbd" "s3://ydb-builds/${{ github.ref_name }}/${{ matrix.build_preset }}/ydbd" -d \ No newline at end of file diff --git a/library/cpp/lwtrace/mon/mon_lwtrace.cpp b/library/cpp/lwtrace/mon/mon_lwtrace.cpp index 09d56560c4bf..a10ae4a775f8 100644 --- a/library/cpp/lwtrace/mon/mon_lwtrace.cpp +++ b/library/cpp/lwtrace/mon/mon_lwtrace.cpp @@ -301,7 +301,7 @@ struct TLogQuery { } } catch (...) { ythrow yexception() - << CurrentExceptionMessage() + << EncodeHtmlPcdata(CurrentExceptionMessage()) << " while parsing track log query: " << Text; } @@ -1853,7 +1853,7 @@ class TTracesHtmlPrinter { try { Os << src->GetStartTime().ToStringUpToSeconds(); } catch (...) { - Os << "error: " << CurrentExceptionMessage(); + Os << "error: " << EncodeHtmlPcdata(CurrentExceptionMessage()); } Os << "" << "
" @@ -3821,17 +3821,17 @@ class TLWTraceMonPage : public NMonitoring::IMonPage { } } catch (TPageGenBase& gen) { out.Clear(); - out << gen.what(); + out << EncodeHtmlPcdata(gen.what()); } catch (...) { out.Clear(); if (request.GetParams().Get("error") == "text") { // Text error reply is helpful for ajax requests out << NMonitoring::HTTPOKTEXT; - out << CurrentExceptionMessage(); + out << EncodeHtmlPcdata(CurrentExceptionMessage()); } else { WWW_HTML(out) { out << "

Error

"
-                        << CurrentExceptionMessage()
+                        << EncodeHtmlPcdata(CurrentExceptionMessage())
                         << Endl;
                 }
             }
diff --git a/ydb/apps/version/version_definition.cpp b/ydb/apps/version/version_definition.cpp
index 4cfb93b74f79..8d7273d188fc 100644
--- a/ydb/apps/version/version_definition.cpp
+++ b/ydb/apps/version/version_definition.cpp
@@ -2,10 +2,21 @@
 
 NKikimrConfig::TCurrentCompatibilityInfo NKikimr::TCompatibilityInfo::MakeCurrent() {
     using TCurrentConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TCurrentCompatibilityInfo;
-    // using TVersionConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TVersion;
-    // using TCompatibilityRuleConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TCompatibilityRule;
+    using TVersionConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TVersion;
+    using TCompatibilityRuleConstructor = NKikimr::TCompatibilityInfo::TProtoConstructor::TCompatibilityRule;
 
     return TCurrentConstructor{
         .Application = "ydb",
+        .Version = TVersionConstructor{
+            .Year = 24,
+            .Major = 3,
+        },
+        .CanConnectTo = {
+            TCompatibilityRuleConstructor{
+                .Application = "nbs",
+                .LowerLimit = TVersionConstructor{ .Year = 23, .Major = 3 },
+                .UpperLimit = TVersionConstructor{ .Year = 24, .Major = 3 },
+            }
+        }
     }.ToPB();
-}
+}
\ No newline at end of file
diff --git a/ydb/apps/ydbd/ya.make b/ydb/apps/ydbd/ya.make
index d399248c2264..12d685e849f1 100644
--- a/ydb/apps/ydbd/ya.make
+++ b/ydb/apps/ydbd/ya.make
@@ -1,6 +1,8 @@
 PROGRAM(ydbd)
 
-NO_EXPORT_DYNAMIC_SYMBOLS()
+IF (NOT SANITIZER_TYPE)  # for some reasons some tests with asan are failed, see comment in CPPCOM-32
+    NO_EXPORT_DYNAMIC_SYMBOLS()
+ENDIF()
 
 IF (OS_LINUX)
     ALLOCATOR(TCMALLOC_256K)
diff --git a/ydb/core/audit/audit_log.cpp b/ydb/core/audit/audit_log.cpp
deleted file mode 100644
index 1d799767b435..000000000000
--- a/ydb/core/audit/audit_log.cpp
+++ /dev/null
@@ -1,24 +0,0 @@
-#include "audit_log.h"
-#include "audit_log_impl.h"
-
-#include 
-#include 
-#include 
-
-namespace NKikimr::NAudit {
-
-std::atomic AUDIT_LOG_ENABLED = false;
-
-THolder CreateAuditWriter(TMap>> logBackends)
-{
-    AUDIT_LOG_ENABLED.store(true);
-    return MakeHolder(std::move(logBackends));
-}
-
-void SendAuditLog(const NActors::TActorSystem* sys, TVector>&& parts)
-{
-    auto request = MakeHolder(Now(), std::move(parts));
-    sys->Send(MakeAuditServiceID(), request.Release());
-}
-
-}    // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/audit_log.h b/ydb/core/audit/audit_log.h
index ae95ab5a4fef..512e79c3e247 100644
--- a/ydb/core/audit/audit_log.h
+++ b/ydb/core/audit/audit_log.h
@@ -1,19 +1,12 @@
 #pragma once
 
-#include 
+#include 
+#include 
 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-#include 
-#include 
-#include 
+#include 
+#include 
 
-#include 
-#include 
+#include 
 
 #define AUDIT_LOG_S(sys, expr)                                                                                                  \
     do {                                                                                                                        \
@@ -24,7 +17,7 @@
         }                                                                                                                       \
     } while (0) /**/
 
-#define AUDIT_LOG(expr) AUDIT_LOG_S((TlsActivationContext->ExecutorThread.ActorSystem), expr)
+#define AUDIT_LOG(expr) AUDIT_LOG_S((::NActors::TlsActivationContext->ExecutorThread.ActorSystem), expr)
 
 #define AUDIT_PART_NO_COND(key, value) AUDIT_PART_COND(key, value, true)
 #define AUDIT_PART_COND(key, value, condition)                                                                                    \
@@ -37,90 +30,14 @@
 #define GET_AUDIT_PART_MACRO(_1, _2, _3, NAME,...) NAME
 #define AUDIT_PART(...) GET_AUDIT_PART_MACRO(__VA_ARGS__, AUDIT_PART_COND, AUDIT_PART_NO_COND)(__VA_ARGS__)
 
+namespace NActors {
+    class TActorSystem;
+}
+
 namespace NKikimr::NAudit {
 
 extern std::atomic AUDIT_LOG_ENABLED;
 
-struct TEvAuditLog
-{
-    //
-    // Events declaration
-    //
-
-    enum EEvents
-    {
-        EvBegin = EventSpaceBegin(TKikimrEvents::ES_YDB_AUDIT_LOG),
-
-        // Request actors
-        EvWriteAuditLog = EvBegin + 0,
-
-        EvEnd
-    };
-
-    static_assert(EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG),
-                  "expected EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG)");
-
-    struct TEvWriteAuditLog
-        : public NActors::TEventLocal
-    {
-        TInstant Time;
-        TVector> Parts;
-
-        TEvWriteAuditLog(TInstant time, TVector>&& parts)
-            : Time(time)
-            , Parts(std::move(parts))
-        {}
-    };
-};
-
-class TAuditLogActor final
-    : public TActor
-{
-private:
-    const TMap>> LogBackends;
-public:
-    TAuditLogActor(TMap>> logBackends)
-        : TActor(&TThis::StateWork)
-        , LogBackends(std::move(logBackends))
-    {
-    }
-
-    static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
-        return NKikimrServices::TActivity::AUDIT_WRITER_ACTOR;
-    }
-
-private:
-    STFUNC(StateWork);
-
-    void HandlePoisonPill(
-        const TEvents::TEvPoisonPill::TPtr& ev,
-        const TActorContext& ctx);
-
-    void HandleWriteAuditLog(
-        const TEvAuditLog::TEvWriteAuditLog::TPtr& ev,
-        const TActorContext& ctx);
-
-    static void WriteLog(
-        const TString& log,
-        const TVector>& logBackends);
-
-    static TString GetJsonLog(
-        const TEvAuditLog::TEvWriteAuditLog::TPtr& ev);
-
-    static TString GetTxtLog(
-        const TEvAuditLog::TEvWriteAuditLog::TPtr& ev);
-
-    void HandleUnexpectedEvent(STFUNC_SIG);
-};
-
-////////////////////////////////////////////////////////////////////////////////
-
 void SendAuditLog(const NActors::TActorSystem* sys, TVector>&& parts);
 
-inline NActors::TActorId MakeAuditServiceID() {
-    return NActors::TActorId(0, TStringBuf("YDB_AUDIT"));
-}
-
-THolder CreateAuditWriter(TMap>> logBackends);
-
 }   // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/audit_log_impl.cpp b/ydb/core/audit/audit_log_impl.cpp
index 4438882c2d42..e03e9ac7ff8e 100644
--- a/ydb/core/audit/audit_log_impl.cpp
+++ b/ydb/core/audit/audit_log_impl.cpp
@@ -1,49 +1,84 @@
-#include "audit_log.h"
-#include "audit_log_impl.h"
-
 #include 
 #include 
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+
+#include "audit_log_service.h"
+#include "audit_log.h"
+
+#if defined LOG_T || \
+    defined LOG_D || \
+    defined LOG_I || \
+    defined LOG_N || \
+    defined LOG_W || \
+    defined LOG_E
+# error log macro redefinition
+#endif
+
+#define LOG_T(stream) LOG_TRACE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_D(stream) LOG_DEBUG_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_I(stream) LOG_INFO_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_N(stream) LOG_NOTICE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_W(stream) LOG_WARN_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
+#define LOG_E(stream) LOG_ERROR_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
 
 namespace NKikimr::NAudit {
 
-using namespace NActors;
+// TAuditLogActor
+//
 
-void TAuditLogActor::HandlePoisonPill(
-    const TEvents::TEvPoisonPill::TPtr& ev,
-    const TActorContext& ctx)
-{
-    Y_UNUSED(ev);
-    AUDIT_LOG_ENABLED.store(false);
-    Die(ctx);
-}
+struct TEvAuditLog {
+    //
+    // Events declaration
+    //
 
-STFUNC(TAuditLogActor::StateWork)
-{
-    switch (ev->GetTypeRewrite()) {
-        HFunc(TEvents::TEvPoisonPill, HandlePoisonPill);
-        HFunc(TEvAuditLog::TEvWriteAuditLog, HandleWriteAuditLog);
-    default:
-        HandleUnexpectedEvent(ev);
-        break;
-    }
-}
+    enum EEvents {
+        EvBegin = EventSpaceBegin(TKikimrEvents::ES_YDB_AUDIT_LOG),
+
+        // Request actors
+        EvWriteAuditLog = EvBegin + 0,
+
+        EvEnd
+    };
+
+    static_assert(EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG),
+        "expected EvEnd <= EventSpaceEnd(TKikimrEvents::ES_YDB_AUDIT_LOG)"
+    );
+
+    struct TEvWriteAuditLog : public NActors::TEventLocal {
+        TInstant Time;
+        TVector> Parts;
+
+        TEvWriteAuditLog(TInstant time, TVector>&& parts)
+            : Time(time)
+            , Parts(std::move(parts))
+        {}
+    };
+};
 
-void TAuditLogActor::WriteLog(const TString& log, const TVector>& logBackends) {
+void WriteLog(const TString& log, const TVector>& logBackends) {
     for (auto& logBackend : logBackends) {
         try {
-            logBackend->WriteData(
-                TLogRecord(
-                    ELogPriority::TLOG_INFO,
-                    log.data(),
-                    log.length()));
+            logBackend->WriteData(TLogRecord(
+                ELogPriority::TLOG_INFO,
+                log.data(),
+                log.length()
+            ));
         } catch (const yexception& e) {
-            LOG_W("WriteLog:"
-                << " unable to write audit log (error: " << e.what() << ")");
+            LOG_W("WriteLog: unable to write audit log (error: " << e.what() << ")");
         }
     }
 }
 
-TString TAuditLogActor::GetJsonLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
+TString GetJsonLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
     const auto* msg = ev->Get();
     TStringStream ss;
     ss << msg->Time << ": ";
@@ -56,7 +91,7 @@ TString TAuditLogActor::GetJsonLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev
     return ss.Str();
 }
 
-TString TAuditLogActor::GetTxtLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
+TString GetTxtLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev) {
     const auto* msg = ev->Get();
     TStringStream ss;
     ss << msg->Time << ": ";
@@ -69,29 +104,81 @@ TString TAuditLogActor::GetTxtLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev)
     return ss.Str();
 }
 
-void TAuditLogActor::HandleWriteAuditLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev, const TActorContext& ctx) {
-    Y_UNUSED(ctx);
-
-    for (auto& logBackends : LogBackends) {
-        switch (logBackends.first) {
-            case NKikimrConfig::TAuditConfig::JSON:
-                WriteLog(GetJsonLog(ev), logBackends.second);
-                break;
-            case NKikimrConfig::TAuditConfig::TXT:
-                WriteLog(GetTxtLog(ev), logBackends.second);
-                break;
-            default:
-                WriteLog(GetJsonLog(ev), logBackends.second);
-                break;
+class TAuditLogActor final : public TActor {
+private:
+    const TAuditLogBackends LogBackends;
+
+public:
+    TAuditLogActor(TAuditLogBackends&& logBackends)
+        : TActor(&TThis::StateWork)
+        , LogBackends(std::move(logBackends))
+    {}
+
+    static constexpr NKikimrServices::TActivity::EType ActorActivityType() {
+        return NKikimrServices::TActivity::AUDIT_WRITER_ACTOR;
+    }
+
+private:
+    STFUNC(StateWork) {
+        switch (ev->GetTypeRewrite()) {
+            HFunc(TEvents::TEvPoisonPill, HandlePoisonPill);
+            HFunc(TEvAuditLog::TEvWriteAuditLog, HandleWriteAuditLog);
+        default:
+            HandleUnexpectedEvent(ev);
+            break;
         }
     }
+
+    void HandlePoisonPill(const TEvents::TEvPoisonPill::TPtr& ev, const TActorContext& ctx) {
+        Y_UNUSED(ev);
+        AUDIT_LOG_ENABLED.store(false);
+        Die(ctx);
+    }
+
+    void HandleWriteAuditLog(const TEvAuditLog::TEvWriteAuditLog::TPtr& ev, const TActorContext& ctx) {
+        Y_UNUSED(ctx);
+
+        for (auto& logBackends : LogBackends) {
+            switch (logBackends.first) {
+                case NKikimrConfig::TAuditConfig::JSON:
+                    WriteLog(GetJsonLog(ev), logBackends.second);
+                    break;
+                case NKikimrConfig::TAuditConfig::TXT:
+                    WriteLog(GetTxtLog(ev), logBackends.second);
+                    break;
+                default:
+                    WriteLog(GetJsonLog(ev), logBackends.second);
+                    break;
+            }
+        }
+    }
+
+    void HandleUnexpectedEvent(STFUNC_SIG) {
+        LOG_W("TAuditLogActor:"
+            << " unhandled event type: " << ev->GetTypeRewrite()
+            << " event: " << ev->GetTypeName()
+        );
+    }
+};
+
+// Client interface implementation
+//
+
+std::atomic AUDIT_LOG_ENABLED = false;
+
+void SendAuditLog(const NActors::TActorSystem* sys, TVector>&& parts)
+{
+    auto request = MakeHolder(Now(), std::move(parts));
+    sys->Send(MakeAuditServiceID(), request.Release());
 }
 
-void TAuditLogActor::HandleUnexpectedEvent(STFUNC_SIG)
+// Service interface implementation
+//
+
+THolder CreateAuditWriter(TAuditLogBackends&& logBackends)
 {
-    LOG_W("TAuditLogActor:"
-          << " unhandled event type: " << ev->GetTypeRewrite()
-          << " event: " << ev->GetTypeName());
+    AUDIT_LOG_ENABLED.store(true);
+    return MakeHolder(std::move(logBackends));
 }
 
 }    // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/audit_log_impl.h b/ydb/core/audit/audit_log_impl.h
deleted file mode 100644
index 91de44ddead3..000000000000
--- a/ydb/core/audit/audit_log_impl.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#if defined LOG_T || \
-    defined LOG_D || \
-    defined LOG_I || \
-    defined LOG_N || \
-    defined LOG_W || \
-    defined LOG_E
-# error log macro redefinition
-#endif
-
-#define LOG_T(stream) LOG_TRACE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_D(stream) LOG_DEBUG_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_I(stream) LOG_INFO_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_N(stream) LOG_NOTICE_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_W(stream) LOG_WARN_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
-#define LOG_E(stream) LOG_ERROR_S((TlsActivationContext->AsActorContext()), NKikimrServices::AUDIT_LOG_WRITER, stream)
diff --git a/ydb/core/audit/audit_log_service.h b/ydb/core/audit/audit_log_service.h
new file mode 100644
index 000000000000..379a1ad516b1
--- /dev/null
+++ b/ydb/core/audit/audit_log_service.h
@@ -0,0 +1,24 @@
+#pragma once
+
+#include 
+
+#include 
+#include 
+
+class TLogBackend;
+
+namespace NActors {
+    class IActor;
+}
+
+namespace NKikimr::NAudit {
+
+inline NActors::TActorId MakeAuditServiceID() {
+    return NActors::TActorId(0, TStringBuf("YDB_AUDIT"));
+}
+
+using TAuditLogBackends = TMap>>;
+
+THolder CreateAuditWriter(TAuditLogBackends&& logBackends);
+
+}   // namespace NKikimr::NAudit
diff --git a/ydb/core/audit/ya.make b/ydb/core/audit/ya.make
index e02fe8a19374..c748e3bd017f 100644
--- a/ydb/core/audit/ya.make
+++ b/ydb/core/audit/ya.make
@@ -1,10 +1,9 @@
 LIBRARY()
 
 SRCS(
-    audit_log_impl.cpp
-    audit_log_impl.h
-    audit_log.cpp
     audit_log.h
+    audit_log_service.h
+    audit_log_impl.cpp
 )
 
 PEERDIR(
diff --git a/ydb/core/base/appdata.cpp b/ydb/core/base/appdata.cpp
index ca40e8537151..97737c602414 100644
--- a/ydb/core/base/appdata.cpp
+++ b/ydb/core/base/appdata.cpp
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -61,6 +62,7 @@ struct TAppData::TImpl {
     NKikimrConfig::TGraphConfig GraphConfig;
     NKikimrSharedCache::TSharedCacheConfig SharedCacheConfig;
     NKikimrConfig::TMetadataCacheConfig MetadataCacheConfig;
+    NKikimrReplication::TReplicationDefaults ReplicationConfig;
 };
 
 TAppData::TAppData(
@@ -113,6 +115,7 @@ TAppData::TAppData(
     , GraphConfig(Impl->GraphConfig)
     , SharedCacheConfig(Impl->SharedCacheConfig)
     , MetadataCacheConfig(Impl->MetadataCacheConfig)
+    , ReplicationConfig(Impl->ReplicationConfig)
     , KikimrShouldContinue(kikimrShouldContinue)
 {}
 
diff --git a/ydb/core/base/appdata_fwd.h b/ydb/core/base/appdata_fwd.h
index 78c78590ef15..f22a246d2cd1 100644
--- a/ydb/core/base/appdata_fwd.h
+++ b/ydb/core/base/appdata_fwd.h
@@ -65,6 +65,10 @@ namespace NKikimrConfig {
     class TMetadataCacheConfig;
 }
 
+namespace NKikimrReplication {
+    class TReplicationDefaults;
+}
+
 namespace NKikimrNetClassifier {
     class TNetClassifierDistributableConfig;
     class TNetClassifierConfig;
@@ -213,6 +217,7 @@ struct TAppData {
     NKikimrConfig::TGraphConfig& GraphConfig;
     NKikimrSharedCache::TSharedCacheConfig& SharedCacheConfig;
     NKikimrConfig::TMetadataCacheConfig& MetadataCacheConfig;
+    NKikimrReplication::TReplicationDefaults& ReplicationConfig;
     bool EnforceUserTokenRequirement = false;
     bool EnforceUserTokenCheckRequirement = false; // check token if it was specified
     bool AllowHugeKeyValueDeletes = true; // delete when all clients limit deletes per request
diff --git a/ydb/core/base/board_lookup.cpp b/ydb/core/base/board_lookup.cpp
index 634cc5dc064e..e3a6e7df35b5 100644
--- a/ydb/core/base/board_lookup.cpp
+++ b/ydb/core/base/board_lookup.cpp
@@ -25,6 +25,7 @@ namespace NKikimr {
 class TBoardLookupActor : public TActorBootstrapped {
     const TString Path;
     const TActorId Owner;
+    const ui64 Cookie;
     const EBoardLookupMode Mode;
     const bool Subscriber;
     TBoardRetrySettings BoardRetrySettings;
@@ -111,12 +112,12 @@ class TBoardLookupActor : public TActorBootstrapped {
     void NotAvailable() {
         if (CurrentStateFunc() != &TThis::StateSubscribe) {
             Send(Owner, new TEvStateStorage::TEvBoardInfo(
-                TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable, Path));
+                TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable, Path), 0, Cookie);
         } else {
             Send(Owner,
                 new TEvStateStorage::TEvBoardInfoUpdate(
                     TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable, Path
-                )
+                ), 0, Cookie
             );
         }
         return PassAway();
@@ -129,7 +130,7 @@ class TBoardLookupActor : public TActorBootstrapped {
                 auto reply = MakeHolder(
                     TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
                 reply->InfoEntries = std::move(Info);
-                Send(Owner, std::move(reply));
+                Send(Owner, std::move(reply), 0, Cookie);
                 if (Subscriber) {
                     Become(&TThis::StateSubscribe);
                     return;
@@ -240,7 +241,7 @@ class TBoardLookupActor : public TActorBootstrapped {
                 auto reply = MakeHolder(
                     TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
                 reply->Updates = { { oid, std::move(update.value()) } };
-                Send(Owner, std::move(reply));
+                Send(Owner, std::move(reply), 0, Cookie);
             }
         } else {
             if (info.GetDropped()) {
@@ -308,7 +309,7 @@ class TBoardLookupActor : public TActorBootstrapped {
                 auto reply = MakeHolder(
                     TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
                 reply->Updates = std::move(updates);
-                Send(Owner, std::move(reply));
+                Send(Owner, std::move(reply), 0, Cookie);
             }
         }
 
@@ -484,7 +485,7 @@ class TBoardLookupActor : public TActorBootstrapped {
             auto reply = MakeHolder(
                 TEvStateStorage::TEvBoardInfo::EStatus::Ok, Path);
             reply->Updates = std::move(updates);
-            Send(Owner, std::move(reply));
+            Send(Owner, std::move(reply), 0, Cookie);
         }
     }
 
@@ -495,9 +496,10 @@ class TBoardLookupActor : public TActorBootstrapped {
 
     TBoardLookupActor(
         const TString &path, TActorId owner, EBoardLookupMode mode,
-        TBoardRetrySettings boardRetrySettings)
+        TBoardRetrySettings boardRetrySettings, ui64 cookie = 0)
         : Path(path)
         , Owner(owner)
+        , Cookie(cookie)
         , Mode(mode)
         , Subscriber(Mode == EBoardLookupMode::Subscription)
         , BoardRetrySettings(std::move(boardRetrySettings))
@@ -545,8 +547,8 @@ class TBoardLookupActor : public TActorBootstrapped {
 
 IActor* CreateBoardLookupActor(
         const TString &path, const TActorId &owner, EBoardLookupMode mode,
-        TBoardRetrySettings boardRetrySettings) {
-    return new TBoardLookupActor(path, owner, mode, std::move(boardRetrySettings));
+        TBoardRetrySettings boardRetrySettings, ui64 cookie) {
+    return new TBoardLookupActor(path, owner, mode, std::move(boardRetrySettings), cookie);
 }
 
 }
diff --git a/ydb/core/base/counters.cpp b/ydb/core/base/counters.cpp
index 538266429c5a..915810b80074 100644
--- a/ydb/core/base/counters.cpp
+++ b/ydb/core/base/counters.cpp
@@ -10,6 +10,7 @@ static const THashSet DATABASE_SERVICES
     = {{ TString("compile"),
          TString("coordinator"),
          TString("dsproxy"),
+         TString("dsproxy_mon"),
          TString("dsproxynode"),
          TString("dsproxy_overview"),
          TString("dsproxy_percentile"),
@@ -39,6 +40,7 @@ static const THashSet DATABASE_SERVICES
          TString("pqproxy|readSession"),
          TString("pqproxy|schemecache"),
          TString("pqproxy|mirrorWriteTimeLag"),
+         TString("pqproxy|userAgents"),
          TString("datastreams"),
     }};
 
diff --git a/ydb/core/base/events.h b/ydb/core/base/events.h
index a73efb723307..704f503215e6 100644
--- a/ydb/core/base/events.h
+++ b/ydb/core/base/events.h
@@ -12,175 +12,180 @@ namespace NKikimr {
 struct TKikimrEvents : TEvents {
     enum EEventSpaceKikimr {
         /* WARNING:
-           Please mind that you should never change the order
-           for the following keywords, you should consider
-           issues about "rolling update".
+           Please mind that you should never change values,
+           you should consider issues about "rolling update".
         */
-        ES_KIKIMR_ES_BEGIN = ES_USERSPACE,  //4096
-        ES_STATESTORAGE, //4097
-        ES_DEPRECATED_4098, //4098
-        ES_BLOBSTORAGE, //4099
-        ES_HIVE, //4100
-        ES_TABLETBASE, //4101
-        ES_TABLET, //4102
-        ES_TABLETRESOLVER,
-        ES_LOCAL,
-        ES_DEPRECATED_4105,
-        ES_TX_PROXY, // generic proxy commands 4106
-        ES_TX_COORDINATOR,
-        ES_TX_MEDIATOR,
-        ES_TX_PROCESSING, // 4109
-        ES_DEPRECATED_4110,
-        ES_DEPRECATED_4111,
-        ES_DEPRECATED_4112,
-        ES_TX_DATASHARD,
-        ES_DEPRECATED_4114,
-        ES_TX_USERPROXY, // user proxy interface
-        ES_SCHEME_CACHE,
-        ES_TX_PROXY_REQ,
-        ES_TABLET_PIPE,
-        ES_DEPRECATED_4118,
-        ES_TABLET_COUNTERS_AGGREGATOR,
-        ES_DEPRECATED_4121,
-        ES_PROXY_BUS, //4122
-        ES_BOOTSTRAPPER,
-        ES_TX_MEDIATORTIMECAST,
-        ES_DEPRECATED_4125,
-        ES_DEPRECATED_4126,
-        ES_DEPRECATED_4127,
-        ES_DEPRECATED_4128,
-        ES_DEPRECATED_4129,
-        ES_DEPRECATED_4130,
-        ES_DEPRECATED_4131,
-        ES_KEYVALUE, //4132
-        ES_MSGBUS_TRACER,
-        ES_RTMR_TABLET,
-        ES_FLAT_EXECUTOR,
-        ES_NODE_WHITEBOARD,
-        ES_FLAT_TX_SCHEMESHARD, // 4137
-        ES_PQ,
-        ES_YQL_KIKIMR_PROXY,
-        ES_PQ_META_CACHE,
-        ES_DEPRECATED_4141,
-        ES_PQ_L2_CACHE, //4142
-        ES_TOKEN_BUILDER,
-        ES_TICKET_PARSER,
-        ES_KQP = NYql::NDq::TDqEvents::ES_DQ_COMPUTE_KQP_COMPATIBLE, // 4145
-        ES_BLACKBOX_VALIDATOR,
-        ES_SELF_PING,
-        ES_PIPECACHE,
-        ES_PQ_PROXY,
-        ES_CMS,
-        ES_NODE_BROKER,
-        ES_TX_ALLOCATOR, //4152
+        ES_KIKIMR_ES_BEGIN = ES_USERSPACE, // 4096
+        ES_STATESTORAGE = 4097, 
+        ES_DEPRECATED_4098 = 4098, 
+        ES_BLOBSTORAGE = 4099, 
+        ES_HIVE = 4100, 
+        ES_TABLETBASE = 4101, 
+        ES_TABLET = 4102, 
+        ES_TABLETRESOLVER = 4103, 
+        ES_LOCAL = 4104, 
+        ES_DEPRECATED_4105 = 4105, 
+        ES_TX_PROXY = 4106, 
+        ES_TX_COORDINATOR = 4107, 
+        ES_TX_MEDIATOR = 4108, 
+        ES_TX_PROCESSING = 4109, 
+        ES_DEPRECATED_4110 = 4110, 
+        ES_DEPRECATED_4111 = 4111, 
+        ES_DEPRECATED_4112 = 4112, 
+        ES_TX_DATASHARD = 4113, 
+        ES_DEPRECATED_4114 = 4114, 
+        ES_TX_USERPROXY = 4115, 
+        ES_SCHEME_CACHE = 4116, 
+        ES_TX_PROXY_REQ = 4117, 
+        ES_TABLET_PIPE = 4118, 
+        ES_DEPRECATED_4118 = 4119, 
+        ES_TABLET_COUNTERS_AGGREGATOR = 4120, 
+        ES_DEPRECATED_4121 = 4121, 
+        ES_PROXY_BUS = 4122, 
+        ES_BOOTSTRAPPER = 4123, 
+        ES_TX_MEDIATORTIMECAST = 4124, 
+        ES_DEPRECATED_4125 = 4125, 
+        ES_DEPRECATED_4126 = 4126, 
+        ES_DEPRECATED_4127 = 4127, 
+        ES_DEPRECATED_4128 = 4128, 
+        ES_DEPRECATED_4129 = 4129, 
+        ES_DEPRECATED_4130 = 4130, 
+        ES_DEPRECATED_4131 = 4131, 
+        ES_KEYVALUE = 4132, 
+        ES_MSGBUS_TRACER = 4133, 
+        ES_RTMR_TABLET = 4134, 
+        ES_FLAT_EXECUTOR = 4135, 
+        ES_NODE_WHITEBOARD = 4136, 
+        ES_FLAT_TX_SCHEMESHARD = 4137, 
+        ES_PQ = 4138, 
+        ES_YQL_KIKIMR_PROXY = 4139, 
+        ES_PQ_META_CACHE = 4140, 
+        ES_DEPRECATED_4141 = 4141, 
+        ES_PQ_L2_CACHE = 4142, 
+        ES_TOKEN_BUILDER = 4143, 
+        ES_TICKET_PARSER = 4144, 
+        ES_KQP = 4145, 
+        ES_BLACKBOX_VALIDATOR = 4146, 
+        ES_SELF_PING = 4147, 
+        ES_PIPECACHE = 4148, 
+        ES_PQ_PROXY = 4149, 
+        ES_CMS = 4150, 
+        ES_NODE_BROKER = 4151, 
+        ES_TX_ALLOCATOR = 4152, 
         // reserve event space for each RTMR process
-        ES_RTMR_STORAGE,
-        ES_RTMR_PROXY,
-        ES_RTMR_PUSHER,
-        ES_RTMR_HOST,
-        ES_RESOURCE_BROKER,
-        ES_VIEWER,
-        ES_SUB_DOMAIN,
-        ES_GRPC_PROXY_STATUS, //OLD
-        ES_SQS,
-        ES_BLOCKSTORE, //4162
-        ES_RTMR_ICBUS,
-        ES_TENANT_POOL,
-        ES_USER_REGISTRY,
-        ES_TVM_SETTINGS_UPDATER,
-        ES_PQ_CLUSTERS_UPDATER,
-        ES_TENANT_SLOT_BROKER,
-        ES_GRPC_CALLS,
-        ES_CONSOLE,
-        ES_KESUS_PROXY,
-        ES_KESUS,
-        ES_CONFIGS_DISPATCHER,
-        ES_IAM_SERVICE,
-        ES_FOLDER_SERVICE,
-        ES_GRPC_MON,
-        ES_QUOTA, // must be in sync with ydb/core/quoter/public/quoter.h
-        ES_COORDINATED_QUOTA,
-        ES_ACCESS_SERVICE,
-        ES_USER_ACCOUNT_SERVICE,
-        ES_PQ_PROXY_NEW,
-        ES_GRPC_STREAMING,
-        ES_SCHEME_BOARD,
-        ES_FLAT_TX_SCHEMESHARD_PROTECTED,
-        ES_GRPC_REQUEST_PROXY,
-        ES_EXPORT_SERVICE,
-        ES_TX_ALLOCATOR_CLIENT,
-        ES_PQ_CLUSTER_TRACKER,
-        ES_NET_CLASSIFIER,
-        ES_SYSTEM_VIEW,
-        ES_TENANT_NODE_ENUMERATOR,
-        ES_SERVICE_ACCOUNT_SERVICE,
-        ES_INDEX_BUILD,
-        ES_BLOCKSTORE_PRIVATE,
-        ES_YT_WRAPPER,
-        ES_S3_WRAPPER,
-        ES_FILESTORE,
-        ES_FILESTORE_PRIVATE,
-        ES_YDB_METERING,
-        ES_IMPORT_SERVICE, // 4200
-        ES_TX_OLAPSHARD,
-        ES_TX_COLUMNSHARD,
-        ES_CROSSREF,
-        ES_SCHEME_BOARD_MON,
-        ES_YQL_ANALYTICS_PROXY = NFq::TEventIds::ES_YQL_ANALYTICS_PROXY,
-        ES_BLOB_CACHE,
-        ES_LONG_TX_SERVICE,
-        ES_TEST_SHARD,
-        ES_DATASTREAMS_PROXY,
-        ES_IAM_TOKEN_SERVICE,
-        ES_HEALTH_CHECK,
-        ES_DQ = NYql::NDq::TDqEvents::ES_DQ_COMPUTE, // 4212
-        ES_YQ, // 4213
-        ES_CHANGE_EXCHANGE_DATASHARD,
-        ES_DATABASE_SERVICE, //4215
-        ES_SEQUENCESHARD, // 4216
-        ES_SEQUENCEPROXY, // 4217
-        ES_CLOUD_STORAGE,
-        ES_CLOUD_STORAGE_PRIVATE,
-        ES_FOLDER_SERVICE_ADAPTER,
-        ES_PQ_PARTITION_WRITER,
-        ES_YDB_PROXY,
-        ES_REPLICATION_CONTROLLER,
-        ES_HTTP_PROXY,
-        ES_BLOB_DEPOT,
-        ES_DATASHARD_LOAD,
-        ES_METADATA_PROVIDER,
-        ES_INTERNAL_REQUEST,
-        ES_BACKGROUND_TASKS,
-        ES_TIERING,
-        ES_METADATA_INITIALIZER,
-        ES_YDB_AUDIT_LOG,
-        ES_METADATA_MANAGER,
-        ES_METADATA_SECRET,
-        ES_TEST_LOAD,
-        ES_GRPC_CANCELATION,
-        ES_DISCOVERY,
-        ES_EXT_INDEX,
-        ES_CONVEYOR,
-        ES_KQP_SCAN_EXCHANGE,
-        ES_IC_NODE_CACHE,
-        ES_DATA_OPERATIONS,
-        ES_KAFKA,
-        ES_STATISTICS,
-        ES_LDAP_AUTH_PROVIDER,
-        ES_DB_METADATA_CACHE,
-        ES_TABLE_CREATOR,
-        ES_PQ_PARTITION_CHOOSER,
-        ES_GRAPH,
-        ES_REPLICATION_WORKER,
-        ES_CHANGE_EXCHANGE,
-        ES_S3_PROVIDER,
-        ES_NEBIUS_ACCESS_SERVICE,
-        ES_REPLICATION_SERVICE,
-        ES_BACKUP_SERVICE,
-        ES_TX_BACKGROUND,
-        ES_SS_BG_TASKS,
-        ES_LIMITER
+        ES_RTMR_STORAGE = 4153, 
+        ES_RTMR_PROXY = 4154, 
+        ES_RTMR_PUSHER = 4155, 
+        ES_RTMR_HOST = 4156, 
+        ES_RESOURCE_BROKER = 4157, 
+        ES_VIEWER = 4158, 
+        ES_SUB_DOMAIN = 4159, 
+        ES_GRPC_PROXY_STATUS = 4160, 
+        ES_SQS = 4161, 
+        ES_BLOCKSTORE = 4162, 
+        ES_RTMR_ICBUS = 4163, 
+        ES_TENANT_POOL = 4164, 
+        ES_USER_REGISTRY = 4165, 
+        ES_TVM_SETTINGS_UPDATER = 4166, 
+        ES_PQ_CLUSTERS_UPDATER = 4167, 
+        ES_TENANT_SLOT_BROKER = 4168, 
+        ES_GRPC_CALLS = 4169, 
+        ES_CONSOLE = 4170, 
+        ES_KESUS_PROXY = 4171, 
+        ES_KESUS = 4172, 
+        ES_CONFIGS_DISPATCHER = 4173, 
+        ES_IAM_SERVICE = 4174, 
+        ES_FOLDER_SERVICE = 4175, 
+        ES_GRPC_MON = 4176, 
+        ES_QUOTA = 4177, // must be in sync with ydb/core/quoter/public/quoter.h
+        ES_COORDINATED_QUOTA = 4178, 
+        ES_ACCESS_SERVICE = 4179, 
+        ES_USER_ACCOUNT_SERVICE = 4180, 
+        ES_PQ_PROXY_NEW = 4181, 
+        ES_GRPC_STREAMING = 4182, 
+        ES_SCHEME_BOARD = 4183, 
+        ES_FLAT_TX_SCHEMESHARD_PROTECTED = 4184, 
+        ES_GRPC_REQUEST_PROXY = 4185, 
+        ES_EXPORT_SERVICE = 4186, 
+        ES_TX_ALLOCATOR_CLIENT = 4187, 
+        ES_PQ_CLUSTER_TRACKER = 4188, 
+        ES_NET_CLASSIFIER = 4189, 
+        ES_SYSTEM_VIEW = 4190, 
+        ES_TENANT_NODE_ENUMERATOR = 4191, 
+        ES_SERVICE_ACCOUNT_SERVICE = 4192, 
+        ES_INDEX_BUILD = 4193, 
+        ES_BLOCKSTORE_PRIVATE = 4194, 
+        ES_YT_WRAPPER = 4195, 
+        ES_S3_WRAPPER = 4196, 
+        ES_FILESTORE = 4197, 
+        ES_FILESTORE_PRIVATE = 4198, 
+        ES_YDB_METERING = 4199, 
+        ES_IMPORT_SERVICE = 4200, 
+        ES_TX_OLAPSHARD = 4201, 
+        ES_TX_COLUMNSHARD = 4202, 
+        ES_CROSSREF = 4203, 
+        ES_SCHEME_BOARD_MON = 4204, 
+        ES_YQL_ANALYTICS_PROXY = 4205, 
+        ES_BLOB_CACHE = 4206, 
+        ES_LONG_TX_SERVICE = 4207, 
+        ES_TEST_SHARD = 4208, 
+        ES_DATASTREAMS_PROXY = 4209, 
+        ES_IAM_TOKEN_SERVICE = 4210, 
+        ES_HEALTH_CHECK = 4211, 
+        ES_DQ = 4212, 
+        ES_YQ = 4213, 
+        ES_CHANGE_EXCHANGE_DATASHARD = 4214, 
+        ES_DATABASE_SERVICE = 4215, 
+        ES_SEQUENCESHARD = 4216, 
+        ES_SEQUENCEPROXY = 4217, 
+        ES_CLOUD_STORAGE = 4218, 
+        ES_CLOUD_STORAGE_PRIVATE = 4219, 
+        ES_FOLDER_SERVICE_ADAPTER = 4220, 
+        ES_PQ_PARTITION_WRITER = 4221, 
+        ES_YDB_PROXY = 4222, 
+        ES_REPLICATION_CONTROLLER = 4223, 
+        ES_HTTP_PROXY = 4224, 
+        ES_BLOB_DEPOT = 4225, 
+        ES_DATASHARD_LOAD = 4226, 
+        ES_METADATA_PROVIDER = 4227, 
+        ES_INTERNAL_REQUEST = 4228, 
+        ES_BACKGROUND_TASKS = 4229, 
+        ES_TIERING = 4230, 
+        ES_METADATA_INITIALIZER = 4231, 
+        ES_YDB_AUDIT_LOG = 4232, 
+        ES_METADATA_MANAGER = 4233, 
+        ES_METADATA_SECRET = 4234, 
+        ES_TEST_LOAD = 4235, 
+        ES_GRPC_CANCELATION = 4236, 
+        ES_DISCOVERY = 4237, 
+        ES_EXT_INDEX = 4238, 
+        ES_CONVEYOR = 4239, 
+        ES_KQP_SCAN_EXCHANGE = 4240, 
+        ES_IC_NODE_CACHE = 4241, 
+        ES_DATA_OPERATIONS = 4242, 
+        ES_KAFKA = 4243, 
+        ES_STATISTICS = 4244, 
+        ES_LDAP_AUTH_PROVIDER = 4245, 
+        ES_DB_METADATA_CACHE = 4246, 
+        ES_TABLE_CREATOR = 4247, 
+        ES_PQ_PARTITION_CHOOSER = 4248, 
+        ES_GRAPH = 4249, 
+        ES_REPLICATION_WORKER = 4250, 
+        ES_CHANGE_EXCHANGE = 4251, 
+        ES_S3_PROVIDER = 4252, 
+        ES_NEBIUS_ACCESS_SERVICE = 4253, 
+        ES_REPLICATION_SERVICE = 4254, 
+        ES_BACKUP_SERVICE = 4255, 
+        ES_TX_BACKGROUND = 4256, 
+        ES_SS_BG_TASKS = 4257, 
+        ES_LIMITER = 4258, 
+        //ES_MEMORY = 4259, NB. exists in main
+        ES_GROUPED_ALLOCATIONS_MANAGER = 4260,
     };
 };
 
+static_assert((int)TKikimrEvents::EEventSpaceKikimr::ES_KQP == (int)NYql::NDq::TDqEvents::ES_DQ_COMPUTE_KQP_COMPATIBLE);
+static_assert((int)TKikimrEvents::EEventSpaceKikimr::ES_DQ == (int)NYql::NDq::TDqEvents::ES_DQ_COMPUTE);
+static_assert((int)TKikimrEvents::EEventSpaceKikimr::ES_YQL_ANALYTICS_PROXY == (int)NFq::TEventIds::ES_YQL_ANALYTICS_PROXY);
+
 }
diff --git a/ydb/core/base/path.h b/ydb/core/base/path.h
index ca7c1403f56b..71263b8c6b79 100644
--- a/ydb/core/base/path.h
+++ b/ydb/core/base/path.h
@@ -37,4 +37,12 @@ inline TVector ChildPath(const TVector& parentPath, const TStr
     return path;
 }
 
+inline TVector ChildPath(const TVector& parentPath, const TVector& childPath) {
+    auto path = parentPath;
+    for (const auto& childName : childPath) {
+        path.push_back(childName);
+    }
+    return path;
+}
+
 }
diff --git a/ydb/core/base/pool_stats_collector.cpp b/ydb/core/base/pool_stats_collector.cpp
index 0ddce4c08d37..40e57e8eb2ca 100644
--- a/ydb/core/base/pool_stats_collector.cpp
+++ b/ydb/core/base/pool_stats_collector.cpp
@@ -46,12 +46,24 @@ class TStatsCollectingActor : public NActors::TStatsCollectingActor {
     void OnWakeup(const TActorContext &ctx) override {
         MiniKQLPoolStats.Update();
 
-        TVector> pools;
+        auto systemUpdate = std::make_unique();
+        ui32 coresTotal = 0;
+        double coresUsed = 0;
         for (const auto& pool : PoolCounters) {
-            pools.emplace_back(pool.Name, pool.Usage, pool.Threads);
+            auto& pb = *systemUpdate->Record.AddPoolStats();
+            pb.SetName(pool.Name);
+            pb.SetUsage(pool.Usage);
+            pb.SetThreads(static_cast(pool.Threads));
+            pb.SetLimit(static_cast(pool.LimitThreads));
+            if (pool.Name != "IO") {
+                coresTotal += static_cast(pool.DefaultThreads);
+            }
+            coresUsed += pool.Usage * pool.LimitThreads;
         }
+        systemUpdate->Record.SetCoresTotal(coresTotal);
+        systemUpdate->Record.SetCoresUsed(coresUsed);
 
-        ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate(pools));
+        ctx.Send(NNodeWhiteboard::MakeNodeWhiteboardServiceId(ctx.SelfID.NodeId()), systemUpdate.release());
     }
 
 private:
diff --git a/ydb/core/base/statestorage.h b/ydb/core/base/statestorage.h
index 1c609a702d40..6a1726a3fb3b 100644
--- a/ydb/core/base/statestorage.h
+++ b/ydb/core/base/statestorage.h
@@ -556,7 +556,7 @@ IActor* CreateStateStorageBoardReplica(const TIntrusivePtr &,
 IActor* CreateSchemeBoardReplica(const TIntrusivePtr&, ui32);
 IActor* CreateBoardLookupActor(
     const TString &path, const TActorId &owner, EBoardLookupMode mode,
-    TBoardRetrySettings boardRetrySettings = {});
+    TBoardRetrySettings boardRetrySettings = {}, ui64 cookie = 0);
 IActor* CreateBoardPublishActor(
     const TString &path, const TString &payload, const TActorId &owner, ui32 ttlMs, bool reg,
     TBoardRetrySettings boardRetrySettings = {});
diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp
index c082235cc18f..6aba81c8ec94 100644
--- a/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp
+++ b/ydb/core/blobstorage/dsproxy/dsproxy_monactor.cpp
@@ -402,8 +402,15 @@ class TBlobStorageGroupProxyMonActor : public TActorBootstrappedRegisterActorPage(proxiesMonPage, path, name, false, TlsActivationContext->ExecutorThread.ActorSystem,
-                SelfId());
+            mon->RegisterActorPage(TMon::TRegisterActorPageFields{
+                .Title = name,
+                .RelPath = path,
+                .ActorSystem = TlsActivationContext->ExecutorThread.ActorSystem,
+                .Index = proxiesMonPage, 
+                .PreTag = false, 
+                .ActorId = SelfId(),
+                .MonServiceName = "dsproxy_mon"
+            });
         }
 
         Become(&TThis::StateOnline);
diff --git a/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp b/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp
index 19659e6b7759..fbb545409a89 100644
--- a/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp
+++ b/ydb/core/blobstorage/dsproxy/dsproxy_patch.cpp
@@ -29,10 +29,33 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor FoundParts;
@@ -69,12 +94,16 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor EmptyResponseFlags;
     TStackVec ErrorResponseFlags;
     TStackVec ForceStopFlags;
+    TStackVec SlowFlags;
     TBlobStorageGroupInfo::TVDiskIds VDisks;
 
     bool UseVPatch = false;
     bool IsGoodPatchedBlobId = false;
     bool IsAllowedErasure = false;
     bool IsSecured = false;
+    bool HasSlowVDisk = false;
+    bool IsContinuedVPatch = false;
+    bool IsMovedPatch = false;
 
 #define PATCH_LOG(priority, service, marker, msg, ...)                         \
         STLOG(priority, service, marker, msg,                                  \
@@ -97,6 +126,15 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorActivePatch;
     }
 
+    void ScheduleWakeUp(TInstant startTime, EWakeUpTag tag) {
+        TDuration duration = TActivationContext::Now() - startTime;
+        Schedule(duration, new TEvents::TEvWakeup(tag));
+    }
+
+    void ScheduleWakeUp(EWakeUpTag tag) {
+        ScheduleWakeUp(StageStart, tag);
+    }
+
     static constexpr ERequestType RequestType() {
         return ERequestType::Patch;
     }
@@ -279,6 +317,12 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorType.ErasureFamily() != TErasureType::ErasureMirror) {
+            if (ReceivedFoundParts == SentStarts / 2 + SentStarts % 2) {
+                ScheduleWakeUp(VPatchStartTag);
+            }
+        }
+
         NKikimrBlobStorage::TEvVPatchFoundParts &record = ev->Get()->Record;
 
         Y_ABORT_UNLESS(record.HasCookie());
@@ -312,6 +356,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor(TStringBuilder() << ReceivedFoundParts << '/' << SentStarts)),
                 (ErrorReason, errorReason));
 
@@ -341,6 +386,13 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorType.ErasureFamily() != TErasureType::ErasureMirror) {
+            if (ReceivedResults == SentVPatchDiff / 2 + SentVPatchDiff % 2) {
+                ScheduleWakeUp(VPatchDiffTag);
+            }
+        }
+
         PullOutStatusFlagsAndFressSpace(record);
         Y_ABORT_UNLESS(record.HasStatus());
         NKikimrProto::EReplyStatus status = record.GetStatus();
@@ -352,6 +404,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor(TStringBuilder() << ReceivedResults << '/' << Info->Type.TotalPartCount())),
                 (ErrorReason, errorReason));
 
@@ -499,6 +552,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor= dataParts, "vdiskIdx# " << vdiskIdx << " partIdx# " << partIdx);
             placements.push_back(TPartPlacement{static_cast(vdiskIdx), static_cast(partIdx + 1)});
+            SentVPatchDiff++;
         }
         SendDiffs(placements);
     }
@@ -537,15 +592,38 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor subgroupIdx = 0;
 
-        ui32 subgroupIdx = 0;
         if (OkVDisksWithParts) {
             ui32 okVDiskIdx = RandomNumber(OkVDisksWithParts.size());
             subgroupIdx = OkVDisksWithParts[okVDiskIdx];
         } else {
+            ui64 worstNs = 0;
+            ui64 nextToWorstNs = 0;
+            i32 worstSubGroubIdx = -1;
+            GetWorstPredictedDelaysNs(NKikimrBlobStorage::EVDiskQueueId::PutAsyncBlob, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
+            if (worstNs > nextToWorstNs * 2) {
+                SlowFlags[worstSubGroubIdx] = true;
+                HasSlowVDisk = true;
+            }
+            if (HasSlowVDisk) {
+                TStackVec goodDisks;
+                for (ui32 idx = 0; idx < VDisks.size(); ++idx) {
+                    if (!SlowFlags[idx] && !ErrorResponseFlags[idx]) {
+                        goodDisks.push_back(idx);
+                    }
+                }
+                if (goodDisks.size()) {
+                    ui32 okVDiskIdx = RandomNumber(goodDisks.size());
+                    subgroupIdx = goodDisks[okVDiskIdx];
+                }
+            }
+        }
+        if (!subgroupIdx) {
             subgroupIdx = RandomNumber(Info->Type.TotalPartCount());
         }
-        TVDiskID vDisk = Info->GetVDiskInSubgroup(subgroupIdx, OriginalId.Hash());
+        TVDiskID vDisk = Info->GetVDiskInSubgroup(*subgroupIdx, OriginalId.Hash());
         TDeque> events;
 
         ui64 cookie = ((ui64)OriginalId.Hash() << 32) | PatchedId.Hash();
@@ -574,7 +652,7 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorPatchesWithFallback->Inc();
-        if (WithMovingPatchRequestToStaticNode && UseVPatch && !IsSecured) {
+        if (WithMovingPatchRequestToStaticNode && UseVPatch && !IsSecured && !IsMovedPatch) {
             PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA05, "Start Moved strategy from fallback");
             StartMovedPatch();
         } else {
@@ -587,20 +665,31 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorPickSubgroup(OriginalId.Hash(), &VDisks, nullptr);
         ReceivedResponseFlags.assign(VDisks.size(), false);
         ErrorResponseFlags.assign(VDisks.size(), false);
         EmptyResponseFlags.assign(VDisks.size(), false);
         ForceStopFlags.assign(VDisks.size(), false);
+        SlowFlags.assign(VDisks.size(), false);
+
+        ui64 worstNs = 0;
+        ui64 nextToWorstNs = 0;
+        i32 worstSubGroubIdx = -1;
+        GetWorstPredictedDelaysNs(NKikimrBlobStorage::EVDiskQueueId::GetFastRead, &worstNs, &nextToWorstNs, &worstSubGroubIdx);
+        if (worstNs > nextToWorstNs * 2) {
+            SlowFlags[worstSubGroubIdx] = true;
+            HasSlowVDisk = true;
+        }
 
         TDeque> events;
-
         for (ui32 idx = 0; idx < VDisks.size(); ++idx) {
-            std::unique_ptr ev = std::make_unique(
-                    OriginalId, PatchedId, VDisks[idx], Deadline, idx, true);
-            events.emplace_back(std::move(ev));
-            SentStarts++;
+            if (!SlowFlags[idx]) {
+                std::unique_ptr ev = std::make_unique(
+                        OriginalId, PatchedId, VDisks[idx], Deadline, idx, true);
+                events.emplace_back(std::move(ev));
+                SentStarts++;
+            }
         }
 
         PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA08, "Start VPatch strategy",
@@ -701,6 +790,17 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActor nextToWorstNs * 2) {
+            SlowFlags[worstSubGroubIdx] = true;
+            HasSlowVDisk = true;
+        }
 
         if (Info->Type.GetErasure() == TErasureType::ErasureMirror3dc) {
             return ContinueVPatchForMirror3dc();
@@ -713,6 +813,9 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorPickSubgroup(OriginalId.Hash(), &VDisks, nullptr);
         IsSecured = (Info->GetEncryptionMode() != TBlobStorageGroupInfo::EEM_NONE);
 
         IsGoodPatchedBlobId = result;
         IsAllowedErasure = Info->Type.ErasureFamily() == TErasureType::ErasureParityBlock
                 || Info->Type.GetErasure() == TErasureType::ErasureNone
                 || Info->Type.GetErasure() == TErasureType::ErasureMirror3dc;
-        if (IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID && !IsSecured) {
+        if (false && IsGoodPatchedBlobId && IsAllowedErasure && UseVPatch && OriginalGroupId == Info->GroupID && !IsSecured) {
             PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA03, "Start VPatch strategy from bootstrap");
             StartVPatch();
         } else {
@@ -825,6 +930,75 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetPredictedDelayNsByOrderNumber(diskIdx, queueId);;
+            if (predictedNs > *outWorstNs) {
+                *outNextToWorstNs = *outWorstNs;
+                *outWorstNs = predictedNs;
+                *outWorstSubgroupIdx = diskIdx;
+            } else if (predictedNs > *outNextToWorstNs) {
+                *outNextToWorstNs = predictedNs;
+            }
+        }
+    }
+
+    void SetSlowDisks() {
+        for (ui32 idx = 0; idx < SlowFlags.size(); ++idx) {
+            SlowFlags[idx] = !ReceivedResponseFlags[idx] && !EmptyResponseFlags[idx] && !ErrorResponseFlags[idx];
+            if (SlowFlags[idx]) {
+                HasSlowVDisk = true;
+            }
+        }
+    }
+
+    template 
+    void HandleWakeUp(TEvents::TEvWakeup::TPtr &ev) {
+        PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA36, "HandleWakeUp",
+                (ExpectedTag, ToString(ExpectedTag)),
+                (ReceivedTag, ToString(ev->Get()->Tag)));
+        if (ev->Get()->Tag == ExpectedTag) {
+            SetSlowDisks();
+            StartFallback();
+        }
+        if (ev->Get()->Tag == NeverTag) {
+            SetSlowDisks();
+            StartFallback();
+            PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA40, "Found NeverTag wake up", (ExpectedTag, ToString(ExpectedTag)));
+        }
+    }
+
+    void HandleVPatchWakeUp(TEvents::TEvWakeup::TPtr &ev) {
+        ui64 expectedTag = (IsContinuedVPatch ? VPatchDiffTag : VPatchStartTag);
+        PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA37, "HandleWakeUp",
+                (ExpectedTag, ToString(expectedTag)),
+                (ReceivedTag, ToString(ev->Get()->Tag)));
+        if (ev->Get()->Tag == expectedTag) {
+            SetSlowDisks();
+            StartFallback();
+        }
+        if (ev->Get()->Tag == NeverTag) {
+            SetSlowDisks();
+            StartFallback();
+            PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA41, "Found NeverTag wake up", (ExpectedTag, ToString(expectedTag)));
+        }
+    }
+
+    void HandleNeverTagWakeUp(TEvents::TEvWakeup::TPtr &ev) {
+        PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA42, "HandleWakeUp",
+                (ExpectedTag, ToString(NeverTag)),
+                (ReceivedTag, ToString(ev->Get()->Tag)));
+        if (ev->Get()->Tag == NeverTag) {
+            PATCH_LOG(PRI_DEBUG, BS_PROXY_PATCH, BPPA43, "Found NeverTag wake up in naive state");
+            ReplyAndDie(NKikimrProto::DEADLINE);
+        }
+    }
+
     STATEFN(NaiveState) {
         if (ProcessEvent(ev)) {
             return;
@@ -832,9 +1006,14 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetTypeRewrite()) {
             hFunc(TEvBlobStorage::TEvGetResult, Handle);
             hFunc(TEvBlobStorage::TEvPutResult, Handle);
+
+            IgnoreFunc(TEvents::TEvWakeup);
+            //hFunc(TEvents::TEvWakeup, HandleWakeUp);
             IgnoreFunc(TEvBlobStorage::TEvVPatchResult);
+            IgnoreFunc(TEvBlobStorage::TEvVPatchFoundParts);
+            IgnoreFunc(TEvBlobStorage::TEvVMovedPatchResult);
         default:
-            Y_ABORT("Received unknown event");
+            Y_FAIL_S("Received unknown event " << TypeName(*ev->GetBase()));
         };
     }
 
@@ -844,9 +1023,11 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetTypeRewrite()) {
             hFunc(TEvBlobStorage::TEvVMovedPatchResult, Handle);
+            hFunc(TEvents::TEvWakeup, HandleWakeUp);
             IgnoreFunc(TEvBlobStorage::TEvVPatchResult);
+            IgnoreFunc(TEvBlobStorage::TEvVPatchFoundParts);
         default:
-            Y_ABORT("Received unknown event");
+            Y_FAIL_S("Received unknown event " << TypeName(*ev->GetBase()));
         };
     }
 
@@ -857,8 +1038,9 @@ class TBlobStorageGroupPatchRequest : public TBlobStorageGroupRequestActorGetTypeRewrite()) {
             hFunc(TEvBlobStorage::TEvVPatchFoundParts, Handle);
             hFunc(TEvBlobStorage::TEvVPatchResult, Handle);
+            hFunc(TEvents::TEvWakeup, HandleVPatchWakeUp);
         default:
-            Y_ABORT("Received unknown event");
+            Y_FAIL_S("Received unknown event " << TypeName(*ev->GetBase()));
         };
     }
 };
diff --git a/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp b/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp
index d6b70cc6f9c2..741d5ce3b875 100644
--- a/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp
+++ b/ydb/core/blobstorage/dsproxy/ut/dsproxy_patch_ut.cpp
@@ -110,6 +110,18 @@ enum class ENaivePatchCase {
     ErrorOnPut,
 };
 
+#define CASE_TO_RETURN_STRING(cs) \
+    case cs: return #cs \
+// end CASE_TO_RETURN_STRING
+TString ToString(ENaivePatchCase cs) {
+    switch (cs) {
+        CASE_TO_RETURN_STRING(ENaivePatchCase::Ok);
+        CASE_TO_RETURN_STRING(ENaivePatchCase::ErrorOnGetItem);
+        CASE_TO_RETURN_STRING(ENaivePatchCase::ErrorOnGet);
+        CASE_TO_RETURN_STRING(ENaivePatchCase::ErrorOnPut);
+    }
+}
+
 NKikimrProto::EReplyStatus GetPatchResultStatus(ENaivePatchCase naiveCase) {
     switch (naiveCase) {
     case ENaivePatchCase::Ok:
@@ -156,6 +168,17 @@ enum class EVPatchCase {
     Custom,
 };
 
+TString ToString(EVPatchCase cs) {
+    switch (cs) {
+        CASE_TO_RETURN_STRING(EVPatchCase::Ok);
+        CASE_TO_RETURN_STRING(EVPatchCase::OneErrorAndAllPartExistInStart);
+        CASE_TO_RETURN_STRING(EVPatchCase::OnePartLostInStart);
+        CASE_TO_RETURN_STRING(EVPatchCase::DeadGroupInStart);
+        CASE_TO_RETURN_STRING(EVPatchCase::ErrorDuringVPatchDiff);
+        CASE_TO_RETURN_STRING(EVPatchCase::Custom);
+    }
+}
+
 NKikimrProto::EReplyStatus GetPatchResultStatus(EVPatchCase vpatchCase) {
     switch (vpatchCase) {
         case EVPatchCase::Ok:
@@ -249,6 +272,15 @@ enum class EMovedPatchCase {
     Error
 };
 
+TString ToString(EMovedPatchCase cs) {
+    switch (cs) {
+        CASE_TO_RETURN_STRING(EMovedPatchCase::Ok);
+        CASE_TO_RETURN_STRING(EMovedPatchCase::Error);
+    }
+}
+
+#undef CASE_TO_RETURN_STRING
+
 NKikimrProto::EReplyStatus GetPatchResultStatus(EMovedPatchCase movedCase) {
     switch (movedCase) {
     case EMovedPatchCase::Ok:
@@ -289,7 +321,7 @@ void ReceivePatchResult(TTestBasicRuntime &runtime, const TTestArgs &args, NKiki
 }
 
 void ConductGet(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCase naiveCase) {
-    CTEST << "ConductGet: Start\n";
+    CTEST << "ConductGet: Start NaiveCase: " << ToString(naiveCase) << "\n";
     NKikimrProto::EReplyStatus resultStatus = GetGetResultStatus(naiveCase);
     TAutoPtr handle;
     TEvBlobStorage::TEvGet *get = runtime.GrabEdgeEventRethrow(handle);
@@ -328,10 +360,10 @@ TString MakePatchedBuffer(const TTestArgs &args) {
 void ConductPut(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCase naiveCase) {
     NKikimrProto::EReplyStatus resultStatus = GetPutResultStatus(naiveCase);
     if (resultStatus == NKikimrProto::UNKNOWN) {
-        CTEST << "ConductPut: Skip\n";
+        CTEST << "ConductPut: Skip NaiveCase: " << ToString(naiveCase) << "\n";
         return;
     }
-    CTEST << "ConductPut: Start\n";
+    CTEST << "ConductPut: Start NaiveCase: " << ToString(naiveCase) << "\n";
     TAutoPtr handle;
     TEvBlobStorage::TEvPut *put = runtime.GrabEdgeEventRethrow(handle);
     UNIT_ASSERT_VALUES_EQUAL(put->Id, args.PatchedId);
@@ -346,7 +378,7 @@ void ConductPut(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCa
 }
 
 void ConductNaivePatch(TTestBasicRuntime &runtime, const TTestArgs &args, ENaivePatchCase naiveCase) {
-    CTEST << "ConductNaivePatch: Start\n";
+    CTEST << "ConductNaivePatch: Start NaiveCase: " << ToString(naiveCase) << Endl;
     ConductGet(runtime, args, naiveCase);
     ConductPut(runtime, args, naiveCase);
     NKikimrProto::EReplyStatus resultStatus = GetPatchResultStatus(naiveCase);
@@ -354,14 +386,27 @@ void ConductNaivePatch(TTestBasicRuntime &runtime, const TTestArgs &args, ENaive
     CTEST << "ConductNaivePatch: Finish\n";
 }
 
+template  
+TString ToString(const TVector &lst) {
+    TStringBuilder bld;
+    bld << '[';
+    for (ui32 idx = 0; idx < lst.size(); ++idx) {
+        if (idx) {
+            bld << ", ";
+        }
+        bld << lst[idx];
+    }
+    bld << ']';
+    return bld;
+}
 
 void ConductVPatchStart(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
-        EVPatchCase naiveCase, TVDiskPointer vdiskPointer)
+        EVPatchCase vpatchCase, TVDiskPointer vdiskPointer)
 {
     auto [vdiskIdx, idxInSubgroup] = vdiskPointer.GetIndecies(env, args.OriginalId.Hash());
-    CTEST << "ConductVPatchStart: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
+    CTEST << "ConductVPatchStart: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << " VPatchCase: " << ToString(vpatchCase) << "\n";
     TVDiskID vdisk = env.Info->GetVDiskInSubgroup(idxInSubgroup, args.OriginalId.Hash());
-    auto [status, parts] = GetVPatchFoundPartsStatus(env, args, naiveCase, vdiskPointer);
+    auto [status, parts] = GetVPatchFoundPartsStatus(env, args, vpatchCase, vdiskPointer);
 
     auto start = runtime.GrabEdgeEventRethrow({env.VDisks[vdiskIdx]});
     auto &startRecord = start->Get()->Record;
@@ -376,21 +421,22 @@ void ConductVPatchStart(TTestBasicRuntime &runtime, const TDSProxyEnv &env, cons
     for (auto partId : parts) {
         foundParts->AddPart(partId);
     }
+    CTEST << "ConductVPatchStart: Send FoundParts vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "parts# " << ToString(parts) << "\n";
     SendByHandle(runtime, start, std::move(foundParts));
     CTEST << "ConductVPatchStart: Finish vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
 }
 
 void ConductVPatchDiff(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
-        EVPatchCase naiveCase, TVDiskPointer vdiskPointer)
+        EVPatchCase vpatchCase, TVDiskPointer vdiskPointer)
 {
     auto [vdiskIdx, idxInSubgroup] = vdiskPointer.GetIndecies(env, args.PatchedId.Hash());
     TVDiskID vdisk = env.Info->GetVDiskInSubgroup(idxInSubgroup, args.PatchedId.Hash());
-    NKikimrProto::EReplyStatus resultStatus = GetVPatchResultStatus(env, args, naiveCase, vdiskPointer);
+    NKikimrProto::EReplyStatus resultStatus = GetVPatchResultStatus(env, args, vpatchCase, vdiskPointer);
     if (resultStatus == NKikimrProto::UNKNOWN) {
-        CTEST << "ConductVPatchDiff: Skip vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
+        CTEST << "ConductVPatchDiff: Skip vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << " VPatchCase: " << ToString(vpatchCase) << "\n";
         return;
     }
-    CTEST << "ConductVPatchDiff: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << "\n";
+    CTEST << "ConductVPatchDiff: Start vdiskIdx# " <<  vdiskIdx << " idxInSubgroup# " << idxInSubgroup << " VPatchCase: " << ToString(vpatchCase) << "\n";
 
     auto diffEv = runtime.GrabEdgeEventRethrow({env.VDisks[vdiskIdx]});
     auto &diffRecord = diffEv->Get()->Record;
@@ -415,6 +461,7 @@ void ConductVPatchDiff(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const
 }
 
 void ConductFailedVPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args) {
+    return; // disabled vpatch
     CTEST << "ConductFailedVPatch: Start\n";
     for (ui32 idxInSubgroup = 0; idxInSubgroup < args.GType.BlobSubgroupSize(); ++idxInSubgroup) {
         TVDiskPointer vdisk = TVDiskPointer::GetVDiskIdx(idxInSubgroup);
@@ -429,7 +476,7 @@ void ConductFailedVPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, con
 
 
 void ConductVMovedPatch(TTestBasicRuntime &runtime, const TTestArgs &args, EMovedPatchCase movedCase) {
-    CTEST << "ConductVMovedPatch: Start\n";
+    CTEST << "ConductVMovedPatch: Start MovedPatchCase: " << ToString(movedCase) << Endl;
     NKikimrProto::EReplyStatus resultStatus = GetVMovedPatchResultStatus(movedCase);
     TAutoPtr handle;
     TEvBlobStorage::TEvVMovedPatch *vPatch = runtime.GrabEdgeEventRethrow(handle);
@@ -459,7 +506,7 @@ void ConductVMovedPatch(TTestBasicRuntime &runtime, const TTestArgs &args, EMove
 void ConductMovedPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
         EMovedPatchCase movedCase)
 {
-    CTEST << "ConductMovedPatch: Start\n";
+    CTEST << "ConductMovedPatch: Start MovedPatchCase: " << ToString(movedCase) << Endl;
     ConductFailedVPatch(runtime, env, args);
     ConductVMovedPatch(runtime, args, movedCase);
     NKikimrProto::EReplyStatus resultStatus = GetPatchResultStatus(movedCase);
@@ -481,7 +528,8 @@ void ConductFallbackPatch(TTestBasicRuntime &runtime, const TTestArgs &args) {
 void ConductVPatchEvents(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
         EVPatchCase vpatchCase)
 {
-    CTEST << "ConductVPatchEvents: Start\n";
+    return; // disabled vpatch
+    CTEST << "ConductVPatchEvents: Start VPatchCase: " << ToString(vpatchCase) << Endl;
     for (ui32 idxInSubgroup = 0; idxInSubgroup < args.GType.BlobSubgroupSize(); ++idxInSubgroup) {
         TVDiskPointer vdisk = TVDiskPointer::GetVDiskIdx(idxInSubgroup);
         ConductVPatchStart(runtime, env, args, vpatchCase, vdisk);
@@ -496,7 +544,7 @@ void ConductVPatchEvents(TTestBasicRuntime &runtime, const TDSProxyEnv &env, con
 void ConductVPatch(TTestBasicRuntime &runtime, const TDSProxyEnv &env, const TTestArgs &args,
         EVPatchCase vpatchCase)
 {
-    CTEST << "ConductFallbackPatch: Start\n";
+    CTEST << "ConductFallbackPatch: Start VPatchCase: " << ToString(vpatchCase) << Endl;
     ConductVPatchEvents(runtime, env, args, vpatchCase);
     NKikimrProto::EReplyStatus resultStatus = GetPatchResultStatus(vpatchCase);
     if (resultStatus == NKikimrProto::UNKNOWN) {
@@ -620,17 +668,18 @@ void RunGeneralTest(void(*runner)(TTestBasicRuntime &runtime, const TTestArgs &a
     Y_UNIT_TEST_NAIVE(ErrorOnPut, erasure) \
     Y_UNIT_TEST_MOVED(Ok, erasure) \
     Y_UNIT_TEST_MOVED(Error, erasure) \
-    Y_UNIT_TEST_VPATCH(Ok, erasure) \
-    Y_UNIT_TEST_VPATCH(OneErrorAndAllPartExistInStart, erasure) \
-    Y_UNIT_TEST_VPATCH(OnePartLostInStart, erasure) \
-    Y_UNIT_TEST_VPATCH(DeadGroupInStart, erasure) \
-    Y_UNIT_TEST_VPATCH(ErrorDuringVPatchDiff, erasure) \
     Y_UNIT_TEST_SECURED(Ok, erasure) \
     Y_UNIT_TEST_SECURED(ErrorOnGetItem, erasure) \
     Y_UNIT_TEST_SECURED(ErrorOnGet, erasure) \
     Y_UNIT_TEST_SECURED(ErrorOnPut, erasure) \
 // end Y_UNIT_TEST_PATCH_PACK
 
+//    Y_UNIT_TEST_VPATCH(Ok, erasure)
+//    Y_UNIT_TEST_VPATCH(OneErrorAndAllPartExistInStart, erasure)
+//    Y_UNIT_TEST_VPATCH(OnePartLostInStart, erasure)
+//    Y_UNIT_TEST_VPATCH(DeadGroupInStart, erasure)
+//    Y_UNIT_TEST_VPATCH(ErrorDuringVPatchDiff, erasure) 
+
     Y_UNIT_TEST_PATCH_PACK(ErasureNone)
     Y_UNIT_TEST_PATCH_PACK(Erasure4Plus2Block)
     Y_UNIT_TEST_PATCH_PACK(ErasureMirror3dc)
@@ -712,6 +761,7 @@ EFaultToleranceCase GetFaultToleranceCaseForBlock4Plus2(const TDSProxyEnv &env,
             }
         }
     }
+    return EFaultToleranceCase::Fallback; // disabled vpatch
     if (layout.CountEffectiveReplicas(env.Info->Type) == env.Info->Type.TotalPartCount()) {
         return EFaultToleranceCase::Ok;
     } else {
@@ -736,6 +786,7 @@ EFaultToleranceCase GetFaultToleranceCaseForMirror3dc(const TDSProxyEnv &env, co
     for (ui32 dcIdx = 0; dcIdx < dcCnt; ++dcIdx) {
         x2cnt += (replInDc[dcIdx] >= 2);
     }
+    return EFaultToleranceCase::Fallback; // disabled vpatch
     if ((replInDc[0] && replInDc[1] && replInDc[2]) || x2cnt >= 2) {
         return EFaultToleranceCase::Ok;
     } else {
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
index 754d0924d1b9..f2995a5b441f 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.cpp
@@ -179,6 +179,7 @@ void TNodeWarden::Bootstrap() {
         icb->RegisterSharedControl(EnableSyncLogChunkCompressionSSD, "VDiskControls.EnableSyncLogChunkCompressionSSD");
         icb->RegisterSharedControl(MaxSyncLogChunksInFlightHDD, "VDiskControls.MaxSyncLogChunksInFlightHDD");
         icb->RegisterSharedControl(MaxSyncLogChunksInFlightSSD, "VDiskControls.MaxSyncLogChunksInFlightSSD");
+        icb->RegisterSharedControl(DefaultHugeGarbagePerMille, "VDiskControls.DefaultHugeGarbagePerMille");
 
         icb->RegisterSharedControl(CostMetricsParametersByMedia[NPDisk::DEVICE_TYPE_ROT].BurstThresholdNs,
                 "VDiskControls.BurstThresholdNsHDD");
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_impl.h b/ydb/core/blobstorage/nodewarden/node_warden_impl.h
index 3d77ae4f1c7e..3a141f3b113c 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_impl.h
+++ b/ydb/core/blobstorage/nodewarden/node_warden_impl.h
@@ -139,6 +139,7 @@ namespace NKikimr::NStorage {
         TControlWrapper EnableSyncLogChunkCompressionSSD;
         TControlWrapper MaxSyncLogChunksInFlightHDD;
         TControlWrapper MaxSyncLogChunksInFlightSSD;
+        TControlWrapper DefaultHugeGarbagePerMille;
 
         TReplQuoter::TPtr ReplNodeRequestQuoter;
         TReplQuoter::TPtr ReplNodeResponseQuoter;
@@ -162,6 +163,7 @@ namespace NKikimr::NStorage {
             , EnableSyncLogChunkCompressionSSD(0, 0, 1)
             , MaxSyncLogChunksInFlightHDD(10, 1, 1024)
             , MaxSyncLogChunksInFlightSSD(10, 1, 1024)
+            , DefaultHugeGarbagePerMille(300, 1, 1000)
             , CostMetricsParametersByMedia({
                 TCostMetricsParameters{200},
                 TCostMetricsParameters{50},
diff --git a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
index 0497406a1332..75061a6bcc90 100644
--- a/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
+++ b/ydb/core/blobstorage/nodewarden/node_warden_vdisk.cpp
@@ -179,6 +179,7 @@ namespace NKikimr::NStorage {
         vdiskConfig->EnableVDiskCooldownTimeout = Cfg->EnableVDiskCooldownTimeout;
         vdiskConfig->ReplPausedAtStart = Cfg->VDiskReplPausedAtStart;
         vdiskConfig->EnableVPatch = EnableVPatch;
+        vdiskConfig->DefaultHugeGarbagePerMille = DefaultHugeGarbagePerMille;
 
         vdiskConfig->EnableLocalSyncLogDataCutting = EnableLocalSyncLogDataCutting;
         if (deviceType == NPDisk::EDeviceType::DEVICE_TYPE_ROT) {
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
index 7f2792c639de..75a8429bee87 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_blockdevice_async.cpp
@@ -401,16 +401,15 @@ class TRealBlockDevice : public IBlockDevice {
                 }
                 EndOffset = op->GetOffset() + opSize;
 
-                ui64 duration = HPNow() - completionAction->SubmitTime;
-                ui64 durationMs = HPMilliSecondsFloat(duration);
+                double duration = HPMilliSecondsFloat(HPNow() - completionAction->SubmitTime);
                 if (op->GetType() == IAsyncIoOperation::EType::PRead) {
                     NSan::Unpoison(op->GetData(), opSize);
                     REQUEST_VALGRIND_MAKE_MEM_DEFINED(op->GetData(), opSize);
-                    Device.Mon.DeviceReadDuration.Increment(durationMs);
-                    LWPROBE(PDiskDeviceReadDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize);
+                    Device.Mon.DeviceReadDuration.Increment(duration);
+                    LWPROBE(PDiskDeviceReadDuration, Device.GetPDiskId(), duration, opSize);
                 } else {
-                    Device.Mon.DeviceWriteDuration.Increment(durationMs);
-                    LWPROBE(PDiskDeviceWriteDuration, Device.GetPDiskId(), HPMilliSecondsFloat(duration), opSize);
+                    Device.Mon.DeviceWriteDuration.Increment(duration);
+                    LWPROBE(PDiskDeviceWriteDuration, Device.GetPDiskId(), duration, opSize);
                 }
                 if (completionAction->FlushAction) {
                     ui64 idx = completionAction->FlushAction->OperationIdx;
@@ -668,8 +667,8 @@ class TRealBlockDevice : public IBlockDevice {
                             Device.IsTrimEnabled = Device.IoContext->DoTrim(op);
                             NHPTimer::STime endTime = HPNow();
                             Device.IdleCounter.Decrement();
-                            const ui64 durationUs = HPMicroSeconds(endTime - beginTime);
-                            Device.Mon.DeviceTrimDuration.Increment(durationUs);
+                            const double duration = HPMilliSecondsFloat(endTime - beginTime);
+                            Device.Mon.DeviceTrimDuration.Increment(duration);
                             *Device.Mon.DeviceEstimatedCostNs += completion->CostNs;
                             if (Device.ActorSystem && Device.IsTrimEnabled) {
                                 LOG_DEBUG_S(*Device.ActorSystem, NKikimrServices::BS_DEVICE,
@@ -680,7 +679,7 @@ class TRealBlockDevice : public IBlockDevice {
                                         << "\" offset# " << op->GetOffset()
                                         << " size# " << op->GetSize());
                                 LWPROBE(PDiskDeviceTrimDuration, Device.GetPDiskId(),
-                                        HPMilliSecondsFloat(endTime - beginTime), op->GetOffset());
+                                        duration, op->GetOffset());
                             }
                         }
                         completion->SetResult(EIoResult::Ok);
diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h
index 43fe42c0d353..96542ae28416 100644
--- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h
+++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_chunk_tracker.h
@@ -163,7 +163,7 @@ class TPerOwnerQuotaTracker {
         str << "";
     }
 
-    void PrintHTML(IOutputStream &str, TQuotaRecord *sharedQuota, NKikimrBlobStorage::TPDiskSpaceColor::E *colorBorder) {
+    void PrintHTML(IOutputStream &str, TQuotaRecord *sharedQuota, NKikimrBlobStorage::TPDiskSpaceColor::E *colorBorder, double *borderOccupancy) {
         str << "
";
         str << "ColorLimits#\n";
         ColorLimits.Print(str);
@@ -171,8 +171,12 @@ class TPerOwnerQuotaTracker {
         str << "\nExpectedOwnerCount# " << ExpectedOwnerCount;
         str << "\nActiveOwners# " << ActiveOwnerIds.size();
         if (colorBorder) {
-            str << "\nColorBorder# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(*colorBorder) << "\n";
+            str << "\nColorBorder# " << NKikimrBlobStorage::TPDiskSpaceColor::E_Name(*colorBorder);
         }
+        if (borderOccupancy) {
+            str << "\nColorBorderOccupancy# " << *borderOccupancy;
+        }
+        str << "\n";
         str << "
"; str << ""; str << R"_( @@ -531,9 +535,9 @@ using TColor = NKikimrBlobStorage::TPDiskSpaceColor; void PrintHTML(IOutputStream &str) { str << "

GlobalQuota

"; - GlobalQuota->PrintHTML(str, nullptr, nullptr); + GlobalQuota->PrintHTML(str, nullptr, nullptr, nullptr); str << "

OwnerQuota

"; - OwnerQuota->PrintHTML(str, SharedQuota.Get(), &ColorBorder); + OwnerQuota->PrintHTML(str, SharedQuota.Get(), &ColorBorder, &ColorBorderOccupancy); } ui32 ColorFlagLimit(TOwner owner, NKikimrBlobStorage::TPDiskSpaceColor::E color) { diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h index d0d58b4b1157..0337910644da 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_color_limits.h @@ -26,7 +26,7 @@ struct TDiskColor { } double CalculateOccupancy(i64 total) const { - return (double)CalculateQuota(total) / total; + return 1 - (double)CalculateQuota(total) / total; } }; @@ -92,15 +92,15 @@ struct TColorLimits { double GetOccupancyForColor(NKikimrBlobStorage::TPDiskSpaceColor::E color, i64 total) { switch (color) { - case NKikimrBlobStorage::TPDiskSpaceColor::GREEN: return Cyan.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::CYAN: return LightYellow.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_YELLOW: return Yellow.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::YELLOW: return LightOrange.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_ORANGE: return PreOrange.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::PRE_ORANGE: return Orange.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::ORANGE: return Red.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::RED: return Black.CalculateOccupancy(total); - case NKikimrBlobStorage::TPDiskSpaceColor::BLACK: return 1.0; + case NKikimrBlobStorage::TPDiskSpaceColor::GREEN: return 0.0; + case NKikimrBlobStorage::TPDiskSpaceColor::CYAN: return Cyan.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_YELLOW: return LightYellow.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::YELLOW: return Yellow.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::LIGHT_ORANGE: return LightOrange.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::PRE_ORANGE: return PreOrange.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::ORANGE: return Orange.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::RED: return Red.CalculateOccupancy(total); + case NKikimrBlobStorage::TPDiskSpaceColor::BLACK: return Black.CalculateOccupancy(total); case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MIN_SENTINEL_DO_NOT_USE_: case NKikimrBlobStorage::TPDiskSpaceColor_E_TPDiskSpaceColor_E_INT_MAX_SENTINEL_DO_NOT_USE_: diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h index d221cfba4b9f..3b51e4cab2c5 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_config.h @@ -298,6 +298,7 @@ struct TPDiskConfig : public TThrRefBase { str << " OrangeLogChunksMultiplier# " << OrangeLogChunksMultiplier << x; str << " WarningLogChunksMultiplier# " << WarningLogChunksMultiplier << x; str << " YellowLogChunksMultiplier# " << YellowLogChunksMultiplier << x; + str << " SpaceColorBorder# " << SpaceColorBorder << x; str << "}"; return str.Str(); } diff --git a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp index fc0c597a99ca..2ac80067655a 100644 --- a/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp +++ b/ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp @@ -1477,7 +1477,7 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { TGuard guard(StateMutex); const ui64 totalSize = Format.DiskSize; const ui64 availableSize = (ui64)Format.ChunkSize * Keeper.GetFreeChunkCount(); - + if (*Mon.PDiskBriefState != TPDiskMon::TPDisk::Error) { *Mon.FreeSpaceBytes = availableSize; *Mon.UsedSpaceBytes = totalSize - availableSize; @@ -1487,7 +1487,7 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { *Mon.UsedSpaceBytes = 32_KB; *Mon.TotalSpaceBytes = 32_KB; } - + NKikimrWhiteboard::TPDiskStateInfo& pdiskState = reportResult->PDiskState->Record; pdiskState.SetPDiskId(PDiskId); pdiskState.SetPath(Cfg->GetDevicePath()); @@ -1499,6 +1499,7 @@ void TPDisk::WhiteboardReport(TWhiteboardReport &whiteboardReport) { pdiskState.SetSystemSize(Format.ChunkSize * (Keeper.GetOwnerHardLimit(OwnerSystemLog) + Keeper.GetOwnerHardLimit(OwnerSystemReserve))); pdiskState.SetLogUsedSize(Format.ChunkSize * (Keeper.GetOwnerHardLimit(OwnerCommonStaticLog) - Keeper.GetOwnerFree(OwnerCommonStaticLog))); pdiskState.SetLogTotalSize(Format.ChunkSize * Keeper.GetOwnerHardLimit(OwnerCommonStaticLog)); + pdiskState.SetNumActiveSlots(TotalOwners); if (ExpectedSlotCount) { pdiskState.SetExpectedSlotCount(ExpectedSlotCount); } diff --git a/ydb/core/blobstorage/ut_blobstorage/gc.cpp b/ydb/core/blobstorage/ut_blobstorage/gc.cpp new file mode 100644 index 000000000000..80c779093259 --- /dev/null +++ b/ydb/core/blobstorage/ut_blobstorage/gc.cpp @@ -0,0 +1,22 @@ +#include + +Y_UNIT_TEST_SUITE(GarbageCollection) { + Y_UNIT_TEST(EmptyGcCmd) { + TEnvironmentSetup env({ + .Erasure = TBlobStorageGroupType::Erasure4Plus2Block, + }); + auto& runtime = env.Runtime; + + env.CreateBoxAndPool(1, 1); + auto info = env.GetGroupInfo(env.GetGroups().front()); + + auto ev = std::make_unique(1u, 1u, 1u, 0u, false, 0u, 0u, nullptr, nullptr, + TInstant::Max(), true); + const TActorId edge = runtime->AllocateEdgeActor(1, __FILE__, __LINE__); + runtime->WrapInActorContext(edge, [&] { + SendToBSProxy(edge, info->GroupID, ev.release()); + }); + auto res = env.WaitForEdgeActorEvent(edge); + UNIT_ASSERT_VALUES_EQUAL(res->Get()->Status, NKikimrProto::ERROR); + } +} diff --git a/ydb/core/blobstorage/ut_blobstorage/ya.make b/ydb/core/blobstorage/ut_blobstorage/ya.make index 0edf9906ed95..ba965e9e83ca 100644 --- a/ydb/core/blobstorage/ut_blobstorage/ya.make +++ b/ydb/core/blobstorage/ut_blobstorage/ya.make @@ -22,6 +22,7 @@ SRCS( ds_proxy_lwtrace.cpp encryption.cpp extra_block_checks.cpp + gc.cpp gc_quorum_3dc.cpp get.cpp group_reconfiguration.cpp diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_config.h b/ydb/core/blobstorage/vdisk/common/vdisk_config.h index 31e30788eeb1..73b1d27daaa1 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_config.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_config.h @@ -218,6 +218,7 @@ namespace NKikimr { TDuration WhiteboardUpdateInterval; bool EnableVDiskCooldownTimeout; TControlWrapper EnableVPatch = true; + TControlWrapper DefaultHugeGarbagePerMille; ///////////// COST METRICS SETTINGS //////////////// bool UseCostTracker = true; diff --git a/ydb/core/blobstorage/vdisk/common/vdisk_events.h b/ydb/core/blobstorage/vdisk/common/vdisk_events.h index f31c02db3bcf..98473e08648b 100644 --- a/ydb/core/blobstorage/vdisk/common/vdisk_events.h +++ b/ydb/core/blobstorage/vdisk/common/vdisk_events.h @@ -1585,7 +1585,7 @@ namespace NKikimr { if (deadline != TInstant::Max()) { this->Record.MutableMsgQoS()->SetDeadlineSeconds((ui32)deadline.Seconds()); } - this->Record.MutableMsgQoS()->SetExtQueueId(HandleClassToQueueId(NKikimrBlobStorage::AsyncBlob)); + this->Record.MutableMsgQoS()->SetExtQueueId(NKikimrBlobStorage::PutAsyncBlob); } bool GetIgnoreBlock() const { @@ -1965,6 +1965,25 @@ namespace NKikimr { } Record.MutableMsgQoS()->SetExtQueueId(NKikimrBlobStorage::EVDiskQueueId::GetFastRead); } + + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchStart &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedBlobId()); + str << "{TEvVPatchStart"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } }; struct TEvBlobStorage::TEvVPatchFoundParts @@ -2010,6 +2029,25 @@ namespace NKikimr { Record.SetStatus(status); } + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchFoundParts &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedBlobId()); + str << "{TEvVPatchFoundParts"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } + void MakeError(NKikimrProto::EReplyStatus status, const TString& errorReason, const NKikimrBlobStorage::TEvVPatchStart &request) { Record.SetErrorReason(errorReason); @@ -2099,6 +2137,25 @@ namespace NKikimr { } return result; } + + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchDiff &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalPartBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedPartBlobId()); + str << "{TEvVPatchDiff"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } }; @@ -2144,6 +2201,25 @@ namespace NKikimr { } return result; } + + TString ToString() const { + return ToString(this->Record); + } + + static TString ToString(const NKikimrBlobStorage::TEvVPatchXorDiff &record) { + TStringStream str; + TLogoBlobID originalId = LogoBlobIDFromLogoBlobID(record.GetOriginalPartBlobId()); + TLogoBlobID patchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedPartBlobId()); + str << "{TEvVPatchXorDiff"; + str << " OriginalBlobId# " << originalId.ToString(); + str << " PatchedBlobId# " << patchedId.ToString(); + if (record.HasMsgQoS()) { + str << " "; + TEvBlobStorage::TEvVPut::OutMsgQos(record.GetMsgQoS(), str); + } + str << "}"; + return str.Str(); + } }; struct TEvBlobStorage::TEvVPatchXorDiffResult diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp index fa7014b64837..54a7d2cfaf53 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.cpp @@ -16,12 +16,14 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// TDefragCtx::TDefragCtx( const TIntrusivePtr &vctx, + const TIntrusivePtr &vconfig, const std::shared_ptr &hugeBlobCtx, const TPDiskCtxPtr &pdiskCtx, const TActorId &skeletonId, const TActorId &hugeKeeperId, bool runDefrageBySchedule) : VCtx(vctx) + , VCfg(vconfig) , HugeBlobCtx(hugeBlobCtx) , PDiskCtx(pdiskCtx) , SkeletonId(skeletonId) @@ -48,7 +50,8 @@ namespace NKikimr { bool HugeHeapDefragmentationRequired( const TOutOfSpaceState& oos, ui32 hugeCanBeFreedChunks, - ui32 hugeTotalChunks) { + ui32 hugeTotalChunks, + double defaultPercent) { if (hugeCanBeFreedChunks < 10) return false; @@ -56,11 +59,14 @@ namespace NKikimr { double percentOfGarbage = static_cast(hugeCanBeFreedChunks) / hugeTotalChunks; if (oos.GetLocalColor() > TSpaceColor::CYAN) { - return percentOfGarbage >= 0.02; + // For anything worse than CYAN + return percentOfGarbage >= Min(0.02, defaultPercent); } else if (oos.GetLocalColor() > TSpaceColor::GREEN) { - return percentOfGarbage >= 0.15; + // For CYAN + return percentOfGarbage >= Min(0.15, defaultPercent); } else { - return percentOfGarbage >= 0.30; + // For GREEN + return percentOfGarbage >= Min(0.30, defaultPercent); } } @@ -113,7 +119,8 @@ namespace NKikimr { const auto& oos = DCtx->VCtx->GetOutOfSpaceState(); Y_ABORT_UNLESS(usefulChunks <= totalChunks); const ui32 canBeFreedChunks = totalChunks - usefulChunks; - if (HugeHeapDefragmentationRequired(oos, canBeFreedChunks, totalChunks)) { + double defaultPercent = DCtx->VCfg->DefaultHugeGarbagePerMille / 1000.0; + if (HugeHeapDefragmentationRequired(oos, canBeFreedChunks, totalChunks, defaultPercent)) { TChunksToDefrag chunksToDefrag = calcStat.GetChunksToDefrag(DCtx->MaxChunksToDefrag); Y_ABORT_UNLESS(chunksToDefrag); STLOG(PRI_INFO, BS_VDISK_DEFRAG, BSVDD03, VDISKP(DCtx->VCtx->VDiskLogPrefix, "scan finished"), diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h index f59ecee374c6..08c451b094e7 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor.h @@ -18,6 +18,7 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////////// struct TDefragCtx { const TIntrusivePtr VCtx; + const TIntrusivePtr VCfg; const std::shared_ptr HugeBlobCtx; const TPDiskCtxPtr PDiskCtx; const TActorId SkeletonId; @@ -30,6 +31,7 @@ namespace NKikimr { TDefragCtx( const TIntrusivePtr &vctx, + const TIntrusivePtr &vconfig, const std::shared_ptr &hugeBlobCtx, const TPDiskCtxPtr &pdiskCtx, const TActorId &skeletonId, @@ -45,7 +47,8 @@ namespace NKikimr { bool HugeHeapDefragmentationRequired( const TOutOfSpaceState& oos, ui32 hugeCanBeFreedChunks, - ui32 hugeTotalChunks); + ui32 hugeTotalChunks, + double defaultPercent); //////////////////////////////////////////////////////////////////////////// // VDISK DEFRAG ACTOR CREATOR diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp index ebcfec56195b..fa059a1d2c8e 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_actor_ut.cpp @@ -15,21 +15,21 @@ namespace NKikimr { TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 9; ui32 hugeUsedChunks = 20; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks, 0.30); UNIT_ASSERT(!defrag); } { TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 200; ui32 hugeUsedChunks = 1000; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks, 0.30); UNIT_ASSERT(!defrag); } { TOutOfSpaceState oos(1, 0); ui32 hugeCanBeFreedChunks = 301; ui32 hugeUsedChunks = 1000; - bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks); + bool defrag = HugeHeapDefragmentationRequired(oos, hugeCanBeFreedChunks, hugeUsedChunks, 0.30); UNIT_ASSERT(defrag); } } diff --git a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp index 1e1753bf6a99..9f3cb4006e1b 100644 --- a/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp +++ b/ydb/core/blobstorage/vdisk/defrag/defrag_quantum.cpp @@ -95,7 +95,7 @@ namespace NKikimr { Compact(); auto hugeStat = GetHugeStat(); - Y_ABORT_UNLESS(hugeStat.LockedChunks.size() < 100); + Y_DEBUG_ABORT_UNLESS(hugeStat.LockedChunks.size() < 100); } Send(ParentActorId, new TEvDefragQuantumResult(std::move(stat))); diff --git a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp index fedaca042103..a1eeb6af94fb 100644 --- a/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp +++ b/ydb/core/blobstorage/vdisk/hullop/blobstorage_hull.cpp @@ -413,6 +413,13 @@ namespace NKikimr { if (!CheckGC(ctx, record)) return {NKikimrProto::ERROR, 0, false}; // record has duplicates + if (!collect && !record.KeepSize() && !record.DoNotKeepSize()) { + LOG_ERROR_S(ctx, NKikimrServices::BS_HULLRECS, HullDs->HullCtx->VCtx->VDiskLogPrefix + << "Db# Barriers ValidateGCCmd: empty garbage collection command" + << " TabletId# " << tabletID); + return {NKikimrProto::ERROR, "empty garbage collection command"}; + } + auto blockStatus = THullDbRecovery::IsBlocked(record); switch (blockStatus.Status) { case TBlocksCache::EStatus::OK: diff --git a/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp b/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp index bdec548c93e9..45f7f4250a02 100644 --- a/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp +++ b/ydb/core/blobstorage/vdisk/repl/blobstorage_repl.cpp @@ -425,8 +425,8 @@ namespace NKikimr { // no more blobs to replicate; replication will not resume State = Finished; ReplCtx->MonGroup.ReplUnreplicatedVDisks() = 0; - ReplCtx->MonGroup.ReplUnreplicatedPhantoms() = 1; - ReplCtx->MonGroup.ReplUnreplicatedNonPhantoms() = 1; + ReplCtx->MonGroup.ReplUnreplicatedPhantoms() = 0; + ReplCtx->MonGroup.ReplUnreplicatedNonPhantoms() = 0; ReplCtx->MonGroup.ReplWorkUnitsRemaining() = 0; ReplCtx->MonGroup.ReplWorkUnitsDone() = 0; ReplCtx->MonGroup.ReplItemsRemaining() = 0; diff --git a/ydb/core/blobstorage/vdisk/repl/query_donor.h b/ydb/core/blobstorage/vdisk/repl/query_donor.h index ffd59a535d3b..dbdc02b8a880 100644 --- a/ydb/core/blobstorage/vdisk/repl/query_donor.h +++ b/ydb/core/blobstorage/vdisk/repl/query_donor.h @@ -68,10 +68,9 @@ namespace NKikimr { } if (action) { - const TActorId temp(actorId); LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::BS_VDISK_GET, SelfId() << " sending " << query->ToString() - << " to " << temp); - Send(actorId, query.release()); + << " to " << actorId); + Send(actorId, query.release(), IEventHandle::FlagTrackDelivery); } else { PassAway(); } @@ -116,6 +115,7 @@ namespace NKikimr { STRICT_STFUNC(StateFunc, hFunc(TEvBlobStorage::TEvVGetResult, Handle); + cFunc(TEvents::TSystem::Undelivered, Step); cFunc(TEvents::TSystem::Poison, PassAway); ) }; diff --git a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp index 043b5300e308..c7a7ff970a4c 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/blobstorage_skeleton.cpp @@ -225,12 +225,19 @@ namespace NKikimr { //////////////////////////////////////////////////////////////////////// void Handle(TEvBlobStorage::TEvVMovedPatch::TPtr &ev, const TActorContext &ctx) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVMovedPatch: receive request;" + << " Event# " << ev->Get()->ToString()); if (!CheckIfWriteAllowed(ev, ctx)) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVMovedPatch: is not allowed;" + << " Event# " << ev->Get()->ToString()); return; } const bool postpone = OverloadHandler->PostponeEvent(ev); if (!postpone) { PrivateHandle(ev, ctx); + } else { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVMovedPatch: is postponned;" + << " Event# " << ev->Get()->ToString()); } } @@ -270,11 +277,16 @@ namespace NKikimr { void Handle(TEvBlobStorage::TEvVPatchStart::TPtr &ev, const TActorContext &ctx) { if (!CheckIfWriteAllowed(ev, ctx)) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatchStart: receive request;" + << " Event# " << ev->Get()->ToString()); return; } const bool postpone = OverloadHandler->PostponeEvent(ev); if (!postpone) { PrivateHandle(ev, ctx); + } else { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatchStart: postponned;" + << " Event# " << ev->Get()->ToString()); } } @@ -307,24 +319,32 @@ namespace NKikimr { template void HandleVPatchDiffResending(TEvDiffPtr &ev, const TActorContext &ctx) { if (!CheckIfWriteAllowed(ev, ctx)) { + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: is not allowed;" + << " Event# " << ev->Get()->ToString()); return; } if constexpr (std::is_same_v) { LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: recieve diff;" << " Event# " << ev->Get()->ToString()); IFaceMonGroup->PatchDiffMsgs()++; - } - if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: recieve xor diff;" << " Event# " << ev->Get()->ToString()); IFaceMonGroup->PatchXorDiffMsgs()++; + } else { + LOG_ERROR_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: UNKNOWN diff;" + << " Event# " << ev->Get()->ToString()); } TLogoBlobID patchedBlobId = LogoBlobIDFromLogoBlobID(ev->Get()->Record.GetPatchedPartBlobId()).FullID(); auto it = VPatchActors.find(patchedBlobId); if (it != VPatchActors.end()) { TActivationContext::Send(ev->Forward(it->second)); + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: diff sent to actor;" + << " Event# " << ev->Get()->ToString()); } else { ReplyError(NKikimrProto::ERROR, "VPatchActor doesn't exist", ev, ctx, TAppData::TimeProvider->Now()); + LOG_DEBUG_S(ctx, BS_VDISK_PATCH, VCtx->VDiskLogPrefix << "TEvVPatch: diff didn't send to actor; actor didn't exist" + << " Event# " << ev->Get()->ToString()); } } @@ -1789,7 +1809,7 @@ namespace NKikimr { } void StartDefrag(const TActorContext &ctx) { - auto defragCtx = std::make_shared(VCtx, HugeBlobCtx, PDiskCtx, ctx.SelfID, + auto defragCtx = std::make_shared(VCtx, Config, HugeBlobCtx, PDiskCtx, ctx.SelfID, Db->HugeKeeperID, true); DefragId = ctx.Register(CreateDefragActor(defragCtx, GInfo)); ActiveActors.Insert(DefragId, __FILE__, __LINE__, ctx, NKikimrServices::BLOBSTORAGE); // keep forever diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp index db94713be486..a0b22acc05a0 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vmovedpatch_actor.cpp @@ -9,8 +9,6 @@ namespace NKikimr { class TVMovedPatchActor : public TActorBootstrapped { friend TActorBootstrapped; - static constexpr ui64 SubRequestDurationMs = 1000; - ui32 OriginalGroupId; ui32 PatchedGroupId; TLogoBlobID OriginalId; @@ -29,6 +27,7 @@ namespace NKikimr { TActorId LeaderId; TOutOfSpaceStatus OOSStatus; + TInstant Deadline = TInstant::Zero(); NLWTrace::TOrbit Orbit; @@ -58,6 +57,10 @@ namespace NKikimr { OriginalId = LogoBlobIDFromLogoBlobID(record.GetOriginalBlobId()); Y_ABORT_UNLESS(record.HasPatchedBlobId()); PatchedId = LogoBlobIDFromLogoBlobID(record.GetPatchedBlobId()); + Deadline = TInstant::Seconds(record.GetMsgQoS().HasDeadlineSeconds()); + if (record.HasMsgQoS() && record.GetMsgQoS().HasDeadlineSeconds()) { + Deadline = TInstant::Seconds(record.GetMsgQoS().HasDeadlineSeconds()); + } DiffCount = record.DiffsSize(); Diffs.reset(new TEvBlobStorage::TEvPatch::TDiff[DiffCount]); @@ -96,6 +99,12 @@ namespace NKikimr { << " ErrorReason# " << ErrorReason << " Marker# BSVSP01"); } + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Send result TEvVMovedPatch: " << errorSubMsg << ';' + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId + << " ErrorReason# " << ErrorReason + << " Marker# BSVSP01"); SendVDiskResponse(ctx, Event->Sender, vMovedPatchResult.release(), Event->Cookie, VCtx); PassAway(); } @@ -108,6 +117,10 @@ namespace NKikimr { } void Handle(TEvBlobStorage::TEvGetResult::TPtr &ev, const TActorContext &ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Receive Get ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); TEvBlobStorage::TEvGetResult *result = ev->Get(); Orbit = std::move(result->Orbit); @@ -138,15 +151,18 @@ namespace NKikimr { Buffer = result->Responses[0].Buffer.ConvertToString(); ApplyDiffs(); - TInstant deadline = TActivationContext::Now() + TDuration::MilliSeconds(SubRequestDurationMs); // We have chosen UserData as PutHandleClass on purpose. // If VMovedPatch and Put were AsyncWrite, it would become a deadlock // because the put subrequest may not send and the moved patch request will end by timeout. - std::unique_ptr put = std::make_unique(PatchedId, Buffer, deadline, + std::unique_ptr put = std::make_unique(PatchedId, Buffer, Deadline, NKikimrBlobStorage::UserData, TEvBlobStorage::TEvPut::TacticDefault); put->Orbit = std::move(Orbit); + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Send Put ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); SendToBSProxy(SelfId(), PatchedGroupId, put.release(), OriginalId.Hash()); } @@ -156,6 +172,11 @@ namespace NKikimr { ui32 originalIdHash = OriginalId.Hash(); + LOG_DEBUG_S(ctx, NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Receive Put ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); + constexpr auto errorSubMsg = "failed on VPut"; if (ev->Cookie != originalIdHash) { ErrorReason = "Couldn't put the patched blob; Received TEvPutResult with wrong cookie"; @@ -173,11 +194,20 @@ namespace NKikimr { } void Bootstrap() { - TInstant deadline = TActivationContext::Now() + TDuration::MilliSeconds(SubRequestDurationMs); + if (Deadline && Deadline < TActivationContext::Now()) { + SendResponseAndDie(TActivationContext::AsActorContext(), NKikimrProto::DEADLINE); + return; + } + std::unique_ptr get = std::make_unique(OriginalId, 0, - OriginalId.BlobSize(), deadline, NKikimrBlobStorage::AsyncRead); + OriginalId.BlobSize(), Deadline, NKikimrBlobStorage::AsyncRead); get->Orbit = std::move(Event->Get()->Orbit); + LOG_DEBUG_S(TActivationContext::AsActorContext(), NKikimrServices::BS_VDISK_PATCH, VCtx->VDiskLogPrefix + << "Send Get ub TEvVMovedPatch: " + << " OriginalBlobId# " << OriginalId + << " PatchedBlobId# " << PatchedId); + SendToBSProxy(SelfId(), OriginalGroupId, get.release(), PatchedId.Hash()); Become(&TThis::StateWait); } diff --git a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp index 5c4325e85ced..05f6f035277a 100644 --- a/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp +++ b/ydb/core/blobstorage/vdisk/skeleton/skeleton_vpatch_actor.cpp @@ -306,7 +306,7 @@ namespace NKikimr::NPrivate { void SendVPatchResult(NKikimrProto::EReplyStatus status, bool forceEnd = false) { STLOG(PRI_INFO, BS_VDISK_PATCH, BSVSP07, - VDiskLogPrefix << " TEvVPatch: send patch result;", + VDiskLogPrefix << " TEvVPatch: " << (forceEnd ? "received force end;" : "send patch result;"), (OriginalBlobId, OriginalBlobId), (PatchedBlobId, PatchedBlobId), (OriginalPartId, (ui32)OriginalPartId), diff --git a/ydb/core/change_exchange/change_sender_common_ops.h b/ydb/core/change_exchange/change_sender_common_ops.h index 8c9f45a1c698..f2c02c0b595b 100644 --- a/ydb/core/change_exchange/change_sender_common_ops.h +++ b/ydb/core/change_exchange/change_sender_common_ops.h @@ -336,7 +336,7 @@ class TBaseChangeSender { Y_ABORT_UNLESS(it != Broadcasting.end()); auto& broadcast = it->second; - if (broadcast.Partitions.contains(partitionId)) { + if (broadcast.CompletedPartitions.contains(partitionId)) { return false; } @@ -413,8 +413,10 @@ class TBaseChangeSender { } TActorId GetChangeServer() const { return ChangeServer; } - void CreateSenders(const TVector& partitionIds, bool partitioningChanged = true) { - if (partitioningChanged) { + +private: + void CreateSendersImpl(const TVector& partitionIds) { + if (partitionIds) { CreateMissingSenders(partitionIds); } else { RecreateSenders(GonePartitions); @@ -427,6 +429,16 @@ class TBaseChangeSender { } } +protected: + void CreateSenders(const TVector& partitionIds) { + Y_ABORT_UNLESS(partitionIds); + CreateSendersImpl(partitionIds); + } + + void CreateSenders() { + CreateSendersImpl({}); + } + void KillSenders() { for (const auto& [_, sender] : std::exchange(Senders, {})) { if (sender.ActorId) { diff --git a/ydb/core/change_exchange/util.cpp b/ydb/core/change_exchange/util.cpp new file mode 100644 index 000000000000..c4c0516e0e94 --- /dev/null +++ b/ydb/core/change_exchange/util.cpp @@ -0,0 +1,15 @@ +#include "util.h" + +namespace NKikimr::NChangeExchange { + +TVector MakePartitionIds(const TVector& partitions) { + TVector result(::Reserve(partitions.size())); + + for (const auto& partition : partitions) { + result.push_back(partition.ShardId); + } + + return result; +} + +} diff --git a/ydb/core/change_exchange/util.h b/ydb/core/change_exchange/util.h new file mode 100644 index 000000000000..f8ba146fdeaf --- /dev/null +++ b/ydb/core/change_exchange/util.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NKikimr::NChangeExchange { + +TVector MakePartitionIds(const TVector& partitions); + +} diff --git a/ydb/core/change_exchange/ya.make b/ydb/core/change_exchange/ya.make index b95ab2178442..680c246118ea 100644 --- a/ydb/core/change_exchange/ya.make +++ b/ydb/core/change_exchange/ya.make @@ -4,6 +4,7 @@ SRCS( change_exchange.cpp change_record.cpp change_sender_monitoring.cpp + util.cpp ) GENERATE_ENUM_SERIALIZATION(change_record.h) diff --git a/ydb/core/client/server/msgbus_server_pq_metacache.cpp b/ydb/core/client/server/msgbus_server_pq_metacache.cpp index 619fc4577981..78d316db4a1c 100644 --- a/ydb/core/client/server/msgbus_server_pq_metacache.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metacache.cpp @@ -218,6 +218,7 @@ class TPersQueueMetaCacheActor : public TActorBootstrappedRecord.MutableRequest()->SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); req->Record.MutableRequest()->SetKeepSession(false); req->Record.MutableRequest()->SetDatabase(NKikimr::NPQ::GetDatabaseFromConfig(AppData(ctx)->PQConfig)); + req->Record.MutableRequest()->SetUsePublicResponseDataFormat(true); req->Record.MutableRequest()->MutableQueryCachePolicy()->set_keep_in_cache(true); req->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); @@ -274,9 +275,14 @@ class TPersQueueMetaCacheActor : public TActorBootstrappedGet()->Record.GetRef(); - Y_ABORT_UNLESS(record.GetResponse().GetResults().size() == 1); - const auto& rr = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - ui64 newVersion = rr.ListSize() == 0 ? 0 : rr.GetList(0).GetStruct(0).GetOptional().GetInt64(); + Y_VERIFY(record.GetResponse().YdbResultsSize() == 1); + NYdb::TResultSetParser parser(record.GetResponse().GetYdbResults(0)); + + ui64 newVersion = 0; + if (parser.RowsCount() != 0) { + parser.TryNextRow(); + newVersion = *parser.ColumnParser(0).GetOptionalInt64(); + } LastVersionUpdate = ctx.Now(); if (newVersion > CurrentTopicsVersion || CurrentTopicsVersion == 0 || SkipVersionCheck) { @@ -293,17 +299,18 @@ class TPersQueueMetaCacheActor : public TActorBootstrappedGet()->Record.GetRef(); - Y_ABORT_UNLESS(record.GetResponse().GetResults().size() == 1); + Y_VERIFY(record.GetResponse().YdbResultsSize() == 1); TString path, dc; - const auto& rr = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - for (const auto& row : rr.GetList()) { - - path = row.GetStruct(0).GetOptional().GetText(); - dc = row.GetStruct(1).GetOptional().GetText(); + NYdb::TResultSetParser parser(record.GetResponse().GetYdbResults(0)); + const ui32 rowCount = parser.RowsCount(); + while (parser.TryNextRow()) { + path = *parser.ColumnParser(0).GetOptionalUtf8(); + dc = *parser.ColumnParser(1).GetOptionalUtf8(); NewTopics.emplace_back(decltype(NewTopics)::value_type{path, dc}); } - if (rr.ListSize() > 0) { + + if (rowCount > 0) { LastTopicKey = {path, dc}; return RunQuery(EQueryType::EGetTopics, ctx); } else { @@ -710,7 +717,7 @@ class TPersQueueMetaCacheActor : public TActorBootstrapped); + DynamicNodesMapping.reset(new THashMap); } while(!NodesMappingWaiters.empty()) { ctx.Send(NodesMappingWaiters.front(), diff --git a/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp b/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp index c0f92246a02a..c9c8c77ce265 100644 --- a/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp +++ b/ydb/core/client/server/msgbus_server_pq_metarequest_ut.cpp @@ -169,7 +169,7 @@ class TMessageBusServerPersQueueRequestTestBase: public TTestBase { static int version = 0; ++version; - THolder request(new TEvPersQueue::TEvUpdateConfig()); + auto request = MakeHolder(); for (size_t i : partitions) { request->Record.MutableTabletConfig()->AddPartitionIds(i); } diff --git a/ydb/core/cms/api_adapters.cpp b/ydb/core/cms/api_adapters.cpp index 3e141a741122..2b2d37258423 100644 --- a/ydb/core/cms/api_adapters.cpp +++ b/ydb/core/cms/api_adapters.cpp @@ -54,11 +54,33 @@ namespace { } } + Ydb::Maintenance::ActionState::ActionReason ConvertReason(NKikimrCms::TAction::TIssue::EType cmsActionIssueType) { + using EIssueType = NKikimrCms::TAction::TIssue; + switch (cmsActionIssueType) { + case EIssueType::UNKNOWN: + return Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED; + case EIssueType::GENERIC: + return Ydb::Maintenance::ActionState::ACTION_REASON_GENERIC; + case EIssueType::TOO_MANY_UNAVAILABLE_VDISKS: + return Ydb::Maintenance::ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS; + case EIssueType::TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS: + return Ydb::Maintenance::ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS; + case EIssueType::DISABLED_NODES_LIMIT_REACHED: + return Ydb::Maintenance::ActionState::ACTION_REASON_DISABLED_NODES_LIMIT_REACHED; + case EIssueType::TENANT_DISABLED_NODES_LIMIT_REACHED: + return Ydb::Maintenance::ActionState::ACTION_REASON_TENANT_DISABLED_NODES_LIMIT_REACHED; + case EIssueType::SYS_TABLETS_NODE_LIMIT_REACHED: + return Ydb::Maintenance::ActionState::ACTION_REASON_SYS_TABLETS_NODE_LIMIT_REACHED; + } + return Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED; + } + void ConvertAction(const NKikimrCms::TAction& cmsAction, Ydb::Maintenance::ActionState& actionState) { ConvertAction(cmsAction, *actionState.mutable_action()->mutable_lock_action()); // FIXME: specify action_uid actionState.set_status(Ydb::Maintenance::ActionState::ACTION_STATUS_PENDING); - actionState.set_reason(Ydb::Maintenance::ActionState::ACTION_REASON_UNSPECIFIED); // FIXME: specify + actionState.set_reason(ConvertReason(cmsAction.GetIssue().GetType())); + actionState.set_reason_details(cmsAction.GetIssue().GetMessage()); } void ConvertActionUid(const TString& taskUid, const TString& permissionId, diff --git a/ydb/core/cms/cluster_info.h b/ydb/core/cms/cluster_info.h index de2c85473518..19fea924fe1a 100644 --- a/ydb/core/cms/cluster_info.h +++ b/ydb/core/cms/cluster_info.h @@ -37,13 +37,6 @@ using TClusterInfoPtr = TIntrusivePtr; struct TCmsState; using TCmsStatePtr = TIntrusivePtr; -struct TErrorInfo { - NKikimrCms::TStatus::ECode Code = NKikimrCms::TStatus::ALLOW; - TString Reason; - TInstant Deadline; - ui64 RollbackPoint = 0; -}; - /** * Structure to hold info about issued permission. A set of * all issued permissions is a part of CMS persistent state. diff --git a/ydb/core/cms/cms.cpp b/ydb/core/cms/cms.cpp index 5c1cf97ab814..2b459c6925cb 100644 --- a/ydb/core/cms/cms.cpp +++ b/ydb/core/cms/cms.cpp @@ -36,6 +36,38 @@ namespace NKikimr::NCms { using namespace NNodeWhiteboard; using namespace NKikimrCms; +namespace { + +constexpr size_t MAX_ISSUES_TO_STORE = 100; + +TAction::TIssue ConvertIssue(const TReason& reason) { + TAction::TIssue issue; + switch (reason.GetType()) { + case TReason::EType::Generic: + issue.SetType(TAction::TIssue::GENERIC); + break; + case TReason::EType::TooManyUnavailableVDisks: + issue.SetType(TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + break; + case TReason::EType::TooManyUnavailableStateStorageRings: + issue.SetType(TAction::TIssue::TOO_MANY_UNAVAILABLE_STATE_STORAGE_RINGS); + break; + case TReason::EType::DisabledNodesLimitReached: + issue.SetType(TAction::TIssue::DISABLED_NODES_LIMIT_REACHED); + break; + case TReason::EType::TenantDisabledNodesLimitReached: + issue.SetType(TAction::TIssue::TENANT_DISABLED_NODES_LIMIT_REACHED); + break; + case TReason::EType::SysTabletsNodeLimitReached: + issue.SetType(TAction::TIssue::SYS_TABLETS_NODE_LIMIT_REACHED); + break; + } + issue.SetMessage(reason.GetMessage()); + return issue; +} + +} // anonymous namespace + void TCms::DefaultSignalTabletActive(const TActorContext &) { // must be empty @@ -326,6 +358,8 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, }; auto point = ClusterInfo->PushRollbackPoint(); + size_t storedIssues = 0; + size_t processedActions = 0; for (const auto &action : request.GetActions()) { TDuration permissionDuration = State->Config.DefaultPermissionDuration; if (request.HasDuration()) @@ -352,28 +386,40 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, auto *permission = response.AddPermissions(); permission->MutableAction()->CopyFrom(action); + permission->MutableAction()->ClearIssue(); permission->SetDeadline(error.Deadline.GetValue()); AddPermissionExtensions(action, *permission); ClusterInfo->AddTempLocks(action, &ctx); } else { LOG_DEBUG(ctx, NKikimrServices::CMS, "Result: %s (reason: %s)", - ToString(error.Code).data(), error.Reason.data()); + ToString(error.Code).data(), error.Reason.GetMessage().data()); if (CodesRate[response.GetStatus().GetCode()] > CodesRate[error.Code]) { response.MutableStatus()->SetCode(error.Code); - response.MutableStatus()->SetReason(error.Reason); + response.MutableStatus()->SetReason(error.Reason.GetMessage()); if (error.Code == TStatus::DISALLOW_TEMP || error.Code == TStatus::ERROR_TEMP) response.SetDeadline(error.Deadline.GetValue()); } + if (schedule) { + auto *scheduledAction = scheduled.AddActions(); + scheduledAction->CopyFrom(action); + + // Limit stored issues to avoid overloading the local database + if (storedIssues < MAX_ISSUES_TO_STORE) { + *scheduledAction->MutableIssue() = ConvertIssue(error.Reason); + ++storedIssues; + } else { + scheduledAction->ClearIssue(); + } + } + if (!allowPartial) break; - - if (schedule) - scheduled.AddActions()->CopyFrom(action); } + ++processedActions; } ClusterInfo->RollbackLocks(point); @@ -396,9 +442,21 @@ bool TCms::CheckPermissionRequest(const TPermissionRequest &request, if (schedule && response.GetStatus().GetCode() != TStatus::ALLOW_PARTIAL) { if (response.GetStatus().GetCode() == TStatus::DISALLOW_TEMP || response.GetStatus().GetCode() == TStatus::ERROR_TEMP) - scheduled.MutableActions()->CopyFrom(request.GetActions()); - else + { + if (!allowPartial) { + // Only the first problem action was scheduled during + // the actions check loop. Merge it with rest actions. + Y_ABORT_UNLESS(scheduled.ActionsSize() == 1); + TAction::TIssue issue = std::move(*scheduled.MutableActions()->begin()->MutableIssue()); + scheduled.MutableActions()->CopyFrom(request.GetActions()); + for (auto &action : *scheduled.MutableActions()) { + action.ClearIssue(); + } + *scheduled.MutableActions(processedActions)->MutableIssue() = std::move(issue); + } + } else { scheduled.ClearActions(); + } } return response.GetStatus().GetCode() == TStatus::ALLOW @@ -505,6 +563,12 @@ bool TCms::CheckEvictVDisks(const TAction &action, TErrorInfo &error) const { return false; } + if (State->Config.SentinelConfig.EvictVDisksStatus.Empty()) { + error.Code = TStatus::ERROR; + error.Reason = "Evict vdisks is disabled in Sentinel (self heal)"; + return false; + } + switch (action.GetType()) { case TAction::RESTART_SERVICES: case TAction::SHUTDOWN_HOST: @@ -701,12 +765,15 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action, case MODE_MAX_AVAILABILITY: if (restartRings + lockedRings > 1) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many unavailable state storage rings" - << ". Restarting rings: " - << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) - << ". Temporary (for a 2 minutes) locked rings: " - << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) - << ". Maximum allowed number of unavailable rings for this mode: " << 1; + error.Reason = TReason( + TStringBuilder() << "Too many unavailable state storage rings" + << ". Restarting rings: " + << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) + << ". Temporary (for a 2 minutes) locked rings: " + << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) + << ". Maximum allowed number of unavailable rings for this mode: " << 1, + TReason::EType::TooManyUnavailableStateStorageRings + ); error.Deadline = defaultDeadline; return false; } @@ -714,13 +781,16 @@ bool TCms::TryToLockStateStorageReplica(const TAction& action, case MODE_KEEP_AVAILABLE: if (restartRings + lockedRings + disabledRings > (nToSelect - 1) / 2) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Too many unavailable state storage rings" - << ". Restarting rings: " - << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) - << ". Temporary (for a 2 minutes) locked rings: " - << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) - << ". Disabled rings: " << disabledRings - << ". Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1) / 2; + error.Reason = TReason( + TStringBuilder() << "Too many unavailable state storage rings" + << ". Restarting rings: " + << (currentRingState == TStateStorageRingInfo::Restart ? restartRings : restartRings - 1) + << ". Temporary (for a 2 minutes) locked rings: " + << (currentRingState == TStateStorageRingInfo::Locked ? lockedRings + 1 : lockedRings) + << ". Disabled rings: " << disabledRings + << ". Maximum allowed number of unavailable rings for this mode: " << (nToSelect - 1) / 2, + TReason::EType::TooManyUnavailableStateStorageRings + ); error.Deadline = defaultDeadline; return false; } @@ -1484,6 +1554,13 @@ void TCms::CheckAndEnqueueRequest(TEvCms::TEvPermissionRequest::TPtr &ev, const ev, TStatus::WRONG_REQUEST, "Priority value is out of range", ctx); } + for (const auto &action : rec.GetActions()) { + if (action.HasIssue()) { + return ReplyWithError( + ev, TStatus::WRONG_REQUEST, TStringBuilder() << "Action issue is read-only", ctx); + } + } + EnqueueRequest(ev.Release(), ctx); } diff --git a/ydb/core/cms/cms_maintenance_api_ut.cpp b/ydb/core/cms/cms_maintenance_api_ut.cpp index a1f49f8616b8..151461525408 100644 --- a/ydb/core/cms/cms_maintenance_api_ut.cpp +++ b/ydb/core/cms/cms_maintenance_api_ut.cpp @@ -69,6 +69,32 @@ Y_UNIT_TEST_SUITE(TMaintenanceApiTest) { ) ); } + + Y_UNIT_TEST(ActionReason) { + TCmsTestEnv env(8); + + auto response = env.CheckMaintenanceTaskCreate("task-1", Ydb::StatusIds::SUCCESS, + MakeActionGroup( + MakeLockAction(env.GetNodeId(0), TDuration::Minutes(10)) + ), + MakeActionGroup( + MakeLockAction(env.GetNodeId(1), TDuration::Minutes(10)) + ) + ); + + UNIT_ASSERT_VALUES_EQUAL(response.action_group_states().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(response.action_group_states(0).action_states().size(), 1); + const auto &a1 = response.action_group_states(0).action_states(0); + UNIT_ASSERT_VALUES_EQUAL(a1.status(), ActionState::ACTION_STATUS_PERFORMED); + UNIT_ASSERT_VALUES_EQUAL(a1.reason(), ActionState::ACTION_REASON_OK); + UNIT_ASSERT(a1.reason_details().empty()); + + UNIT_ASSERT_VALUES_EQUAL(response.action_group_states(1).action_states().size(), 1); + const auto &a2 = response.action_group_states(1).action_states(0); + UNIT_ASSERT_VALUES_EQUAL(a2.status(), ActionState::ACTION_STATUS_PENDING); + UNIT_ASSERT_VALUES_EQUAL(a2.reason(), ActionState::ACTION_REASON_TOO_MANY_UNAVAILABLE_VDISKS); + UNIT_ASSERT(a2.reason_details().Contains("too many unavailable vdisks")); + } } } // namespace NKikimr::NCmsTest diff --git a/ydb/core/cms/cms_ut.cpp b/ydb/core/cms/cms_ut.cpp index 54279556431d..d9050fd8eb7e 100644 --- a/ydb/core/cms/cms_ut.cpp +++ b/ydb/core/cms/cms_ut.cpp @@ -631,6 +631,104 @@ Y_UNIT_TEST_SUITE(TCmsTest) { env.CheckListRequests("user1", 0); } + Y_UNIT_TEST(ActionIssue) + { + TCmsTestEnv env(16); + + // Acquire lock on one node + auto rec = env.CheckPermissionRequest + ("user", false, false, true, true, TStatus::ALLOW, + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000)); + UNIT_ASSERT_VALUES_EQUAL(rec.PermissionsSize(), 1); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + + auto pid = rec.GetPermissions(0).GetId(); + + // Schedule request + rec = env.CheckPermissionRequest + ("user", false, false, true, true, TStatus::DISALLOW_TEMP, + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(9), 60000000), + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); + UNIT_ASSERT_VALUES_EQUAL(rec.PermissionsSize(), 0); + + auto rid = rec.GetRequestId(); + + // Get scheduled request + auto scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 2); + auto action1 = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT(!action1.HasIssue()); + auto action2 = scheduledRec.GetRequests(0).GetActions(1); + UNIT_ASSERT(action2.HasIssue()); + UNIT_ASSERT_VALUES_EQUAL(action2.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Try to check request + env.CheckRequest("user", rid, false, TStatus::DISALLOW_TEMP); + + // Get scheduled request + scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 2); + action1 = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT(!action1.HasIssue()); + action2 = scheduledRec.GetRequests(0).GetActions(1); + UNIT_ASSERT(action2.HasIssue()); + UNIT_ASSERT_VALUES_EQUAL(action2.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Done with permission + env.CheckDonePermission("user", pid); + + // Try to check request + rec = env.CheckRequest("user", rid, false, TStatus::ALLOW, 2); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + UNIT_ASSERT(!rec.GetPermissions(1).GetAction().HasIssue()); + + env.CheckGetRequest("user", rid, false, TStatus::WRONG_REQUEST); + } + + Y_UNIT_TEST(ActionIssuePartialPermissions) + { + TCmsTestEnv env(8); + + // Schedule request + auto rec = env.CheckPermissionRequest + ("user", true, false, true, true, TStatus::ALLOW_PARTIAL, + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(0), 60000000), + MakeAction(TAction::SHUTDOWN_HOST, env.GetNodeId(1), 60000000)); + UNIT_ASSERT_VALUES_EQUAL(rec.PermissionsSize(), 1); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + + auto pid = rec.GetPermissions(0).GetId(); + auto rid = rec.GetRequestId(); + + // Get scheduled request + auto scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 1); + auto action = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT_VALUES_EQUAL(action.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Try to check request + env.CheckRequest("user", rid, false, TStatus::DISALLOW_TEMP); + + // Get scheduled request + scheduledRec = env.CheckGetRequest("user", rid); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.RequestsSize(), 1); + UNIT_ASSERT_VALUES_EQUAL(scheduledRec.GetRequests(0).ActionsSize(), 1); + action = scheduledRec.GetRequests(0).GetActions(0); + UNIT_ASSERT_VALUES_EQUAL(action.GetIssue().GetType(), TAction::TIssue::TOO_MANY_UNAVAILABLE_VDISKS); + + // Done with permission + env.CheckDonePermission("user", pid); + + // Try to check request + rec = env.CheckRequest("user", rid, false, TStatus::ALLOW, 1); + UNIT_ASSERT(!rec.GetPermissions(0).GetAction().HasIssue()); + + env.CheckGetRequest("user", rid, false, TStatus::WRONG_REQUEST); + } + Y_UNIT_TEST(WalleTasks) { TCmsTestEnv env(24, 4); @@ -1808,9 +1906,94 @@ Y_UNIT_TEST_SUITE(TCmsTest) { env.CheckRejectRequest("user", request3.GetRequestId()); } + Y_UNIT_TEST(AllVDisksEvictionInRack) + { + auto opts = TTestEnvOpts(8) + .WithSentinel() + .WithNodeLocationCallback([](ui32 nodeId) { + NActorsInterconnect::TNodeLocation location; + location.SetRack(ToString(nodeId / 2 + 1)); + return TNodeLocation(location); // Node = [0, 1, 2, 3, 4, 5, 6, 7] + // Rack = [1, 1, 2, 2, 3, 3, 4, 4] + }); + TCmsTestEnv env(opts); + env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); + + // Evict all VDisks from rack 1 + auto request1 = env.CheckPermissionRequest( + MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(), + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage") + ), + TStatus::DISALLOW_TEMP // ok, waiting for move VDisks + ); + auto request2 = env.CheckPermissionRequest( + MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(), + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(1), 600000000, "storage") + ), + TStatus::DISALLOW_TEMP // ok, waiting for move VDisks + ); + + // Check that FAULTY BSC requests are sent + env.CheckBSCUpdateRequests({ env.GetNodeId(0), env.GetNodeId(1) }, NKikimrBlobStorage::FAULTY); + + // "Move" VDisks from rack 1 + auto& node1 = TFakeNodeWhiteboardService::Info[env.GetNodeId(0)]; + node1.VDisksMoved = true; + node1.VDiskStateInfo.clear(); + auto& node2 = TFakeNodeWhiteboardService::Info[env.GetNodeId(1)]; + node2.VDisksMoved = true; + node2.VDiskStateInfo.clear(); + env.RegenerateBSConfig(TFakeNodeWhiteboardService::Config.MutableResponse()->MutableStatus(0)->MutableBaseConfig(), opts); + + auto permission1 = env.CheckRequest("user", request1.GetRequestId(), false, TStatus::ALLOW, 1); + auto permission2 = env.CheckRequest("user", request2.GetRequestId(), false, TStatus::ALLOW, 1); + env.CheckDonePermission("user", permission1.GetPermissions(0).GetId()); + env.CheckDonePermission("user", permission2.GetPermissions(0).GetId()); + } + + Y_UNIT_TEST(DisabledEvictVDisks) + { + auto opts = TTestEnvOpts(8).WithSentinel(); + TCmsTestEnv env(opts); + env.SetLogPriority(NKikimrServices::CMS, NLog::PRI_DEBUG); + + // Make transition faster for tests purposes + auto cmsConfig = env.GetCmsConfig(); + cmsConfig.MutableSentinelConfig()->SetDefaultStateLimit(1); + env.SetCmsConfig(cmsConfig); + + // Evict VDisks + auto request = env.CheckPermissionRequest( + MakePermissionRequest(TRequestOptions("user").WithEvictVDisks(), + MakeAction(TAction::RESTART_SERVICES, env.GetNodeId(0), 600000000, "storage") + ), + TStatus::DISALLOW_TEMP // ok, waiting for move VDisks + ); + + // Check that FAULTY BSC request is sent + env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY); + + // Disable VDisks eviction + cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::DISABLED); + env.SetCmsConfig(cmsConfig); + + // Check that ACTIVE BSC request is sent + env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::ACTIVE); + + // Check that CMS returns ERROR when VDisks eviction is disabled + env.CheckRequest("user", request.GetRequestId(), false, TStatus::ERROR, 0); + + // Enable VDisks eviction again + cmsConfig.MutableSentinelConfig()->SetEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig::FAULTY); + env.SetCmsConfig(cmsConfig); + + // Check that FAULTY BSC request is sent again + env.CheckBSCUpdateRequests({ env.GetNodeId(0) }, NKikimrBlobStorage::FAULTY); + } + Y_UNIT_TEST(EmergencyDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1839,7 +2022,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(ScheduledEmergencyDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1871,7 +2054,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(WalleRequestDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1899,7 +2082,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(ScheduledWalleRequestDuringRollingRestart) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1930,7 +2113,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(EnableCMSRequestPrioritiesFeatureFlag) { - TCmsTestEnv env(8); + TCmsTestEnv env(TTestEnvOpts(8).WithoutEnableCMSRequestPriorities()); // Start rolling restart with specified priority auto rollingRestart = env.CheckPermissionRequest ("user", true, false, true, true, -80, TStatus::WRONG_REQUEST, @@ -1943,7 +2126,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(SamePriorityRequest) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -1973,7 +2156,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(SamePriorityRequest2) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Start rolling restart auto rollingRestart = env.CheckPermissionRequest @@ -2003,7 +2186,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(PriorityRange) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); const TString expectedReason = "Priority value is out of range"; @@ -2024,7 +2207,7 @@ Y_UNIT_TEST_SUITE(TCmsTest) { Y_UNIT_TEST(WalleTasksDifferentPriorities) { - TCmsTestEnv env(TTestEnvOpts(8).WithEnableCMSRequestPriorities()); + TCmsTestEnv env(8); // Without node limits NKikimrCms::TCmsConfig config; diff --git a/ydb/core/cms/cms_ut_common.cpp b/ydb/core/cms/cms_ut_common.cpp index b57c54dcc01c..4a0f6715e431 100644 --- a/ydb/core/cms/cms_ut_common.cpp +++ b/ydb/core/cms/cms_ut_common.cpp @@ -29,6 +29,17 @@ const bool ENABLE_DETAILED_CMS_LOG = true; const bool ENABLE_DETAILED_CMS_LOG = false; #endif +#define COMMA , +Y_DECLARE_OUT_SPEC(, std::map>, o, value) { + std::vector pairs; + for (const auto& [status, nodes] : value) { + pairs.push_back( + TStringBuilder() << status << "=" << '[' << JoinSeq(',', nodes) << ']' + ); + } + o << '[' << JoinSeq(',', pairs) << ']'; +}; + namespace NKikimr { namespace NCmsTest { @@ -391,7 +402,7 @@ static NKikimrConfig::TBootstrap GenerateBootstrapConfig(TTestActorRuntime &runt return res; } -static void SetupServices(TTestActorRuntime &runtime, const TTestEnvOpts &options) { +static void SetupServices(TTestBasicRuntime &runtime, const TTestEnvOpts &options) { const ui32 domainsNum = 1; const ui32 disksInDomain = 1; @@ -503,6 +514,7 @@ static void SetupServices(TTestActorRuntime &runtime, const TTestEnvOpts &option ), 0); + runtime.LocationCallback = options.NodeLocationCallback; runtime.Initialize(app.Unwrap()); auto dnsConfig = new TDynamicNameserviceConfig(); dnsConfig->MaxStaticNodeId = 1000; @@ -868,6 +880,39 @@ TCmsTestEnv::CheckRequest(const TString &user, return rec; } +void TCmsTestEnv::CheckBSCUpdateRequests(std::set expectedNodes, + NKikimrBlobStorage::EDriveStatus expectedStatus) +{ + using TBSCRequests = std::map>; + + TBSCRequests expectedRequests = { {expectedStatus, expectedNodes} }; + TBSCRequests actualRequests; + + TDispatchOptions options; + options.FinalEvents.emplace_back([&](IEventHandle& ev) { + if (ev.GetTypeRewrite() == TEvBlobStorage::TEvControllerConfigRequest::EventType) { + const auto& request = ev.Get()->Record; + bool foundUpdateDriveCommand = false; + for (const auto& command : request.GetRequest().GetCommand()) { + if (command.HasUpdateDriveStatus()) { + foundUpdateDriveCommand = true; + const auto& update = command.GetUpdateDriveStatus(); + actualRequests[update.GetStatus()].insert(update.GetHostKey().GetNodeId()); + } + } + return foundUpdateDriveCommand; + } + return false; + }); + DispatchEvents(options, TDuration::Minutes(1)); + + UNIT_ASSERT_C( + actualRequests == expectedRequests, + TStringBuilder() << "Sentinel sent wrong update requests to BSC: " + << "expected# " << expectedRequests + << ", actual# " << actualRequests + ); +} void TCmsTestEnv::CheckWalleStoreTaskIsFailed(NCms::TEvCms::TEvStoreWalleTask* req) { diff --git a/ydb/core/cms/cms_ut_common.h b/ydb/core/cms/cms_ut_common.h index eafa2624ba97..c719133702f5 100644 --- a/ydb/core/cms/cms_ut_common.h +++ b/ydb/core/cms/cms_ut_common.h @@ -92,6 +92,9 @@ struct TTestEnvOpts { bool EnableCMSRequestPriorities; bool EnableSingleCompositeActionGroup; + using TNodeLocationCallback = std::function; + TNodeLocationCallback NodeLocationCallback; + TTestEnvOpts() = default; TTestEnvOpts(ui32 nodeCount, @@ -107,7 +110,7 @@ struct TTestEnvOpts { , UseMirror3dcErasure(false) , AdvanceCurrentTime(false) , EnableSentinel(false) - , EnableCMSRequestPriorities(false) + , EnableCMSRequestPriorities(true) , EnableSingleCompositeActionGroup(true) { } @@ -122,10 +125,16 @@ struct TTestEnvOpts { return *this; } - TTestEnvOpts& WithEnableCMSRequestPriorities() { - EnableCMSRequestPriorities = true; + TTestEnvOpts& WithoutEnableCMSRequestPriorities() { + EnableCMSRequestPriorities = false; + return *this; + } + + TTestEnvOpts& WithNodeLocationCallback(TNodeLocationCallback nodeLocationCallback) { + NodeLocationCallback = nodeLocationCallback; return *this; } + }; class TCmsTestEnv : public TTestBasicRuntime { @@ -323,6 +332,8 @@ class TCmsTestEnv : public TTestBasicRuntime { return CheckRequest(user, id, dry, NKikimrCms::MODE_MAX_AVAILABILITY, res, count); } + void CheckBSCUpdateRequests(std::set expectedNodes, NKikimrBlobStorage::EDriveStatus expectedStatus); + void CheckWalleStoreTaskIsFailed(NCms::TEvCms::TEvStoreWalleTask *req); template diff --git a/ydb/core/cms/config.h b/ydb/core/cms/config.h index 6eeb5407111a..4f2f6a7259b9 100644 --- a/ydb/core/cms/config.h +++ b/ydb/core/cms/config.h @@ -1,12 +1,14 @@ #pragma once #include "pdisk_state.h" +#include "pdisk_status.h" #include #include #include #include +#include namespace NKikimr::NCms { @@ -30,6 +32,8 @@ struct TCmsSentinelConfig { ui32 RoomRatio; ui32 RackRatio; + TMaybeFail EvictVDisksStatus; + void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { config.SetEnable(Enable); config.SetDryRun(DryRun); @@ -45,6 +49,7 @@ struct TCmsSentinelConfig { config.SetRackRatio(RackRatio); SaveStateLimits(config); + SaveEvictVDisksStatus(config); } void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig &config) { @@ -63,6 +68,8 @@ struct TCmsSentinelConfig { auto newStateLimits = LoadStateLimits(config); StateLimits.swap(newStateLimits); + + EvictVDisksStatus = LoadEvictVDisksStatus(config); } void SaveStateLimits(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { @@ -129,6 +136,31 @@ struct TCmsSentinelConfig { return stateLimits; } + + static TMaybeFail LoadEvictVDisksStatus(const NKikimrCms::TCmsConfig::TSentinelConfig &config) { + using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig; + switch (config.GetEvictVDisksStatus()) { + case EEvictVDisksStatus::UNKNOWN: + case EEvictVDisksStatus::FAULTY: + return EPDiskStatus::FAULTY; + case EEvictVDisksStatus::DISABLED: + return Nothing(); + } + return EPDiskStatus::FAULTY; + } + + void SaveEvictVDisksStatus(NKikimrCms::TCmsConfig::TSentinelConfig &config) const { + using EEvictVDisksStatus = NKikimrCms::TCmsConfig::TSentinelConfig; + + if (EvictVDisksStatus.Empty()) { + config.SetEvictVDisksStatus(EEvictVDisksStatus::DISABLED); + return; + } + + if (*EvictVDisksStatus == EPDiskStatus::FAULTY) { + config.SetEvictVDisksStatus(EEvictVDisksStatus::FAULTY); + } + } }; struct TCmsLogConfig { diff --git a/ydb/core/cms/console/configs_dispatcher.cpp b/ydb/core/cms/console/configs_dispatcher.cpp index 9688829f0f9b..3c0cc91dcb00 100644 --- a/ydb/core/cms/console/configs_dispatcher.cpp +++ b/ydb/core/cms/console/configs_dispatcher.cpp @@ -927,7 +927,7 @@ void TConfigsDispatcher::Handle(TEvConsole::TEvConfigSubscriptionNotification::T if (subscription->Yaml && YamlConfigEnabled) { ReplaceConfigItems(YamlProtoConfig, trunc, FilterKinds(subscription->Kinds), BaseConfig); } else { - Y_FOR_EACH_BIT(kind, kinds) { + Y_FOR_EACH_BIT(kind, FilterKinds(kinds)) { if (affectedKinds.contains(kind)) { hasAffectedKinds = true; } @@ -941,15 +941,15 @@ void TConfigsDispatcher::Handle(TEvConsole::TEvConfigSubscriptionNotification::T ReplaceConfigItems(ev->Get()->Record.GetConfig(), trunc, FilterKinds(kinds), BaseConfig); } - if (hasAffectedKinds || !CompareConfigs(subscription->CurrentConfig.Config, trunc) || CurrentStateFunc() == &TThis::StateInit) { + if (hasAffectedKinds || !CompareConfigs(subscription->CurrentConfig.Config, trunc, FilterKinds(kinds)) || CurrentStateFunc() == &TThis::StateInit) { subscription->UpdateInProcess = MakeHolder(); subscription->UpdateInProcess->Record.MutableConfig()->CopyFrom(trunc); subscription->UpdateInProcess->Record.SetLocal(true); - Y_FOR_EACH_BIT(kind, kinds) { + Y_FOR_EACH_BIT(kind, FilterKinds(kinds)) { subscription->UpdateInProcess->Record.AddItemKinds(kind); } subscription->UpdateInProcessCookie = ++NextRequestCookie; - subscription->UpdateInProcessConfigVersion = FilterVersion(ev->Get()->Record.GetConfig().GetVersion(), kinds); + subscription->UpdateInProcessConfigVersion = FilterVersion(ev->Get()->Record.GetConfig().GetVersion(), FilterKinds(kinds)); if (YamlConfigEnabled) { UpdateYamlVersion(subscription); @@ -1046,6 +1046,12 @@ void TConfigsDispatcher::Handle(TEvConfigsDispatcher::TEvSetConfigSubscriptionRe "SetConfigSubscriptionRequest handler"); Y_UNUSED(nonYamlKinds); auto kinds = KindsToBitMap(ev->Get()->ConfigItemKinds); + + auto truncKinds = FilterKinds(kinds); + if (truncKinds.Empty() && !kinds.Empty()) { + return; + } + auto subscriberActor = ev->Get()->Subscriber ? ev->Get()->Subscriber : ev->Sender; auto subscription = FindSubscription(kinds); diff --git a/ydb/core/cms/console/console__replace_yaml_config.cpp b/ydb/core/cms/console/console__replace_yaml_config.cpp index 1d6840692492..1e510997c812 100644 --- a/ydb/core/cms/console/console__replace_yaml_config.cpp +++ b/ydb/core/cms/console/console__replace_yaml_config.cpp @@ -1,5 +1,6 @@ #include "console_configs_manager.h" #include "console_configs_provider.h" +#include "console_audit.h" #include #include @@ -16,6 +17,7 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBaseGet()->Record.GetRequest().config()) + , Peer(ev->Get()->Record.GetPeerName()) , Sender(ev->Sender) , UserSID(NACLib::TUserToken(ev->Get()->Record.GetUserToken()).GetUserSID()) , Force(force) @@ -146,6 +148,7 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBaseRecord.AddIssues(); issue->set_severity(NYql::TSeverityIds::S_ERROR); issue->set_message(ex.what()); + ErrorReason = ex.what(); Response = MakeHolder(Sender, ctx.SelfID, ev.Release()); } @@ -159,6 +162,14 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBaseYamlConfig, + /* newConfig = */ Config, + /* reason = */ {}, + /* success = */ true); + Self->YamlVersion = Version + 1; Self->YamlConfig = UpdatedConfig; Self->YamlDropped = false; @@ -167,6 +178,14 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBase(Self->YamlConfig); ctx.Send(Self->ConfigsProvider, resp.Release()); + } else if (Error && !DryRun) { + AuditLogReplaceConfigTransaction( + /* peer = */ Peer, + /* userSID = */ UserSID, + /* oldConfig = */ Self->YamlConfig, + /* newConfig = */ Config, + /* reason = */ ErrorReason, + /* success = */ false); } Self->TxProcessor->TxCompleted(this, ctx); @@ -174,6 +193,7 @@ class TConfigsManager::TTxReplaceYamlConfig : public TTransactionBase Response; bool Error = false; + TString ErrorReason; bool Modify = false; TSimpleSharedPtr UnknownFieldsCollector = nullptr; ui32 Version; diff --git a/ydb/core/cms/console/console_audit.cpp b/ydb/core/cms/console/console_audit.cpp new file mode 100644 index 000000000000..8ea7d2e6b9cb --- /dev/null +++ b/ydb/core/cms/console/console_audit.cpp @@ -0,0 +1,34 @@ +#include "console_audit.h" + +#include +#include + +namespace NKikimr::NConsole { + +void AuditLogReplaceConfigTransaction( + const TString& peer, + const TString& userSID, + const TString& oldConfig, + const TString& newConfig, + const TString& reason, + bool success) +{ + static const TString COMPONENT_NAME = "console"; + + static const TString EMPTY_VALUE = "{none}"; + + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(peer); + + AUDIT_LOG( + AUDIT_PART("component", COMPONENT_NAME) + AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EMPTY_VALUE)) + AUDIT_PART("subject", (!userSID.empty() ? userSID : EMPTY_VALUE)) + AUDIT_PART("status", TString(success ? "SUCCESS" : "ERROR")) + AUDIT_PART("reason", reason, !reason.empty()) + AUDIT_PART("operation", TString("REPLACE DYNCONFIG")) + AUDIT_PART("old_config", oldConfig) + AUDIT_PART("new_config", newConfig) + ); +} + +} // namespace NKikimr::NConsole diff --git a/ydb/core/cms/console/console_audit.h b/ydb/core/cms/console/console_audit.h new file mode 100644 index 000000000000..028176090129 --- /dev/null +++ b/ydb/core/cms/console/console_audit.h @@ -0,0 +1,15 @@ +#pragma once + +#include + +namespace NKikimr::NConsole { + +void AuditLogReplaceConfigTransaction( + const TString& peer, + const TString& userSID, + const TString& oldConfig, + const TString& newConfig, + const TString& reason, + bool success); + +} // namespace NKikimr::NConsole diff --git a/ydb/core/cms/console/console_configs_manager.cpp b/ydb/core/cms/console/console_configs_manager.cpp index b9495819e6f6..6906c6b307cf 100644 --- a/ydb/core/cms/console/console_configs_manager.cpp +++ b/ydb/core/cms/console/console_configs_manager.cpp @@ -1,6 +1,7 @@ #include "console_configs_manager.h" #include "configs_dispatcher.h" +#include "console_audit.h" #include "console_configs_provider.h" #include "console_impl.h" #include "http.h" @@ -974,4 +975,24 @@ void TConfigsManager::ScheduleLogCleanup(const TActorContext &ctx) LogCleanupTimerCookieHolder.Get()); } +void TConfigsManager::HandleUnauthorized(TEvConsole::TEvReplaceYamlConfigRequest::TPtr &ev, const TActorContext &) { + AuditLogReplaceConfigTransaction( + /* peer = */ ev->Get()->Record.GetPeerName(), + /* userSID = */ ev->Get()->Record.GetUserToken(), + /* oldConfig = */ YamlConfig, + /* newConfig = */ ev->Get()->Record.GetRequest().config(), + /* reason = */ "Unauthorized.", + /* success = */ false); +} + +void TConfigsManager::HandleUnauthorized(TEvConsole::TEvSetYamlConfigRequest::TPtr &ev, const TActorContext &) { + AuditLogReplaceConfigTransaction( + /* peer = */ ev->Get()->Record.GetPeerName(), + /* userSID = */ ev->Get()->Record.GetUserToken(), + /* oldConfig = */ YamlConfig, + /* newConfig = */ ev->Get()->Record.GetRequest().config(), + /* reason = */ "Unauthorized.", + /* success = */ false); +} + } // namespace NKikimr::NConsole diff --git a/ydb/core/cms/console/console_configs_manager.h b/ydb/core/cms/console/console_configs_manager.h index 63feebe774f7..7884c6eb5341 100644 --- a/ydb/core/cms/console/console_configs_manager.h +++ b/ydb/core/cms/console/console_configs_manager.h @@ -152,6 +152,8 @@ class TConfigsManager : public TActorBootstrapped { void Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx); void Handle(TEvConsole::TEvReplaceYamlConfigRequest::TPtr & ev, const TActorContext & ctx); void Handle(TEvConsole::TEvSetYamlConfigRequest::TPtr & ev, const TActorContext & ctx); + void HandleUnauthorized(TEvConsole::TEvReplaceYamlConfigRequest::TPtr & ev, const TActorContext & ctx); + void HandleUnauthorized(TEvConsole::TEvSetYamlConfigRequest::TPtr & ev, const TActorContext & ctx); void Handle(TEvConsole::TEvDropConfigRequest::TPtr & ev, const TActorContext & ctx); void Handle(TEvPrivate::TEvStateLoaded::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvCleanupSubscriptions::TPtr &ev, const TActorContext &ctx); @@ -160,9 +162,16 @@ class TConfigsManager : public TActorBootstrapped { template void HandleWithRights(T &ev, const TActorContext &ctx) { + constexpr bool HasHandleUnauthorized = requires(T &ev) { + HandleUnauthorized(ev, ctx); + }; + if (CheckRights(ev->Get()->Record.GetUserToken())) { Handle(ev, ctx); } else { + if constexpr (HasHandleUnauthorized) { + HandleUnauthorized(ev, ctx); + } auto req = MakeHolder(); ctx.Send(ev->Sender, req.Release()); } diff --git a/ydb/core/cms/console/ya.make b/ydb/core/cms/console/ya.make index 6fecc0620508..57092fca8ea4 100644 --- a/ydb/core/cms/console/ya.make +++ b/ydb/core/cms/console/ya.make @@ -11,6 +11,8 @@ SRCS( configs_dispatcher.h console.cpp console.h + console_audit.cpp + console_audit.h console_configs_manager.cpp console_configs_manager.h console_configs_provider.cpp diff --git a/ydb/core/cms/erasure_checkers.cpp b/ydb/core/cms/erasure_checkers.cpp index 8d1b257ec1a2..1d753ba32cf0 100644 --- a/ydb/core/cms/erasure_checkers.cpp +++ b/ydb/core/cms/erasure_checkers.cpp @@ -77,10 +77,13 @@ bool TErasureCounterBase::CheckForMaxAvailability(TClusterInfoPtr info, TErrorIn } error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; } @@ -150,10 +153,13 @@ bool TDefaultErasureCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErr } error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; } @@ -191,20 +197,26 @@ bool TMirror3dcCounter::CheckForKeepAvailability(TClusterInfoPtr info, TErrorInf if (DataCenterDisabledNodes.size() > 2) { error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Number of data centers with unavailable vdisks: " << DataCenterDisabledNodes.size() << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; } error.Code = TStatus::DISALLOW_TEMP; - error.Reason = TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" + error.Reason = TReason( + TStringBuilder() << "Issue in affected group with id '" << GroupId << "'" << ": too many unavailable vdisks" << ". Locked: " << DumpVDisksInfo(Locked, info) - << ". Down: " << DumpVDisksInfo(Down, info); + << ". Down: " << DumpVDisksInfo(Down, info), + TReason::EType::TooManyUnavailableVDisks + ); error.Deadline = defaultDeadline; return false; diff --git a/ydb/core/cms/error_info.h b/ydb/core/cms/error_info.h new file mode 100644 index 000000000000..c6346f133330 --- /dev/null +++ b/ydb/core/cms/error_info.h @@ -0,0 +1,59 @@ +#pragma once + +#include "defs.h" + +namespace NKikimr::NCms { + +class TReason { +public: + // Must be sync with proto enum + enum class EType { + Generic, + TooManyUnavailableVDisks, + TooManyUnavailableStateStorageRings, + DisabledNodesLimitReached, + TenantDisabledNodesLimitReached, + SysTabletsNodeLimitReached, + }; + + TReason(const TString &message, EType type = EType::Generic) + : Message(message) + , Type(type) + {} + + TReason(const char* message, EType type = EType::Generic) + : Message(message) + , Type(type) + {} + + TReason() = default; + + operator TString() const { + return Message; + } + + const TString& GetMessage() const { + return Message; + } + + EType GetType() const { + return Type; + } + +private: + TString Message; + EType Type = EType::Generic; +}; + +struct TErrorInfo { + NKikimrCms::TStatus::ECode Code = NKikimrCms::TStatus::ALLOW; + TReason Reason; + TInstant Deadline; + ui64 RollbackPoint = 0; +}; + +} // namespace NKikimr::NCms + +Y_DECLARE_OUT_SPEC(inline, NKikimr::NCms::TReason, stream, value) { + stream << value.GetMessage(); +} diff --git a/ydb/core/cms/json_proxy_proto.h b/ydb/core/cms/json_proxy_proto.h index 8079eb6971e2..d65d419f4495 100644 --- a/ydb/core/cms/json_proxy_proto.h +++ b/ydb/core/cms/json_proxy_proto.h @@ -80,6 +80,8 @@ class TJsonProxyProto : public TActorBootstrapped { return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TVDiskControls::descriptor(), ctx); else if (name == ".NKikimrConfig.TImmediateControlsConfig.TTabletControls") return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TTabletControls::descriptor(), ctx); + else if (name == ".NKikimrConfig.TImmediateControlsConfig.TBlobStorageControllerControls") + return ReplyWithTypeDescription(*NKikimrConfig::TImmediateControlsConfig::TBlobStorageControllerControls::descriptor(), ctx); } ctx.Send(RequestEvent->Sender, diff --git a/ydb/core/cms/node_checkers.cpp b/ydb/core/cms/node_checkers.cpp index 51306ab3efb6..b478063a6d98 100644 --- a/ydb/core/cms/node_checkers.cpp +++ b/ydb/core/cms/node_checkers.cpp @@ -87,7 +87,7 @@ const THashMap& TNodesCounterBase::GetNodeToSta return NodeToState; } -bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const { +bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const { Y_ABORT_UNLESS(NodeToState.contains(nodeId)); auto nodeState = NodeToState.at(nodeId); @@ -126,28 +126,34 @@ bool TNodesLimitsCounterBase::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili const auto disabledNodes = LockedNodesCount + DownNodesCount + 1; if (DisabledNodesLimit > 0 && disabledNodes > DisabledNodesLimit) { - reason = TStringBuilder() << ReasonPrefix(nodeId) + reason = TReason( + TStringBuilder() << ReasonPrefix(nodeId) << ": too many unavailable nodes." << " Locked: " << LockedNodesCount << ", down: " << DownNodesCount - << ", limit: " << DisabledNodesLimit; + << ", limit: " << DisabledNodesLimit, + DisabledNodesLimitReachedReasonType() + ); return false; } if (DisabledNodesRatioLimit > 0 && (disabledNodes * 100 > NodeToState.size() * DisabledNodesRatioLimit)) { - reason = TStringBuilder() << ReasonPrefix(nodeId) + reason = TReason( + TStringBuilder() << ReasonPrefix(nodeId) << ": too many unavailable nodes." << " Locked: " << LockedNodesCount << ", down: " << DownNodesCount << ", total: " << NodeToState.size() - << ", limit: " << DisabledNodesRatioLimit << "%"; + << ", limit: " << DisabledNodesRatioLimit << "%", + DisabledNodesLimitReachedReasonType() + ); return false; } return true; } -bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const { +bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const { Y_ABORT_UNLESS(NodeToState.contains(nodeId)); auto nodeState = NodeToState.at(nodeId); @@ -198,12 +204,15 @@ bool TSysTabletsNodesCounter::TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabili Y_ABORT("Unknown availability mode"); } - reason = TStringBuilder() << "Cannot lock node '" << nodeId << "'" + reason = TReason( + TStringBuilder() << "Cannot lock node '" << nodeId << "'" << ": tablet '" << NKikimrConfig::TBootstrap_ETabletType_Name(TabletType) << "'" << " has too many unavailable nodes." << " Locked: " << LockedNodesCount << ", down: " << DownNodesCount - << ", limit: " << limit; + << ", limit: " << limit, + TReason::EType::SysTabletsNodeLimitReached + ); return false; } diff --git a/ydb/core/cms/node_checkers.h b/ydb/core/cms/node_checkers.h index a71066cfe2df..55b606e5ce58 100644 --- a/ydb/core/cms/node_checkers.h +++ b/ydb/core/cms/node_checkers.h @@ -1,6 +1,7 @@ #pragma once #include "defs.h" +#include "error_info.h" #include #include @@ -39,7 +40,7 @@ class INodesChecker { virtual void LockNode(ui32 nodeId) = 0; virtual void UnlockNode(ui32 nodeId) = 0; - virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const = 0; + virtual bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const = 0; }; /** @@ -80,7 +81,13 @@ class TNodesLimitsCounterBase : public TNodesCounterBase { ui32 DisabledNodesLimit; ui32 DisabledNodesRatioLimit; - virtual TString ReasonPrefix(ui32 nodeId) const = 0; + virtual TString ReasonPrefix(ui32 nodeId) const { + return TStringBuilder() << "Cannot lock node '" << nodeId << "'"; + } + + virtual TReason::EType DisabledNodesLimitReachedReasonType() const { + return TReason::EType::DisabledNodesLimitReached; + }; public: explicit TNodesLimitsCounterBase(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) @@ -94,7 +101,7 @@ class TNodesLimitsCounterBase : public TNodesCounterBase { DisabledNodesRatioLimit = ratioLimit; } - bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const override final; + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const override final; }; class TTenantLimitsCounter : public TNodesLimitsCounterBase { @@ -106,6 +113,10 @@ class TTenantLimitsCounter : public TNodesLimitsCounterBase { return TStringBuilder() << "Cannot lock node '" << nodeId << "' of tenant '" << TenantName << "'"; } + TReason::EType DisabledNodesLimitReachedReasonType() const override final { + return TReason::EType::TenantDisabledNodesLimitReached; + } + public: explicit TTenantLimitsCounter(const TString& tenantName, ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit) @@ -115,11 +126,6 @@ class TTenantLimitsCounter : public TNodesLimitsCounterBase { }; class TClusterLimitsCounter : public TNodesLimitsCounterBase { -protected: - TString ReasonPrefix(ui32 nodeId) const override final { - return TStringBuilder() << "Cannot lock node '" << nodeId << "'"; - } - public: explicit TClusterLimitsCounter(ui32 disabledNodesLimit, ui32 disabledNodesRatioLimit) : TNodesLimitsCounterBase(disabledNodesLimit, disabledNodesRatioLimit) @@ -143,7 +149,7 @@ class TSysTabletsNodesCounter : public TNodesCounterBase { { } - bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TString& reason) const override final; + bool TryToLockNode(ui32 nodeId, NKikimrCms::EAvailabilityMode mode, TReason& reason) const override final; }; } // namespace NKikimr::NCms diff --git a/ydb/core/cms/pdisk_status.h b/ydb/core/cms/pdisk_status.h new file mode 100644 index 000000000000..b36a86cec46c --- /dev/null +++ b/ydb/core/cms/pdisk_status.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NKikimr::NCms { + +using EPDiskStatus = NKikimrBlobStorage::EDriveStatus; + +} // namespace NKikimr::NCms diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index 3ab0d3cf883d..c99e236d4a16 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -125,6 +125,10 @@ void TPDiskStatusComputer::SetForcedStatus(EPDiskStatus status) { ForcedStatus = status; } +bool TPDiskStatusComputer::HasForcedStatus() const { + return ForcedStatus.Defined(); +} + void TPDiskStatusComputer::ResetForcedStatus() { ForcedStatus.Clear(); } @@ -196,6 +200,7 @@ void TPDiskStatus::DisallowChanging() { TPDiskInfo::TPDiskInfo(EPDiskStatus initialStatus, const ui32& defaultStateLimit, const TLimitsMap& stateLimits) : TPDiskStatus(initialStatus, defaultStateLimit, stateLimits) + , ActualStatus(initialStatus) { Touch(); } @@ -890,15 +895,15 @@ class TSentinel: public TActorBootstrapped { continue; } - if (it->second.HasFaultyMarker()) { - info.SetForcedStatus(EPDiskStatus::FAULTY); + if (it->second.HasFaultyMarker() && Config.EvictVDisksStatus.Defined()) { + info.SetForcedStatus(*Config.EvictVDisksStatus); } else { info.ResetForcedStatus(); } all.AddPDisk(id); if (info.IsChanged()) { - if (info.IsNewStatusGood()) { + if (info.IsNewStatusGood() || info.HasForcedStatus()) { alwaysAllowed.insert(id); } else { changed.AddPDisk(id); diff --git a/ydb/core/cms/sentinel_impl.h b/ydb/core/cms/sentinel_impl.h index 8622050ff1d8..52cc9d8af030 100644 --- a/ydb/core/cms/sentinel_impl.h +++ b/ydb/core/cms/sentinel_impl.h @@ -3,8 +3,7 @@ #include "defs.h" #include "pdiskid.h" #include "pdisk_state.h" - -#include +#include "pdisk_status.h" #include #include @@ -12,7 +11,6 @@ namespace NKikimr::NCms::NSentinel { -using EPDiskStatus = NKikimrBlobStorage::EDriveStatus; using TLimitsMap = TMap; class TPDiskStatusComputer { @@ -29,6 +27,7 @@ class TPDiskStatusComputer { void Reset(); void SetForcedStatus(EPDiskStatus status); + bool HasForcedStatus() const; void ResetForcedStatus(); private: @@ -84,7 +83,7 @@ struct TPDiskInfo using EIgnoreReason = NKikimrCms::TPDiskInfo::EIgnoreReason; EPDiskStatus ActualStatus = EPDiskStatus::ACTIVE; - EPDiskStatus PrevStatus = EPDiskStatus::ACTIVE; + EPDiskStatus PrevStatus = EPDiskStatus::UNKNOWN; TInstant LastStatusChange; bool StatusChangeFailed = false; // means that this pdisk status change last time was the reason of whole request failure diff --git a/ydb/core/cms/ya.make b/ydb/core/cms/ya.make index d3edcc7908e7..1d4c9845dc5f 100644 --- a/ydb/core/cms/ya.make +++ b/ydb/core/cms/ya.make @@ -30,6 +30,7 @@ SRCS( downtime.cpp erasure_checkers.h erasure_checkers.cpp + error_info.h http.cpp http.h info_collector.cpp diff --git a/ydb/core/config/init/init.cpp b/ydb/core/config/init/init.cpp index 9890ccabc480..10dcde834b5d 100644 --- a/ydb/core/config/init/init.cpp +++ b/ydb/core/config/init/init.cpp @@ -48,8 +48,8 @@ class TDefaultProtoConfigFileProvider static bool IsFileReadable(const fs::path& p) { std::error_code ec; // For noexcept overload usage. auto perms = fs::status(p, ec).permissions(); - if ((perms & fs::perms::owner_read) != fs::perms::none && - (perms & fs::perms::group_read) != fs::perms::none && + if ((perms & fs::perms::owner_read) != fs::perms::none || + (perms & fs::perms::group_read) != fs::perms::none || (perms & fs::perms::others_read) != fs::perms::none ) { return true; @@ -228,6 +228,7 @@ class TDefaultNodeBrokerClient const TGrpcSslSettings& grpcSettings, const TString addr, const NYdb::NDiscovery::TNodeRegistrationSettings& settings, + const TString& nodeRegistrationToken, const IEnv& env) { TCommandConfig::TServerEndpoint endpoint = TCommandConfig::ParseServerAddress(addr); @@ -242,7 +243,9 @@ class TDefaultNodeBrokerClient config.UseClientCertificate(certificate.c_str(), privateKey.c_str()); } } - config.SetAuthToken(BUILTIN_ACL_ROOT); + if (nodeRegistrationToken) { + config.SetAuthToken(nodeRegistrationToken); + } config.SetEndpoint(endpoint.Address); auto connection = NYdb::TDriver(config); @@ -313,6 +316,7 @@ class TDefaultNodeBrokerClient const TGrpcSslSettings& grpcSettings, const TVector& addrs, const NYdb::NDiscovery::TNodeRegistrationSettings& settings, + const TString& nodeRegistrationToken, const IEnv& env, IInitLogger& logger) { @@ -326,6 +330,7 @@ class TDefaultNodeBrokerClient grpcSettings, addr, settings, + nodeRegistrationToken, env); if (result.IsSuccess()) { logger.Out() << "Success. Registered via discovery service as " << result.GetNodeId() << Endl; @@ -387,6 +392,7 @@ class TDefaultNodeBrokerClient grpcSettings, addrs, newRegSettings, + regSettings.NodeRegistrationToken, env, logger); diff --git a/ydb/core/config/init/init.h b/ydb/core/config/init/init.h index bb81c5461369..815bce93b8d0 100644 --- a/ydb/core/config/init/init.h +++ b/ydb/core/config/init/init.h @@ -118,6 +118,7 @@ struct TNodeRegistrationSettings { bool FixedNodeID; ui32 InterconnectPort; NActors::TNodeLocation Location; + TString NodeRegistrationToken; }; class INodeRegistrationResult { diff --git a/ydb/core/config/init/init_impl.h b/ydb/core/config/init/init_impl.h index c2a8a9fee2fd..bbe418c82e93 100644 --- a/ydb/core/config/init/init_impl.h +++ b/ydb/core/config/init/init_impl.h @@ -1119,7 +1119,7 @@ class TInitialConfiguratorImpl } void FillData(const NConfig::TCommonAppOptions& cf) { - if (cf.TenantName && ScopeId.IsEmpty()) { + if (!cf.TenantName && ScopeId.IsEmpty()) { const TString myDomain = DeduceNodeDomain(cf, AppConfig); for (const auto& domain : AppConfig.GetDomainsConfig().GetDomain()) { if (domain.GetName() == myDomain) { @@ -1247,6 +1247,7 @@ class TInitialConfiguratorImpl cf.FixedNodeID, cf.InterconnectPort, cf.CreateNodeLocation(), + AppConfig.GetAuthConfig().GetNodeRegistrationToken(), }; auto result = NodeBrokerClient.RegisterDynamicNode(cf.GrpcSslSettings, addrs, settings, Env, Logger); diff --git a/ydb/core/control/immediate_control_board_impl.cpp b/ydb/core/control/immediate_control_board_impl.cpp index 06f6dab1a832..796eaea6455f 100644 --- a/ydb/core/control/immediate_control_board_impl.cpp +++ b/ydb/core/control/immediate_control_board_impl.cpp @@ -16,7 +16,7 @@ bool TControlBoard::RegisterLocalControl(TControlWrapper control, TString name) } bool TControlBoard::RegisterSharedControl(TControlWrapper& control, TString name) { - auto& ptr = Board.InsertIfAbsent(name, control.Control); + auto ptr = Board.InsertIfAbsent(name, control.Control); if (control.Control == ptr) { return true; } else { diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp index f482af16e987..2721d8cd6821 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.cpp +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.cpp @@ -7,7 +7,7 @@ #include #include -#include "ydb/core/audit/audit_log.h" +#include "ydb/core/audit/audit_log_service.h" #include #include @@ -186,6 +186,9 @@ #include #include +#include +#include + #include #include @@ -240,6 +243,29 @@ #include +#ifndef KIKIMR_DISABLE_S3_OPS +#include +#endif + +namespace { + +#ifndef KIKIMR_DISABLE_S3_OPS +struct TAwsApiGuard { + TAwsApiGuard() { + Aws::InitAPI(Options); + } + + ~TAwsApiGuard() { + Aws::ShutdownAPI(Options); + } + +private: + Aws::SDKOptions Options; +}; +#endif + +} + namespace NKikimr { namespace NKikimrServicesInitializers { @@ -303,6 +329,7 @@ void AddExecutorPool( TBasicExecutorPoolConfig basic; basic.PoolId = poolId; basic.PoolName = poolConfig.GetName(); + basic.UseRingQueue = systemConfig.HasUseRingQueue() && systemConfig.GetUseRingQueue(); if (poolConfig.HasMaxAvgPingDeviation()) { auto poolGroup = counters->GetSubgroup("execpool", basic.PoolName); auto &poolInfo = cpuManager.PingInfoByPool[poolId]; @@ -1645,7 +1672,7 @@ void TSecurityServicesInitializer::InitializeServices(NActors::TActorSystemSetup .AuthConfig = Config.GetAuthConfig(), .CertificateAuthValues = { .ClientCertificateAuthorization = Config.GetClientCertificateAuthorization(), - .ServerCertificateFilePath = grpcConfig.GetCert(), + .ServerCertificateFilePath = grpcConfig.HasPathToCertificateFile() ? grpcConfig.GetPathToCertificateFile() : grpcConfig.GetCert(), .Domain = Config.GetAuthConfig().GetCertificateAuthenticationDomain() } }; @@ -1981,7 +2008,8 @@ TPersQueueL2CacheInitializer::TPersQueueL2CacheInitializer(const TKikimrRunConfi {} void TPersQueueL2CacheInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { - static const ui64 DEFAULT_PQ_L2_MAX_SIZE_MB = 8 * 1024; + static const ui64 DEFAULT_PQ_L2_MAX_SIZE_MB = + NKikimrNodeLimits::TNodeLimitsConfig_TPersQueueNodeConfig::default_instance().GetSharedCacheSizeMb(); static const TDuration DEFAULT_PQ_L2_KEEP_TIMEOUT = TDuration::Seconds(10); NPQ::TCacheL2Parameters params; @@ -2149,7 +2177,7 @@ void TKqpServiceInitializer::InitializeServices(NActors::TActorSystemSetup* setu // Create resource manager auto rm = NKqp::CreateKqpResourceManagerActor(Config.GetTableServiceConfig().GetResourceManager(), nullptr, - {}, kqpProxySharedResources); + {}, kqpProxySharedResources, NodeId); setup->LocalServices.push_back(std::make_pair( NKqp::MakeKqpRmServiceID(NodeId), TActorSetupCmd(rm, TMailboxType::HTSwap, appData->UserPoolId))); @@ -2179,6 +2207,26 @@ void TKqpServiceInitializer::InitializeServices(NActors::TActorSystemSetup* setu } } +TGroupedMemoryLimiterInitializer::TGroupedMemoryLimiterInitializer(const TKikimrRunConfig& runConfig) + : IKikimrServicesInitializer(runConfig) { +} + +void TGroupedMemoryLimiterInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + NOlap::NGroupedMemoryManager::TConfig serviceConfig; + Y_ABORT_UNLESS(serviceConfig.DeserializeFromProto(Config.GetGroupedMemoryLimiterConfig())); + + if (serviceConfig.IsEnabled()) { + TIntrusivePtr<::NMonitoring::TDynamicCounters> tabletGroup = GetServiceCounters(appData->Counters, "tablets"); + TIntrusivePtr<::NMonitoring::TDynamicCounters> countersGroup = tabletGroup->GetSubgroup("type", "TX_GROUPED_MEMORY_LIMITER"); + + auto service = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(serviceConfig, countersGroup); + + setup->LocalServices.push_back(std::make_pair( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::MakeServiceId(NodeId), + TActorSetupCmd(service, TMailboxType::HTSwap, appData->UserPoolId))); + } +} + TCompDiskLimiterInitializer::TCompDiskLimiterInitializer(const TKikimrRunConfig& runConfig) : IKikimrServicesInitializer(runConfig) { } @@ -2792,5 +2840,18 @@ void TGraphServiceInitializer::InitializeServices(NActors::TActorSystemSetup* se TActorSetupCmd(NGraph::CreateGraphService(appData->TenantName), TMailboxType::HTSwap, appData->UserPoolId)); } +#ifndef KIKIMR_DISABLE_S3_OPS +TAwsApiInitializer::TAwsApiInitializer(IGlobalObjectStorage& globalObjects) + : GlobalObjects(globalObjects) +{ +} + +void TAwsApiInitializer::InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) { + Y_UNUSED(setup); + Y_UNUSED(appData); + GlobalObjects.AddGlobalObject(std::make_shared()); +} +#endif + } // namespace NKikimrServicesInitializers } // namespace NKikimr diff --git a/ydb/core/driver_lib/run/kikimr_services_initializers.h b/ydb/core/driver_lib/run/kikimr_services_initializers.h index cd2060c52aec..04f30522186f 100644 --- a/ydb/core/driver_lib/run/kikimr_services_initializers.h +++ b/ydb/core/driver_lib/run/kikimr_services_initializers.h @@ -397,6 +397,12 @@ class TCompDiskLimiterInitializer: public IKikimrServicesInitializer { void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; }; +class TGroupedMemoryLimiterInitializer: public IKikimrServicesInitializer { +public: + TGroupedMemoryLimiterInitializer(const TKikimrRunConfig& runConfig); + void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; +}; + class TCompConveyorInitializer: public IKikimrServicesInitializer { public: TCompConveyorInitializer(const TKikimrRunConfig& runConfig); @@ -612,5 +618,16 @@ class TGraphServiceInitializer : public IKikimrServicesInitializer { void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; }; +#ifndef KIKIMR_DISABLE_S3_OPS +class TAwsApiInitializer : public IServiceInitializer { + IGlobalObjectStorage& GlobalObjects; + +public: + TAwsApiInitializer(IGlobalObjectStorage& globalObjects); + + void InitializeServices(NActors::TActorSystemSetup* setup, const NKikimr::TAppData* appData) override; +}; +#endif + } // namespace NKikimrServicesInitializers } // namespace NKikimr diff --git a/ydb/core/driver_lib/run/run.cpp b/ydb/core/driver_lib/run/run.cpp index 5996acf0166d..2f2a61a50517 100644 --- a/ydb/core/driver_lib/run/run.cpp +++ b/ydb/core/driver_lib/run/run.cpp @@ -122,6 +122,7 @@ #include #include #include +#include #include @@ -495,8 +496,11 @@ static TString ReadFile(const TString& fileName) { } void TKikimrRunner::InitializeGracefulShutdown(const TKikimrRunConfig& runConfig) { - Y_UNUSED(runConfig); GracefulShutdownSupported = true; + const auto& config = runConfig.AppConfig.GetShutdownConfig(); + if (config.HasMinDelayBeforeShutdownSeconds()) { + MinDelayBeforeShutdown = TDuration::Seconds(config.GetMinDelayBeforeShutdownSeconds()); + } } void TKikimrRunner::InitializeKqpController(const TKikimrRunConfig& runConfig) { @@ -597,6 +601,8 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { names["keyvalue"] = &hasKeyValue; TServiceCfg hasReplication = services.empty(); names["replication"] = &hasReplication; + TServiceCfg hasView = services.empty(); + names["view"] = &hasView; std::unordered_set enabled; for (const auto& name : services) { @@ -872,6 +878,11 @@ void TKikimrRunner::InitializeGRpc(const TKikimrRunConfig& runConfig) { grpcRequestProxies[0], hasReplication.IsRlAllowed())); } + if (hasView) { + server.AddService(new NGRpcService::TGRpcViewService(ActorSystem.Get(), Counters, + grpcRequestProxies[0], hasView.IsRlAllowed())); + } + if (ModuleFactories) { for (const auto& service : ModuleFactories->GrpcServiceFactory.Create(enabled, disabled, ActorSystem.Get(), Counters, grpcRequestProxies[0])) { server.AddService(service); @@ -1126,6 +1137,10 @@ void TKikimrRunner::InitializeAppData(const TKikimrRunConfig& runConfig) AppData->MetadataCacheConfig.CopyFrom(runConfig.AppConfig.GetMetadataCacheConfig()); } + if (runConfig.AppConfig.HasReplicationConfig()) { + AppData->ReplicationConfig = runConfig.AppConfig.GetReplicationConfig(); + } + // setup resource profiles AppData->ResourceProfiles = new TResourceProfiles; if (runConfig.AppConfig.GetBootstrapConfig().ResourceProfilesSize()) @@ -1183,6 +1198,8 @@ void TKikimrRunner::InitializeLogSettings(const TKikimrRunConfig& runConfig) ); LogSettings->ClusterName = logConfig.HasClusterName() ? logConfig.GetClusterName() : ""; + LogSettings->TenantName = runConfig.TenantName; + LogSettings->NodeId = runConfig.NodeId; if (logConfig.GetFormat() == "full") { LogSettings->Format = NLog::TSettings::PLAIN_FULL_FORMAT; @@ -1550,6 +1567,10 @@ TIntrusivePtr TKikimrRunner::CreateServiceInitializers sil->AddServiceInitializer(new TCompDiskLimiterInitializer(runConfig)); } + if (serviceMask.EnableGroupedMemoryLimiter) { + sil->AddServiceInitializer(new TGroupedMemoryLimiterInitializer(runConfig)); + } + if (serviceMask.EnableScanConveyor) { sil->AddServiceInitializer(new TScanConveyorInitializer(runConfig)); } @@ -1647,6 +1668,12 @@ TIntrusivePtr TKikimrRunner::CreateServiceInitializers sil->AddServiceInitializer(new TGraphServiceInitializer(runConfig)); } +#ifndef KIKIMR_DISABLE_S3_OPS + if (serviceMask.EnableAwsService) { + sil->AddServiceInitializer(new TAwsApiInitializer(*this)); + } +#endif + return sil; } @@ -1694,6 +1721,7 @@ void TKikimrRunner::KikimrStop(bool graceful) { ActorSystem->Send(new IEventHandle(NGRpcService::CreateGrpcPublisherServiceActorId(), {}, new TEvents::TEvPoisonPill)); } + THPTimer timer; TIntrusivePtr drainProgress(new TDrainProgress()); if (AppData->FeatureFlags.GetEnableDrainOnShutdown() && GracefulShutdownSupported && ActorSystem) { drainProgress->OnSend(); @@ -1727,6 +1755,12 @@ void TKikimrRunner::KikimrStop(bool graceful) { } } + // Wait for a minimum delay to make sure that clients forget about this node + auto passedTime = TDuration::Seconds(timer.Passed()); + if (MinDelayBeforeShutdown > passedTime) { + Sleep(MinDelayBeforeShutdown - passedTime); + } + if (ActorSystem) { ActorSystem->BroadcastToProxies([](const TActorId& proxyId) { return new IEventHandle(proxyId, {}, new TEvInterconnect::TEvTerminate); diff --git a/ydb/core/driver_lib/run/run.h b/ydb/core/driver_lib/run/run.h index a6c9283fc43d..eaa4ff1f91a6 100644 --- a/ydb/core/driver_lib/run/run.h +++ b/ydb/core/driver_lib/run/run.h @@ -42,6 +42,7 @@ class TKikimrRunner : public virtual TThrRefBase, private IGlobalObjectStorage { bool EnabledGrpcService = false; bool GracefulShutdownSupported = false; + TDuration MinDelayBeforeShutdown; THolder SqsHttp; THolder YdbDriver; diff --git a/ydb/core/driver_lib/run/service_mask.h b/ydb/core/driver_lib/run/service_mask.h index b99356bd3cff..044557229c6b 100644 --- a/ydb/core/driver_lib/run/service_mask.h +++ b/ydb/core/driver_lib/run/service_mask.h @@ -78,6 +78,8 @@ union TBasicKikimrServicesMask { bool EnableDatabaseMetadataCache:1; bool EnableGraphService:1; bool EnableCompDiskLimiter:1; + bool EnableGroupedMemoryLimiter:1; + bool EnableAwsService:1; }; struct { diff --git a/ydb/core/driver_lib/run/ya.make b/ydb/core/driver_lib/run/ya.make index 2433ec8d5b2d..ae98d77faa3a 100644 --- a/ydb/core/driver_lib/run/ya.make +++ b/ydb/core/driver_lib/run/ya.make @@ -1,5 +1,15 @@ LIBRARY(run) +IF (OS_WINDOWS) + CFLAGS( + -DKIKIMR_DISABLE_S3_OPS + ) +ELSE() + PEERDIR( + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core + ) +ENDIF() + SRCS( auto_config_initializer.cpp config.cpp @@ -111,6 +121,7 @@ PEERDIR( ydb/core/tx/coordinator ydb/core/tx/conveyor/service ydb/core/tx/limiter/service + ydb/core/tx/limiter/grouped_memory/usage ydb/core/tx/datashard ydb/core/tx/long_tx_service ydb/core/tx/long_tx_service/public @@ -161,6 +172,7 @@ PEERDIR( ydb/services/persqueue_v1 ydb/services/rate_limiter ydb/services/replication + ydb/services/view ydb/services/ydb ) diff --git a/ydb/core/driver_lib/version/version.cpp b/ydb/core/driver_lib/version/version.cpp index adc22dfeeca6..85effd8a57d0 100644 --- a/ydb/core/driver_lib/version/version.cpp +++ b/ydb/core/driver_lib/version/version.cpp @@ -26,7 +26,8 @@ TCompatibilityInfo::TCompatibilityInfo() { auto current = MakeCurrent(); - // bool success = CompleteFromTag(current); + bool success = CompleteFromTag(current); + Y_UNUSED(success); // Y_ABORT_UNLESS(success); CurrentCompatibilityInfo.CopyFrom(current); @@ -72,12 +73,13 @@ const TStored* TCompatibilityInfo::GetDefault(TComponentId componentId) const { // obsolete version control TMaybe VERSION = NActors::TInterconnectProxyCommon::TVersionInfo{ // version of this binary - "trunk", + "stable-24-3", // compatible versions; must include all compatible old ones, including this one; version verification occurs on both // peers and connection is accepted if at least one of peers accepts the version of the other peer { - "trunk" + "stable-24-2", + "stable-24-3" } }; diff --git a/ydb/core/engine/mkql_keys.cpp b/ydb/core/engine/mkql_keys.cpp index 93d70dcfcbb3..d282ccf5f40f 100644 --- a/ydb/core/engine/mkql_keys.cpp +++ b/ydb/core/engine/mkql_keys.cpp @@ -51,14 +51,6 @@ NScheme::TTypeInfo UnpackTypeInfo(NKikimr::NMiniKQL::TType *type, bool &isOption } } - -template -TCell MakeCell(const NUdf::TUnboxedValuePod& value) { - static_assert(TCell::CanInline(sizeof(T)), "Can't inline data in cell."); - const auto v = value.Get(); - return TCell(reinterpret_cast(&v), sizeof(v)); -} - THolder ExtractKeyTuple(const TTableId& tableId, TTupleLiteral* tuple, const TVector& columns, TKeyDesc::ERowOperation rowOperation, bool requireStaticKey, const TTypeEnvironment& env) { diff --git a/ydb/core/engine/mkql_keys.h b/ydb/core/engine/mkql_keys.h index 517120748a77..b51e789d0182 100644 --- a/ydb/core/engine/mkql_keys.h +++ b/ydb/core/engine/mkql_keys.h @@ -45,6 +45,13 @@ THolder ExtractTableKey(TCallable& callable, const TTableStrings& stri TVector> ExtractTableKeys(TExploringNodeVisitor& explorer, const TTypeEnvironment& env); TTableId ExtractTableId(const TRuntimeNode& node); +template +TCell MakeCell(const NUdf::TUnboxedValuePod& value) { + static_assert(TCell::CanInline(sizeof(T)), "Can't inline data in cell."); + const auto v = value.Get(); + return TCell(reinterpret_cast(&v), sizeof(v)); +} + TCell MakeCell(NScheme::TTypeInfo type, const NUdf::TUnboxedValuePod& value, const TTypeEnvironment& env, bool copy = true, i32 typmod = -1, TMaybe* error = {}); diff --git a/ydb/core/engine/mkql_proto.cpp b/ydb/core/engine/mkql_proto.cpp index 98c622608f7d..4dd266740a83 100644 --- a/ydb/core/engine/mkql_proto.cpp +++ b/ydb/core/engine/mkql_proto.cpp @@ -223,6 +223,19 @@ bool CellsFromTuple(const NKikimrMiniKQL::TType* tupleType, } break; } + case NScheme::NTypeIds::Decimal: + { + if (v.HasLow128() && v.HasHi128()) { + NYql::NDecimal::TInt128 int128 = NYql::NDecimal::FromProto(v); + auto &data = memoryOwner.emplace_back(); + data.resize(sizeof(NYql::NDecimal::TInt128)); + std::memcpy(data.Detach(), &int128, sizeof(NYql::NDecimal::TInt128)); + c = TCell(data); + } else { + CHECK_OR_RETURN_ERROR(false, Sprintf("Cannot parse value of type Decimal in tuple at position %" PRIu32, i)); + } + break; + } default: CHECK_OR_RETURN_ERROR(false, Sprintf("Unsupported typeId %" PRIu16 " at index %" PRIu32, typeId, i)); break; @@ -328,6 +341,13 @@ bool CellToValue(NScheme::TTypeInfo type, const TCell& c, NKikimrMiniKQL::TValue val.MutableOptional()->SetText(c.Data(), c.Size()); break; + case NScheme::NTypeIds::Decimal: { + const auto loHi = c.AsValue>(); + val.MutableOptional()->SetLow128(loHi.first); + val.MutableOptional()->SetHi128(loHi.second); + break; + } + case NScheme::NTypeIds::Pg: { auto convert = NPg::PgNativeTextFromNativeBinary(c.AsBuf(), type.GetTypeDesc()); if (convert.Error) { diff --git a/ydb/core/formats/arrow/accessor/abstract/constructor.cpp b/ydb/core/formats/arrow/accessor/abstract/constructor.cpp new file mode 100644 index 000000000000..51c2e86cadf7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/constructor.cpp @@ -0,0 +1,11 @@ +#include "constructor.h" +#include + +namespace NKikimr::NArrow::NAccessor { + +TConstructorContainer TConstructorContainer::GetDefaultConstructor() { + static std::shared_ptr result = std::make_shared(); + return result; +} + +} diff --git a/ydb/core/formats/arrow/accessor/abstract/constructor.h b/ydb/core/formats/arrow/accessor/abstract/constructor.h new file mode 100644 index 000000000000..aa99260e097a --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/constructor.h @@ -0,0 +1,75 @@ +#pragma once + +#include +#include +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class IConstructor { +public: + using TFactory = NObjectFactory::TObjectFactory; + using TProto = NKikimrArrowAccessorProto::TConstructor; + +private: + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const = 0; + virtual TConclusion> DoConstructDefault( + const TChunkConstructionData& externalInfo) const = 0; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const = 0; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) = 0; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const = 0; + virtual TString DoDebugString() const { + return ""; + } + +public: + virtual ~IConstructor() = default; + + TString DebugString() const { + return TStringBuilder() << GetClassName() << ":" << DoDebugString(); + } + + TConclusion> Construct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const { + return DoConstruct(originalData, externalInfo); + } + + TConclusion> ConstructDefault(const TChunkConstructionData& externalInfo) const { + return DoConstructDefault(externalInfo); + } + + bool DeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) { + return DoDeserializeFromProto(proto); + } + + NKikimrArrowAccessorProto::TConstructor SerializeToProto() const { + return DoSerializeToProto(); + } + + void SerializeToProto(NKikimrArrowAccessorProto::TConstructor& proto) const { + proto = DoSerializeToProto(); + } + + std::shared_ptr GetExpectedSchema(const std::shared_ptr& resultColumn) const { + AFL_VERIFY(resultColumn); + return DoGetExpectedSchema(resultColumn); + } + + virtual TString GetClassName() const = 0; +}; + +class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + +public: + using TBase::TBase; + + static TConstructorContainer GetDefaultConstructor(); +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/request.cpp b/ydb/core/formats/arrow/accessor/abstract/request.cpp new file mode 100644 index 000000000000..08bc3ee5c87d --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/request.cpp @@ -0,0 +1,16 @@ +#include "request.h" + +namespace NKikimr::NArrow::NAccessor { + +TConclusionStatus TRequestedConstructorContainer::DeserializeFromRequest(NYql::TFeaturesExtractor& features) { + const std::optional className = features.Extract("DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME"); + if (!className) { + return TConclusionStatus::Success(); + } + if (!TBase::Initialize(*className)) { + return TConclusionStatus::Fail("don't know anything about class_name=" + *className); + } + return TBase::GetObjectPtr()->DeserializeFromRequest(features); +} + +} diff --git a/ydb/core/formats/arrow/accessor/abstract/request.h b/ydb/core/formats/arrow/accessor/abstract/request.h new file mode 100644 index 000000000000..42fbf3551623 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/request.h @@ -0,0 +1,57 @@ +#pragma once +#include "constructor.h" + +#include +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class IRequestedConstructor { +public: + using TFactory = NObjectFactory::TObjectFactory; + using TProto = NKikimrArrowAccessorProto::TRequestedConstructor; +private: + virtual TConclusion DoBuildConstructor() const = 0; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const = 0; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) = 0; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& features) = 0; + +public: + virtual ~IRequestedConstructor() = default; + + NKikimrArrowAccessorProto::TRequestedConstructor SerializeToProto() const { + return DoSerializeToProto(); + } + + void SerializeToProto(NKikimrArrowAccessorProto::TRequestedConstructor& proto) const { + proto = DoSerializeToProto(); + } + + bool DeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) { + return DoDeserializeFromProto(proto); + } + + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features) { + return DoDeserializeFromRequest(features); + } + + TConclusion BuildConstructor() const { + return DoBuildConstructor(); + } + + virtual TString GetClassName() const = 0; +}; + +class TRequestedConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + +public: + using TBase::TBase; + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features); +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/abstract/ya.make b/ydb/core/formats/arrow/accessor/abstract/ya.make new file mode 100644 index 000000000000..c40f1f297c18 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/abstract/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/conclusion + ydb/services/metadata/abstract + ydb/library/formats/arrow/accessor/abstract + ydb/library/formats/arrow/accessor/common + ydb/library/formats/arrow/protos +) + +SRCS( + constructor.cpp + request.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp b/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp new file mode 100644 index 000000000000..d804e92efdd7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/accessor.cpp @@ -0,0 +1,46 @@ +#include "accessor.h" + +namespace NKikimr::NArrow::NAccessor { + +namespace { +class TSerializedChunkAccessor { +private: + const std::vector& Chunks; + const std::shared_ptr& Loader; + std::optional& Result; + +public: + TSerializedChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader, + std::optional& result) + : Chunks(chunks) + , Loader(loader) + , Result(result) { + } + ui64 GetChunksCount() const { + return Chunks.size(); + } + ui64 GetChunkLength(const ui32 idx) const { + return Chunks[idx].GetRecordsCount(); + } + void OnArray(const ui32 chunkIdx, const ui32 startPosition) const { + Result = IChunkedArray::TLocalChunkedArrayAddress(Chunks[chunkIdx].GetArrayVerified(Loader), startPosition, chunkIdx); + } +}; +} // namespace + +IChunkedArray::TLocalDataAddress TDeserializeChunkedArray::DoGetLocalData( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const { + AFL_VERIFY(false); + return IChunkedArray::TLocalDataAddress(nullptr, 0, 0); +} + +IChunkedArray::TLocalChunkedArrayAddress TDeserializeChunkedArray::DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TSerializedChunkAccessor accessor(Chunks, Loader, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite_serial/accessor.h b/ydb/core/formats/arrow/accessor/composite_serial/accessor.h new file mode 100644 index 000000000000..ac7e0193d7bd --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/accessor.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { +private: + using TBase = NArrow::NAccessor::IChunkedArray; + +public: + class TChunk { + private: + YDB_READONLY(ui32, RecordsCount, 0); + std::shared_ptr PredefinedArray; + const TString Data; + + public: + TChunk(const std::shared_ptr& predefinedArray) + : PredefinedArray(predefinedArray) { + AFL_VERIFY(PredefinedArray); + RecordsCount = PredefinedArray->GetRecordsCount(); + } + + TChunk(const ui32 recordsCount, const TString& data) + : RecordsCount(recordsCount) + , Data(data) { + } + + std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { + if (PredefinedArray) { + return PredefinedArray; + } + return loader->ApplyVerified(Data, RecordsCount); + } + }; + +private: + std::shared_ptr Loader; + std::vector Chunks; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& chunkCurrent, const ui64 position) const override; + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetScalar(const ui32 /*index*/) const override { + AFL_VERIFY(false)("problem", "cannot use method"); + return nullptr; + } + virtual std::optional DoGetRawSize() const override { + return {}; + } + virtual std::shared_ptr DoGetMaxScalar() const override { + AFL_VERIFY(false); + return nullptr; + } + virtual std::shared_ptr DoGetChunkedArray() const override { + AFL_VERIFY(false); + return nullptr; + } + +public: + TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) + : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) + , Loader(loader) + , Chunks(std::move(chunks)) { + AFL_VERIFY(Loader); + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/composite_serial/ya.make b/ydb/core/formats/arrow/accessor/composite_serial/ya.make new file mode 100644 index 000000000000..e8095e990285 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/composite_serial/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/formats/arrow/accessor/abstract + ydb/core/formats/arrow/common + ydb/core/formats/arrow/save_load +) + +SRCS( + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/plain/accessor.cpp b/ydb/core/formats/arrow/accessor/plain/accessor.cpp new file mode 100644 index 000000000000..c606f2e1952b --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/accessor.cpp @@ -0,0 +1,89 @@ +#include "accessor.h" + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +std::optional TTrivialArray::DoGetRawSize() const { + return NArrow::GetArrayDataSize(Array); +} + +std::vector TTrivialArray::DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + auto schema = std::make_shared(arrow::FieldVector({ std::make_shared("f", GetDataType()) })); + auto chunks = NArrow::NSplitter::TSimpleSplitter(saver).SplitBySizes( + arrow::RecordBatch::Make(schema, GetRecordsCount(), { Array }), fullSerializedData, splitSizes); + std::vector result; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetSlicedBatch()->num_columns() == 1); + result.emplace_back(std::make_shared(i.GetSlicedBatch()->column(0)), i.GetSerializedChunk()); + } + return result; +} + +std::shared_ptr TTrivialArray::DoGetMaxScalar() const { + auto minMaxPos = NArrow::FindMinMaxPosition(Array); + return NArrow::TStatusValidator::GetValid(Array->GetScalar(minMaxPos.second)); +} + +namespace { +class TChunkAccessor { +private: + std::shared_ptr ChunkedArray; + std::optional* Result; + +public: + TChunkAccessor(const std::shared_ptr& chunkedArray, std::optional& result) + : ChunkedArray(chunkedArray) + , Result(&result) { + } + ui64 GetChunksCount() const { + return (ui64)ChunkedArray->num_chunks(); + } + ui64 GetChunkLength(const ui32 idx) const { + return (ui64)ChunkedArray->chunk(idx)->length(); + } + void OnArray(const ui32 idx, const ui32 startPosition) const { + const auto& arr = ChunkedArray->chunk(idx); + *Result = IChunkedArray::TLocalDataAddress(arr, startPosition, idx); + } +}; + +} // namespace + +IChunkedArray::TLocalDataAddress TTrivialChunkedArray::DoGetLocalData( + const std::optional& chunkCurrent, const ui64 position) const { + std::optional result; + TChunkAccessor accessor(Array, result); + SelectChunk(chunkCurrent, position, accessor); + AFL_VERIFY(result); + return *result; +} + +std::optional TTrivialChunkedArray::DoGetRawSize() const { + ui64 result = 0; + for (auto&& i : Array->chunks()) { + result += NArrow::GetArrayDataSize(i); + } + return result; +} + +std::shared_ptr TTrivialChunkedArray::DoGetMaxScalar() const { + std::shared_ptr result; + for (auto&& i : Array->chunks()) { + if (!i->length()) { + continue; + } + auto minMaxPos = NArrow::FindMinMaxPosition(i); + auto scalarCurrent = NArrow::TStatusValidator::GetValid(i->GetScalar(minMaxPos.second)); + if (!result || ScalarCompare(result, scalarCurrent) < 0) { + result = scalarCurrent; + } + } + + return result; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/plain/accessor.h b/ydb/core/formats/arrow/accessor/plain/accessor.h new file mode 100644 index 000000000000..a00826161c40 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/accessor.h @@ -0,0 +1,77 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TTrivialArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; + +protected: + virtual std::optional DoGetRawSize() const override; + + virtual TLocalDataAddress DoGetLocalData(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + return TLocalDataAddress(Array, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + return std::make_shared(Array); + } + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + return NArrow::TStatusValidator::GetValid(Array->GetScalar(index)); + } + virtual std::shared_ptr DoGetMaxScalar() const override; + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) override; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, TCommonChunkAddress(0, GetRecordsCount(), 0)); + } + +public: + TTrivialArray(const std::shared_ptr& data) + : TBase(data->length(), EType::Array, data->type()) + , Array(data) { + } +}; + +class TTrivialChunkedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; + +protected: + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override; + virtual std::shared_ptr DoGetChunkedArray() const override { + return Array; + } + virtual std::optional DoGetRawSize() const override; + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + auto chunk = GetChunkSlow(index); + return NArrow::TStatusValidator::GetValid(chunk.GetArray()->GetScalar(chunk.GetAddress().GetLocalIndex(index))); + } + virtual std::vector DoSplitBySizes( + const TColumnSaver& /*saver*/, const TString& /*fullSerializedData*/, const std::vector& /*splitSizes*/) override { + AFL_VERIFY(false); + return {}; + } + + virtual std::shared_ptr DoGetMaxScalar() const override; + + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, TCommonChunkAddress(0, 0, 0)); + } + +public: + TTrivialChunkedArray(const std::shared_ptr& data) + : TBase(data->length(), EType::ChunkedArray, data->type()) + , Array(data) { + } +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/plain/constructor.cpp b/ydb/core/formats/arrow/accessor/plain/constructor.cpp new file mode 100644 index 000000000000..3ecf41502b33 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/constructor.cpp @@ -0,0 +1,33 @@ +#include "accessor.h" +#include "constructor.h" + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +TConclusion> TConstructor::DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& /*externalInfo*/) const { + AFL_VERIFY(originalData->num_columns() == 1)("count", originalData->num_columns())("schema", originalData->schema()->ToString()); + return std::make_shared(originalData->column(0)); +} + +TConclusion> TConstructor::DoConstructDefault(const TChunkConstructionData& externalInfo) const { + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(externalInfo.GetColumnType(), externalInfo.GetDefaultValue(), externalInfo.GetRecordsCount())); +} + +NKikimrArrowAccessorProto::TConstructor TConstructor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TConstructor(); +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& /*proto*/) { + return true; +} + +std::shared_ptr TConstructor::DoGetExpectedSchema(const std::shared_ptr& resultColumn) const { + return std::make_shared(arrow::FieldVector({ resultColumn })); +} + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/constructor.h b/ydb/core/formats/arrow/accessor/plain/constructor.h new file mode 100644 index 000000000000..57c366689eb0 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +class TConstructor: public IConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::PlainDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const override; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) override; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const override; + virtual TConclusion> DoConstructDefault(const TChunkConstructionData& externalInfo) const override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/request.cpp b/ydb/core/formats/arrow/accessor/plain/request.cpp new file mode 100644 index 000000000000..05a6ab128165 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/request.cpp @@ -0,0 +1,22 @@ +#include "request.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NPlain { + +NKikimrArrowAccessorProto::TRequestedConstructor TRequestedConstuctor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TRequestedConstructor(); +} + +bool TRequestedConstuctor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) { + return true; +} + +NKikimr::TConclusionStatus TRequestedConstuctor::DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) { + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TRequestedConstuctor::DoBuildConstructor() const { + return std::make_shared(); +} + +} diff --git a/ydb/core/formats/arrow/accessor/plain/request.h b/ydb/core/formats/arrow/accessor/plain/request.h new file mode 100644 index 000000000000..19a8390f2df2 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/request.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NPlain { + +class TRequestedConstuctor: public IRequestedConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::PlainDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion DoBuildConstructor() const override; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) override; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NPlain diff --git a/ydb/core/formats/arrow/accessor/plain/ya.make b/ydb/core/formats/arrow/accessor/plain/ya.make new file mode 100644 index 000000000000..5b3dd0bf6081 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/plain/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/library/formats/arrow + ydb/library/formats/arrow/protos +) + +SRCS( + accessor.cpp + GLOBAL constructor.cpp + GLOBAL request.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp b/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp new file mode 100644 index 000000000000..62c796b811d1 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/accessor.cpp @@ -0,0 +1,280 @@ +#include "accessor.h" + +#include +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +TSparsedArray::TSparsedArray(const IChunkedArray& defaultArray, const std::shared_ptr& defaultValue) + : TBase(defaultArray.GetRecordsCount(), EType::SparsedArray, defaultArray.GetDataType()) + , DefaultValue(defaultValue) { + if (DefaultValue) { + AFL_VERIFY(DefaultValue->type->id() == defaultArray.GetDataType()->id()); + } + std::optional current; + std::shared_ptr records; + ui32 sparsedRecordsCount = 0; + AFL_VERIFY(SwitchType(GetDataType()->id(), [&](const auto& type) { + using TWrap = std::decay_t; + using TScalar = typename arrow::TypeTraits::ScalarType; + using TArray = typename arrow::TypeTraits::ArrayType; + using TBuilder = typename arrow::TypeTraits::BuilderType; + auto builderValue = NArrow::MakeBuilder(GetDataType()); + TBuilder* builderValueImpl = (TBuilder*)builderValue.get(); + auto builderIndex = NArrow::MakeBuilder(arrow::uint32()); + arrow::UInt32Builder* builderIndexImpl = (arrow::UInt32Builder*)builderIndex.get(); + auto scalar = static_pointer_cast(DefaultValue); + for (ui32 pos = 0; pos < GetRecordsCount();) { + current = defaultArray.GetChunk(current, pos); + auto typedArray = static_pointer_cast(current->GetArray()); + for (ui32 i = 0; i < typedArray->length(); ++i) { + std::optional isDefault; + if (scalar) { + if constexpr (arrow::has_string_view()) { + isDefault = arrow::util::string_view((char*)scalar->value->data(), scalar->value->size()) == typedArray->GetView(i); + } else if constexpr (arrow::has_c_type()) { + isDefault = scalar->value == typedArray->Value(i); + } else { + AFL_VERIFY(false)("type", GetDataType()->ToString()); + } + } else { + isDefault = typedArray->IsNull(i); + } + if (!*isDefault) { + if constexpr (arrow::has_string_view()) { + NArrow::TStatusValidator::Validate(builderValueImpl->Append(typedArray->GetView(i))); + NArrow::TStatusValidator::Validate(builderIndexImpl->Append(pos + i)); + ++sparsedRecordsCount; + } else if constexpr (arrow::has_c_type()) { + NArrow::TStatusValidator::Validate(builderValueImpl->Append(typedArray->Value(i))); + NArrow::TStatusValidator::Validate(builderIndexImpl->Append(pos + i)); + ++sparsedRecordsCount; + } else { + AFL_VERIFY(false)("type", GetDataType()->ToString()); + } + } + } + pos = current->GetAddress().GetGlobalFinishPosition(); + AFL_VERIFY(pos <= GetRecordsCount()); + } + std::vector> columns = { NArrow::TStatusValidator::GetValid(builderIndex->Finish()), + NArrow::TStatusValidator::GetValid(builderValue->Finish()) }; + records = arrow::RecordBatch::Make(BuildSchema(GetDataType()), sparsedRecordsCount, columns); + AFL_VERIFY_DEBUG(records->ValidateFull().ok()); + return true; + })); + AFL_VERIFY(records); + Records.emplace_back(0, GetRecordsCount(), records, DefaultValue); +} + +std::vector TSparsedArray::DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) { + AFL_VERIFY(Records.size() == 1)("size", Records.size()); + auto chunks = NArrow::NSplitter::TSimpleSplitter(saver).SplitBySizes(Records.front().GetRecords(), fullSerializedData, splitSizes); + + std::vector result; + ui32 idx = 0; + ui32 startIdx = 0; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetSlicedBatch()->num_columns() == 2); + AFL_VERIFY(i.GetSlicedBatch()->column(0)->type()->id() == arrow::uint32()->id()); + auto UI32Column = static_pointer_cast(i.GetSlicedBatch()->column(0)); + ui32 nextStartIdx = NArrow::NAccessor::TSparsedArray::GetLastIndex(i.GetSlicedBatch()) + 1; + if (idx + 1 == chunks.size()) { + nextStartIdx = GetRecordsCount(); + } + std::shared_ptr batch; + { + std::unique_ptr builder = NArrow::MakeBuilder(arrow::uint32()); + arrow::UInt32Builder* builderImpl = (arrow::UInt32Builder*)builder.get(); + for (ui32 rowIdx = 0; rowIdx < UI32Column->length(); ++rowIdx) { + TStatusValidator::Validate(builderImpl->Append(UI32Column->Value(rowIdx) - startIdx)); + } + auto colIndex = TStatusValidator::GetValid(builder->Finish()); + batch = arrow::RecordBatch::Make( + i.GetSlicedBatch()->schema(), i.GetSlicedBatch()->num_rows(), { colIndex, i.GetSlicedBatch()->column(1) }); + } + + ++idx; + { + TBuilder builder(DefaultValue, GetDataType()); + builder.AddChunk(nextStartIdx - startIdx, batch); + result.emplace_back(builder.Finish(), saver.Apply(batch)); + } + startIdx = nextStartIdx; + } + + return result; +} + +std::shared_ptr TSparsedArray::DoGetMaxScalar() const { + std::shared_ptr result; + for (auto&& i : Records) { + auto scalarCurrent = i.GetMaxScalar(); + if (!scalarCurrent) { + continue; + } + if (!result || ScalarCompare(result, scalarCurrent) < 0) { + result = scalarCurrent; + } + } + return result; +} + +ui32 TSparsedArray::GetLastIndex(const std::shared_ptr& batch) { + AFL_VERIFY(batch); + AFL_VERIFY(batch->num_rows()); + auto c = batch->GetColumnByName("index"); + AFL_VERIFY(c)("schema", batch->schema()->ToString()); + AFL_VERIFY(c->type_id() == arrow::uint32()->id())("type", c->type()->ToString()); + auto ui32Column = static_pointer_cast(c); + return ui32Column->Value(ui32Column->length() - 1); +} + +namespace { +static thread_local THashMap> SimpleBatchesCache; +} + +NKikimr::NArrow::NAccessor::TSparsedArrayChunk TSparsedArray::MakeDefaultChunk( + const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount) { + auto it = SimpleBatchesCache.find(type->ToString()); + if (it == SimpleBatchesCache.end()) { + it = SimpleBatchesCache.emplace(type->ToString(), NArrow::MakeEmptyBatch(BuildSchema(type))).first; + AFL_VERIFY(it->second->ValidateFull().ok()); + } + return TSparsedArrayChunk(0, recordsCount, it->second, defaultValue); +} + +IChunkedArray::TLocalDataAddress TSparsedArrayChunk::GetChunk( + const std::optional& /*chunkCurrent*/, const ui64 position, const ui32 chunkIdx) const { + const auto predCompare = [](const ui32 position, const TInternalChunkInfo& item) { + return position < item.GetStartExt(); + }; + auto it = std::upper_bound(RemapExternalToInternal.begin(), RemapExternalToInternal.end(), position, predCompare); + AFL_VERIFY(it != RemapExternalToInternal.begin()); + --it; + if (it->GetIsDefault()) { + return IChunkedArray::TLocalDataAddress( + NArrow::TThreadSimpleArraysCache::Get(ColValue->type(), DefaultValue, it->GetSize()), StartPosition + it->GetStartExt(), chunkIdx); + } else { + return IChunkedArray::TLocalDataAddress( + ColValue->Slice(it->GetStartInt(), it->GetSize()), StartPosition + it->GetStartExt(), chunkIdx); + } +} + +std::vector> TSparsedArrayChunk::GetChunkedArray() const { + std::vector> chunks; + for (auto&& i : RemapExternalToInternal) { + if (i.GetIsDefault()) { + chunks.emplace_back(NArrow::TThreadSimpleArraysCache::Get(ColValue->type(), DefaultValue, i.GetSize())); + } else { + chunks.emplace_back(ColValue->Slice(i.GetStartInt(), i.GetSize())); + } + } + return chunks; +} + +TSparsedArrayChunk::TSparsedArrayChunk(const ui32 posStart, const ui32 recordsCount, const std::shared_ptr& records, + const std::shared_ptr& defaultValue) + : RecordsCount(recordsCount) + , StartPosition(posStart) + , Records(records) + , DefaultValue(defaultValue) { + AFL_VERIFY(records->num_columns() == 2); + ColIndex = Records->GetColumnByName("index"); + AFL_VERIFY(ColIndex); + AFL_VERIFY(ColIndex->type_id() == arrow::uint32()->id()); + UI32ColIndex = static_pointer_cast(ColIndex); + if (UI32ColIndex->length()) { + AFL_VERIFY(UI32ColIndex->Value(UI32ColIndex->length() - 1) < recordsCount)("val", UI32ColIndex->Value(UI32ColIndex->length() - 1))( + "count", recordsCount); + } + NotDefaultRecordsCount = UI32ColIndex->length(); + RawValues = UI32ColIndex->raw_values(); + ColValue = Records->GetColumnByName("value"); + if (DefaultValue) { + AFL_VERIFY(DefaultValue->type->id() == ColValue->type_id()); + } + ui32 nextIndex = 0; + ui32 startIndexExt = 0; + ui32 startIndexInt = 0; + for (ui32 idx = 0; idx < UI32ColIndex->length(); ++idx) { + if (nextIndex != UI32ColIndex->Value(idx)) { + if (idx - startIndexInt) { + RemapExternalToInternal.emplace_back(startIndexExt, startIndexInt, idx - startIndexInt, false); + } + RemapExternalToInternal.emplace_back(nextIndex, 0, UI32ColIndex->Value(idx) - nextIndex, true); + startIndexExt = UI32ColIndex->Value(idx); + startIndexInt = idx; + } + nextIndex = UI32ColIndex->Value(idx) + 1; + } + if (UI32ColIndex->length() > startIndexInt) { + RemapExternalToInternal.emplace_back(startIndexExt, startIndexInt, UI32ColIndex->length() - startIndexInt, false); + } + if (nextIndex != RecordsCount) { + RemapExternalToInternal.emplace_back(nextIndex, 0, RecordsCount - nextIndex, true); + } + ui32 count = 0; + for (auto&& i : RemapExternalToInternal) { + count += i.GetSize(); + } + for (ui32 i = 0; i + 1 < RemapExternalToInternal.size(); ++i) { + AFL_VERIFY(RemapExternalToInternal[i + 1].GetStartExt() == RemapExternalToInternal[i].GetStartExt() + RemapExternalToInternal[i].GetSize()); + } + AFL_VERIFY(count == RecordsCount)("count", count)("records_count", RecordsCount); + AFL_VERIFY(ColValue); +} + +ui64 TSparsedArrayChunk::GetRawSize() const { + return std::max(NArrow::GetBatchDataSize(Records), 8); +} + +std::shared_ptr TSparsedArrayChunk::GetScalar(const ui32 index) const { + AFL_VERIFY(index < RecordsCount); + for (ui32 idx = 0; idx < UI32ColIndex->length(); ++idx) { + if (UI32ColIndex->Value(idx) == index) { + return NArrow::TStatusValidator::GetValid(ColValue->GetScalar(idx)); + } + } + return DefaultValue; +} + +ui32 TSparsedArrayChunk::GetFirstIndexNotDefault() const { + if (UI32ColIndex->length()) { + return StartPosition + GetUI32ColIndex()->Value(0); + } else { + return StartPosition + GetRecordsCount(); + } +} + +std::shared_ptr TSparsedArrayChunk::GetMaxScalar() const { + if (!ColValue->length()) { + return DefaultValue; + } + auto minMax = NArrow::FindMinMaxPosition(ColValue); + auto currentScalar = NArrow::TStatusValidator::GetValid(ColValue->GetScalar(minMax.second)); + if (!DefaultValue || ScalarCompare(DefaultValue, currentScalar) < 0) { + return currentScalar; + } + return DefaultValue; +} + +void TSparsedArray::TBuilder::AddChunk(const ui32 recordsCount, const std::shared_ptr& data) { + AFL_VERIFY(data); + AFL_VERIFY(recordsCount); + AFL_VERIFY(data->num_rows() <= recordsCount)("rows", data->num_rows())("count", recordsCount); + AFL_VERIFY(data->num_columns() == 2)("count", data->num_columns()); + AFL_VERIFY(data->column(0)->type_id() == arrow::uint32()->id())("type", data->column(0)->type()->ToString()); + AFL_VERIFY_DEBUG(data->schema()->field(0)->name() == "index")("name", data->schema()->field(0)->name()); + if (data->num_rows()) { + auto* arr = static_cast(data->column(0).get()); + AFL_VERIFY(arr->Value(arr->length() - 1) < recordsCount)("val", arr->Value(arr->length() - 1))("count", recordsCount); + } + Chunks.emplace_back(RecordsCount, recordsCount, data, DefaultValue); + RecordsCount += recordsCount; +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/sparsed/accessor.h b/ydb/core/formats/arrow/accessor/sparsed/accessor.h new file mode 100644 index 000000000000..040224962239 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/accessor.h @@ -0,0 +1,189 @@ +#pragma once +#include + +#include +#include + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TSparsedArrayChunk: public TMoveOnly { +private: + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui32, StartPosition, 0); + YDB_READONLY_DEF(std::shared_ptr, Records); + std::shared_ptr DefaultValue; + + std::shared_ptr ColIndex; + const ui32* RawValues = nullptr; + ui32 NotDefaultRecordsCount = 0; + YDB_READONLY_DEF(std::shared_ptr, UI32ColIndex); + YDB_READONLY_DEF(std::shared_ptr, ColValue); + + class TInternalChunkInfo { + private: + YDB_READONLY(ui32, StartExt, 0); + YDB_READONLY(ui32, StartInt, 0); + YDB_READONLY(ui32, Size, 0); + YDB_READONLY(bool, IsDefault, false); + + public: + TInternalChunkInfo(const ui32 startExt, const ui32 startInt, const ui32 size, const bool defaultFlag) + : StartExt(startExt) + , StartInt(startInt) + , Size(size) + , IsDefault(defaultFlag) { + AFL_VERIFY(Size); + } + + bool operator<(const TInternalChunkInfo& item) const { + return StartExt < item.StartExt; + } + }; + + std::vector RemapExternalToInternal; + +public: + ui32 GetFinishPosition() const { + return StartPosition + RecordsCount; + } + + ui32 GetNotDefaultRecordsCount() const { + return NotDefaultRecordsCount; + } + + ui32 GetIndexUnsafeFast(const ui32 i) const { + return RawValues[i]; + } + + ui32 GetFirstIndexNotDefault() const; + + std::shared_ptr GetMaxScalar() const; + + std::shared_ptr GetScalar(const ui32 index) const; + + IChunkedArray::TLocalDataAddress GetChunk( + const std::optional& chunkCurrent, const ui64 position, const ui32 chunkIdx) const; + + std::vector> GetChunkedArray() const; + + TSparsedArrayChunk(const ui32 posStart, const ui32 recordsCount, const std::shared_ptr& records, + const std::shared_ptr& defaultValue); + + ui64 GetRawSize() const; +}; + +class TSparsedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + std::shared_ptr DefaultValue; + std::vector Records; + +protected: + virtual TLocalChunkedArrayAddress DoGetLocalChunkedArray( + const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + AFL_VERIFY(false); + return TLocalChunkedArrayAddress(nullptr, 0, 0); + } + + virtual std::shared_ptr DoGetMaxScalar() const override; + + virtual std::vector DoSplitBySizes( + const TColumnSaver& saver, const TString& fullSerializedData, const std::vector& splitSizes) override; + + virtual TLocalDataAddress DoGetLocalData(const std::optional& chunkCurrent, const ui64 position) const override { + ui32 currentIdx = 0; + for (ui32 i = 0; i < Records.size(); ++i) { + if (currentIdx <= position && position < currentIdx + Records[i].GetRecordsCount()) { + return Records[i].GetChunk(chunkCurrent, position - currentIdx, i); + } + currentIdx += Records[i].GetRecordsCount(); + } + AFL_VERIFY(false); + return TLocalDataAddress(nullptr, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + std::vector> chunks; + for (auto&& i : Records) { + auto chunksLocal = i.GetChunkedArray(); + chunks.insert(chunks.end(), chunksLocal.begin(), chunksLocal.end()); + } + return std::make_shared(chunks, GetDataType()); + } + virtual std::optional DoGetRawSize() const override { + ui64 bytes = 0; + for (auto&& i : Records) { + bytes += i.GetRawSize(); + } + return bytes; + } + + TSparsedArray(std::vector&& data, const std::shared_ptr& defaultValue, + const std::shared_ptr& type, const ui32 recordsCount) + : TBase(recordsCount, EType::SparsedArray, type) + , DefaultValue(defaultValue) + , Records(std::move(data)) { + } + + static ui32 GetLastIndex(const std::shared_ptr& batch); + + static std::shared_ptr BuildSchema(const std::shared_ptr& type) { + std::vector> fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", type) }; + return std::make_shared(fields); + } + + static TSparsedArrayChunk MakeDefaultChunk( + const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount); + +public: + TSparsedArray(const IChunkedArray& defaultArray, const std::shared_ptr& defaultValue); + + TSparsedArray(const std::shared_ptr& defaultValue, const std::shared_ptr& type, const ui32 recordsCount) + : TBase(recordsCount, EType::SparsedArray, type) + , DefaultValue(defaultValue) { + Records.emplace_back(MakeDefaultChunk(defaultValue, type, recordsCount)); + } + + virtual std::shared_ptr DoGetScalar(const ui32 index) const override { + auto& chunk = GetSparsedChunk(index); + return chunk.GetScalar(index - chunk.GetStartPosition()); + } + + const TSparsedArrayChunk& GetSparsedChunk(const ui64 position) const { + const auto pred = [](const ui64 position, const TSparsedArrayChunk& item) { + return position < item.GetStartPosition(); + }; + auto it = std::upper_bound(Records.begin(), Records.end(), position, pred); + AFL_VERIFY(it != Records.begin()); + --it; + AFL_VERIFY(position < it->GetStartPosition() + it->GetRecordsCount()); + AFL_VERIFY(it->GetStartPosition() <= position); + return *it; + } + + class TBuilder { + private: + ui32 RecordsCount = 0; + std::vector Chunks; + std::shared_ptr DefaultValue; + std::shared_ptr Type; + + public: + TBuilder(const std::shared_ptr& defaultValue, const std::shared_ptr& type) + : DefaultValue(defaultValue) + , Type(type) { + } + + void AddChunk(const ui32 recordsCount, const std::shared_ptr& data); + + std::shared_ptr Finish() { + return std::shared_ptr(new TSparsedArray(std::move(Chunks), DefaultValue, Type, RecordsCount)); + } + }; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp b/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp new file mode 100644 index 000000000000..e3f45cd75327 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/constructor.cpp @@ -0,0 +1,34 @@ +#include "accessor.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +std::shared_ptr TConstructor::DoGetExpectedSchema(const std::shared_ptr& resultColumn) const { + arrow::FieldVector fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", resultColumn->type()) }; + return std::make_shared(fields); +} + +TConclusion> TConstructor::DoConstructDefault(const TChunkConstructionData& externalInfo) const { + return std::make_shared(externalInfo.GetDefaultValue(), externalInfo.GetColumnType(), externalInfo.GetRecordsCount()); +} + +TConclusion> TConstructor::DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const { + AFL_VERIFY(originalData->num_columns() == 2)("count", originalData->num_columns())("schema", originalData->schema()->ToString()); + NArrow::NAccessor::TSparsedArray::TBuilder builder(externalInfo.GetDefaultValue(), externalInfo.GetColumnType()); + builder.AddChunk(externalInfo.GetRecordsCount(), originalData); + return builder.Finish(); +} + +NKikimrArrowAccessorProto::TConstructor TConstructor::DoSerializeToProto() const { + NKikimrArrowAccessorProto::TConstructor result; + *result.MutableSparsed() = {}; + return result; +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& /*proto*/) { + return true; +} + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/constructor.h b/ydb/core/formats/arrow/accessor/sparsed/constructor.h new file mode 100644 index 000000000000..0ccf5efdd70f --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/constructor.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +class TConstructor: public IConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::SparsedDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion> DoConstruct( + const std::shared_ptr& originalData, const TChunkConstructionData& externalInfo) const override; + virtual NKikimrArrowAccessorProto::TConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TConstructor& proto) override; + virtual std::shared_ptr DoGetExpectedSchema(const std::shared_ptr& resultColumn) const override; + virtual TConclusion> DoConstructDefault(const TChunkConstructionData& externalInfo) const override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/request.cpp b/ydb/core/formats/arrow/accessor/sparsed/request.cpp new file mode 100644 index 000000000000..d484341a95c0 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/request.cpp @@ -0,0 +1,22 @@ +#include "request.h" +#include "constructor.h" + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +NKikimrArrowAccessorProto::TRequestedConstructor TRequestedConstuctor::DoSerializeToProto() const { + return NKikimrArrowAccessorProto::TRequestedConstructor(); +} + +bool TRequestedConstuctor::DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& /*proto*/) { + return true; +} + +NKikimr::TConclusionStatus TRequestedConstuctor::DoDeserializeFromRequest(NYql::TFeaturesExtractor& /*features*/) { + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TRequestedConstuctor::DoBuildConstructor() const { + return std::make_shared(); +} + +} diff --git a/ydb/core/formats/arrow/accessor/sparsed/request.h b/ydb/core/formats/arrow/accessor/sparsed/request.h new file mode 100644 index 000000000000..4be2d897b090 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/request.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow::NAccessor::NSparsed { + +class TRequestedConstuctor: public IRequestedConstructor { +public: + static TString GetClassNameStatic() { + return TGlobalConst::SparsedDataAccessorName; + } + +private: + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + virtual TConclusion DoBuildConstructor() const override; + virtual NKikimrArrowAccessorProto::TRequestedConstructor DoSerializeToProto() const override; + virtual bool DoDeserializeFromProto(const NKikimrArrowAccessorProto::TRequestedConstructor& proto) override; + virtual TConclusionStatus DoDeserializeFromRequest(NYql::TFeaturesExtractor& features) override; + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NArrow::NAccessor::NSparsed diff --git a/ydb/core/formats/arrow/accessor/sparsed/ya.make b/ydb/core/formats/arrow/accessor/sparsed/ya.make new file mode 100644 index 000000000000..c68f5f84f9a6 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/sparsed/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/library/formats/arrow + ydb/library/formats/arrow/protos +) + +SRCS( + GLOBAL constructor.cpp + GLOBAL request.cpp + accessor.cpp +) + +END() diff --git a/ydb/core/formats/arrow/accessor/ya.make b/ydb/core/formats/arrow/accessor/ya.make new file mode 100644 index 000000000000..197b97d9efe7 --- /dev/null +++ b/ydb/core/formats/arrow/accessor/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +PEERDIR( + ydb/core/formats/arrow/accessor/abstract + ydb/core/formats/arrow/accessor/plain + ydb/core/formats/arrow/accessor/composite_serial + ydb/core/formats/arrow/accessor/sparsed +) + +END() diff --git a/ydb/core/formats/arrow/arrow_filter.cpp b/ydb/core/formats/arrow/arrow_filter.cpp index 58cd7116baed..c404a016f4bd 100644 --- a/ydb/core/formats/arrow/arrow_filter.cpp +++ b/ydb/core/formats/arrow/arrow_filter.cpp @@ -1,5 +1,5 @@ #include "arrow_filter.h" -#include "switch_type.h" +#include "switch/switch_type.h" #include "common/container.h" #include "common/adapter.h" diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index 2ee5626e5d40..1246b01e334c 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -1,13 +1,14 @@ #include "arrow_helpers.h" -#include "switch_type.h" -#include "common/validation.h" +#include "switch/switch_type.h" #include "permutations.h" #include "common/adapter.h" #include "serializer/native.h" #include "serializer/abstract.h" #include "serializer/stream.h" -#include "simple_arrays_cache.h" +#include +#include +#include #include #include @@ -105,11 +106,6 @@ arrow::Result> MakeArrowSchema(const std::vector< return fields.status(); } -TString SerializeSchema(const arrow::Schema& schema) { - auto buffer = TStatusValidator::GetValid(arrow::ipc::SerializeSchema(schema)); - return buffer->ToString(); -} - std::shared_ptr DeserializeSchema(const TString& str) { std::shared_ptr buffer(std::make_shared(str)); arrow::io::BufferReader reader(buffer); @@ -143,148 +139,6 @@ std::shared_ptr DeserializeBatch(const TString& blob, const } } -std::shared_ptr MakeEmptyBatch(const std::shared_ptr& schema, const ui32 rowsCount) { - std::vector> columns; - columns.reserve(schema->num_fields()); - - for (auto& field : schema->fields()) { - auto result = NArrow::TThreadSimpleArraysCache::GetNull(field->type(), rowsCount); - columns.emplace_back(result); - Y_ABORT_UNLESS(result); - } - return arrow::RecordBatch::Make(schema, rowsCount, columns); -} - -std::shared_ptr CombineBatches(const std::vector>& batches) { - if (batches.empty()) { - return nullptr; - } - auto table = TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); - return table ? ToBatch(table, true) : nullptr; -} - -std::shared_ptr ToBatch(const std::shared_ptr& tableExt, const bool combine) { - if (!tableExt) { - return nullptr; - } - std::shared_ptr table; - if (combine) { - auto res = tableExt->CombineChunks(); - Y_ABORT_UNLESS(res.ok()); - table = *res; - } else { - table = tableExt; - } - std::vector> columns; - columns.reserve(table->num_columns()); - for (auto& col : table->columns()) { - AFL_VERIFY(col->num_chunks() == 1)("size", col->num_chunks())("size_bytes", GetTableDataSize(tableExt)) - ("schema", tableExt->schema()->ToString())("size_new", GetTableDataSize(table)); - columns.push_back(col->chunk(0)); - } - return arrow::RecordBatch::Make(table->schema(), table->num_rows(), columns); -} - -// Check if the permutation doesn't reorder anything -bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) { - if ((ui64)permutation.length() != originalLength) { - return false; - } - for (i64 i = 0; i < permutation.length(); ++i) { - if (permutation.Value(i) != (ui64)i) { - return false; - } - } - return true; -} - -std::shared_ptr Reorder(const std::shared_ptr& batch, - const std::shared_ptr& permutation, const bool canRemove) { - Y_ABORT_UNLESS(permutation->length() == batch->num_rows() || canRemove); - - auto res = IsTrivial(*permutation, batch->num_rows()) ? batch : arrow::compute::Take(batch, permutation); - Y_ABORT_UNLESS(res.ok()); - return (*res).record_batch(); -} - -THashMap> ShardingSplit(const std::shared_ptr& batch, const THashMap>& shardRows) { - AFL_VERIFY(batch); - std::shared_ptr permutation; - { - arrow::UInt64Builder builder; - Y_VERIFY_OK(builder.Reserve(batch->num_rows())); - - for (auto&& [shardId, rowIdxs]: shardRows) { - for (auto& row : rowIdxs) { - Y_VERIFY_OK(builder.Append(row)); - } - } - Y_VERIFY_OK(builder.Finish(&permutation)); - } - - auto reorderedBatch = Reorder(batch, permutation, false); - - THashMap> out; - - int offset = 0; - for (auto&& [shardId, shardRowIdxs] : shardRows) { - if (shardRowIdxs.empty()) { - continue; - } - out.emplace(shardId, reorderedBatch->Slice(offset, shardRowIdxs.size())); - offset += shardRowIdxs.size(); - } - - Y_ABORT_UNLESS(offset == batch->num_rows()); - return out; -} - -std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector>& shardRows, const ui32 numShards) { - AFL_VERIFY(batch); - std::shared_ptr permutation; - { - arrow::UInt64Builder builder; - Y_VERIFY_OK(builder.Reserve(batch->num_rows())); - - for (ui32 shardNo = 0; shardNo < numShards; ++shardNo) { - for (auto& row : shardRows[shardNo]) { - Y_VERIFY_OK(builder.Append(row)); - } - } - Y_VERIFY_OK(builder.Finish(&permutation)); - } - - auto reorderedBatch = Reorder(batch, permutation, false); - - std::vector> out(numShards); - - int offset = 0; - for (ui32 shardNo = 0; shardNo < numShards; ++shardNo) { - int length = shardRows[shardNo].size(); - if (length) { - out[shardNo] = reorderedBatch->Slice(offset, length); - offset += length; - } - } - - Y_ABORT_UNLESS(offset == batch->num_rows()); - return out; -} - -std::vector> ShardingSplit(const std::shared_ptr& batch, - const std::vector& sharding, ui32 numShards) { - AFL_VERIFY(batch); - Y_ABORT_UNLESS((size_t)batch->num_rows() == sharding.size()); - - std::vector> shardRows(numShards); - for (size_t row = 0; row < sharding.size(); ++row) { - ui32 shardNo = sharding[row]; - Y_ABORT_UNLESS(shardNo < numShards); - shardRows[shardNo].push_back(row); - } - return ShardingSplit(batch, shardRows, numShards); -} - void DedupSortedBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, std::vector>& out) { @@ -322,37 +176,6 @@ void DedupSortedBatch(const std::shared_ptr& batch, Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(out.back(), sortingKey)); } -template -static bool IsSelfSorted(const std::shared_ptr& batch) { - if (batch->num_rows() < 2) { - return true; - } - auto& columns = batch->columns(); - - for (int i = 1; i < batch->num_rows(); ++i) { - TRawReplaceKey prev(&columns, i - 1); - TRawReplaceKey current(&columns, i); - if constexpr (desc) { - if (prev < current) { - AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "prev < current")("current", current.DebugString())("prev", prev.DebugString()); - return false; - } - } else { - if (current < prev) { - AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "current < prev")("current", current.DebugString())("prev", prev.DebugString()); - return false; - } - } - if constexpr (uniq) { - if (prev == current) { - AFL_DEBUG(NKikimrServices::ARROW_HELPER)("event", "equal")("current", current.DebugString())("prev", prev.DebugString()); - return false; - } - } - } - return true; -} - bool IsSorted(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, bool desc) { auto keyBatch = TColumnOperator().Adapt(batch, sortingKey).DetachResult(); @@ -373,267 +196,6 @@ bool IsSortedAndUnique(const std::shared_ptr& batch, } } -bool HasAllColumns(const std::shared_ptr& batch, const std::shared_ptr& schema) { - for (auto& field : schema->fields()) { - if (batch->schema()->GetFieldIndex(field->name()) < 0) { - return false; - } - } - return true; -} - -std::vector> MakeBuilders(const std::shared_ptr& schema, - size_t reserve, const std::map& sizeByColumn) { - std::vector> builders; - builders.reserve(schema->num_fields()); - - for (auto& field : schema->fields()) { - std::unique_ptr builder; - TStatusValidator::Validate(arrow::MakeBuilder(arrow::default_memory_pool(), field->type(), &builder)); - if (sizeByColumn.size()) { - auto it = sizeByColumn.find(field->name()); - if (it != sizeByColumn.end()) { - AFL_VERIFY(NArrow::ReserveData(*builder, it->second))("size", it->second)("field", field->name()); - } - } - - if (reserve) { - TStatusValidator::Validate(builder->Reserve(reserve)); - } - - builders.emplace_back(std::move(builder)); - - } - return builders; -} - -std::unique_ptr MakeBuilder(const std::shared_ptr& field) { - AFL_VERIFY(field); - return MakeBuilder(field->type()); -} - -std::unique_ptr MakeBuilder(const std::shared_ptr& type) { - AFL_VERIFY(type); - std::unique_ptr builder; - TStatusValidator::Validate(arrow::MakeBuilder(arrow::default_memory_pool(), type, &builder)); - return std::move(builder); -} - -std::vector> Finish(std::vector>&& builders) { - std::vector> out; - for (auto& builder : builders) { - std::shared_ptr array; - TStatusValidator::Validate(builder->Finish(&array)); - out.emplace_back(array); - } - return out; -} - -std::vector ColumnNames(const std::shared_ptr& schema) { - std::vector out; - out.reserve(schema->num_fields()); - for (int i = 0; i < schema->num_fields(); ++i) { - auto& name = schema->field(i)->name(); - out.emplace_back(TString(name.data(), name.size())); - } - return out; -} - -std::shared_ptr MakeUI64Array(ui64 value, i64 size) { - auto res = arrow::MakeArrayFromScalar(arrow::UInt64Scalar(value), size); - Y_ABORT_UNLESS(res.ok()); - return std::static_pointer_cast(*res); -} - -std::pair FindMinMaxPosition(const std::shared_ptr& array) { - if (array->length() == 0) { - return {-1, -1}; - } - - int minPos = 0; - int maxPos = 0; - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - - for (int i = 1; i < column.length(); ++i) { - const auto& value = column.GetView(i); - if (value < column.GetView(minPos)) { - minPos = i; - } - if (value > column.GetView(maxPos)) { - maxPos = i; - } - } - return true; - }); - return {minPos, maxPos}; -} - -std::shared_ptr MinScalar(const std::shared_ptr& type) { - std::shared_ptr out; - SwitchType(type->id(), [&](const auto& t) { - using TWrap = std::decay_t; - using T = typename TWrap::T; - using TScalar = typename arrow::TypeTraits::ScalarType; - - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - out = std::make_shared(arrow::Buffer::FromString(""), type); - } else if constexpr (std::is_same_v) { - std::string s(static_cast(*type).byte_width(), '\0'); - out = std::make_shared(arrow::Buffer::FromString(s), type); - } else if constexpr (std::is_same_v) { - return false; - } else if constexpr (arrow::is_temporal_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(Min(), type); - } else if constexpr (arrow::has_c_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(Min()); - } else { - return false; - } - return true; - }); - Y_ABORT_UNLESS(out); - return out; -} - -namespace { - -template -class TDefaultScalarValue { -public: - static constexpr T Value = 0; -}; - -template <> -class TDefaultScalarValue { -public: - static constexpr bool Value = false; -}; - -} - -std::shared_ptr DefaultScalar(const std::shared_ptr& type) { - std::shared_ptr out; - SwitchType(type->id(), [&](const auto& t) { - using TWrap = std::decay_t; - using T = typename TWrap::T; - using TScalar = typename arrow::TypeTraits::ScalarType; - - if constexpr (std::is_same_v || - std::is_same_v || - std::is_same_v || - std::is_same_v) { - out = std::make_shared(arrow::Buffer::FromString(""), type); - } else if constexpr (std::is_same_v) { - std::string s(static_cast(*type).byte_width(), '\0'); - out = std::make_shared(arrow::Buffer::FromString(s), type); - } else if constexpr (std::is_same_v) { - return false; - } else if constexpr (arrow::is_temporal_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(TDefaultScalarValue::Value, type); - } else if constexpr (arrow::has_c_type::value) { - using TCType = typename arrow::TypeTraits::CType; - out = std::make_shared(TDefaultScalarValue::Value); - } else { - return false; - } - return true; - }); - Y_ABORT_UNLESS(out); - return out; -} - -std::shared_ptr GetScalar(const std::shared_ptr& array, int position) { - auto res = array->GetScalar(position); - Y_ABORT_UNLESS(res.ok()); - return *res; -} - -bool IsGoodScalar(const std::shared_ptr& x) { - if (!x) { - return false; - } - - return SwitchType(x->type->id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TScalar = typename arrow::TypeTraits::ScalarType; - using TValue = std::decay_t(*x).value)>; - - if constexpr (arrow::has_string_view()) { - const auto& xval = static_cast(*x).value; - return xval && xval->data(); - } - if constexpr (std::is_arithmetic_v) { - return true; - } - return false; - }); -} - -bool ScalarLess(const std::shared_ptr& x, const std::shared_ptr& y) { - Y_ABORT_UNLESS(x); - Y_ABORT_UNLESS(y); - return ScalarLess(*x, *y); -} - -bool ScalarLess(const arrow::Scalar& x, const arrow::Scalar& y) { - return ScalarCompare(x, y) < 0; -} - -int ScalarCompare(const arrow::Scalar& x, const arrow::Scalar& y) { - Y_VERIFY_S(x.type->Equals(y.type), x.type->ToString() + " vs " + y.type->ToString()); - - return SwitchTypeImpl(x.type->id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TScalar = typename arrow::TypeTraits::ScalarType; - using TValue = std::decay_t(x).value)>; - - if constexpr (arrow::has_string_view()) { - const auto& xval = static_cast(x).value; - const auto& yval = static_cast(y).value; - Y_ABORT_UNLESS(xval); - Y_ABORT_UNLESS(yval); - TStringBuf xBuf(reinterpret_cast(xval->data()), xval->size()); - TStringBuf yBuf(reinterpret_cast(yval->data()), yval->size()); - if (xBuf < yBuf) { - return -1; - } else if (yBuf < xBuf) { - return 1; - } else { - return 0; - } - } - if constexpr (std::is_arithmetic_v) { - const auto& xval = static_cast(x).value; - const auto& yval = static_cast(y).value; - if (xval < yval) { - return -1; - } else if (yval < xval) { - return 1; - } else { - return 0; - } - } - Y_ABORT_UNLESS(false); // TODO: non primitive types - return 0; - }); -} - -int ScalarCompare(const std::shared_ptr& x, const std::shared_ptr& y) { - Y_ABORT_UNLESS(x); - Y_ABORT_UNLESS(y); - return ScalarCompare(*x, *y); -} - std::shared_ptr SortBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique) { auto sortPermutation = MakeSortPermutation(batch, sortingKey, andUnique); @@ -644,217 +206,6 @@ std::shared_ptr SortBatch(const std::shared_ptr BoolVecToArray(const std::vector& vec) { - std::shared_ptr out; - arrow::BooleanBuilder builder; - for (const auto val : vec) { - Y_ABORT_UNLESS(builder.Append(val).ok()); - } - Y_ABORT_UNLESS(builder.Finish(&out).ok()); - return out; -} - - -bool ArrayScalarsEqual(const std::shared_ptr& lhs, const std::shared_ptr& rhs) { - bool res = lhs->length() == rhs->length(); - for (int64_t i = 0; i < lhs->length() && res; ++i) { - res &= arrow::ScalarEquals(*lhs->GetScalar(i).ValueOrDie(), *rhs->GetScalar(i).ValueOrDie()); - } - return res; -} - -bool ReserveData(arrow::ArrayBuilder& builder, const size_t size) { - arrow::Status result = arrow::Status::OK(); - if (builder.type()->id() == arrow::Type::BINARY || - builder.type()->id() == arrow::Type::STRING) - { - static_assert(std::is_convertible_v&>, - "Expected StringBuilder to be BaseBinaryBuilder"); - auto& bBuilder = static_cast&>(builder); - result = bBuilder.ReserveData(size); - } - - if (!result.ok()) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "ReserveData")("error", result.ToString()); - } - return result.ok(); -} - -template -bool MergeBatchColumnsImpl(const std::vector>& batches, std::shared_ptr& result, - const std::vector& columnsOrder, const bool orderFieldsAreNecessary, const TBuilder& builder) { - if (batches.empty()) { - result = nullptr; - return true; - } - if (batches.size() == 1) { - result = batches.front(); - return true; - } - std::vector> fields; - std::vector> columns; - std::map fieldNames; - for (auto&& i : batches) { - Y_ABORT_UNLESS(i); - for (auto&& f : i->schema()->fields()) { - if (!fieldNames.emplace(f->name(), fields.size()).second) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "duplicated column")("name", f->name()); - return false; - } - fields.emplace_back(f); - } - if (i->num_rows() != batches.front()->num_rows()) { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "inconsistency record sizes")("i", i->num_rows())("front", batches.front()->num_rows()); - return false; - } - for (auto&& c : i->columns()) { - columns.emplace_back(c); - } - } - - Y_ABORT_UNLESS(fields.size() == columns.size()); - if (columnsOrder.size()) { - std::vector> fieldsOrdered; - std::vector> columnsOrdered; - for (auto&& i : columnsOrder) { - auto it = fieldNames.find(i); - if (orderFieldsAreNecessary) { - Y_ABORT_UNLESS(it != fieldNames.end()); - } else if (it == fieldNames.end()) { - continue; - } - fieldsOrdered.emplace_back(fields[it->second]); - columnsOrdered.emplace_back(columns[it->second]); - } - std::swap(fieldsOrdered, fields); - std::swap(columnsOrdered, columns); - } - result = builder(std::make_shared(fields), batches.front()->num_rows(), std::move(columns)); - return true; -} - -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder, const bool orderFieldsAreNecessary) { - const auto builder = [](const std::shared_ptr& schema, const ui32 recordsCount, std::vector>&& columns) { - return arrow::Table::Make(schema, columns, recordsCount); - }; - - return MergeBatchColumnsImpl(batches, result, columnsOrder, orderFieldsAreNecessary, builder); -} - -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder, const bool orderFieldsAreNecessary) { - const auto builder = [](const std::shared_ptr& schema, const ui32 recordsCount, std::vector>&& columns) { - return arrow::RecordBatch::Make(schema, recordsCount, columns); - }; - - return MergeBatchColumnsImpl(batches, result, columnsOrder, orderFieldsAreNecessary, builder); -} - -std::partial_ordering ColumnsCompare(const std::vector>& x, const ui32 xRow, const std::vector>& y, const ui32 yRow) { - return TRawReplaceKey(&x, xRow).CompareNotNull(TRawReplaceKey(&y, yRow)); -} - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position) { - NJson::TJsonValue result = NJson::JSON_ARRAY; - for (auto&& i : array->columns()) { - result.AppendValue(DebugJson(i, position)); - } - return result; -} - -TString DebugString(std::shared_ptr array, const ui32 position) { - if (!array) { - return "_NO_DATA"; - } - Y_ABORT_UNLESS(position < array->length()); - TStringBuilder result; - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - if constexpr (arrow::has_string_view()) { - auto value = column.GetString(position); - result << TString(value.data(), value.size()); - } - if constexpr (arrow::has_c_type()) { - result << column.Value(position); - } - return true; - }); - return result; -} - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position) { - if (!array) { - return NJson::JSON_NULL; - } - Y_ABORT_UNLESS(position < array->length()); - NJson::TJsonValue result = NJson::JSON_MAP; - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - result.InsertValue("type", typeid(TArray).name()); - if constexpr (arrow::has_string_view()) { - auto value = column.GetString(position); - result.InsertValue("value", TString(value.data(), value.size())); - } - if constexpr (arrow::has_c_type()) { - result.InsertValue("value", column.Value(position)); - } - return true; - }); - return result; -} - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 head, const ui32 tail) { - if (!array) { - return NJson::JSON_NULL; - } - NJson::TJsonValue resultFull = NJson::JSON_MAP; - resultFull.InsertValue("length", array->length()); - SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - - auto& column = static_cast(*array); - resultFull.InsertValue("type", typeid(TArray).name()); - resultFull.InsertValue("head", head); - resultFull.InsertValue("tail", tail); - auto& result = resultFull.InsertValue("data", NJson::JSON_ARRAY); - for (int i = 0; i < column.length(); ++i) { - if (i >= (int)head && i + (int)tail < column.length()) { - continue; - } - if constexpr (arrow::has_string_view()) { - auto value = column.GetString(i); - result.AppendValue(TString(value.data(), value.size())); - } - if constexpr (arrow::has_c_type()) { - result.AppendValue(column.Value(i)); - } - } - return true; - }); - return resultFull; -} - -NJson::TJsonValue DebugJson(std::shared_ptr batch, const ui32 head, const ui32 tail) { - if (!batch) { - return NJson::JSON_NULL; - } - NJson::TJsonValue result = NJson::JSON_ARRAY; - ui32 idx = 0; - for (auto&& i : batch->columns()) { - auto& jsonColumn = result.AppendValue(NJson::JSON_MAP); - jsonColumn.InsertValue("name", batch->column_name(idx)); - jsonColumn.InsertValue("data", DebugJson(i, head, tail)); - ++idx; - } - return result; -} - std::shared_ptr ReallocateBatch(std::shared_ptr original) { if (!original) { return nullptr; @@ -862,92 +213,16 @@ std::shared_ptr ReallocateBatch(std::shared_ptrschema()); } -std::shared_ptr MergeColumns(const std::vector>& batches) { - std::vector> columns; - std::vector> fields; - std::optional recordsCount; - std::set columnNames; - for (auto&& batch : batches) { - if (!batch) { - continue; - } - for (auto&& column : batch->columns()) { - columns.emplace_back(column); - if (!recordsCount) { - recordsCount = column->length(); - } else { - Y_ABORT_UNLESS(*recordsCount == column->length()); - } - } - for (auto&& field : batch->schema()->fields()) { - AFL_VERIFY(columnNames.emplace(field->name()).second)("field_name", field->name()); - fields.emplace_back(field); - } - } - if (columns.empty()) { - return nullptr; - } - auto schema = std::make_shared(fields); - return arrow::RecordBatch::Make(schema, *recordsCount, columns); -} - -std::vector> SliceToRecordBatches(const std::shared_ptr& t) { - std::set splitPositions; - const ui32 numRows = t->num_rows(); - for (auto&& i : t->columns()) { - ui32 pos = 0; - for (auto&& arr : i->chunks()) { - splitPositions.emplace(pos); - pos += arr->length(); - } - AFL_VERIFY(pos == t->num_rows()); - } - std::vector>> slicedData; - slicedData.resize(splitPositions.size()); - std::vector positions(splitPositions.begin(), splitPositions.end()); - for (auto&& i : t->columns()) { - for (ui32 idx = 0; idx < positions.size(); ++idx) { - auto slice = i->Slice(positions[idx], ((idx + 1 == positions.size()) ? numRows : positions[idx + 1]) - positions[idx]); - AFL_VERIFY(slice->num_chunks() == 1); - slicedData[idx].emplace_back(slice->chunks().front()); - } - } - std::vector> result; - ui32 count = 0; - for (auto&& i : slicedData) { - result.emplace_back(arrow::RecordBatch::Make(t->schema(), i.front()->length(), i)); - count += result.back()->num_rows(); - } - AFL_VERIFY(count == t->num_rows())("count", count)("t", t->num_rows()); - return result; -} - -std::shared_ptr ToTable(const std::shared_ptr& batch) { - if (!batch) { - return nullptr; - } - return TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batch->schema(), {batch})); -} - -bool HasNulls(const std::shared_ptr& column) { - AFL_VERIFY(column); - return column->null_bitmap_data(); -} - -std::vector ConvertStrings(const std::vector& input) { - std::vector result; - for (auto&& i : input) { - result.emplace_back(i); +std::shared_ptr ReallocateBatch(const std::shared_ptr& original) { + if (!original) { + return original; } - return result; -} - -std::vector ConvertStrings(const std::vector& input) { - std::vector result; - for (auto&& i : input) { - result.emplace_back(i); + auto batches = NArrow::SliceToRecordBatches(original); + for (auto&& i : batches) { + i = NArrow::TStatusValidator::GetValid( + NArrow::NSerialization::TNativeSerializer().Deserialize(NArrow::NSerialization::TNativeSerializer().SerializeFull(i))); } - return result; + return NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); } } diff --git a/ydb/core/formats/arrow/arrow_helpers.h b/ydb/core/formats/arrow/arrow_helpers.h index f6f4fd0c18a0..ea1f7a825bb2 100644 --- a/ydb/core/formats/arrow/arrow_helpers.h +++ b/ydb/core/formats/arrow/arrow_helpers.h @@ -1,5 +1,4 @@ #pragma once -#include "switch_type.h" #include "process_columns.h" #include #include @@ -9,44 +8,16 @@ #include #include #include +#include namespace NKikimr::NArrow { -using TArrayVec = std::vector>; - arrow::Result> GetArrowType(NScheme::TTypeInfo typeInfo); arrow::Result> GetCSVArrowType(NScheme::TTypeInfo typeId); -template -inline bool ArrayEqualValue(const std::shared_ptr& x, const std::shared_ptr& y) { - auto& arrX = static_cast(*x); - auto& arrY = static_cast(*y); - for (int i = 0; i < x->length(); ++i) { - if (arrX.Value(i) != arrY.Value(i)) { - return false; - } - } - return true; -} - -template -inline bool ArrayEqualView(const std::shared_ptr& x, const std::shared_ptr& y) { - auto& arrX = static_cast(*x); - auto& arrY = static_cast(*y); - for (int i = 0; i < x->length(); ++i) { - if (arrX.GetView(i) != arrY.GetView(i)) { - return false; - } - } - return true; -} - -struct TSortDescription; - arrow::Result MakeArrowFields(const std::vector>& columns, const std::set& notNullColumns = {}); arrow::Result> MakeArrowSchema(const std::vector>& columns, const std::set& notNullColumns = {}); -TString SerializeSchema(const arrow::Schema& schema); std::shared_ptr DeserializeSchema(const TString& str); TString SerializeBatch(const std::shared_ptr& batch, const arrow::ipc::IpcWriteOptions& options); @@ -54,28 +25,6 @@ TString SerializeBatchNoCompression(const std::shared_ptr& b std::shared_ptr DeserializeBatch(const TString& blob, const std::shared_ptr& schema); -std::shared_ptr MakeEmptyBatch(const std::shared_ptr& schema, const ui32 rowsCount = 0); -std::shared_ptr ToTable(const std::shared_ptr& batch); - -std::shared_ptr ToBatch(const std::shared_ptr& combinedTable, const bool combine); -std::shared_ptr CombineBatches(const std::vector>& batches); -std::shared_ptr MergeColumns(const std::vector>& rb); -std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector& sharding, ui32 numShards); -std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector>& shardRows, const ui32 numShards); -THashMap> ShardingSplit(const std::shared_ptr& batch, const THashMap>& shardRows); - -std::unique_ptr MakeBuilder(const std::shared_ptr& field); -std::unique_ptr MakeBuilder(const std::shared_ptr& type); - -std::vector> MakeBuilders(const std::shared_ptr& schema, - size_t reserve = 0, const std::map& sizeByColumn = {}); -std::vector> Finish(std::vector>&& builders); - -std::shared_ptr MakeUI64Array(ui64 value, i64 size); -std::vector ColumnNames(const std::shared_ptr& schema); -bool ReserveData(arrow::ArrayBuilder& builder, const size_t size); -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder = {}, const bool orderFieldsAreNecessary = true); -bool MergeBatchColumns(const std::vector>& batches, std::shared_ptr& result, const std::vector& columnsOrder = {}, const bool orderFieldsAreNecessary = true); std::shared_ptr SortBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); @@ -88,33 +37,8 @@ bool IsSortedAndUnique(const std::shared_ptr& batch, void DedupSortedBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, std::vector>& out); -bool HasAllColumns(const std::shared_ptr& batch, const std::shared_ptr& schema); -std::pair FindMinMaxPosition(const std::shared_ptr& column); - -std::shared_ptr DefaultScalar(const std::shared_ptr& type); -std::shared_ptr MinScalar(const std::shared_ptr& type); -std::shared_ptr GetScalar(const std::shared_ptr& array, int position); -bool IsGoodScalar(const std::shared_ptr& x); -int ScalarCompare(const arrow::Scalar& x, const arrow::Scalar& y); -int ScalarCompare(const std::shared_ptr& x, const std::shared_ptr& y); -std::partial_ordering ColumnsCompare(const std::vector>& x, const ui32 xRow, const std::vector>& y, const ui32 yRow); -bool ScalarLess(const std::shared_ptr& x, const std::shared_ptr& y); -bool ScalarLess(const arrow::Scalar& x, const arrow::Scalar& y); std::shared_ptr ReallocateBatch(std::shared_ptr original); - -bool HasNulls(const std::shared_ptr& column); - -std::vector> SliceToRecordBatches(const std::shared_ptr& t); - -bool ArrayScalarsEqual(const std::shared_ptr& lhs, const std::shared_ptr& rhs); -std::shared_ptr BoolVecToArray(const std::vector& vec); - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 head, const ui32 tail); -NJson::TJsonValue DebugJson(std::shared_ptr batch, const ui32 head, const ui32 tail); - -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position); -TString DebugString(std::shared_ptr array, const ui32 position); -NJson::TJsonValue DebugJson(std::shared_ptr array, const ui32 position); +std::shared_ptr ReallocateBatch(const std::shared_ptr& original); } diff --git a/ydb/core/formats/arrow/common/accessor.cpp b/ydb/core/formats/arrow/common/accessor.cpp deleted file mode 100644 index 9865b2a692f7..000000000000 --- a/ydb/core/formats/arrow/common/accessor.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "accessor.h" -#include -#include -#include -#include -#include - -namespace NKikimr::NArrow::NAccessor { - -void IChunkedArray::TReader::AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const { - auto address = GetReadChunk(position); - AFL_VERIFY(NArrow::Append(builder, *address.GetArray(), address.GetPosition(), recordSize)); -} - -std::shared_ptr IChunkedArray::TReader::CopyRecord(const ui64 recordIndex) const { - auto address = GetReadChunk(recordIndex); - return NArrow::CopyRecords(address.GetArray(), {address.GetPosition()}); -} - -std::shared_ptr IChunkedArray::Slice(const ui32 offset, const ui32 count) const { - AFL_VERIFY(offset + count <= (ui64)GetRecordsCount())("offset", offset)("count", count)("length", GetRecordsCount()); - ui32 currentOffset = offset; - ui32 countLeast = count; - std::vector> chunks; - auto address = GetChunk({}, offset); - while (countLeast) { - address = GetChunk(address, currentOffset); - const ui64 internalPos = currentOffset - address.GetStartPosition(); - if (internalPos + countLeast <= (ui64)address.GetArray()->length()) { - chunks.emplace_back(address.GetArray()->Slice(internalPos, countLeast)); - break; - } else { - const ui32 deltaCount = address.GetArray()->length() - internalPos; - chunks.emplace_back(address.GetArray()->Slice(internalPos, deltaCount)); - AFL_VERIFY(countLeast >= deltaCount); - countLeast -= deltaCount; - currentOffset += deltaCount; - } - } - return std::make_shared(chunks, DataType); -} - -TString IChunkedArray::TReader::DebugString(const ui32 position) const { - auto address = GetReadChunk(position); - return NArrow::DebugString(address.GetArray(), address.GetPosition()); -} - -std::partial_ordering IChunkedArray::TReader::CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition) { - AFL_VERIFY(l.size() == r.size()); - for (ui32 i = 0; i < l.size(); ++i) { - const TAddress lAddress = l[i].GetReadChunk(lPosition); - const TAddress rAddress = r[i].GetReadChunk(rPosition); - auto cmp = lAddress.Compare(rAddress); - if (std::is_neq(cmp)) { - return cmp; - } - } - return std::partial_ordering::equivalent; -} - -IChunkedArray::TAddress IChunkedArray::TReader::GetReadChunk(const ui64 position) const { - AFL_VERIFY(position < ChunkedArray->GetRecordsCount()); - if (CurrentChunkAddress && position < CurrentChunkAddress->GetStartPosition() + CurrentChunkAddress->GetArray()->length() && CurrentChunkAddress->GetStartPosition() <= position) { - } else { - CurrentChunkAddress = ChunkedArray->DoGetChunk(CurrentChunkAddress, position); - } - return IChunkedArray::TAddress(CurrentChunkAddress->GetArray(), position - CurrentChunkAddress->GetStartPosition(), CurrentChunkAddress->GetChunkIndex()); -} - -const std::partial_ordering IChunkedArray::TAddress::Compare(const TAddress& item) const { - return TComparator::TypedCompare(*Array, Position, *item.Array, item.Position); -} - -namespace { -class TChunkAccessor { -private: - std::shared_ptr ChunkedArray; -public: - TChunkAccessor(const std::shared_ptr& chunkedArray) - : ChunkedArray(chunkedArray) - { - - } - ui64 GetChunksCount() const { - return (ui64)ChunkedArray->num_chunks(); - } - ui64 GetChunkLength(const ui32 idx) const { - return (ui64)ChunkedArray->chunk(idx)->length(); - } - std::shared_ptr GetArray(const ui32 idx) const { - return ChunkedArray->chunk(idx); - } -}; - -} - -std::partial_ordering IChunkedArray::TCurrentChunkAddress::Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const { - AFL_VERIFY(StartPosition <= position); - AFL_VERIFY(position < FinishPosition); - AFL_VERIFY(item.StartPosition <= itemPosition); - AFL_VERIFY(itemPosition < item.FinishPosition); - return TComparator::TypedCompare(*Array, position - StartPosition, *item.Array, itemPosition - item.StartPosition); -} - -std::shared_ptr IChunkedArray::TCurrentChunkAddress::CopyRecord(const ui64 recordIndex) const { - AFL_VERIFY(StartPosition <= recordIndex); - AFL_VERIFY(recordIndex < FinishPosition); - return NArrow::CopyRecords(Array, { recordIndex - StartPosition }); -} - -TString IChunkedArray::TCurrentChunkAddress::DebugString(const ui64 position) const { - AFL_VERIFY(position < FinishPosition); - AFL_VERIFY(StartPosition <= position); - return NArrow::DebugString(Array, position - StartPosition); -} - -IChunkedArray::TCurrentChunkAddress TTrivialChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { - TChunkAccessor accessor(Array); - return SelectChunk(chunkCurrent, position, accessor); -} - -} diff --git a/ydb/core/formats/arrow/common/accessor.h b/ydb/core/formats/arrow/common/accessor.h deleted file mode 100644 index 3765d726992b..000000000000 --- a/ydb/core/formats/arrow/common/accessor.h +++ /dev/null @@ -1,216 +0,0 @@ -#pragma once -#include -#include - -#include -#include -#include - -namespace NKikimr::NArrow::NAccessor { - -class IChunkedArray { -public: - enum class EType { - Undefined, - Array, - ChunkedArray, - SerializedChunkedArray - }; - - class TCurrentChunkAddress { - private: - YDB_READONLY_DEF(std::shared_ptr, Array); - YDB_READONLY(ui64, StartPosition, 0); - YDB_READONLY(ui64, FinishPosition, 0); - YDB_READONLY(ui64, ChunkIndex, 0); - public: - TString DebugString(const ui64 position) const; - - ui64 GetLength() const { - return Array->length(); - } - - bool Contains(const ui64 position) const { - return position >= StartPosition && position < FinishPosition; - } - - std::shared_ptr CopyRecord(const ui64 recordIndex) const; - - std::partial_ordering Compare(const ui64 position, const TCurrentChunkAddress& item, const ui64 itemPosition) const; - - TCurrentChunkAddress(const std::shared_ptr& arr, const ui64 pos, const ui32 chunkIdx) - : Array(arr) - , StartPosition(pos) - , ChunkIndex(chunkIdx) - { - AFL_VERIFY(arr); - AFL_VERIFY(arr->length()); - FinishPosition = StartPosition + arr->length(); - } - - TString DebugString() const { - return TStringBuilder() - << "start=" << StartPosition << ";" - << "chunk_index=" << ChunkIndex << ";" - << "length=" << Array->length() << ";"; - } - }; - - class TAddress { - private: - YDB_READONLY_DEF(std::shared_ptr, Array); - YDB_READONLY(ui64, Position, 0); - YDB_READONLY(ui64, ChunkIdx, 0); - public: - bool NextPosition() { - if (Position + 1 < (ui32)Array->length()) { - ++Position; - return true; - } - return false; - } - - TAddress(const std::shared_ptr& arr, const ui64 position, const ui64 chunkIdx) - : Array(arr) - , Position(position) - , ChunkIdx(chunkIdx) - { - - } - - const std::partial_ordering Compare(const TAddress& item) const; - }; -private: - YDB_READONLY_DEF(std::shared_ptr, DataType); - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY(EType, Type, EType::Undefined); -protected: - virtual std::shared_ptr DoGetChunkedArray() const = 0; - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const = 0; - - template - TCurrentChunkAddress SelectChunk(const std::optional& chunkCurrent, const ui64 position, const TChunkAccessor& accessor) const { - if (!chunkCurrent || position >= chunkCurrent->GetStartPosition() + chunkCurrent->GetLength()) { - ui32 startIndex = 0; - ui64 idx = 0; - if (chunkCurrent) { - AFL_VERIFY(chunkCurrent->GetChunkIndex() + 1 < accessor.GetChunksCount()); - startIndex = chunkCurrent->GetChunkIndex() + 1; - idx = chunkCurrent->GetStartPosition() + chunkCurrent->GetLength(); - } - for (ui32 i = startIndex; i < accessor.GetChunksCount(); ++i) { - const ui64 nextIdx = idx + accessor.GetChunkLength(i); - if (idx <= position && position < nextIdx) { - return TCurrentChunkAddress(accessor.GetArray(i), idx, i); - } - idx = nextIdx; - } - } else if (position < chunkCurrent->GetStartPosition()) { - AFL_VERIFY(chunkCurrent->GetChunkIndex() > 0); - ui64 idx = chunkCurrent->GetStartPosition(); - for (i32 i = chunkCurrent->GetChunkIndex() - 1; i >= 0; --i) { - AFL_VERIFY(idx >= accessor.GetChunkLength(i))("idx", idx)("length", accessor.GetChunkLength(i)); - const ui64 nextIdx = idx - accessor.GetChunkLength(i); - if (nextIdx <= position && position < idx) { - return TCurrentChunkAddress(accessor.GetArray(i), nextIdx, i); - } - idx = nextIdx; - } - } - TStringBuilder sb; - ui64 recordsCountChunks = 0; - for (ui32 i = 0; i < accessor.GetChunksCount(); ++i) { - sb << accessor.GetChunkLength(i) << ","; - recordsCountChunks += accessor.GetChunkLength(i); - } - TStringBuilder chunkCurrentInfo; - if (chunkCurrent) { - chunkCurrentInfo << chunkCurrent->DebugString(); - } - AFL_VERIFY(recordsCountChunks == GetRecordsCount())("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); - AFL_VERIFY(false)("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); - return TCurrentChunkAddress(nullptr, 0, 0); - } - -public: - - class TReader { - private: - std::shared_ptr ChunkedArray; - mutable std::optional CurrentChunkAddress; - public: - TReader(const std::shared_ptr& data) - : ChunkedArray(data) - { - AFL_VERIFY(ChunkedArray); - } - - ui64 GetRecordsCount() const { - return ChunkedArray->GetRecordsCount(); - } - - TAddress GetReadChunk(const ui64 position) const; - static std::partial_ordering CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition); - void AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const; - std::shared_ptr CopyRecord(const ui64 recordIndex) const; - TString DebugString(const ui32 position) const; - }; - - std::shared_ptr GetChunkedArray() const { - return DoGetChunkedArray(); - } - virtual ~IChunkedArray() = default; - - std::shared_ptr Slice(const ui32 offset, const ui32 count) const; - - TCurrentChunkAddress GetChunk(const std::optional& chunkCurrent, const ui64 position) const { - return DoGetChunk(chunkCurrent, position); - } - - IChunkedArray(const ui64 recordsCount, const EType type, const std::shared_ptr& dataType) - : DataType(dataType) - , RecordsCount(recordsCount) - , Type(type) { - - } -}; - -class TTrivialArray: public IChunkedArray { -private: - using TBase = IChunkedArray; - const std::shared_ptr Array; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { - return TCurrentChunkAddress(Array, 0, 0); - } - virtual std::shared_ptr DoGetChunkedArray() const override { - return std::make_shared(Array); - } - -public: - TTrivialArray(const std::shared_ptr& data) - : TBase(data->length(), EType::Array, data->type()) - , Array(data) { - - } -}; - -class TTrivialChunkedArray: public IChunkedArray { -private: - using TBase = IChunkedArray; - const std::shared_ptr Array; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; - virtual std::shared_ptr DoGetChunkedArray() const override { - return Array; - } - -public: - TTrivialChunkedArray(const std::shared_ptr& data) - : TBase(data->length(), EType::ChunkedArray, data->type()) - , Array(data) { - - } -}; - -} diff --git a/ydb/core/formats/arrow/common/adapter.h b/ydb/core/formats/arrow/common/adapter.h index 543e78511146..18b2deeacc9b 100644 --- a/ydb/core/formats/arrow/common/adapter.h +++ b/ydb/core/formats/arrow/common/adapter.h @@ -1,18 +1,19 @@ #pragma once #include "container.h" -#include "accessor.h" -#include "validation.h" +#include + +#include #include -#include #include +#include #include +#include +#include #include #include -#include -#include -#include +#include namespace NKikimr::NArrow::NAdapter { @@ -27,7 +28,8 @@ class TDataBuilderPolicy { using TColumn = arrow::Array; using TAccessor = NAccessor::TTrivialArray; - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, + const std::shared_ptr& field, const std::shared_ptr& extCol) { return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, extCol)); } @@ -37,7 +39,8 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr Build(const std::shared_ptr& schema, std::vector>&& columns, const ui32 count) { return arrow::RecordBatch::Make(schema, count, std::move(columns)); } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto res = arrow::compute::Filter(batch, filter); Y_VERIFY_S(res.ok(), res.status().message()); Y_ABORT_UNLESS(res->kind() == arrow::Datum::RECORD_BATCH); @@ -46,7 +49,6 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { return batch->Slice(0, 0); } - }; template <> @@ -60,11 +62,13 @@ class TDataBuilderPolicy { [[nodiscard]] static std::shared_ptr Build(const std::shared_ptr& schema, std::vector>&& columns, const ui32 count) { return arrow::Table::Make(schema, std::move(columns), count); } - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn( + const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, std::make_shared(extCol))); } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { auto res = arrow::compute::Filter(batch, filter); Y_VERIFY_S(res.ok(), res.status().message()); Y_ABORT_UNLESS(res->kind() == arrow::Datum::TABLE); @@ -86,12 +90,14 @@ class TDataBuilderPolicy { } return std::make_shared(std::make_shared(std::move(fields)), std::move(columns)); } - [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, + const std::shared_ptr& field, const std::shared_ptr& extCol) { batch->AddField(field, std::make_shared(extCol)).Validate(); return batch; } - [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { - auto table = batch->BuildTable(); + [[nodiscard]] static std::shared_ptr ApplyArrowFilter( + const std::shared_ptr& batch, const std::shared_ptr& filter) { + auto table = batch->BuildTableVerified(); return std::make_shared(TDataBuilderPolicy::ApplyArrowFilter(table, filter)); } [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { @@ -99,4 +105,4 @@ class TDataBuilderPolicy { } }; -} +} // namespace NKikimr::NArrow::NAdapter diff --git a/ydb/core/formats/arrow/common/container.cpp b/ydb/core/formats/arrow/common/container.cpp index ccf8dc71fb0c..7b159f2eef06 100644 --- a/ydb/core/formats/arrow/common/container.cpp +++ b/ydb/core/formats/arrow/common/container.cpp @@ -1,50 +1,71 @@ #include "container.h" + +#include +#include + #include -#include +#include +#include namespace NKikimr::NArrow { -NKikimr::TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContainer& container) { - if (RecordsCount != container.RecordsCount) { - return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in additional container: " << - container.GetSchema()->ToString() << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); +TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContainer& container) { + if (!container.RecordsCount) { + return TConclusionStatus::Success(); + } + if (!RecordsCount) { + RecordsCount = container.RecordsCount; + } + if (*RecordsCount != *container.RecordsCount) { + return TConclusionStatus::Fail(TStringBuilder() + << "inconsistency records count in additional container: " << container.GetSchema()->ToString() + << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); } for (i32 i = 0; i < container.Schema->num_fields(); ++i) { auto addFieldResult = AddField(container.Schema->field(i), container.Columns[i]); - if (!addFieldResult) { + if (addFieldResult.IsFail()) { return addFieldResult; } } return TConclusionStatus::Success(); } -NKikimr::TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { +TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { AFL_VERIFY(f); AFL_VERIFY(data); - if (data->GetRecordsCount() != RecordsCount) { - return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << - f->name() << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); + if (RecordsCount && data->GetRecordsCount() != *RecordsCount) { + return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << f->name() + << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); } if (!data->GetDataType()->Equals(f->type())) { - return TConclusionStatus::Fail("schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); - } - if (Schema->GetFieldByName(f->name())) { - return TConclusionStatus::Fail("field name duplication: " + f->name()); + return TConclusionStatus::Fail( + "schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); } - auto resultAdd = Schema->AddField(Schema->num_fields(), f); - if (!resultAdd.ok()) { - return TConclusionStatus::Fail("internal schema error on add field: " + resultAdd.status().ToString()); + { + auto conclusion = Schema->AddField(f); + if (conclusion.IsFail()) { + return conclusion; + } } - Schema = *resultAdd; + RecordsCount = data->GetRecordsCount(); Columns.emplace_back(data); return TConclusionStatus::Success(); } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns) - : Schema(schema) - , Columns(std::move(columns)) -{ - AFL_VERIFY(schema); +TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { + return AddField(f, std::make_shared(data)); +} + +TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { + return AddField(f, std::make_shared(data)); +} + +void TGeneralContainer::DeleteFieldsByIndex(const std::vector& idxs) { + Schema->DeleteFieldsByIndex(idxs); + NUtil::EraseItems(Columns, idxs); +} + +void TGeneralContainer::Initialize() { std::optional recordsCount; AFL_VERIFY(Schema->num_fields() == (i32)Columns.size())("schema", Schema->num_fields())("columns", Columns.size()); for (i32 i = 0; i < Schema->num_fields(); ++i) { @@ -54,17 +75,40 @@ TGeneralContainer::TGeneralContainer(const std::shared_ptr& schem recordsCount = Columns[i]->GetRecordsCount(); } else { AFL_VERIFY(*recordsCount == Columns[i]->GetRecordsCount()) - ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())("field_name", Schema->field(i)->name()); + ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())( + "field_name", Schema->field(i)->name()); } } AFL_VERIFY(recordsCount); + AFL_VERIFY(!RecordsCount || *RecordsCount == *recordsCount); RecordsCount = *recordsCount; } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) { +TGeneralContainer::TGeneralContainer( + const std::vector>& fields, std::vector>&& columns) + : Schema(std::make_shared(fields)) + , Columns(std::move(columns)) { + Initialize(); +} + +TGeneralContainer::TGeneralContainer( + const std::shared_ptr& schema, std::vector>&& columns) + : Schema(std::make_shared(schema)) + , Columns(std::move(columns)) { + Initialize(); +} + +TGeneralContainer::TGeneralContainer( + const std::shared_ptr& schema, std::vector>&& columns) + : Schema(std::make_shared(schema)) + , Columns(std::move(columns)) { + Initialize(); +} + +TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) + : RecordsCount(TValidator::CheckNotNull(table)->num_rows()) + , Schema(std::make_shared(TValidator::CheckNotNull(table)->schema())) { AFL_VERIFY(table); - Schema = table->schema(); - RecordsCount = table->num_rows(); for (auto&& i : table->columns()) { if (i->num_chunks() == 1) { Columns.emplace_back(std::make_shared(i->chunk(0))); @@ -72,15 +116,22 @@ TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) Columns.emplace_back(std::make_shared(i)); } } + Initialize(); } -TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) { +TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) + : RecordsCount(TValidator::CheckNotNull(table)->num_rows()) + , Schema(std::make_shared(TValidator::CheckNotNull(table)->schema())) { AFL_VERIFY(table); - Schema = table->schema(); - RecordsCount = table->num_rows(); for (auto&& i : table->columns()) { Columns.emplace_back(std::make_shared(i)); } + Initialize(); +} + +TGeneralContainer::TGeneralContainer(const ui32 recordsCount) + : RecordsCount(recordsCount) + , Schema(std::make_shared()) { } std::shared_ptr TGeneralContainer::GetAccessorByNameVerified(const std::string& fieldId) const { @@ -110,14 +161,82 @@ std::shared_ptr TGeneralContainer::BuildTableOptional(const std::o if (fields.empty()) { return nullptr; } - return arrow::Table::Make(std::make_shared(fields), columns, RecordsCount); + AFL_VERIFY(RecordsCount); + return arrow::Table::Make(std::make_shared(fields), columns, *RecordsCount); } -std::shared_ptr TGeneralContainer::BuildTable(const std::optional>& columnNames /*= {}*/) const { +std::shared_ptr TGeneralContainer::BuildTableVerified(const std::optional>& columnNames /*= {}*/) const { auto result = BuildTableOptional(columnNames); AFL_VERIFY(result); AFL_VERIFY(!columnNames || result->schema()->num_fields() == (i32)columnNames->size()); return result; } +std::shared_ptr TGeneralContainer::GetAccessorByNameOptional(const std::string& fieldId) const { + int idx = Schema->GetFieldIndex(fieldId); + if (idx == -1) { + return nullptr; + } + AFL_VERIFY((ui32)idx < Columns.size())("idx", idx)("count", Columns.size()); + return Columns[idx]; +} + +TConclusionStatus TGeneralContainer::SyncSchemaTo( + const std::shared_ptr& schema, const IFieldsConstructor* defaultFieldsConstructor, const bool forceDefaults) { + std::shared_ptr schemaNew = std::make_shared(); + std::vector> columnsNew; + if (!RecordsCount) { + return TConclusionStatus::Fail("original container has not data"); + } + for (auto&& i : schema->fields()) { + const int idx = Schema->GetFieldIndex(i->name()); + if (idx == -1) { + if (!defaultFieldsConstructor) { + return TConclusionStatus::Fail("haven't field for sync: '" + i->name() + "'"); + } else { + schemaNew->AddField(i).Validate(); + auto defConclusion = defaultFieldsConstructor->GetDefaultColumnElementValue(i, forceDefaults); + if (defConclusion.IsFail()) { + return defConclusion; + } + columnsNew.emplace_back( + std::make_shared(NArrow::TThreadSimpleArraysCache::Get(i->type(), *defConclusion, *RecordsCount))); + } + } else { + const auto& fOwned = Schema->GetFieldVerified(idx); + if (!fOwned->type()->Equals(i->type())) { + return TConclusionStatus::Fail( + "different field types for '" + i->name() + "'. Have " + fOwned->type()->ToString() + ", need " + i->type()->ToString()); + } + schemaNew->AddField(fOwned).Validate(); + columnsNew.emplace_back(Columns[idx]); + } + } + std::swap(Schema, schemaNew); + std::swap(columnsNew, Columns); + return TConclusionStatus::Success(); } + +TString TGeneralContainer::DebugString() const { + TStringBuilder result; + if (RecordsCount) { + result << "records_count=" << *RecordsCount << ";"; + } + result << "schema=" << Schema->ToString() << ";"; + return result; +} + +TConclusion> IFieldsConstructor::GetDefaultColumnElementValue( + const std::shared_ptr& field, const bool force) const { + AFL_VERIFY(field); + auto result = DoGetDefaultColumnElementValue(field->name()); + if (result) { + return result; + } + if (force) { + return NArrow::DefaultScalar(field->type()); + } + return TConclusionStatus::Fail("have not default value for column " + field->name()); +} + +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/common/container.h b/ydb/core/formats/arrow/common/container.h index 25262d14ff4a..dacd5d62c0b0 100644 --- a/ydb/core/formats/arrow/common/container.h +++ b/ydb/core/formats/arrow/common/container.h @@ -1,8 +1,10 @@ #pragma once -#include "accessor.h" #include +#include #include +#include +#include #include #include @@ -12,50 +14,78 @@ namespace NKikimr::NArrow { +class IFieldsConstructor { +private: + virtual std::shared_ptr DoGetDefaultColumnElementValue(const std::string& fieldName) const = 0; +public: + TConclusion> GetDefaultColumnElementValue(const std::shared_ptr& field, const bool force) const; +}; + class TGeneralContainer { private: - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY_DEF(std::shared_ptr, Schema); + std::optional RecordsCount; + YDB_READONLY_DEF(std::shared_ptr, Schema); std::vector> Columns; + void Initialize(); public: - TString DebugString() const { - return TStringBuilder() - << "records_count=" << RecordsCount << ";" - << "schema=" << Schema->ToString() << ";" - ; + TGeneralContainer(const ui32 recordsCount); + + ui32 GetRecordsCount() const { + AFL_VERIFY(RecordsCount); + return *RecordsCount; + } + + TString DebugString() const; + + [[nodiscard]] TConclusionStatus SyncSchemaTo(const std::shared_ptr& schema, + const IFieldsConstructor* defaultFieldsConstructor, const bool forceDefaults); + + bool HasColumn(const std::string& name) { + return Schema->HasField(name); + } + + ui64 num_columns() const { + return Columns.size(); } ui64 num_rows() const { - return RecordsCount; + AFL_VERIFY(RecordsCount); + return *RecordsCount; } - std::shared_ptr BuildTable(const std::optional>& columnNames = {}) const; + ui32 GetColumnsCount() const { + return Columns.size(); + } + + const std::shared_ptr& GetColumnVerified(const ui32 idx) const { + AFL_VERIFY(idx < Columns.size()); + return Columns[idx]; + } + + std::shared_ptr BuildTableVerified(const std::optional>& columnNames = {}) const; std::shared_ptr BuildTableOptional(const std::optional>& columnNames = {}) const; std::shared_ptr BuildEmptySame() const; [[nodiscard]] TConclusionStatus MergeColumnsStrictly(const TGeneralContainer& container); [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); + [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); - TGeneralContainer(const std::shared_ptr& table); + [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); - TGeneralContainer(const std::shared_ptr& table); + void DeleteFieldsByIndex(const std::vector& idxs); + TGeneralContainer(const std::shared_ptr& table); + TGeneralContainer(const std::shared_ptr& table); TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns); + TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns); + TGeneralContainer(const std::vector>& fields, std::vector>&& columns); arrow::Status ValidateFull() const { return arrow::Status::OK(); } - std::shared_ptr GetAccessorByNameOptional(const std::string& fieldId) const { - for (i32 i = 0; i < Schema->num_fields(); ++i) { - if (Schema->field(i)->name() == fieldId) { - return Columns[i]; - } - } - return nullptr; - } - + std::shared_ptr GetAccessorByNameOptional(const std::string& fieldId) const; std::shared_ptr GetAccessorByNameVerified(const std::string& fieldId) const; }; diff --git a/ydb/core/formats/arrow/common/ya.make b/ydb/core/formats/arrow/common/ya.make index 61f742b09b76..fc34c380aeb0 100644 --- a/ydb/core/formats/arrow/common/ya.make +++ b/ydb/core/formats/arrow/common/ya.make @@ -5,13 +5,13 @@ PEERDIR( ydb/core/formats/arrow/switch ydb/library/actors/core ydb/library/conclusion + ydb/library/formats/arrow + ydb/core/formats/arrow/splitter ) SRCS( container.cpp - validation.cpp adapter.cpp - accessor.cpp ) END() diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 1bd0c92e2ceb..f0a38e2c8149 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -1,5 +1,5 @@ #include "converter.h" -#include "switch_type.h" +#include "switch/switch_type.h" #include #include diff --git a/ydb/core/formats/arrow/custom_registry.cpp b/ydb/core/formats/arrow/custom_registry.cpp index 13e8dc6150a3..9d61c8bf6476 100644 --- a/ydb/core/formats/arrow/custom_registry.cpp +++ b/ydb/core/formats/arrow/custom_registry.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #endif namespace cp = ::arrow::compute; @@ -62,6 +63,10 @@ static void RegisterYdbCast(cp::FunctionRegistry* registry) { Y_ABORT_UNLESS(registry->AddFunction(std::make_shared()).ok()); } +static void RegisterCustomAggregates(cp::FunctionRegistry* registry) { + Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetFunctionName(EAggregate::NumRows))).ok()); +} + static void RegisterHouseAggregates(cp::FunctionRegistry* registry) { #ifndef WIN32 try { @@ -71,6 +76,7 @@ static void RegisterHouseAggregates(cp::FunctionRegistry* registry) { Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Max))).ok()); Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Sum))).ok()); //Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::Avg))).ok()); + Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseFunctionName(EAggregate::NumRows))).ok()); Y_ABORT_UNLESS(registry->AddFunction(std::make_shared(GetHouseGroupByName())).ok()); } catch (const std::exception& /*ex*/) { @@ -88,6 +94,7 @@ static std::unique_ptr CreateCustomRegistry() { RegisterRound(registry.get()); RegisterArithmetic(registry.get()); RegisterYdbCast(registry.get()); + RegisterCustomAggregates(registry.get()); RegisterHouseAggregates(registry.get()); return registry; } diff --git a/ydb/core/formats/arrow/dictionary/conversion.cpp b/ydb/core/formats/arrow/dictionary/conversion.cpp index 026ae6ba5d6e..b1decbf14bb6 100644 --- a/ydb/core/formats/arrow/dictionary/conversion.cpp +++ b/ydb/core/formats/arrow/dictionary/conversion.cpp @@ -1,8 +1,8 @@ #include "conversion.h" #include -#include -#include #include +#include +#include namespace NKikimr::NArrow { @@ -131,11 +131,4 @@ bool IsDictionableArray(const std::shared_ptr& data) { return result; } -ui64 GetDictionarySize(const std::shared_ptr& data) { - if (!data) { - return 0; - } - return GetArrayDataSize(data->dictionary()) + GetArrayDataSize(data->indices()); -} - } diff --git a/ydb/core/formats/arrow/dictionary/conversion.h b/ydb/core/formats/arrow/dictionary/conversion.h index ee044bfd514b..dfedb4aa31a0 100644 --- a/ydb/core/formats/arrow/dictionary/conversion.h +++ b/ydb/core/formats/arrow/dictionary/conversion.h @@ -7,7 +7,6 @@ namespace NKikimr::NArrow { bool IsDictionableArray(const std::shared_ptr& data); -ui64 GetDictionarySize(const std::shared_ptr& data); std::shared_ptr ArrayToDictionary(const std::shared_ptr& data); std::shared_ptr ArrayToDictionary(const std::shared_ptr& data); std::shared_ptr DictionaryToArray(const std::shared_ptr& data); diff --git a/ydb/core/formats/arrow/dictionary/object.cpp b/ydb/core/formats/arrow/dictionary/object.cpp index 4a72802b2aec..36c9fe3fc276 100644 --- a/ydb/core/formats/arrow/dictionary/object.cpp +++ b/ydb/core/formats/arrow/dictionary/object.cpp @@ -1,6 +1,6 @@ #include "object.h" -#include #include +#include #include namespace NKikimr::NArrow::NDictionary { diff --git a/ydb/core/formats/arrow/dictionary/object.h b/ydb/core/formats/arrow/dictionary/object.h index 2fd4d6a12924..09f5efebb56f 100644 --- a/ydb/core/formats/arrow/dictionary/object.h +++ b/ydb/core/formats/arrow/dictionary/object.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace NKikimr::NArrow::NDictionary { diff --git a/ydb/core/formats/arrow/dictionary/ya.make b/ydb/core/formats/arrow/dictionary/ya.make index ea71f4c7dacf..35639ea43bea 100644 --- a/ydb/core/formats/arrow/dictionary/ya.make +++ b/ydb/core/formats/arrow/dictionary/ya.make @@ -3,9 +3,11 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/protos - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch ydb/library/actors/core + ydb/library/formats/arrow/transformer + ydb/library/formats/arrow/common + ydb/library/formats/arrow/simple_builder ) SRCS( diff --git a/ydb/core/formats/arrow/hash/calcer.cpp b/ydb/core/formats/arrow/hash/calcer.cpp index 71af0492cfea..d5fa4a8dd6a3 100644 --- a/ydb/core/formats/arrow/hash/calcer.cpp +++ b/ydb/core/formats/arrow/hash/calcer.cpp @@ -1,8 +1,8 @@ #include "calcer.h" -#include "xx_hash.h" #include #include #include +#include #include #include #include diff --git a/ydb/core/formats/arrow/hash/calcer.h b/ydb/core/formats/arrow/hash/calcer.h index d82f669fbee1..51dfe7858f8c 100644 --- a/ydb/core/formats/arrow/hash/calcer.h +++ b/ydb/core/formats/arrow/hash/calcer.h @@ -1,11 +1,11 @@ #pragma once -#include "xx_hash.h" #include -#include #include #include #include +#include +#include #include #include diff --git a/ydb/core/formats/arrow/hash/ya.make b/ydb/core/formats/arrow/hash/ya.make index 6d9a98b836a6..d7337f6b5588 100644 --- a/ydb/core/formats/arrow/hash/ya.make +++ b/ydb/core/formats/arrow/hash/ya.make @@ -2,17 +2,18 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch ydb/core/formats/arrow/reader ydb/library/actors/core ydb/library/services ydb/library/actors/protos + ydb/library/formats/arrow/hash + ydb/library/formats/arrow/common + ydb/library/formats/arrow/simple_builder ) SRCS( calcer.cpp - xx_hash.cpp ) END() diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index 623bc15c9221..c36a09779c6d 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -1,13 +1,13 @@ #include "permutations.h" #include "arrow_helpers.h" -#include "replace_key.h" #include "size_calcer.h" #include "hash/calcer.h" -#include #include +#include +#include #include #include @@ -15,28 +15,6 @@ namespace NKikimr::NArrow { -std::shared_ptr MakePermutation(const int size, const bool reverse) { - arrow::UInt64Builder builder; - TStatusValidator::Validate(builder.Reserve(size)); - - if (size) { - if (reverse) { - ui64 value = size - 1; - for (i64 i = 0; i < size; ++i, --value) { - TStatusValidator::Validate(builder.Append(value)); - } - } else { - for (i64 i = 0; i < size; ++i) { - TStatusValidator::Validate(builder.Append(i)); - } - } - } - - std::shared_ptr out; - TStatusValidator::Validate(builder.Finish(&out)); - return out; -} - std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique) { auto keyBatch = TColumnOperator().VerifyIfAbsent().Adapt(batch, sortingKey).DetachResult(); auto keyColumns = std::make_shared(keyBatch->columns()); @@ -100,84 +78,6 @@ std::shared_ptr MakeSortPermutation(const std::shared_ptr -std::shared_ptr MakeFilterPermutationImpl(const std::vector& indexes) { - if (indexes.empty()) { - return {}; - } - - arrow::UInt64Builder builder; - if (!builder.Reserve(indexes.size()).ok()) { - return {}; - } - - for (auto&& i : indexes) { - TStatusValidator::Validate(builder.Append(i)); - } - std::shared_ptr out; - TStatusValidator::Validate(builder.Finish(&out)); - return out; -} - -std::shared_ptr MakeFilterPermutation(const std::vector& indexes) { - return MakeFilterPermutationImpl(indexes); -} - -std::shared_ptr MakeFilterPermutation(const std::vector& indexes) { - return MakeFilterPermutationImpl(indexes); -} - -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes) { - Y_ABORT_UNLESS(!!source); - auto schema = source->schema(); - std::vector> columns; - for (auto&& i : source->columns()) { - columns.emplace_back(CopyRecords(i, indexes)); - } - return arrow::RecordBatch::Make(schema, indexes.size(), columns); -} - -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes) { - if (!source) { - return source; - } - std::shared_ptr result; - SwitchType(source->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - using TBuilder = typename arrow::TypeTraits::BuilderType; - auto& column = static_cast(*source); - - std::unique_ptr builder; - TStatusValidator::Validate(arrow::MakeBuilder(arrow::default_memory_pool(), source->type(), &builder)); - auto& builderImpl = static_cast(*builder); - - if constexpr (arrow::has_string_view::value) { - ui64 sumByIndexes = 0; - for (auto&& idx : indexes) { - Y_ABORT_UNLESS(idx < (ui64)column.length()); - sumByIndexes += column.GetView(idx).size(); - } - TStatusValidator::Validate(builderImpl.ReserveData(sumByIndexes)); - } - - TStatusValidator::Validate(builder->Reserve(indexes.size())); - - { - const ui32 arraySize = column.length(); - for (auto&& i : indexes) { - Y_ABORT_UNLESS(i < arraySize); - builderImpl.UnsafeAppend(column.GetView(i)); - } - } - - TStatusValidator::Validate(builder->Finish(&result)); - return true; - }); - Y_ABORT_UNLESS(result); - return result; -} - namespace { template @@ -213,101 +113,4 @@ bool THashConstructor::BuildHashUI64(std::shared_ptr& batch, return BuildHashUI64Impl(batch, fieldNames, hashFieldName); } -ui64 TShardedRecordBatch::GetMemorySize() const { - return NArrow::GetTableMemorySize(RecordBatch); -} - -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) { - AFL_VERIFY(batch); - RecordBatch = TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batch->schema(), {batch})); -} - - -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) - : RecordBatch(batch) -{ - AFL_VERIFY(RecordBatch); -} - -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards) - : RecordBatch(batch) - , SplittedByShards(std::move(splittedByShards)) -{ - AFL_VERIFY(RecordBatch); - AFL_VERIFY(SplittedByShards.size()); -} - -std::vector> TShardingSplitIndex::Apply(const std::shared_ptr& input) { - AFL_VERIFY(input); - AFL_VERIFY(input->num_rows() == RecordsCount); - auto permutation = BuildPermutation(); - auto resultBatch = NArrow::TStatusValidator::GetValid(arrow::compute::Take(input, *permutation)).table(); - AFL_VERIFY(resultBatch->num_rows() == RecordsCount); - std::vector> result; - ui64 startIndex = 0; - for (auto&& i : Remapping) { - result.emplace_back(resultBatch->Slice(startIndex, i.size())); - startIndex += i.size(); - } - AFL_VERIFY(startIndex == RecordsCount); - return result; -} - -NKikimr::NArrow::TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { - AFL_VERIFY(input); - if (shardsCount == 1) { - return TShardedRecordBatch(input); - } - auto hashColumn = input->GetColumnByName(hashColumnName); - if (!hashColumn) { - return TShardedRecordBatch(input); - } - std::optional splitter; - if (hashColumn->type()->id() == arrow::Type::UINT64) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else if (hashColumn->type()->id() == arrow::Type::UINT32) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else if (hashColumn->type()->id() == arrow::Type::INT64) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else if (hashColumn->type()->id() == arrow::Type::INT32) { - splitter = TShardingSplitIndex::Build(shardsCount, *hashColumn); - } else { - Y_ABORT_UNLESS(false); - } - auto resultBatch = NArrow::TStatusValidator::GetValid(input->RemoveColumn(input->schema()->GetFieldIndex(hashColumnName))); - return TShardedRecordBatch(resultBatch, splitter->DetachRemapping()); -} - -TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { - return Apply(shardsCount, TStatusValidator::GetValid(arrow::Table::FromRecordBatches(input->schema(), {input})) - , hashColumnName); -} - -std::shared_ptr TShardingSplitIndex::BuildPermutation() const { - arrow::UInt64Builder builder; - Y_ABORT_UNLESS(builder.Reserve(RecordsCount).ok()); - - for (auto&& i : Remapping) { - for (auto&& idx : i) { - TStatusValidator::Validate(builder.Append(idx)); - } - } - - std::shared_ptr out; - Y_ABORT_UNLESS(builder.Finish(&out).ok()); - return out; -} - -std::shared_ptr ReverseRecords(const std::shared_ptr& batch) { - AFL_VERIFY(batch); - auto permutation = NArrow::MakePermutation(batch->num_rows(), true); - return NArrow::TStatusValidator::GetValid(arrow::compute::Take(batch, permutation)).record_batch(); -} - -std::shared_ptr ReverseRecords(const std::shared_ptr& batch) { - AFL_VERIFY(batch); - auto permutation = NArrow::MakePermutation(batch->num_rows(), true); - return NArrow::TStatusValidator::GetValid(arrow::compute::Take(batch, permutation)).table(); -} - } diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index 73a433ee52a2..f8c62fb87107 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -2,6 +2,7 @@ #include "arrow_helpers.h" #include +#include #include #include #include @@ -15,143 +16,6 @@ class THashConstructor { }; -class TShardedRecordBatch { -private: - YDB_READONLY_DEF(std::shared_ptr, RecordBatch); - YDB_READONLY_DEF(std::vector>, SplittedByShards); -public: - TShardedRecordBatch(const std::shared_ptr& batch); - TShardedRecordBatch(const std::shared_ptr& batch); - - void Cut(const ui32 limit) { - RecordBatch = RecordBatch->Slice(0, limit); - for (auto&& i : SplittedByShards) { - auto it = std::lower_bound(i.begin(), i.end(), limit); - if (it != i.end()) { - i.erase(it, i.end()); - } - } - } - - bool IsSharded() const { - return SplittedByShards.size() > 1; - } - - TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards); - - ui64 GetMemorySize() const; - - ui64 GetRecordsCount() const { - return RecordBatch->num_rows(); - } -}; - -class TShardingSplitIndex { -private: - ui32 ShardsCount = 0; - std::vector> Remapping; - ui32 RecordsCount = 0; - - template - std::vector MergeLists(const std::vector& base, const TIterator itFrom, const TIterator itTo) { - std::vector result; - result.reserve(base.size() + (itTo - itFrom)); - auto itBase = base.begin(); - auto itExt = itFrom; - while (itBase != base.end() && itExt != itTo) { - if (*itBase < *itExt) { - result.emplace_back(*itBase); - ++itBase; - } else { - result.emplace_back(*itExt); - ++itExt; - } - } - if (itBase == base.end()) { - result.insert(result.end(), itExt, itTo); - } else if (itExt == itTo) { - result.insert(result.end(), itBase, base.end()); - } - return result; - } - - template - void Initialize(const arrow::ChunkedArray& arrowHashArrayChunked) { - Y_ABORT_UNLESS(ShardsCount); - Remapping.resize(ShardsCount); - const ui32 expectation = arrowHashArrayChunked.length() / ShardsCount + 1; - for (auto&& i : Remapping) { - i.reserve(2 * expectation); - } - for (auto&& arrowHashArrayAbstract : arrowHashArrayChunked.chunks()) { - auto& arrowHashArray = static_cast(*arrowHashArrayAbstract); - ui64 offset = 0; - for (ui64 i = 0; i < (ui64)arrowHashArray.length(); ++i) { - const i64 v = arrowHashArray.GetView(i); - const ui32 idx = ((v < 0) ? (-v) : v) % ShardsCount; - Remapping[idx].emplace_back(offset + i); - } - offset += (ui64)arrowHashArray.length(); - } - std::deque*> sizeCorrection; - for (auto&& i : Remapping) { - sizeCorrection.emplace_back(&i); - } - const auto pred = [](const std::vector* l, const std::vector* r) { - return l->size() < r->size(); - }; - std::sort(sizeCorrection.begin(), sizeCorrection.end(), pred); - while (sizeCorrection.size() > 1 && sizeCorrection.back()->size() > expectation && sizeCorrection.front()->size() < expectation) { - const ui32 uselessRecords = sizeCorrection.back()->size() - expectation; - const ui32 needRecords = expectation - sizeCorrection.front()->size(); - const ui32 moveRecords = std::min(needRecords, uselessRecords); - if (moveRecords == 0) { - break; - } - *sizeCorrection.front() = MergeLists(*sizeCorrection.front(), sizeCorrection.back()->end() - moveRecords, sizeCorrection.back()->end()); - sizeCorrection.back()->resize(sizeCorrection.back()->size() - moveRecords); - if (sizeCorrection.back()->size() <= expectation) { - sizeCorrection.pop_back(); - } - if (sizeCorrection.front()->size() >= expectation) { - sizeCorrection.pop_front(); - } - } - } - - TShardingSplitIndex(const ui32 shardsCount, const arrow::ChunkedArray& arrowHashArray) - : ShardsCount(shardsCount) - , RecordsCount(arrowHashArray.length()) { - } - -public: - - std::vector> DetachRemapping() { - return std::move(Remapping); - } - - template - static TShardingSplitIndex Build(const ui32 shardsCount, const arrow::ChunkedArray& arrowHashArray) { - TShardingSplitIndex result(shardsCount, arrowHashArray); - result.Initialize(arrowHashArray); - return result; - } - - std::shared_ptr BuildPermutation() const; - - std::vector> Apply(const std::shared_ptr& input); - static TShardedRecordBatch Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName); - static TShardedRecordBatch Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName); -}; - -std::shared_ptr MakePermutation(const int size, const bool reverse = false); -std::shared_ptr MakeFilterPermutation(const std::vector& indexes); -std::shared_ptr MakeFilterPermutation(const std::vector& indexes); std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); -std::shared_ptr ReverseRecords(const std::shared_ptr& batch); -std::shared_ptr ReverseRecords(const std::shared_ptr& batch); - -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); -std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); } diff --git a/ydb/core/formats/arrow/process_columns.cpp b/ydb/core/formats/arrow/process_columns.cpp index d8795e188055..c032d1d8006c 100644 --- a/ydb/core/formats/arrow/process_columns.cpp +++ b/ydb/core/formats/arrow/process_columns.cpp @@ -1,14 +1,45 @@ #include "process_columns.h" + #include "common/adapter.h" +#include +#include + #include namespace NKikimr::NArrow { namespace { -template -std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr& srcBatch, - const std::vector& columnNames) { + +template +class TColumnNameAccessor { +public: + static const std::string& GetFieldName(const T& val) { + return val; + } + static TString DebugString(const std::vector& items) { + return JoinSeq(",", items); + } +}; + +template <> +class TColumnNameAccessor> { +public: + static const std::string& GetFieldName(const std::shared_ptr& val) { + return val->name(); + } + static TString DebugString(const std::vector>& items) { + TStringBuilder sb; + for (auto&& i : items) { + sb << i->name() << ","; + } + return sb; + } +}; + +template +std::shared_ptr ExtractColumnsValidateImpl( + const std::shared_ptr& srcBatch, const std::vector& columnNames) { std::vector> fields; fields.reserve(columnNames.size()); std::vector::TColumn>> columns; @@ -16,7 +47,7 @@ std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr auto srcSchema = srcBatch->schema(); for (auto& name : columnNames) { - const int pos = srcSchema->GetFieldIndex(name); + const int pos = srcSchema->GetFieldIndex(TColumnNameAccessor::GetFieldName(name)); if (Y_LIKELY(pos > -1)) { fields.push_back(srcSchema->field(pos)); columns.push_back(srcBatch->column(pos)); @@ -26,46 +57,57 @@ std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr return NAdapter::TDataBuilderPolicy::Build(std::move(fields), std::move(columns), srcBatch->num_rows()); } -template -TConclusion> AdaptColumnsImpl(const std::shared_ptr& srcBatch, - const std::shared_ptr& dstSchema) { +template +TConclusion> AdaptColumnsImpl( + const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { AFL_VERIFY(srcBatch); AFL_VERIFY(dstSchema); std::vector::TColumn>> columns; columns.reserve(dstSchema->num_fields()); - + std::vector> fields; + fields.reserve(dstSchema->num_fields()); + std::set fieldIdx; + ui32 idx = 0; for (auto& field : dstSchema->fields()) { const int index = srcBatch->schema()->GetFieldIndex(field->name()); if (index > -1) { + if (subset) { + fieldIdx.emplace(idx); + } columns.push_back(srcBatch->column(index)); + fields.emplace_back(field); auto srcField = srcBatch->schema()->field(index); if (field->Equals(srcField)) { - AFL_VERIFY(columns.back()->type()->Equals(field->type()))("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")("column", field->name()) - ("column_type", field->type()->ToString())("incoming_type", columns.back()->type()->ToString()); + AFL_VERIFY(columns.back()->type()->Equals(field->type()))("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column", field->name())("column_type", field->type()->ToString())("incoming_type", columns.back()->type()->ToString()); } else { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")("column", field->name()) - ("column_type", field->ToString(true))("incoming_type", srcField->ToString(true)); + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column", field->name())("column_type", field->ToString(true))("incoming_type", srcField->ToString(true)); return TConclusionStatus::Fail("incompatible column types"); } - } else { - AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "not_found_column")("column", field->name()) - ("column_type", field->type()->ToString())("columns", JoinSeq(",", srcBatch->schema()->field_names())); + } else if (!subset) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "not_found_column")("column", field->name())( + "column_type", field->type()->ToString())("columns", JoinSeq(",", srcBatch->schema()->field_names())); return TConclusionStatus::Fail("not found column '" + field->name() + "'"); } + ++idx; } - - return NAdapter::TDataBuilderPolicy::Build(dstSchema, std::move(columns), srcBatch->num_rows()); + if (subset) { + *subset = TSchemaSubset(fieldIdx, dstSchema->num_fields()); + } + return NAdapter::TDataBuilderPolicy::Build(std::make_shared(fields), std::move(columns), srcBatch->num_rows()); } -template +template std::shared_ptr ExtractImpl(const TColumnOperator::EExtractProblemsPolicy& policy, - const std::shared_ptr& incoming, const std::vector& columnNames) { + const std::shared_ptr& incoming, const std::vector& columnNames) { AFL_VERIFY(incoming); AFL_VERIFY(columnNames.size()); auto result = ExtractColumnsValidateImpl(incoming, columnNames); switch (policy) { case TColumnOperator::EExtractProblemsPolicy::Verify: - AFL_VERIFY((ui32)result->num_columns() == columnNames.size())("schema", incoming->schema()->ToString())("required", JoinSeq(",", columnNames)); + AFL_VERIFY((ui32)result->num_columns() == columnNames.size())("schema", incoming->schema()->ToString())( + "required", TColumnNameAccessor::DebugString(columnNames)); break; case TColumnOperator::EExtractProblemsPolicy::Null: if ((ui32)result->num_columns() != columnNames.size()) { @@ -79,7 +121,8 @@ std::shared_ptr ExtractImpl(const TColumnOperator::EExtractProbl } template -TConclusion> ReorderImpl(const std::shared_ptr& incoming, const std::vector& columnNames) { +TConclusion> ReorderImpl( + const std::shared_ptr& incoming, const std::vector& columnNames) { AFL_VERIFY(!!incoming); AFL_VERIFY(columnNames.size()); if ((ui32)incoming->num_columns() < columnNames.size()) { @@ -96,17 +139,30 @@ TConclusion> ReorderImpl(const std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } -std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } -std::shared_ptr TColumnOperator::Extract(const std::shared_ptr& incoming, const std::vector& columnNames) { +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector>& columns) { + return ExtractImpl(AbsentColumnPolicy, incoming, columns); +} + +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector>& columns) { + return ExtractImpl(AbsentColumnPolicy, incoming, columns); +} + +std::shared_ptr TColumnOperator::Extract( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ExtractImpl(AbsentColumnPolicy, incoming, columnNames); } @@ -114,28 +170,86 @@ std::shared_ptr TColumnOperator::Extract(const std::shared_ptr> TColumnOperator::Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { - return AdaptColumnsImpl(incoming, dstSchema); +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); } -NKikimr::TConclusion> TColumnOperator::Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { - return AdaptColumnsImpl(incoming, dstSchema); +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); +} + +NKikimr::TConclusion> TColumnOperator::Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset) { + return AdaptColumnsImpl(incoming, dstSchema, subset); +} + +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } -NKikimr::TConclusion> TColumnOperator::Reorder(const std::shared_ptr& incoming, const std::vector& columnNames) { +NKikimr::TConclusion> TColumnOperator::Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames) { return ReorderImpl(incoming, columnNames); } +namespace { +template +TConclusion BuildSequentialSubsetImpl( + const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema) { + AFL_VERIFY(srcBatch); + AFL_VERIFY(dstSchema); + if (dstSchema->num_fields() < srcBatch->schema()->num_fields()) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "incorrect columns set: destination must been wider than source")( + "source", srcBatch->schema()->ToString())("destination", dstSchema->ToString()); + return TConclusionStatus::Fail("incorrect columns set: destination must been wider than source"); + } + std::set fieldIdx; + auto itSrc = srcBatch->schema()->fields().begin(); + auto itDst = dstSchema->fields().begin(); + while (itSrc != srcBatch->schema()->fields().end() && itDst != dstSchema->fields().end()) { + if ((*itSrc)->name() != (*itDst)->name()) { + ++itDst; + } else { + fieldIdx.emplace(itDst - dstSchema->fields().begin()); + if (!(*itDst)->Equals(*itSrc)) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "cannot_use_incoming_batch")("reason", "invalid_column_type")( + "column_type", (*itDst)->ToString(true))("incoming_type", (*itSrc)->ToString(true)); + return TConclusionStatus::Fail("incompatible column types"); + } + + ++itDst; + ++itSrc; + } + } + if (itDst == dstSchema->fields().end() && itSrc != srcBatch->schema()->fields().end()) { + AFL_ERROR(NKikimrServices::ARROW_HELPER)("event", "incorrect columns order in source set")("source", srcBatch->schema()->ToString())( + "destination", dstSchema->ToString()); + return TConclusionStatus::Fail("incorrect columns order in source set"); + } + return TSchemaSubset(fieldIdx, dstSchema->num_fields()); +} +} // namespace + +TConclusion TColumnOperator::BuildSequentialSubset( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema) { + return BuildSequentialSubsetImpl(incoming, dstSchema); +} -} \ No newline at end of file +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/process_columns.h b/ydb/core/formats/arrow/process_columns.h index d07b106231d2..ad57af9e6647 100644 --- a/ydb/core/formats/arrow/process_columns.h +++ b/ydb/core/formats/arrow/process_columns.h @@ -5,6 +5,9 @@ namespace NKikimr::NArrow { +class TSchemaSubset; +class TSchemaLite; + class TColumnOperator { public: enum class EExtractProblemsPolicy { @@ -12,6 +15,7 @@ class TColumnOperator { Verify, Skip }; + private: EExtractProblemsPolicy AbsentColumnPolicy = EExtractProblemsPolicy::Verify; @@ -31,18 +35,35 @@ class TColumnOperator { return *this; } - std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector& columnNames); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector>& columns); + std::shared_ptr Extract( + const std::shared_ptr& incoming, const std::vector>& columns); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); std::shared_ptr Extract(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); - TConclusion> Adapt(const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); + TConclusion BuildSequentialSubset( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema); + + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt(const std::shared_ptr& incoming, + const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); + TConclusion> Adapt( + const std::shared_ptr& incoming, const std::shared_ptr& dstSchema, TSchemaSubset* subset = nullptr); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); - TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); + TConclusion> Reorder( + const std::shared_ptr& incoming, const std::vector& columnNames); TConclusion> Reorder(const std::shared_ptr& incoming, const std::vector& columnNames); }; -} \ No newline at end of file +} // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/program.cpp b/ydb/core/formats/arrow/program.cpp index 1203f1802098..50071d8490e3 100644 --- a/ydb/core/formats/arrow/program.cpp +++ b/ydb/core/formats/arrow/program.cpp @@ -19,6 +19,18 @@ enum class AggFunctionId { AGG_MIN = 3, AGG_MAX = 4, AGG_SUM = 5, + AGG_AVG = 6, + //AGG_VAR = 7, + //AGG_COVAR = 8, + //AGG_STDDEV = 9, + //AGG_CORR = 10, + //AGG_ARG_MIN = 11, + //AGG_ARG_MAX = 12, + //AGG_COUNT_DISTINCT = 13, + //AGG_QUANTILES = 14, + //AGG_TOP_COUNT = 15, + //AGG_TOP_SUM = 16, + AGG_NUM_ROWS = 17, }; struct GroupByOptions: public arrow::compute::ScalarAggregateOptions { struct Assign { @@ -88,7 +100,7 @@ class TConstFunction : public IStepFunction { using TBase = IStepFunction; public: using TBase::TBase; - arrow::Result Call(const TAssign& assign, const TDatumBatch& batch) const override { + arrow::Result Call(const TAssign& assign, const TDatumBatch& batch) const override { Y_UNUSED(batch); return assign.GetConstant(); } @@ -398,6 +410,8 @@ const char * GetFunctionName(EAggregate op) { return "min_max"; case EAggregate::Sum: return "sum"; + case EAggregate::NumRows: + return "num_rows"; #if 0 // TODO case EAggregate::Avg: return "mean"; @@ -424,6 +438,8 @@ const char * GetHouseFunctionName(EAggregate op) { case EAggregate::Avg: return "ch.avg"; #endif + case EAggregate::NumRows: + return "ch.num_rows"; default: break; } @@ -448,6 +464,8 @@ CH::AggFunctionId GetHouseFunction(EAggregate op) { case EAggregate::Avg: return CH::AggFunctionId::AGG_AVG; #endif + case EAggregate::NumRows: + return CH::AggFunctionId::AGG_NUM_ROWS; default: break; } @@ -531,7 +549,7 @@ class TFilterVisitor : public arrow::ArrayVisitor { arrow::Status TDatumBatch::AddColumn(const std::string& name, arrow::Datum&& column) { - if (Schema->GetFieldIndex(name) != -1) { + if (HasColumn(name)) { return arrow::Status::Invalid("Trying to add duplicate column '" + name + "'"); } @@ -543,20 +561,27 @@ arrow::Status TDatumBatch::AddColumn(const std::string& name, arrow::Datum&& col return arrow::Status::Invalid("Wrong column length."); } - Schema = *Schema->AddField(Schema->num_fields(), field); + NewColumnIds.emplace(name, NewColumnsPtr.size()); + NewColumnsPtr.emplace_back(field); + Datums.emplace_back(column); return arrow::Status::OK(); } arrow::Result TDatumBatch::GetColumnByName(const std::string& name) const { - auto i = Schema->GetFieldIndex(name); + auto it = NewColumnIds.find(name); + if (it != NewColumnIds.end()) { + AFL_VERIFY(SchemaBase->num_fields() + it->second < Datums.size()); + return Datums[SchemaBase->num_fields() + it->second]; + } + auto i = SchemaBase->GetFieldIndex(name); if (i < 0) { return arrow::Status::Invalid("Not found column '" + name + "' or duplicate"); } return Datums[i]; } -std::shared_ptr TDatumBatch::ToTable() const { +std::shared_ptr TDatumBatch::ToTable() { std::vector> columns; columns.reserve(Datums.size()); for (auto col : Datums) { @@ -576,10 +601,10 @@ std::shared_ptr TDatumBatch::ToTable() const { AFL_VERIFY(false); } } - return arrow::Table::Make(Schema, columns, Rows); + return arrow::Table::Make(GetSchema(), columns, Rows); } -std::shared_ptr TDatumBatch::ToRecordBatch() const { +std::shared_ptr TDatumBatch::ToRecordBatch() { std::vector> columns; columns.reserve(Datums.size()); for (auto col : Datums) { @@ -594,7 +619,7 @@ std::shared_ptr TDatumBatch::ToRecordBatch() const { AFL_VERIFY(false); } } - return arrow::RecordBatch::Make(Schema, Rows, columns); + return arrow::RecordBatch::Make(GetSchema(), Rows, columns); } std::shared_ptr TDatumBatch::FromRecordBatch(const std::shared_ptr& batch) { @@ -603,12 +628,7 @@ std::shared_ptr TDatumBatch::FromRecordBatch(const std::shared_ptr< for (int64_t i = 0; i < batch->num_columns(); ++i) { datums.push_back(arrow::Datum(batch->column(i))); } - return std::make_shared( - TProgramStep::TDatumBatch{ - .Schema = std::make_shared(*batch->schema()), - .Datums = std::move(datums), - .Rows = batch->num_rows() - }); + return std::make_shared(std::make_shared(*batch->schema()), std::move(datums), batch->num_rows()); } std::shared_ptr TDatumBatch::FromTable(const std::shared_ptr& batch) { @@ -617,12 +637,15 @@ std::shared_ptr TDatumBatch::FromTable(const std::shared_ptrnum_columns(); ++i) { datums.push_back(arrow::Datum(batch->column(i))); } - return std::make_shared( - TProgramStep::TDatumBatch{ - .Schema = std::make_shared(*batch->schema()), - .Datums = std::move(datums), - .Rows = batch->num_rows() - }); + return std::make_shared(std::make_shared(*batch->schema()), std::move(datums), batch->num_rows()); +} + +TDatumBatch::TDatumBatch(const std::shared_ptr& schema, std::vector&& datums, const i64 rows) + : SchemaBase(schema) + , Rows(rows) + , Datums(std::move(datums)) { + AFL_VERIFY(SchemaBase); + AFL_VERIFY(Datums.size() == (ui32)SchemaBase->num_fields()); } TAssign TAssign::MakeTimestamp(const TColumnInfo& column, ui64 value) { @@ -673,6 +696,27 @@ IStepFunction::TPtr TAggregateAssign::GetFunction(arrow::compu return std::make_shared(ctx); } +TString TAggregateAssign::DebugString() const { + TStringBuilder sb; + sb << "{"; + if (Operation != EAggregate::Unspecified) { + sb << "op=" << GetFunctionName(Operation) << ";"; + } + if (Arguments.size()) { + sb << "arguments=["; + for (auto&& i : Arguments) { + sb << i.DebugString() << ";"; + } + sb << "];"; + } + sb << "options=" << ScalarOpts.ToString() << ";"; + if (KernelFunction) { + sb << "kernel=" << KernelFunction->name() << ";"; + } + sb << "column=" << Column.DebugString() << ";"; + sb << "}"; + return sb; +} arrow::Status TProgramStep::ApplyAssignes(TDatumBatch& batch, arrow::compute::ExecContext* ctx) const { if (Assignes.empty()) { @@ -680,7 +724,7 @@ arrow::Status TProgramStep::ApplyAssignes(TDatumBatch& batch, arrow::compute::Ex } batch.Datums.reserve(batch.Datums.size() + Assignes.size()); for (auto& assign : Assignes) { - if (batch.GetColumnByName(assign.GetName()).ok()) { + if (batch.HasColumn(assign.GetName())) { return arrow::Status::Invalid("Assign to existing column '" + assign.GetName() + "'."); } @@ -703,8 +747,9 @@ arrow::Status TProgramStep::ApplyAggregates(TDatumBatch& batch, arrow::compute:: } ui32 numResultColumns = GroupBy.size() + GroupByKeys.size(); - TDatumBatch res; - res.Datums.reserve(numResultColumns); + std::vector datums; + datums.reserve(numResultColumns); + std::optional resultRecordsCount; arrow::FieldVector fields; fields.reserve(numResultColumns); @@ -715,13 +760,13 @@ arrow::Status TProgramStep::ApplyAggregates(TDatumBatch& batch, arrow::compute:: if (!funcResult.ok()) { return funcResult.status(); } - res.Datums.push_back(*funcResult); - fields.emplace_back(std::make_shared(assign.GetName(), res.Datums.back().type())); + datums.push_back(*funcResult); + fields.emplace_back(std::make_shared(assign.GetName(), datums.back().type())); } - res.Rows = 1; + resultRecordsCount = 1; } else { CH::GroupByOptions funcOpts; - funcOpts.schema = batch.Schema; + funcOpts.schema = batch.GetSchema(); funcOpts.assigns.reserve(numResultColumns); funcOpts.has_nullable_key = false; @@ -759,19 +804,18 @@ arrow::Status TProgramStep::ApplyAggregates(TDatumBatch& batch, arrow::compute:: return arrow::Status::Invalid("No expected column in GROUP BY result."); } fields.emplace_back(std::make_shared(assign.result_column, column->type())); - res.Datums.push_back(column); + datums.push_back(column); } - res.Rows = gbBatch->num_rows(); + resultRecordsCount = gbBatch->num_rows(); } - - res.Schema = std::make_shared(std::move(fields)); - batch = std::move(res); + AFL_VERIFY(resultRecordsCount); + batch = TDatumBatch(std::make_shared(std::move(fields)), std::move(datums), *resultRecordsCount); return arrow::Status::OK(); } arrow::Status TProgramStep::MakeCombinedFilter(TDatumBatch& batch, NArrow::TColumnFilter& result) const { - TFilterVisitor filterVisitor(batch.Rows); + TFilterVisitor filterVisitor(batch.GetRecordsCount()); for (auto& colName : Filters) { auto column = batch.GetColumnByName(colName.GetColumnName()); if (!column.ok()) { @@ -821,13 +865,13 @@ arrow::Status TProgramStep::ApplyFilters(TDatumBatch& batch) const { } } std::vector filterDatums; - for (int64_t i = 0; i < batch.Schema->num_fields(); ++i) { - if (batch.Datums[i].is_arraylike() && (allColumns || neededColumns.contains(batch.Schema->field(i)->name()))) { + for (int64_t i = 0; i < batch.GetSchema()->num_fields(); ++i) { + if (batch.Datums[i].is_arraylike() && (allColumns || neededColumns.contains(batch.GetSchema()->field(i)->name()))) { filterDatums.emplace_back(&batch.Datums[i]); } } - bits.Apply(batch.Rows, filterDatums); - batch.Rows = bits.GetFilteredCount().value_or(batch.Rows); + bits.Apply(batch.GetRecordsCount(), filterDatums); + batch.SetRecordsCount(bits.GetFilteredCount().value_or(batch.GetRecordsCount())); return arrow::Status::OK(); } @@ -838,15 +882,14 @@ arrow::Status TProgramStep::ApplyProjection(TDatumBatch& batch) const { std::vector> newFields; std::vector newDatums; for (size_t i = 0; i < Projection.size(); ++i) { - int schemaFieldIndex = batch.Schema->GetFieldIndex(Projection[i].GetColumnName()); + int schemaFieldIndex = batch.GetSchema()->GetFieldIndex(Projection[i].GetColumnName()); if (schemaFieldIndex == -1) { return arrow::Status::Invalid("Could not find column " + Projection[i].GetColumnName() + " in record batch schema."); } - newFields.push_back(batch.Schema->field(schemaFieldIndex)); + newFields.push_back(batch.GetSchema()->field(schemaFieldIndex)); newDatums.push_back(batch.Datums[schemaFieldIndex]); } - batch.Schema = std::make_shared(std::move(newFields)); - batch.Datums = std::move(newDatums); + batch = TDatumBatch(std::make_shared(std::move(newFields)), std::move(newDatums), batch.GetRecordsCount()); return arrow::Status::OK(); } @@ -919,14 +962,10 @@ std::set TProgramStep::GetColumnsInUsage(const bool originalOnly/* } arrow::Result> TProgramStep::BuildFilter(const std::shared_ptr& t) const { - return BuildFilter(t->BuildTable(GetColumnsInUsage(true))); -} - -arrow::Result> TProgramStep::BuildFilter(const std::shared_ptr& t) const { if (Filters.empty()) { return nullptr; } - std::vector> batches = NArrow::SliceToRecordBatches(t); + std::vector> batches = NArrow::SliceToRecordBatches(t->BuildTableVerified(GetColumnsInUsage(true))); NArrow::TColumnFilter fullLocal = NArrow::TColumnFilter::BuildAllowFilter(); for (auto&& rb : batches) { auto datumBatch = TDatumBatch::FromRecordBatch(rb); @@ -938,7 +977,7 @@ arrow::Result> TProgramStep::BuildFilter( } NArrow::TColumnFilter local = NArrow::TColumnFilter::BuildAllowFilter(); NArrow::TStatusValidator::Validate(MakeCombinedFilter(*datumBatch, local)); - AFL_VERIFY(local.Size() == datumBatch->Rows)("local", local.Size())("datum", datumBatch->Rows); + AFL_VERIFY(local.Size() == datumBatch->GetRecordsCount())("local", local.Size())("datum", datumBatch->GetRecordsCount()); fullLocal.Append(local); } AFL_VERIFY(fullLocal.Size() == t->num_rows())("filter", fullLocal.Size())("t", t->num_rows()); @@ -946,18 +985,18 @@ arrow::Result> TProgramStep::BuildFilter( } const std::set& TProgramStep::GetFilterOriginalColumnIds() const { - AFL_VERIFY(IsFilterOnly()); +// AFL_VERIFY(IsFilterOnly()); return FilterOriginalColumnIds; } std::set TProgram::GetEarlyFilterColumns() const { std::set result; for (ui32 i = 0; i < Steps.size(); ++i) { + auto stepFields = Steps[i]->GetColumnsInUsage(true); + result.insert(stepFields.begin(), stepFields.end()); if (!Steps[i]->IsFilterOnly()) { break; } - auto stepFields = Steps[i]->GetColumnsInUsage(); - result.insert(stepFields.begin(), stepFields.end()); } return result; } diff --git a/ydb/core/formats/arrow/program.h b/ydb/core/formats/arrow/program.h index dfb22116158b..2b953b55e070 100644 --- a/ydb/core/formats/arrow/program.h +++ b/ydb/core/formats/arrow/program.h @@ -21,6 +21,7 @@ enum class EAggregate { Max = 4, Sum = 5, //Avg = 6, + NumRows = 7, }; } @@ -37,15 +38,47 @@ const char * GetHouseFunctionName(EAggregate op); inline const char * GetHouseGroupByName() { return "ch.group_by"; } EOperation ValidateOperation(EOperation op, ui32 argsSize); -struct TDatumBatch { - std::shared_ptr Schema; - std::vector Datums; +class TDatumBatch { +private: + std::shared_ptr SchemaBase; + THashMap NewColumnIds; + std::vector> NewColumnsPtr; int64_t Rows = 0; +public: + std::vector Datums; + + ui64 GetRecordsCount() const { + return Rows; + } + + void SetRecordsCount(const ui64 value) { + Rows = value; + } + + TDatumBatch(const std::shared_ptr& schema, std::vector&& datums, const i64 rows); + + const std::shared_ptr& GetSchema() { + if (NewColumnIds.size()) { + std::vector> fields = SchemaBase->fields(); + fields.insert(fields.end(), NewColumnsPtr.begin(), NewColumnsPtr.end()); + SchemaBase = std::make_shared(fields); + NewColumnIds.clear(); + NewColumnsPtr.clear(); + } + return SchemaBase; + } + arrow::Status AddColumn(const std::string& name, arrow::Datum&& column); arrow::Result GetColumnByName(const std::string& name) const; - std::shared_ptr ToTable() const; - std::shared_ptr ToRecordBatch() const; + bool HasColumn(const std::string& name) const { + if (NewColumnIds.contains(name)) { + return true; + } + return SchemaBase->GetFieldIndex(name) > -1; + } + std::shared_ptr ToTable(); + std::shared_ptr ToRecordBatch(); static std::shared_ptr FromRecordBatch(const std::shared_ptr& batch); static std::shared_ptr FromTable(const std::shared_ptr& batch); }; @@ -291,6 +324,7 @@ class TAggregateAssign { const arrow::compute::ScalarAggregateOptions* GetOptions() const { return &ScalarOpts; } IStepFunction::TPtr GetFunction(arrow::compute::ExecContext* ctx) const; + TString DebugString() const; private: TColumnInfo Column; @@ -340,10 +374,18 @@ class TProgramStep { sb << "];"; } if (GroupBy.size()) { - sb << "group_by_count=" << GroupBy.size() << "; "; + sb << "group_by_assignes=["; + for (auto&& i : GroupBy) { + sb << i.DebugString() << ";"; + } + sb << "];"; } if (GroupByKeys.size()) { - sb << "group_by_keys_count=" << GroupByKeys.size() << ";"; + sb << "group_by_keys=["; + for (auto&& i : GroupByKeys) { + sb << i.DebugString() << ";"; + } + sb << "];"; } sb << "projections=["; @@ -405,7 +447,6 @@ class TProgramStep { return Filters.size() && (!GroupBy.size() && !GroupByKeys.size()); } - [[nodiscard]] arrow::Result> BuildFilter(const std::shared_ptr& t) const; [[nodiscard]] arrow::Result> BuildFilter(const std::shared_ptr& t) const; }; diff --git a/ydb/core/formats/arrow/protos/ya.make b/ydb/core/formats/arrow/protos/ya.make deleted file mode 100644 index f01aa064fbcc..000000000000 --- a/ydb/core/formats/arrow/protos/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - ssa.proto -) - -PEERDIR( - -) - -END() diff --git a/ydb/core/formats/arrow/reader/batch_iterator.h b/ydb/core/formats/arrow/reader/batch_iterator.h index 48497a53c452..d3bb365d5706 100644 --- a/ydb/core/formats/arrow/reader/batch_iterator.h +++ b/ydb/core/formats/arrow/reader/batch_iterator.h @@ -44,7 +44,8 @@ class TBatchIterator { TBatchIterator(TRWSortableBatchPosition&& keyColumns) : ControlPointFlag(true) - , KeyColumns(std::move(keyColumns)) { + , KeyColumns(std::move(keyColumns)) + { } diff --git a/ydb/core/formats/arrow/reader/merger.cpp b/ydb/core/formats/arrow/reader/merger.cpp index ddae86c1ed28..16b9733ad4c0 100644 --- a/ydb/core/formats/arrow/reader/merger.cpp +++ b/ydb/core/formats/arrow/reader/merger.cpp @@ -1,15 +1,16 @@ #include "merger.h" #include "result_builder.h" +#include #include namespace NKikimr::NArrow::NMerger { -void TMergePartialStream::PutControlPoint(const TSortableBatchPosition& point) { +void TMergePartialStream::PutControlPoint(const TSortableBatchPosition& point, const bool deepCopy) { AFL_VERIFY(point.IsSameSortingSchema(SortSchema))("point", point.DebugJson())("schema", SortSchema->ToString()); Y_ABORT_UNLESS(point.IsReverseSort() == Reverse); Y_ABORT_UNLESS(++ControlPoints == 1); - SortHeap.Push(TBatchIterator(point.BuildRWPosition())); + SortHeap.Push(TBatchIterator(point.BuildRWPosition(false, deepCopy))); } void TMergePartialStream::RemoveControlPoint() { @@ -65,7 +66,7 @@ bool TMergePartialStream::DrainToControlPoint(TRecordBatchBuilder& builder, cons } bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { - PutControlPoint(readTo); + PutControlPoint(readTo, false); return DrainToControlPoint(builder, includeFinish, lastResultPosition); } @@ -185,13 +186,16 @@ void TMergePartialStream::DrainCurrentPosition(TRecordBatchBuilder* builder, std SortHeap.CleanFinished(); } -std::vector> TMergePartialStream::DrainAllParts(const std::map& positions, +std::vector> TMergePartialStream::DrainAllParts(const TIntervalPositions& positions, const std::vector>& resultFields) { std::vector> result; for (auto&& i : positions) { TRecordBatchBuilder indexesBuilder(resultFields); - DrainCurrentTo(indexesBuilder, i.first, i.second); + if (SortHeap.Empty() || i.GetPosition().Compare(SortHeap.Current().GetKeyColumns()) == std::partial_ordering::less) { + continue; + } + DrainCurrentTo(indexesBuilder, i.GetPosition(), i.IsIncludedToLeftInterval()); result.emplace_back(indexesBuilder.Finalize()); if (result.back()->num_rows() == 0) { result.pop_back(); diff --git a/ydb/core/formats/arrow/reader/merger.h b/ydb/core/formats/arrow/reader/merger.h index 196edcd09e3c..c30aba0f384f 100644 --- a/ydb/core/formats/arrow/reader/merger.h +++ b/ydb/core/formats/arrow/reader/merger.h @@ -37,6 +37,9 @@ class TMergePartialStream { void DrainCurrentPosition(TRecordBatchBuilder* builder, std::shared_ptr* resultScanData, ui64* resultPosition); void CheckSequenceInDebug(const TRWSortableBatchPosition& nextKeyColumnsPosition); + bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, + std::optional* lastResultPosition = nullptr); + public: TMergePartialStream(std::shared_ptr sortSchema, std::shared_ptr dataSchema, const bool reverse, const std::vector& versionColumnNames) : SortSchema(sortSchema) @@ -49,6 +52,7 @@ class TMergePartialStream { Y_ABORT_UNLESS(!DataSchema || DataSchema->num_fields()); } + void PutControlPoint(const TSortableBatchPosition& point, const bool deepCopy); void SkipToLowerBound(const TSortableBatchPosition& pos, const bool include); void SetPossibleSameVersion(const bool value) { @@ -67,8 +71,6 @@ class TMergePartialStream { return TStringBuilder() << "sort_heap=" << SortHeap.DebugJson(); } - void PutControlPoint(const TSortableBatchPosition& point); - void RemoveControlPoint(); bool ControlPointEnriched() const { @@ -92,9 +94,8 @@ class TMergePartialStream { void DrainAll(TRecordBatchBuilder& builder); std::shared_ptr SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); - bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); bool DrainToControlPoint(TRecordBatchBuilder& builder, const bool includeFinish, std::optional* lastResultPosition = nullptr); - std::vector> DrainAllParts(const std::map& positions, + std::vector> DrainAllParts(const TIntervalPositions& positions, const std::vector>& resultFields); }; diff --git a/ydb/core/formats/arrow/reader/position.cpp b/ydb/core/formats/arrow/reader/position.cpp index 6431d180d130..b728405769d7 100644 --- a/ydb/core/formats/arrow/reader/position.cpp +++ b/ydb/core/formats/arrow/reader/position.cpp @@ -1,4 +1,7 @@ #include "position.h" + +#include + #include namespace NKikimr::NArrow::NMerger { @@ -15,11 +18,13 @@ NJson::TJsonValue TSortableBatchPosition::DebugJson() const { return result; } -std::optional TSortableBatchPosition::FindPosition(TRWSortableBatchPosition& position, const ui64 posStartExt, const ui64 posFinishExt, const TSortableBatchPosition& forFound, const bool greater) { +std::optional TSortableBatchPosition::FindPosition(TRWSortableBatchPosition& position, + const ui64 posStartExt, const ui64 posFinishExt, const TSortableBatchPosition& forFound, const bool greater) { ui64 posStart = posStartExt; ui64 posFinish = posFinishExt; + auto guard = position.CreateAsymmetricAccessGuard(); { - AFL_VERIFY(position.InitPosition(posStart)); + AFL_VERIFY(guard.InitSortingPosition(posStart)); auto cmp = position.Compare(forFound); if (cmp == std::partial_ordering::greater) { return TFoundPosition::Greater(posStart); @@ -28,7 +33,7 @@ std::optional TSortableBatchPosition::Fi } } { - AFL_VERIFY(position.InitPosition(posFinish)); + AFL_VERIFY(guard.InitSortingPosition(posFinish)); auto cmp = position.Compare(forFound); if (cmp == std::partial_ordering::less) { return TFoundPosition::Less(posFinish); @@ -37,7 +42,7 @@ std::optional TSortableBatchPosition::Fi } } while (posFinish > posStart + 1) { - Y_ABORT_UNLESS(position.InitPosition(0.5 * (posStart + posFinish))); + AFL_VERIFY(guard.InitSortingPosition(0.5 * (posStart + posFinish))); const auto comparision = position.Compare(forFound); if (comparision == std::partial_ordering::less) { posStart = position.Position; @@ -47,17 +52,18 @@ std::optional TSortableBatchPosition::Fi return TFoundPosition::Equal(position.Position); } } - Y_ABORT_UNLESS(posFinish != posStart); + AFL_VERIFY(posFinish != posStart); if (greater) { - Y_ABORT_UNLESS(position.InitPosition(posFinish)); + AFL_VERIFY(guard.InitSortingPosition(posFinish)); return TFoundPosition::Greater(posFinish); } else { - Y_ABORT_UNLESS(position.InitPosition(posStart)); + AFL_VERIFY(guard.InitSortingPosition(posStart)); return TFoundPosition::Less(posStart); } } -std::optional TSortableBatchPosition::FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool greater, const std::optional includedStartPosition) { +std::optional TSortableBatchPosition::FindPosition(const std::shared_ptr& batch, + const TSortableBatchPosition& forFound, const bool greater, const std::optional includedStartPosition) { if (!batch || !batch->num_rows()) { return {}; } @@ -74,11 +80,14 @@ std::optional TSortableBatchPosition::Fi return FindPosition(position, posStart, posFinish, forFound, greater); } -NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition() const { - return TRWSortableBatchPosition(Position, RecordsCount, ReverseSort, Sorting->BuildCopy(Position), Data ? Data->BuildCopy(Position) : nullptr); +NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition(const bool needData, const bool deepCopy) const { + return TRWSortableBatchPosition(Position, RecordsCount, ReverseSort, + deepCopy ? Sorting->BuildCopy(Position) : Sorting, + (needData && Data) ? (deepCopy ? Data->BuildCopy(Position) : Data) : nullptr); } -NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition(std::shared_ptr batch, const ui32 position) const { +NKikimr::NArrow::NMerger::TRWSortableBatchPosition TSortableBatchPosition::BuildRWPosition( + std::shared_ptr batch, const ui32 position) const { std::vector dataColumns; if (Data) { dataColumns = Data->GetFieldNames(); @@ -98,7 +107,8 @@ TSortableBatchPosition::TFoundPosition TRWSortableBatchPosition::SkipToLower(con return *pos; } -TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { +TSortableScanData::TSortableScanData( + const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetAccessorByNameOptional(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns))("batch", batch->DebugString()); @@ -110,7 +120,8 @@ TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr< BuildPosition(position); } -TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { +TSortableScanData::TSortableScanData( + const ui64 position, const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetColumnByName(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns)); @@ -134,10 +145,11 @@ TSortableScanData::TSortableScanData(const ui64 position, const std::shared_ptr< BuildPosition(position); } -void TSortableScanData::AppendPositionTo(const std::vector>& builders, const ui64 position, ui64* recordSize) const { +void TSortableScanData::AppendPositionTo( + const std::vector>& builders, const ui64 position, ui64* recordSize) const { AFL_VERIFY(builders.size() == PositionAddress.size()); for (ui32 i = 0; i < PositionAddress.size(); ++i) { - AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), position - PositionAddress[i].GetStartPosition(), recordSize)); + AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), PositionAddress[i].GetAddress().GetLocalIndex(position), recordSize)); } } @@ -148,9 +160,9 @@ void TSortableScanData::BuildPosition(const ui64 position) { StartPosition = 0; LastInit = position; for (auto&& i : Columns) { - PositionAddress.emplace_back(i->GetChunk({}, position)); - StartPosition = std::max(StartPosition, PositionAddress.back().GetStartPosition()); - FinishPosition = std::min(FinishPosition, PositionAddress.back().GetFinishPosition()); + PositionAddress.emplace_back(i->GetChunkSlow(position)); + StartPosition = std::max(StartPosition, PositionAddress.back().GetAddress().GetGlobalStartPosition()); + FinishPosition = std::min(FinishPosition, PositionAddress.back().GetAddress().GetGlobalFinishPosition()); if (!recordsCount) { recordsCount = i->GetRecordsCount(); } else { @@ -166,18 +178,19 @@ void TSortableScanData::BuildPosition(const ui64 position) { bool TSortableScanData::InitPosition(const ui64 position) { AFL_VERIFY(position < RecordsCount); if (position < FinishPosition && StartPosition <= position) { - return false; + return true; } LastInit = position; ui32 idx = 0; FinishPosition = Max(); StartPosition = 0; for (auto&& i : PositionAddress) { - if (!i.Contains(position)) { - i = Columns[idx]->GetChunk(i, position); + if (!i.GetAddress().Contains(position)) { + i = Columns[idx]->GetChunk(i.GetAddress(), position); } - StartPosition = std::max(StartPosition, i.GetStartPosition()); - FinishPosition = std::min(FinishPosition, i.GetFinishPosition()); + StartPosition = std::max(StartPosition, i.GetAddress().GetGlobalStartPosition()); + FinishPosition = std::min(FinishPosition, i.GetAddress().GetGlobalFinishPosition()); + AFL_VERIFY(i.GetAddress().Contains(position)); ++idx; } AFL_VERIFY(StartPosition < FinishPosition); @@ -212,14 +225,13 @@ void TCursor::AppendPositionTo(const std::vectortype()->Equals(PositionAddress[i].GetArray()->type())); - AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), Position - PositionAddress[i].GetStartPosition(), recordSize)); + AFL_VERIFY(NArrow::Append(*builders[i], *PositionAddress[i].GetArray(), PositionAddress[i].GetAddress().GetLocalIndex(Position), recordSize)); } } TCursor::TCursor(const std::shared_ptr& table, const ui64 position, const std::vector& columns) - : Position(position) -{ + : Position(position) { PositionAddress = TSortableScanData(position, table, columns).GetPositionAddress(); } -} +} // namespace NKikimr::NArrow::NMerger diff --git a/ydb/core/formats/arrow/reader/position.h b/ydb/core/formats/arrow/reader/position.h index 8a6e15fd79ac..78233e50b4a5 100644 --- a/ydb/core/formats/arrow/reader/position.h +++ b/ydb/core/formats/arrow/reader/position.h @@ -1,10 +1,9 @@ #pragma once -#include #include #include -#include #include +#include #include #include @@ -22,12 +21,12 @@ class TSortableScanData; class TCursor { private: YDB_READONLY(ui64, Position, 0); - std::vector PositionAddress; + std::vector PositionAddress; public: TCursor() = default; TCursor(const std::shared_ptr& table, const ui64 position, const std::vector& columns); - TCursor(const ui64 position, const std::vector& addresses) + TCursor(const ui64 position, const std::vector& addresses) : Position(position) , PositionAddress(addresses) { @@ -64,7 +63,7 @@ class TCursor { class TSortableScanData { private: ui64 RecordsCount = 0; - YDB_READONLY_DEF(std::vector, PositionAddress); + YDB_READONLY_DEF(std::vector, PositionAddress); YDB_READONLY_DEF(std::vector>, Columns); YDB_READONLY_DEF(std::vector>, Fields); ui64 StartPosition = 0; @@ -87,19 +86,18 @@ class TSortableScanData { BuildPosition(position); } - const NAccessor::IChunkedArray::TCurrentChunkAddress& GetPositionAddress(const ui32 colIdx) const { + const NAccessor::IChunkedArray::TFullDataAddress& GetPositionAddress(const ui32 colIdx) const { AFL_VERIFY(colIdx < PositionAddress.size()); return PositionAddress[colIdx]; } ui32 GetPositionInChunk(const ui32 colIdx, const ui32 pos) const { AFL_VERIFY(colIdx < PositionAddress.size()); - AFL_VERIFY(pos >= PositionAddress[colIdx].GetStartPosition()); - return pos - PositionAddress[colIdx].GetStartPosition(); + return PositionAddress[colIdx].GetAddress().GetLocalIndex(pos); } - std::shared_ptr BuildCopy(const ui64 position) const { - return std::make_shared(position, RecordsCount, Columns, Fields); + std::shared_ptr BuildCopy(const ui64 /*position*/) const { + return std::make_shared(*this); } TCursor BuildCursor(const ui64 position) const { @@ -109,8 +107,8 @@ class TSortableScanData { auto addresses = PositionAddress; ui32 idx = 0; for (auto&& i : addresses) { - if (!i.Contains(position)) { - i = Columns[idx]->GetChunk(i, position); + if (!i.GetAddress().Contains(position)) { + i = Columns[idx]->GetChunk(i.GetAddress(), position); } ++idx; } @@ -129,15 +127,15 @@ class TSortableScanData { } else { for (ui32 idx = 0; idx < PositionAddress.size(); ++idx) { std::partial_ordering cmp = std::partial_ordering::equivalent; - const bool containsSelf = PositionAddress[idx].Contains(position); - const bool containsItem = item.PositionAddress[idx].Contains(itemPosition); + const bool containsSelf = PositionAddress[idx].GetAddress().Contains(position); + const bool containsItem = item.PositionAddress[idx].GetAddress().Contains(itemPosition); if (containsSelf && containsItem) { cmp = PositionAddress[idx].Compare(position, item.PositionAddress[idx], itemPosition); } else if (containsSelf) { - auto temporaryAddress = item.Columns[idx]->GetChunk(item.PositionAddress[idx], itemPosition); + auto temporaryAddress = item.Columns[idx]->GetChunk(item.PositionAddress[idx].GetAddress(), itemPosition); cmp = PositionAddress[idx].Compare(position, temporaryAddress, itemPosition); } else if (containsItem) { - auto temporaryAddress = Columns[idx]->GetChunk(PositionAddress[idx], position); + auto temporaryAddress = Columns[idx]->GetChunk(PositionAddress[idx].GetAddress(), position); cmp = temporaryAddress.Compare(position, item.PositionAddress[idx], itemPosition); } else { AFL_VERIFY(false); @@ -153,7 +151,7 @@ class TSortableScanData { void AppendPositionTo(const std::vector>& builders, const ui64 position, ui64* recordSize) const; - bool InitPosition(const ui64 position); + [[nodiscard]] bool InitPosition(const ui64 position); std::shared_ptr Slice(const ui64 offset, const ui64 count) const { std::vector> slicedArrays; @@ -210,6 +208,17 @@ class TSortableBatchPosition { bool ReverseSort = false; std::shared_ptr Sorting; std::shared_ptr Data; + + TSortableBatchPosition(const i64 position, const i64 recordsCount, const bool reverseSort, const std::shared_ptr& sorting, + const std::shared_ptr& data) + : Position(position) + , RecordsCount(recordsCount) + , ReverseSort(reverseSort) + , Sorting(sorting) + , Data(data) { + AFL_VERIFY(IsAvailablePosition(Position)); + } + public: TSortableBatchPosition() = default; @@ -221,7 +230,7 @@ class TSortableBatchPosition { return RecordsCount; } - std::shared_ptr GetSorting() const { + const std::shared_ptr& GetSorting() const { return Sorting; } @@ -240,16 +249,6 @@ class TSortableBatchPosition { return Sorting->GetFields(); } - TSortableBatchPosition(const i64 position, const i64 recordsCount, const bool reverseSort, const std::shared_ptr& sorting, const std::shared_ptr& data) - : Position(position) - , RecordsCount(recordsCount) - , ReverseSort(reverseSort) - , Sorting(sorting) - , Data(data) - { - - } - TSortableBatchPosition(const TRWSortableBatchPosition& source) = delete; TSortableBatchPosition(TRWSortableBatchPosition& source) = delete; TSortableBatchPosition(TRWSortableBatchPosition&& source) = delete; @@ -258,7 +257,7 @@ class TSortableBatchPosition { TSortableBatchPosition operator= (TRWSortableBatchPosition& source) = delete; TSortableBatchPosition operator= (TRWSortableBatchPosition&& source) = delete; - TRWSortableBatchPosition BuildRWPosition() const; + TRWSortableBatchPosition BuildRWPosition(const bool needData, const bool deepCopy) const; std::shared_ptr SliceData(const ui64 offset, const ui64 count) const { AFL_VERIFY(Data); @@ -316,7 +315,12 @@ class TSortableBatchPosition { } }; - static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool needGreater, const std::optional includedStartPosition); + [[nodiscard]] bool IsAvailablePosition(const i64 position) const { + return 0 <= position && position < RecordsCount; + } + + static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, + const bool needGreater, const std::optional includedStartPosition); static std::optional FindPosition(TRWSortableBatchPosition& position, const ui64 posStart, const ui64 posFinish, const TSortableBatchPosition& forFound, const bool greater); const TSortableScanData& GetData() const { @@ -404,29 +408,151 @@ class TSortableBatchPosition { }; +class TIntervalPosition { +private: + TSortableBatchPosition Position; + bool LeftIntervalInclude; +public: + const TSortableBatchPosition& GetPosition() const { + return Position; + } + bool IsIncludedToLeftInterval() const { + return LeftIntervalInclude; + } + TIntervalPosition(TSortableBatchPosition&& position, const bool leftIntervalInclude) + : Position(std::move(position)) + , LeftIntervalInclude(leftIntervalInclude) { + + } + + TIntervalPosition(const TSortableBatchPosition& position, const bool leftIntervalInclude) + : Position(position) + , LeftIntervalInclude(leftIntervalInclude) { + + } + + bool operator<(const TIntervalPosition& item) const { + std::partial_ordering cmp = Position.Compare(item.Position); + if (cmp == std::partial_ordering::equivalent) { + return (LeftIntervalInclude ? 1 : 0) < (item.LeftIntervalInclude ? 1 : 0); + } + return cmp == std::partial_ordering::less; + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("position", Position.DebugJson()); + result.InsertValue("include", LeftIntervalInclude); + return result; + } +}; + +class TIntervalPositions { +private: + std::vector Positions; +public: + bool IsEmpty() const { + return Positions.empty(); + } + + std::vector::const_iterator begin() const { + return Positions.begin(); + } + + std::vector::const_iterator end() const { + return Positions.end(); + } + + void InsertPosition(TIntervalPosition&& intervalPosition) { + Positions.emplace_back(std::move(intervalPosition)); + ui32 index = Positions.size() - 1; + while (index >= 1 && Positions[index] < Positions[index - 1]) { + std::swap(Positions[index], Positions[index - 1]); + index = index - 1; + } + } + + void InsertPosition(TSortableBatchPosition&& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(std::move(position), includePositionToLeftInterval); + InsertPosition(std::move(intervalPosition)); + } + + void InsertPosition(const TSortableBatchPosition& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(position, includePositionToLeftInterval); + InsertPosition(std::move(intervalPosition)); + } + + void AddPosition(TIntervalPosition&& intervalPosition) { + if (Positions.size()) { + AFL_VERIFY(Positions.back() < intervalPosition)("back", Positions.back().DebugJson())("pos", intervalPosition.DebugJson()); + } + Positions.emplace_back(std::move(intervalPosition)); + } + + void AddPosition(TSortableBatchPosition&& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(std::move(position), includePositionToLeftInterval); + AddPosition(std::move(intervalPosition)); + } + + void AddPosition(const TSortableBatchPosition& position, const bool includePositionToLeftInterval) { + TIntervalPosition intervalPosition(position, includePositionToLeftInterval); + AddPosition(std::move(intervalPosition)); + } +}; + class TRWSortableBatchPosition: public TSortableBatchPosition, public TMoveOnly { private: using TBase = TSortableBatchPosition; public: using TBase::TBase; - bool NextPosition(const i64 delta) { + [[nodiscard]] bool NextPosition(const i64 delta) { return InitPosition(Position + delta); } - bool InitPosition(const i64 position) { - if (position < RecordsCount && position >= 0) { - Sorting->InitPosition(position); - if (Data) { - Data->InitPosition(position); + [[nodiscard]] bool InitPosition(const i64 position) { + if (!IsAvailablePosition(position)) { + return false; + } + AFL_VERIFY(Sorting->InitPosition(position))("pos", position)("count", RecordsCount); + if (Data) { + AFL_VERIFY(Data->InitPosition(position))("pos", position)("count", RecordsCount); + } + Position = position; + return true; + } + + class TAsymmetricPositionGuard: TNonCopyable { + private: + TRWSortableBatchPosition& Owner; + public: + TAsymmetricPositionGuard(TRWSortableBatchPosition& owner) + : Owner(owner) + { + } + + [[nodiscard]] bool InitSortingPosition(const i64 position) { + if (!Owner.IsAvailablePosition(position)) { + return false; } - Position = position; + AFL_VERIFY(Owner.Sorting->InitPosition(position)); + Owner.Position = position; return true; - } else { - return false; } + ~TAsymmetricPositionGuard() { + if (Owner.IsAvailablePosition(Owner.Position)) { + if (Owner.Data) { + AFL_VERIFY(Owner.Data->InitPosition(Owner.Position)); + } + } + } + }; + + TAsymmetricPositionGuard CreateAsymmetricAccessGuard() { + return TAsymmetricPositionGuard(*this); } + TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& forFound); // (-inf, it1), [it1, it2), [it2, it3), ..., [itLast, +inf) diff --git a/ydb/core/formats/arrow/reader/result_builder.cpp b/ydb/core/formats/arrow/reader/result_builder.cpp index deb4fe3e1427..9b412902b1ed 100644 --- a/ydb/core/formats/arrow/reader/result_builder.cpp +++ b/ydb/core/formats/arrow/reader/result_builder.cpp @@ -1,9 +1,8 @@ #include "result_builder.h" -#include - #include #include +#include #include @@ -64,7 +63,7 @@ std::shared_ptr TRecordBatchBuilder::Finalize() { for (auto&& i : Builders) { columns.emplace_back(NArrow::TStatusValidator::GetValid(i->Finish())); } - auto result = arrow::RecordBatch::Make(schema, columns.front()->length(), columns); + auto result = arrow::RecordBatch::Make(schema, columns.front()->length(), std::move(columns)); #ifndef NDEBUG NArrow::TStatusValidator::Validate(result->ValidateFull()); #endif diff --git a/ydb/core/formats/arrow/reader/ya.make b/ydb/core/formats/arrow/reader/ya.make index d57bb4e501ca..8dba6acf2efb 100644 --- a/ydb/core/formats/arrow/reader/ya.make +++ b/ydb/core/formats/arrow/reader/ya.make @@ -2,11 +2,11 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch ydb/core/formats/arrow/common ydb/library/actors/core ydb/library/services + ydb/library/formats/arrow ) SRCS( diff --git a/ydb/core/formats/arrow/save_load/loader.cpp b/ydb/core/formats/arrow/save_load/loader.cpp new file mode 100644 index 000000000000..c9328f751d4a --- /dev/null +++ b/ydb/core/formats/arrow/save_load/loader.cpp @@ -0,0 +1,68 @@ +#include "loader.h" + +#include + +namespace NKikimr::NArrow::NAccessor { + +TString TColumnLoader::DebugString() const { + TStringBuilder result; + result << "accessor_constructor:" << AccessorConstructor->DebugString() << ";"; + result << "result_field:" << ResultField->ToString() << ";"; + if (Transformer) { + result << "transformer:" << Transformer->DebugString() << ";"; + } + result << "serializer:" << Serializer->DebugString() << ";"; + return result; +} + +TColumnLoader::TColumnLoader(NTransformation::ITransformer::TPtr transformer, const NSerialization::TSerializerContainer& serializer, + const TConstructorContainer& accessorConstructor, const std::shared_ptr& resultField, + const std::shared_ptr& defaultValue, const ui32 columnId) + : Serializer(serializer) + , Transformer(transformer) + , AccessorConstructor(accessorConstructor) + , ResultField(resultField) + , DefaultValue(defaultValue) + , ColumnId(columnId) { + AFL_VERIFY(!!AccessorConstructor); + AFL_VERIFY(ResultField); + AFL_VERIFY(Serializer); +} + +const std::shared_ptr& TColumnLoader::GetField() const { + return ResultField; +} + +arrow::Result> TColumnLoader::Apply(const TString& data) const { + Y_ABORT_UNLESS(Serializer); + arrow::Result> columnArray = + Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, AccessorConstructor->GetExpectedSchema(ResultField)); + if (!columnArray.ok()) { + return columnArray; + } + if (Transformer) { + return Transformer->Transform(*columnArray); + } else { + return columnArray; + } +} + +std::shared_ptr TColumnLoader::ApplyRawVerified(const TString& data) const { + return TStatusValidator::GetValid(Apply(data)); +} + +std::shared_ptr TColumnLoader::ApplyVerified(const TString& dataStr, const ui32 recordsCount) const { + auto data = TStatusValidator::GetValid(Apply(dataStr)); + return BuildAccessor(data, TChunkConstructionData(recordsCount, DefaultValue, ResultField->type())); +} + +std::shared_ptr TColumnLoader::BuildAccessor( + const std::shared_ptr& batch, const TChunkConstructionData& chunkData) const { + return AccessorConstructor->Construct(batch, chunkData).DetachResult(); +} + +std::shared_ptr TColumnLoader::BuildDefaultAccessor(const ui32 recordsCount) const { + return AccessorConstructor->ConstructDefault(TChunkConstructionData(recordsCount, DefaultValue, ResultField->type())).DetachResult(); +} + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/loader.h b/ydb/core/formats/arrow/save_load/loader.h new file mode 100644 index 000000000000..2d3119ac3fa8 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/loader.h @@ -0,0 +1,56 @@ +#pragma once +#include +#include + +#include +#include + +#include + +namespace NKikimr::NArrow::NAccessor { + +class TColumnLoader { +private: + NSerialization::TSerializerContainer Serializer; + NTransformation::ITransformer::TPtr Transformer; + YDB_READONLY_DEF(NAccessor::TConstructorContainer, AccessorConstructor); + YDB_READONLY_DEF(std::shared_ptr, ResultField); + YDB_READONLY_DEF(std::shared_ptr, DefaultValue); + const ui32 ColumnId; + + arrow::Result> Apply(const TString& data) const; + std::shared_ptr BuildAccessor( + const std::shared_ptr& batch, const TChunkConstructionData& chunkData) const; + +public: + std::shared_ptr BuildDefaultAccessor(const ui32 recordsCount) const; + + bool IsEqualTo(const TColumnLoader& item) const { + if (!!Transformer != !!item.Transformer) { + return false; + } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { + return false; + } + if (!Serializer.IsEqualTo(item.Serializer)) { + return false; + } + return true; + } + + TString DebugString() const; + + TColumnLoader(NTransformation::ITransformer::TPtr transformer, const NSerialization::TSerializerContainer& serializer, + const NAccessor::TConstructorContainer& accessorConstructor, const std::shared_ptr& resultField, + const std::shared_ptr& defaultValue, const ui32 columnId); + + ui32 GetColumnId() const { + return ColumnId; + } + + const std::shared_ptr& GetField() const; + + std::shared_ptr ApplyVerified(const TString& data, const ui32 expectedRecordsCount) const; + std::shared_ptr ApplyRawVerified(const TString& data) const; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/saver.cpp b/ydb/core/formats/arrow/save_load/saver.cpp new file mode 100644 index 000000000000..95adebc76471 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/saver.cpp @@ -0,0 +1,38 @@ +#include "saver.h" + +namespace NKikimr::NArrow::NAccessor { + +TColumnSaver::TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) + : Transformer(transformer) + , Serializer(serializer) +{ + Y_ABORT_UNLESS(Serializer); +} + +bool TColumnSaver::IsHardPacker() const { + return Serializer->IsHardPacker(); +} + +TString TColumnSaver::Apply(std::shared_ptr data, std::shared_ptr field) const { + auto schema = std::make_shared(arrow::FieldVector{field}); + auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); + return Apply(batch); +} + +TString TColumnSaver::Apply(const std::shared_ptr& data) const { + Y_ABORT_UNLESS(Serializer); + NArrow::NSerialization::TSerializerContainer serializer = Serializer; + if (SerializerBySizeUpperBorder.size()) { + auto it = SerializerBySizeUpperBorder.lower_bound(data->num_rows()); + if (it != SerializerBySizeUpperBorder.end()) { + serializer = it->second; + } + } + if (Transformer) { + return serializer->SerializeFull(Transformer->Transform(data)); + } else { + return serializer->SerializePayload(data); + } +} + +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/save_load/saver.h b/ydb/core/formats/arrow/save_load/saver.h new file mode 100644 index 000000000000..dd9feb4114f3 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/saver.h @@ -0,0 +1,38 @@ +#pragma once +#include + +#include +#include + +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class TColumnSaver { +private: + NArrow::NTransformation::ITransformer::TPtr Transformer; + YDB_READONLY_DEF(NArrow::NSerialization::TSerializerContainer, Serializer); + std::map SerializerBySizeUpperBorder; + +public: + TColumnSaver() = default; + TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer); + + void AddSerializerWithBorder(const ui32 upperBorder, const NArrow::NSerialization::TSerializerContainer& serializer) { + if (Serializer.IsCompatibleForExchange(serializer)) { + AFL_VERIFY(SerializerBySizeUpperBorder.emplace(upperBorder, serializer).second); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_add_serializer")("reason", "incompatible_serializers")( + "border", upperBorder); + } + } + + bool IsHardPacker() const; + + TString Apply(std::shared_ptr data, std::shared_ptr field) const; + + TString Apply(const std::shared_ptr& data) const; +}; + +} // namespace NKikimr::NArrow::NAccessor diff --git a/ydb/core/formats/arrow/save_load/ya.make b/ydb/core/formats/arrow/save_load/ya.make new file mode 100644 index 000000000000..7947aa1ab826 --- /dev/null +++ b/ydb/core/formats/arrow/save_load/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + saver.cpp + loader.cpp +) + +PEERDIR( + ydb/library/actors/core + contrib/libs/apache/arrow + ydb/library/accessor + ydb/library/conclusion + ydb/library/formats/arrow/transformer + ydb/library/formats/arrow/common + ydb/core/formats/arrow/transformer + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/formats/arrow/serializer/abstract.cpp b/ydb/core/formats/arrow/serializer/abstract.cpp index 6347cd765b9b..8010197ae409 100644 --- a/ydb/core/formats/arrow/serializer/abstract.cpp +++ b/ydb/core/formats/arrow/serializer/abstract.cpp @@ -21,8 +21,12 @@ NKikimr::TConclusionStatus TSerializerContainer::DeserializeFromRequest(NYql::TF return TBase::GetObjectPtr()->DeserializeFromRequest(features); } -std::shared_ptr TSerializerContainer::GetDefaultSerializer() { +std::shared_ptr TSerializerContainer::GetDefaultSerializer() { return std::make_shared(); } +std::shared_ptr TSerializerContainer::GetFastestSerializer() { + return std::make_shared(arrow::Compression::UNCOMPRESSED); +} + } diff --git a/ydb/core/formats/arrow/serializer/abstract.h b/ydb/core/formats/arrow/serializer/abstract.h index 1c8d9963dd7e..9811aaaf0f20 100644 --- a/ydb/core/formats/arrow/serializer/abstract.h +++ b/ydb/core/formats/arrow/serializer/abstract.h @@ -4,9 +4,9 @@ #include #include #include -#include #include +#include #include #include @@ -146,6 +146,7 @@ class TSerializerContainer: public NBackgroundTasks::TInterfaceProtoContainer GetDefaultSerializer(); + static std::shared_ptr GetFastestSerializer(); TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TCompressionOptions& proto); diff --git a/ydb/core/formats/arrow/serializer/native.cpp b/ydb/core/formats/arrow/serializer/native.cpp index 7b422a8c1cb1..4b90286001d2 100644 --- a/ydb/core/formats/arrow/serializer/native.cpp +++ b/ydb/core/formats/arrow/serializer/native.cpp @@ -2,10 +2,10 @@ #include "stream.h" #include "parsing.h" #include -#include #include #include +#include #include #include diff --git a/ydb/core/formats/arrow/serializer/ya.make b/ydb/core/formats/arrow/serializer/ya.make index bf7e091ab4bf..8c9fb49fe08f 100644 --- a/ydb/core/formats/arrow/serializer/ya.make +++ b/ydb/core/formats/arrow/serializer/ya.make @@ -2,9 +2,9 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow - ydb/core/formats/arrow/common ydb/services/metadata/abstract ydb/library/actors/core + ydb/library/formats/arrow/common ydb/core/protos ) diff --git a/ydb/core/formats/arrow/simple_arrays_cache.h b/ydb/core/formats/arrow/simple_arrays_cache.h deleted file mode 100644 index e527e44a0b08..000000000000 --- a/ydb/core/formats/arrow/simple_arrays_cache.h +++ /dev/null @@ -1,45 +0,0 @@ -#pragma once -#include "size_calcer.h" - -#include - -#include -#include -#include -#include - -namespace NKikimr::NArrow { - -class TThreadSimpleArraysCache { -private: - THashMap> Arrays; - const ui64 MaxOneArrayMemorySize = 10 * 1024 * 1024; - - template - std::shared_ptr InitializePosition(const TString& key, const ui32 recordsCountExt, const TInitializeActor actor) { - const ui32 recordsCount = (recordsCountExt < 1024) ? 1024 : recordsCountExt; - auto it = Arrays.find(key); - if (it == Arrays.end() || it->second->length() < recordsCount) { - auto arrNew = actor(recordsCount); - if (NArrow::GetArrayMemorySize(arrNew->data()) < MaxOneArrayMemorySize) { - if (it == Arrays.end()) { - it = Arrays.emplace(key, arrNew).first; - } else { - it->second = arrNew; - } - } else { - AFL_VERIFY(recordsCountExt == recordsCount)("ext", recordsCountExt)("count", recordsCount); - return arrNew; - } - } - return it->second->Slice(0, recordsCountExt); - } - - std::shared_ptr GetNullImpl(const std::shared_ptr& type, const ui32 recordsCount); - std::shared_ptr GetConstImpl(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); -public: - static std::shared_ptr GetNull(const std::shared_ptr& type, const ui32 recordsCount); - static std::shared_ptr GetConst(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); - static std::shared_ptr Get(const std::shared_ptr& type, const std::shared_ptr& scalar, const ui32 recordsCount); -}; -} diff --git a/ydb/core/formats/arrow/simple_builder/filler.cpp b/ydb/core/formats/arrow/simple_builder/filler.cpp deleted file mode 100644 index f6168701ddbe..000000000000 --- a/ydb/core/formats/arrow/simple_builder/filler.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "filler.h" -#include - -namespace NKikimr::NArrow::NConstruction { - -TStringPoolFiller::TStringPoolFiller(const ui32 poolSize, const ui32 strLen) { - for (ui32 i = 0; i < poolSize; ++i) { - Data.emplace_back(NUnitTest::RandomString(strLen, i)); - } -} - -arrow::util::string_view TStringPoolFiller::GetValue(const ui32 idx) const { - const TString& str = Data[(2 + 7 * idx) % Data.size()]; - return arrow::util::string_view(str.data(), str.size()); -} - -} diff --git a/ydb/core/formats/arrow/simple_builder/filler.h b/ydb/core/formats/arrow/simple_builder/filler.h deleted file mode 100644 index e86e7a6c2139..000000000000 --- a/ydb/core/formats/arrow/simple_builder/filler.h +++ /dev/null @@ -1,129 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NArrow::NConstruction { - -template -class TIntSeqFiller { -public: - using TValue = TArrowInt; -private: - using CType = typename TArrowInt::c_type; - const CType Delta; -public: - CType GetValue(const CType idx) const { - return Delta + idx; - } - TIntSeqFiller(const CType delta = 0) - : Delta(delta) { - - } -}; - -template -class TIntConstFiller { -public: - using TValue = TArrowInt; -private: - using CType = typename TArrowInt::c_type; - const CType Value; -public: - CType GetValue(const CType /*idx*/) const { - return Value; - } - TIntConstFiller(const CType value) - : Value(value) { - - } -}; - -class TStringPoolFiller { -private: - std::vector Data; -public: - using TValue = arrow::StringType; - arrow::util::string_view GetValue(const ui32 idx) const; - - TStringPoolFiller(const ui32 poolSize, const ui32 strLen); -}; - -template -class TLinearArrayAccessor { -private: - using TArray = typename arrow::TypeTraits::ArrayType; - const TArray& Data; -public: - using TValue = TValueExt; - auto GetValue(const ui32 idx) const { - return Data.Value(idx); - } - - TLinearArrayAccessor(const arrow::Array& data) - : Data(static_cast(data)) { - } -}; - -template -class TBinaryArrayAccessor { -private: - using TArray = typename arrow::TypeTraits::ArrayType; - const TArray& Data; -public: - using TValue = TValueExt; - const char* GetValueView(const ui32 idx) const { - return Data.GetView(idx).data(); - } - - TBinaryArrayAccessor(const arrow::Array& data) - : Data(static_cast(data)) { - } -}; - -template -class TDictionaryArrayAccessor { -private: - using TDictionary = typename arrow::TypeTraits::ArrayType; - const TDictionary& Dictionary; - const TIndices& Indices; -public: - using TValue = TDictionaryValue; - auto GetValue(const ui32 idx) const { - return Dictionary.Value(Indices.Value(idx)); - } - - TDictionaryArrayAccessor(const TDictionary& dictionary, const TIndices& indices) - : Dictionary(dictionary) - , Indices(indices) { - } -}; - -template -class TBinaryDictionaryArrayAccessor { -private: - using TDictionary = typename arrow::TypeTraits::ArrayType; - const TDictionary& Dictionary; - const TIndices& Indices; - std::vector DictionaryStrings; -public: - using TValue = TDictionaryValue; - const char* GetValueView(const ui32 idx) const { - return DictionaryStrings[Indices.Value(idx)].data(); - } - - TBinaryDictionaryArrayAccessor(const TDictionary& dictionary, const TIndices& indices) - : Dictionary(dictionary) - , Indices(indices) - { - DictionaryStrings.reserve(Dictionary.length()); - for (i64 idx = 0; idx < Dictionary.length(); ++idx) { - auto sView = Dictionary.Value(idx); - DictionaryStrings.emplace_back(TString(sView.data(), sView.size())); - } - } -}; - -} diff --git a/ydb/core/formats/arrow/size_calcer.cpp b/ydb/core/formats/arrow/size_calcer.cpp index d44018faaa77..a79f52eb5799 100644 --- a/ydb/core/formats/arrow/size_calcer.cpp +++ b/ydb/core/formats/arrow/size_calcer.cpp @@ -1,5 +1,5 @@ #include "size_calcer.h" -#include "switch_type.h" +#include "switch/switch_type.h" #include "arrow_helpers.h" #include "dictionary/conversion.h" #include @@ -50,203 +50,16 @@ TConclusion> SplitByBlobSize(const std::shared_ptr return result; } -ui32 TRowSizeCalculator::GetRowBitWidth(const ui32 row) const { - Y_ABORT_UNLESS(Prepared); - ui32 result = CommonSize; - for (auto&& c : BinaryColumns) { - result += GetBitWidthAligned(c->GetView(row).size() * 8); - } - for (auto&& c : StringColumns) { - result += GetBitWidthAligned(c->GetView(row).size() * 8); - } - return result; -} - -bool TRowSizeCalculator::InitBatch(const std::shared_ptr& batch) { - Batch = batch; - CommonSize = 0; - BinaryColumns.clear(); - StringColumns.clear(); - Prepared = false; - for (ui32 i = 0; i < (ui32)Batch->num_columns(); ++i) { - auto fSize = std::dynamic_pointer_cast(Batch->column(i)->type()); - if (fSize) { - CommonSize += GetBitWidthAligned(fSize->bit_width()); - } else { - auto c = Batch->column(i); - if (c->type()->id() == arrow::Type::BINARY) { - const arrow::BinaryArray& viewArray = static_cast(*c); - BinaryColumns.emplace_back(&viewArray); - } else if (c->type()->id() == arrow::Type::STRING) { - const arrow::StringArray& viewArray = static_cast(*c); - StringColumns.emplace_back(&viewArray); - } else { - return false; - } - } - } - Prepared = true; - return true; -} - -ui32 TRowSizeCalculator::GetRowBytesSize(const ui32 row) const { - const ui32 bitsWidth = GetRowBitWidth(row); - ui32 result = bitsWidth / 8; - if (bitsWidth % 8) { - ++result; - } - return result; -} - -ui64 GetArrayMemorySize(const std::shared_ptr& data) { - if (!data) { - return 0; - } - ui64 result = 0; - for (auto&& i : data->buffers) { - if (i) { - result += i->capacity(); - } - } - for (auto&& i : data->child_data) { - for (auto&& b : i->buffers) { - if (b) { - result += b->capacity(); - } - } - } - if (data->dictionary) { - for (auto&& b : data->dictionary->buffers) { - if (b) { - result += b->capacity(); - } - } - } - return result; -} - - -ui64 GetBatchDataSize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->columns()) { - bytes += GetArrayDataSize(column); - } - return bytes; -} - -ui64 GetBatchMemorySize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->column_data()) { - bytes += GetArrayMemorySize(column); - } - return bytes; -} - -ui64 GetTableMemorySize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->columns()) { - for (auto&& chunk : column->chunks()) { - bytes += GetArrayMemorySize(chunk->data()); - } - } - return bytes; -} - -ui64 GetTableDataSize(const std::shared_ptr& batch) { - if (!batch) { - return 0; - } - ui64 bytes = 0; - for (auto& column : batch->columns()) { - for (auto&& chunk : column->chunks()) { - bytes += GetArrayDataSize(chunk); - } - } - return bytes; -} - -template -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - return sizeof(typename TType::c_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - return column->length() * 8; // Special value for empty lines -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::StringArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::LargeStringArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::BinaryArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->total_values_length() + sizeof(arrow::LargeBinaryArray::offset_type) * column->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - auto typedColumn = std::static_pointer_cast(column); - return typedColumn->byte_width() * typedColumn->length(); -} - -template <> -ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { - return sizeof(ui64) * 2 * column->length(); -} - -ui64 GetArrayDataSize(const std::shared_ptr& column) { - auto type = column->type(); - if (type->id() == arrow::Type::DICTIONARY) { - auto dictArray = static_pointer_cast(column); - return GetDictionarySize(dictArray); - } - ui64 bytes = 0; - bool success = SwitchTypeWithNull(type->id(), [&](TTypeWrapper typeHolder) { - Y_UNUSED(typeHolder); - bytes = GetArrayDataSizeImpl(column); - return true; - }); - - // Add null bit mask overhead if any. - if (HasNulls(column)) { - bytes += column->length() / 8 + 1; - } - - Y_DEBUG_ABORT_UNLESS(success, "Unsupported arrow type %s", type->ToString().data()); - return bytes; -} - NKikimr::NArrow::TSerializedBatch TSerializedBatch::Build(std::shared_ptr batch, const TBatchSplitttingContext& context) { - std::optional specialKeys; + std::optional specialKeysPayload; + std::optional specialKeysFull; if (context.GetFieldsForSpecialKeys().size()) { - specialKeys = TFirstLastSpecialKeys(batch, context.GetFieldsForSpecialKeys()); + TFirstLastSpecialKeys specialKeys(batch, context.GetFieldsForSpecialKeys()); + specialKeysPayload = specialKeys.SerializePayloadToString(); + specialKeysFull = specialKeys.SerializeFullToString(); } - return TSerializedBatch(NArrow::SerializeSchema(*batch->schema()), NArrow::SerializeBatchNoCompression(batch), batch->num_rows(), NArrow::GetBatchDataSize(batch), specialKeys); + return TSerializedBatch(NArrow::SerializeBatchNoCompression(batch), batch->num_rows(), + NArrow::GetBatchDataSize(batch), specialKeysPayload, specialKeysFull); } TConclusionStatus TSerializedBatch::BuildWithLimit(std::shared_ptr batch, const TBatchSplitttingContext& context, std::optional& sbL, std::optional& sbR) { @@ -290,7 +103,7 @@ TConclusion> TSerializedBatch::BuildWithLimit(std: } TString TSerializedBatch::DebugString() const { - return TStringBuilder() << "(data_size=" << Data.size() << ";schema_data_size=" << SchemaData.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; + return TStringBuilder() << "(data_size=" << Data.size() << ";rows_count=" << RowsCount << ";raw_bytes=" << RawBytes << ";)"; } } diff --git a/ydb/core/formats/arrow/size_calcer.h b/ydb/core/formats/arrow/size_calcer.h index 410d5517bc86..00e55f9ca20b 100644 --- a/ydb/core/formats/arrow/size_calcer.h +++ b/ydb/core/formats/arrow/size_calcer.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -11,43 +12,6 @@ namespace NKikimr::NArrow { -class TRowSizeCalculator { -private: - std::shared_ptr Batch; - ui32 CommonSize = 0; - std::vector BinaryColumns; - std::vector StringColumns; - bool Prepared = false; - const ui32 AlignBitsCount = 1; - - ui32 GetBitWidthAligned(const ui32 bitWidth) const { - if (AlignBitsCount == 1) { - return bitWidth; - } - ui32 result = bitWidth / AlignBitsCount; - if (bitWidth % AlignBitsCount) { - result += 1; - } - result *= AlignBitsCount; - return result; - } - -public: - - ui64 GetApproxSerializeSize(const ui64 dataSize) const { - return Max(dataSize * 1.05, dataSize + Batch->num_columns() * 8); - } - - TRowSizeCalculator(const ui32 alignBitsCount) - : AlignBitsCount(alignBitsCount) - { - - } - bool InitBatch(const std::shared_ptr& batch); - ui32 GetRowBitWidth(const ui32 row) const; - ui32 GetRowBytesSize(const ui32 row) const; -}; - class TBatchSplitttingContext { private: YDB_ACCESSOR(ui64, SizeLimit, 6 * 1024 * 1024); @@ -70,23 +34,29 @@ class TBatchSplitttingContext { class TSerializedBatch { private: - YDB_READONLY_DEF(TString, SchemaData); YDB_READONLY_DEF(TString, Data); YDB_READONLY(ui32, RowsCount, 0); YDB_READONLY(ui32, RawBytes, 0); - std::optional SpecialKeys; + std::optional SpecialKeysFull; + std::optional SpecialKeysPayload; + public: size_t GetSize() const { return Data.size(); } - const TFirstLastSpecialKeys& GetSpecialKeysSafe() const { - AFL_VERIFY(SpecialKeys); - return *SpecialKeys; + const TString& GetSpecialKeysPayloadSafe() const { + AFL_VERIFY(SpecialKeysPayload); + return *SpecialKeysPayload; + } + + const TString& GetSpecialKeysFullSafe() const { + AFL_VERIFY(SpecialKeysFull); + return *SpecialKeysFull; } bool HasSpecialKeys() const { - return !!SpecialKeys; + return !!SpecialKeysFull; } TString DebugString() const; @@ -95,27 +65,17 @@ class TSerializedBatch { static TConclusionStatus BuildWithLimit(std::shared_ptr batch, const TBatchSplitttingContext& context, std::optional& sbL, std::optional& sbR); static TSerializedBatch Build(std::shared_ptr batch, const TBatchSplitttingContext& context); - TSerializedBatch(TString&& schemaData, TString&& data, const ui32 rowsCount, const ui32 rawBytes, const std::optional& specialKeys) - : SchemaData(schemaData) - , Data(data) + TSerializedBatch(TString&& data, const ui32 rowsCount, const ui32 rawBytes, + const std::optional& specialKeysPayload, const std::optional& specialKeysFull) + : Data(data) , RowsCount(rowsCount) , RawBytes(rawBytes) - , SpecialKeys(specialKeys) - { - + , SpecialKeysFull(specialKeysFull) + , SpecialKeysPayload(specialKeysPayload) { + AFL_VERIFY(!!SpecialKeysPayload == !!SpecialKeysFull); } }; TConclusion> SplitByBlobSize(const std::shared_ptr& batch, const TBatchSplitttingContext& context); -// Return size in bytes including size of bitmap mask -ui64 GetBatchDataSize(const std::shared_ptr& batch); -ui64 GetTableDataSize(const std::shared_ptr& batch); -// Return size in bytes including size of bitmap mask -ui64 GetArrayMemorySize(const std::shared_ptr& data); -ui64 GetBatchMemorySize(const std::shared_ptr&batch); -ui64 GetTableMemorySize(const std::shared_ptr& batch); -// Return size in bytes *not* including size of bitmap mask -ui64 GetArrayDataSize(const std::shared_ptr& column); - } diff --git a/ydb/core/formats/arrow/special_keys.cpp b/ydb/core/formats/arrow/special_keys.cpp index 0b97fb3f25ed..0745fad0e559 100644 --- a/ydb/core/formats/arrow/special_keys.cpp +++ b/ydb/core/formats/arrow/special_keys.cpp @@ -27,12 +27,12 @@ NKikimr::NArrow::TReplaceKey TSpecialKeys::GetKeyByIndex(const ui32 position, co } } -TString TSpecialKeys::SerializeToString() const { - return NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->SerializeFull(Data); +TString TSpecialKeys::SerializePayloadToString() const { + return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializePayload(Data); } -TString TSpecialKeys::SerializeToStringDataOnlyNoCompression() const { - return NArrow::SerializeBatchNoCompression(Data); +TString TSpecialKeys::SerializeFullToString() const { + return NArrow::NSerialization::TSerializerContainer::GetFastestSerializer()->SerializeFull(Data); } ui64 TSpecialKeys::GetMemoryBytes() const { @@ -50,13 +50,17 @@ TFirstLastSpecialKeys::TFirstLastSpecialKeys(const std::shared_ptr indexes = {0}; - if (batch->num_rows() > 1) { - indexes.emplace_back(batch->num_rows() - 1); - } + if (keyBatch->num_rows() <= 2) { + Data = keyBatch; + } else { + std::vector indexes = { 0 }; + if (batch->num_rows() > 1) { + indexes.emplace_back(batch->num_rows() - 1); + } - Data = NArrow::CopyRecords(keyBatch, indexes); - Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2); + Data = NArrow::CopyRecords(keyBatch, indexes); + Y_ABORT_UNLESS(Data->num_rows() == 1 || Data->num_rows() == 2); + } } TMinMaxSpecialKeys::TMinMaxSpecialKeys(std::shared_ptr batch, const std::shared_ptr& schema) { diff --git a/ydb/core/formats/arrow/special_keys.h b/ydb/core/formats/arrow/special_keys.h index d56e658fbb68..7559b4a5f7fb 100644 --- a/ydb/core/formats/arrow/special_keys.h +++ b/ydb/core/formats/arrow/special_keys.h @@ -1,5 +1,8 @@ #pragma once -#include + +#include "arrow_helpers.h" + +#include #include namespace NKikimr::NArrow { @@ -22,8 +25,6 @@ class TSpecialKeys { public: ui64 GetMemoryBytes() const; - TString SerializeToStringDataOnlyNoCompression() const; - TSpecialKeys(const TString& data, const std::shared_ptr& schema) { Data = NArrow::DeserializeBatch(data, schema); Y_ABORT_UNLESS(Data); @@ -34,7 +35,8 @@ class TSpecialKeys { Y_ABORT_UNLESS(DeserializeFromString(data)); } - TString SerializeToString() const; + TString SerializePayloadToString() const; + TString SerializeFullToString() const; ui64 GetMemorySize() const; }; diff --git a/ydb/core/formats/arrow/splitter/scheme_info.cpp b/ydb/core/formats/arrow/splitter/scheme_info.cpp new file mode 100644 index 000000000000..35a8fcc5c035 --- /dev/null +++ b/ydb/core/formats/arrow/splitter/scheme_info.cpp @@ -0,0 +1,13 @@ +#include "scheme_info.h" + +namespace NKikimr::NArrow::NSplitter { + +NAccessor::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { + auto saver = DoGetColumnSaver(columnId); + if (OverrideSerializer) { + saver.AddSerializerWithBorder(Max(), *OverrideSerializer); + } + return saver; +} + +} diff --git a/ydb/core/formats/arrow/splitter/scheme_info.h b/ydb/core/formats/arrow/splitter/scheme_info.h new file mode 100644 index 000000000000..0bb30e97300a --- /dev/null +++ b/ydb/core/formats/arrow/splitter/scheme_info.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + +#include +#include + +namespace NKikimr::NArrow::NSplitter { + +class ISchemaDetailInfo { +private: + YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); + +protected: + virtual NAccessor::TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; + +public: + using TPtr = std::shared_ptr; + virtual ~ISchemaDetailInfo() = default; + virtual ui32 GetColumnId(const std::string& fieldName) const = 0; + NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const; + virtual std::shared_ptr GetField(const ui32 columnId) const = 0; + virtual std::optional GetColumnSerializationStats(const ui32 columnId) const = 0; + virtual bool NeedMinMaxForColumn(const ui32 columnId) const = 0; + virtual bool IsSortedColumn(const ui32 columnId) const = 0; + virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const = 0; +}; +} // namespace NKikimr::NArrow::NSplitter diff --git a/ydb/core/formats/arrow/splitter/simple.cpp b/ydb/core/formats/arrow/splitter/simple.cpp new file mode 100644 index 000000000000..a113084b531b --- /dev/null +++ b/ydb/core/formats/arrow/splitter/simple.cpp @@ -0,0 +1,200 @@ +#include "simple.h" + +#include + +#include +#include + +namespace NKikimr::NArrow::NSplitter { + +std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const { + AFL_VERIFY(data); + AFL_VERIFY(field); + auto schema = std::make_shared(arrow::FieldVector{field}); + auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); + return Split(batch, maxBlobSize); +} + +class TSplitChunk { +private: + std::shared_ptr Data; + YDB_READONLY_DEF(std::optional, Result); + ui32 SplitFactor = 0; + ui32 Iterations = 0; + ui32 MaxBlobSize = 8 * 1024 * 1024; + NAccessor::TColumnSaver ColumnSaver; + +public: + TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, + const NAccessor::TColumnSaver& columnSaver) + : Data(data) + , SplitFactor(baseSplitFactor) + , MaxBlobSize(maxBlobSize) + , ColumnSaver(columnSaver) + { + AFL_VERIFY(Data && Data->num_rows()); + AFL_VERIFY(SplitFactor); + } + + TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, TString&& serializedData, + const NAccessor::TColumnSaver& columnSaver) + : Data(data) + , Result(TSaverSplittedChunk(data, std::move(serializedData))) + , SplitFactor(baseSplitFactor) + , MaxBlobSize(maxBlobSize) + , ColumnSaver(columnSaver) + { + AFL_VERIFY(Data && Data->num_rows()); + AFL_VERIFY(SplitFactor); + } + + std::vector Split() { + while (true) { + AFL_VERIFY(!Result); + AFL_VERIFY(++Iterations < 100); + AFL_VERIFY(SplitFactor <= Data->num_rows())("factor", SplitFactor)("records", Data->num_rows())("iteration", Iterations)( + "size", NArrow::GetBatchDataSize(Data)); + bool found = false; + std::vector result; + if (SplitFactor == 1) { + TString blob = ColumnSaver.Apply(Data); + if (blob.size() < MaxBlobSize) { + Result = TSaverSplittedChunk(Data, std::move(blob)); + found = true; + result.emplace_back(*this); + } else { + TBatchSerializationStat stats(blob.size(), Data->num_rows(), NArrow::GetBatchDataSize(Data)); + SplitFactor = stats.PredictOptimalSplitFactor(Data->num_rows(), MaxBlobSize).value_or(1); + if (SplitFactor == 1) { + SplitFactor = 2; + } + AFL_VERIFY(Data->num_rows() > 1); + } + } else { + TLinearSplitInfo linearSplitting = TSimpleSplitter::GetLinearSplittingByMax(Data->num_rows(), Data->num_rows() / SplitFactor); + TStringBuilder sb; + std::optional badStartPosition; + ui32 badBatchRecordsCount = 0; + ui64 badBatchSerializedSize = 0; + ui32 badBatchCount = 0; + for (auto it = linearSplitting.StartIterator(); it.IsValid(); it.Next()) { + auto slice = Data->Slice(it.GetPosition(), it.GetCurrentPackSize()); + TString blob = ColumnSaver.Apply(slice); + if (blob.size() >= MaxBlobSize) { + if (!badStartPosition) { + badStartPosition = it.GetPosition(); + } + badBatchSerializedSize += blob.size(); + badBatchRecordsCount += it.GetCurrentPackSize(); + ++badBatchCount; + Y_ABORT_UNLESS(!linearSplitting.IsMinimalGranularity()); + } else { + if (badStartPosition) { + AFL_VERIFY(badBatchRecordsCount && badBatchCount)("count", badBatchCount)("records", badBatchRecordsCount); + auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); + TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); + result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver); + badStartPosition = {}; + badBatchRecordsCount = 0; + badBatchCount = 0; + badBatchSerializedSize = 0; + } + found = true; + result.emplace_back(1, MaxBlobSize, slice, std::move(blob), ColumnSaver); + } + } + if (badStartPosition) { + auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); + TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); + result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver); + } + ++SplitFactor; + } + if (found) { + return result; + } + } + AFL_VERIFY(false); + return {}; + } +}; + +std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const ui32 maxBlobSize) const { + AFL_VERIFY(data->num_rows()); + TSplitChunk baseChunk(Stats ? Stats->PredictOptimalSplitFactor(data->num_rows(), maxBlobSize).value_or(1) : 1, maxBlobSize, data, ColumnSaver); + std::vector chunks = {baseChunk}; + for (auto it = chunks.begin(); it != chunks.end(); ) { + AFL_VERIFY(chunks.size() < 100); + if (!!it->GetResult()) { + ++it; + continue; + } + std::vector splitted = it->Split(); + if (splitted.size() == 1) { + *it = splitted.front(); + } else { + it = chunks.insert(it, splitted.begin(), splitted.end()); + chunks.erase(it + splitted.size()); + } + } + std::vector result; + for (auto&& i : chunks) { + AFL_VERIFY(i.GetResult()); + result.emplace_back(*i.GetResult()); + } + return result; +} + +std::vector TSimpleSplitter::SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const { + std::vector result; + ui64 position = 0; + for (auto&& i : recordsCount) { + auto subData = data->Slice(position, i); + result.emplace_back(subData, ColumnSaver.Apply(subData)); + position += i; + } + Y_ABORT_UNLESS(position == (ui64)data->num_rows()); + return result; +} + +std::vector TSimpleSplitter::SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const { + auto splitPartSizesLocal = splitPartSizesExt; + Y_ABORT_UNLESS(data); + { + ui32 sumSizes = 0; + for (auto&& i : splitPartSizesExt) { + sumSizes += i; + } + Y_ABORT_UNLESS(sumSizes <= dataSerialization.size()); + + if (sumSizes < dataSerialization.size()) { + splitPartSizesLocal.emplace_back(dataSerialization.size() - sumSizes); + } + } + std::vector recordsCount; + i64 remainedRecordsCount = data->num_rows(); + const double rowsPerByte = 1.0 * data->num_rows() / dataSerialization.size(); + i32 remainedParts = splitPartSizesLocal.size(); + for (ui32 idx = 0; idx < splitPartSizesLocal.size(); ++idx) { + AFL_VERIFY(remainedRecordsCount >= remainedParts)("remained_records_count", remainedRecordsCount) + ("remained_parts", remainedParts)("idx", idx)("size", splitPartSizesLocal.size())("sizes", JoinSeq(",", splitPartSizesLocal))("data_size", dataSerialization.size()); + --remainedParts; + i64 expectedRecordsCount = rowsPerByte * splitPartSizesLocal[idx]; + if (expectedRecordsCount < 1) { + expectedRecordsCount = 1; + } else if (remainedRecordsCount < expectedRecordsCount + remainedParts) { + expectedRecordsCount = remainedRecordsCount - remainedParts; + } + if (idx + 1 == splitPartSizesLocal.size()) { + expectedRecordsCount = remainedRecordsCount; + } + Y_ABORT_UNLESS(expectedRecordsCount); + recordsCount.emplace_back(expectedRecordsCount); + remainedRecordsCount -= expectedRecordsCount; + Y_ABORT_UNLESS(remainedRecordsCount >= 0); + } + Y_ABORT_UNLESS(remainedRecordsCount == 0); + return SplitByRecordsCount(data, recordsCount); +} + +} diff --git a/ydb/core/formats/arrow/splitter/simple.h b/ydb/core/formats/arrow/splitter/simple.h new file mode 100644 index 000000000000..1405d3a6dc20 --- /dev/null +++ b/ydb/core/formats/arrow/splitter/simple.h @@ -0,0 +1,120 @@ +#pragma once +#include +#include "scheme_info.h" + +namespace NKikimr::NArrow::NSplitter { + +class TSaverSplittedChunk { +private: + YDB_READONLY_DEF(std::shared_ptr, SlicedBatch); + YDB_READONLY_DEF(TString, SerializedChunk); +public: + ui32 GetRecordsCount() const { + return SlicedBatch->num_rows(); + } + + TSaverSplittedChunk(const std::shared_ptr& batch, TString&& serializedChunk) + : SlicedBatch(batch) + , SerializedChunk(std::move(serializedChunk)) { + Y_ABORT_UNLESS(SlicedBatch); + Y_ABORT_UNLESS(SlicedBatch->num_rows()); + } +}; + +class TLinearSplitInfo { +private: + YDB_READONLY(ui64, PacksCount, 0); + YDB_READONLY(ui64, PackSize, 0); + YDB_READONLY(ui64, ObjectsCount, 0); +public: + bool IsMinimalGranularity() const { + return PackSize == 1; + } + + TLinearSplitInfo(const ui64 packsCount, const ui64 packSize, const ui64 objectsCount) + : PacksCount(packsCount) + , PackSize(packSize) + , ObjectsCount(objectsCount) + { + AFL_VERIFY(objectsCount >= packsCount)("objects_count", objectsCount)("packs_count", packsCount); + AFL_VERIFY(PackSize); + AFL_VERIFY(PacksCount); + } + + class TIterator { + private: + const TLinearSplitInfo& Owner; + YDB_READONLY(ui64, Position, 0); + YDB_READONLY(ui64, CurrentPackSize, 0); + ui64 PackIdx = 0; + void InitPack() { + CurrentPackSize = (PackIdx + 1 == Owner.GetPacksCount()) ? Owner.ObjectsCount - Position : Owner.GetPackSize(); + } + public: + explicit TIterator(const TLinearSplitInfo& owner) + : Owner(owner) + { + InitPack(); + } + + bool IsValid() const { + if (Position < Owner.GetObjectsCount() && PackIdx < Owner.GetPacksCount()) { + return true; + } else { + Y_ABORT_UNLESS(Position == Owner.GetObjectsCount() && PackIdx == Owner.GetPacksCount()); + return false; + } + } + + bool Next() { + Y_ABORT_UNLESS(IsValid()); + Position += CurrentPackSize; + ++PackIdx; + InitPack(); + return IsValid(); + } + }; + + TIterator StartIterator() const { + return TIterator(*this); + } +}; + +class TSimpleSplitter { +private: + NAccessor::TColumnSaver ColumnSaver; + YDB_ACCESSOR_DEF(std::optional, Stats); +public: + explicit TSimpleSplitter(const NAccessor::TColumnSaver& columnSaver) + : ColumnSaver(columnSaver) + { + + } + + static TLinearSplitInfo GetOptimalLinearSplitting(const ui64 objectsCount, const i64 optimalPackSizeExt) { + const i64 optimalPackSize = optimalPackSizeExt ? optimalPackSizeExt : 1; + const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / optimalPackSize)); + const ui32 countPacksMin = std::max(1, (ui32)ceil(1.0 * objectsCount / optimalPackSize)); + const ui32 stepPackMax = objectsCount / countPacksMin; + const ui32 stepPackMin = objectsCount / countPacksMax; + if (std::abs(optimalPackSize - stepPackMax) > std::abs(optimalPackSize - stepPackMin)) { + return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); + } else { + return TLinearSplitInfo(countPacksMin, stepPackMax, objectsCount); + } + } + + static TLinearSplitInfo GetLinearSplittingByMax(const ui64 objectsCount, const ui64 maxPackSizeExt) { + const ui64 maxPackSize = maxPackSizeExt ? maxPackSizeExt : 1; + const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / maxPackSize)); + const ui32 stepPackMin = objectsCount / countPacksMax; + return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); + } + + std::vector Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const; + std::vector Split(const std::shared_ptr& data, const ui32 maxBlobSize) const; + std::vector SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const; + std::vector SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const; +}; + +} diff --git a/ydb/core/formats/arrow/splitter/ya.make b/ydb/core/formats/arrow/splitter/ya.make new file mode 100644 index 000000000000..70db86e75d4d --- /dev/null +++ b/ydb/core/formats/arrow/splitter/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + simple.cpp + scheme_info.cpp +) + +PEERDIR( + contrib/libs/apache/arrow + ydb/library/actors/core + ydb/library/conclusion + ydb/library/formats/arrow/splitter + ydb/library/formats/arrow/common + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/formats/arrow/ssa_program_optimizer.cpp b/ydb/core/formats/arrow/ssa_program_optimizer.cpp index f55f63110f7b..ff1e5a5cb38c 100644 --- a/ydb/core/formats/arrow/ssa_program_optimizer.cpp +++ b/ydb/core/formats/arrow/ssa_program_optimizer.cpp @@ -1,5 +1,7 @@ #include "ssa_program_optimizer.h" +#include + namespace NKikimr::NSsa { namespace { @@ -11,7 +13,8 @@ void ReplaceCountAll(TProgram& program) { Y_ABORT_UNLESS(step); for (auto& groupBy : step->MutableGroupBy()) { - if (groupBy.GetOperation() == EAggregate::Count && groupBy.GetArguments().empty()) { + if (groupBy.GetOperation() == EAggregate::NumRows) { + AFL_VERIFY(groupBy.GetArguments().empty()); if (step->GetGroupByKeys().size()) { groupBy.MutableArguments().push_back(step->GetGroupByKeys()[0]); } else { diff --git a/ydb/core/formats/arrow/ssa_runtime_version.h b/ydb/core/formats/arrow/ssa_runtime_version.h index 500074420fda..cdf3f6bdc09d 100644 --- a/ydb/core/formats/arrow/ssa_runtime_version.h +++ b/ydb/core/formats/arrow/ssa_runtime_version.h @@ -19,7 +19,7 @@ namespace NKikimr::NSsa { // Bump this version every time incompatible runtime functions are introduced. #ifndef SSA_RUNTIME_VERSION -#define SSA_RUNTIME_VERSION 5U +#define SSA_RUNTIME_VERSION 4U #endif // History: diff --git a/ydb/core/formats/arrow/switch/switch_type.h b/ydb/core/formats/arrow/switch/switch_type.h index 78a9dfa04360..383ad1567c4b 100644 --- a/ydb/core/formats/arrow/switch/switch_type.h +++ b/ydb/core/formats/arrow/switch/switch_type.h @@ -1,7 +1,7 @@ #pragma once #include #include -#include +#include #include #include @@ -13,106 +13,6 @@ extern "C" { namespace NKikimr::NArrow { -template -struct TTypeWrapper -{ - using T = TType; -}; - -template -TResult SwitchTypeImpl(arrow::Type::type typeId, TFunc&& f) { - switch (typeId) { - case arrow::Type::NA: { - if constexpr (EnableNull) { - return f(TTypeWrapper()); - } - break; - } - case arrow::Type::BOOL: - return f(TTypeWrapper()); - case arrow::Type::UINT8: - return f(TTypeWrapper()); - case arrow::Type::INT8: - return f(TTypeWrapper()); - case arrow::Type::UINT16: - return f(TTypeWrapper()); - case arrow::Type::INT16: - return f(TTypeWrapper()); - case arrow::Type::UINT32: - return f(TTypeWrapper()); - case arrow::Type::INT32: - return f(TTypeWrapper()); - case arrow::Type::UINT64: - return f(TTypeWrapper()); - case arrow::Type::INT64: - return f(TTypeWrapper()); - case arrow::Type::HALF_FLOAT: - return f(TTypeWrapper()); - case arrow::Type::FLOAT: - return f(TTypeWrapper()); - case arrow::Type::DOUBLE: - return f(TTypeWrapper()); - case arrow::Type::STRING: - return f(TTypeWrapper()); - case arrow::Type::BINARY: - return f(TTypeWrapper()); - case arrow::Type::FIXED_SIZE_BINARY: - return f(TTypeWrapper()); - case arrow::Type::DATE32: - return f(TTypeWrapper()); - case arrow::Type::DATE64: - return f(TTypeWrapper()); - case arrow::Type::TIMESTAMP: - return f(TTypeWrapper()); - case arrow::Type::TIME32: - return f(TTypeWrapper()); - case arrow::Type::TIME64: - return f(TTypeWrapper()); - case arrow::Type::INTERVAL_MONTHS: - return f(TTypeWrapper()); - case arrow::Type::DECIMAL: - return f(TTypeWrapper()); - case arrow::Type::DURATION: - return f(TTypeWrapper()); - case arrow::Type::LARGE_STRING: - return f(TTypeWrapper()); - case arrow::Type::LARGE_BINARY: - return f(TTypeWrapper()); - case arrow::Type::DECIMAL256: - case arrow::Type::DENSE_UNION: - case arrow::Type::DICTIONARY: - case arrow::Type::EXTENSION: - case arrow::Type::FIXED_SIZE_LIST: - case arrow::Type::INTERVAL_DAY_TIME: - case arrow::Type::LARGE_LIST: - case arrow::Type::LIST: - case arrow::Type::MAP: - case arrow::Type::MAX_ID: - case arrow::Type::SPARSE_UNION: - case arrow::Type::STRUCT: - break; - } - - return defaultValue; -} - -template -bool SwitchType(arrow::Type::type typeId, TFunc&& f) { - return SwitchTypeImpl(typeId, std::move(f)); -} - -template -bool SwitchTypeWithNull(arrow::Type::type typeId, TFunc&& f) { - return SwitchType(typeId, std::move(f)); -} - -template -bool SwitchArrayType(const arrow::Datum& column, TFunc&& f) { - auto type = column.type(); - Y_ABORT_UNLESS(type); - return SwitchType(type->id(), std::forward(f)); -} - /** * @brief Function to switch yql type correctly and uniformly converting it to arrow type using callback * @@ -227,74 +127,4 @@ inline bool IsPrimitiveYqlType(const NScheme::TTypeInfo& typeInfo) { return false; } -template -bool Append(arrow::ArrayBuilder& builder, const typename T::c_type& value) { - using TBuilder = typename arrow::TypeTraits::BuilderType; - - TStatusValidator::Validate(static_cast(builder).Append(value)); - return true; -} - -template -bool Append(arrow::ArrayBuilder& builder, arrow::util::string_view value) { - using TBuilder = typename arrow::TypeTraits::BuilderType; - - TStatusValidator::Validate(static_cast(builder).Append(value)); - return true; -} - -template -bool Append(arrow::ArrayBuilder& builder, const typename T::c_type* values, size_t size) { - using TBuilder = typename arrow::NumericBuilder; - - TStatusValidator::Validate(static_cast(builder).AppendValues(values, size)); - return true; -} - -template -bool Append(arrow::ArrayBuilder& builder, const std::vector& values) { - using TBuilder = typename arrow::NumericBuilder; - - TStatusValidator::Validate(static_cast(builder).AppendValues(values.data(), values.size())); - return true; -} - -template -[[nodiscard]] bool Append(T& builder, const arrow::Array& array, int position, ui64* recordSize = nullptr) { - Y_DEBUG_ABORT_UNLESS(builder.type()->id() == array.type_id()); - return SwitchType(array.type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - using TArray = typename arrow::TypeTraits::ArrayType; - using TBuilder = typename arrow::TypeTraits::BuilderType; - - auto& typedArray = static_cast(array); - auto& typedBuilder = static_cast(builder); - - if (typedArray.IsNull(position)) { - TStatusValidator::Validate(typedBuilder.AppendNull()); - if (recordSize) { - *recordSize += 4; - } - return true; - } else { - if constexpr (!arrow::has_string_view::value) { - TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position))); - if (recordSize) { - *recordSize += sizeof(typedArray.GetView(position)); - } - return true; - } - if constexpr (arrow::has_string_view::value) { - TStatusValidator::Validate(typedBuilder.Append(typedArray.GetView(position))); - if (recordSize) { - *recordSize += typedArray.GetView(position).size(); - } - return true; - } - } - Y_ABORT_UNLESS(false, "unpredictable variant"); - return false; - }); -} - } diff --git a/ydb/core/formats/arrow/switch/ya.make b/ydb/core/formats/arrow/switch/ya.make index e11e5e070ca6..622e9bf2a604 100644 --- a/ydb/core/formats/arrow/switch/ya.make +++ b/ydb/core/formats/arrow/switch/ya.make @@ -4,11 +4,11 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/scheme_types ydb/library/actors/core + ydb/library/formats/arrow/switch ) SRCS( switch_type.cpp - compare.cpp ) END() diff --git a/ydb/core/formats/arrow/transformer/dictionary.h b/ydb/core/formats/arrow/transformer/dictionary.h index da0c13a5189a..4229c0ed8071 100644 --- a/ydb/core/formats/arrow/transformer/dictionary.h +++ b/ydb/core/formats/arrow/transformer/dictionary.h @@ -1,5 +1,5 @@ #pragma once -#include "abstract.h" +#include namespace NKikimr::NArrow::NTransformation { diff --git a/ydb/core/formats/arrow/transformer/ya.make b/ydb/core/formats/arrow/transformer/ya.make index 3a1c0c4c12ab..8ca15c923dd9 100644 --- a/ydb/core/formats/arrow/transformer/ya.make +++ b/ydb/core/formats/arrow/transformer/ya.make @@ -3,12 +3,11 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/formats/arrow/dictionary + ydb/library/formats/arrow/transformer ) SRCS( - abstract.cpp dictionary.cpp - composite.cpp ) END() diff --git a/ydb/core/formats/arrow/ut/ut_arrow.cpp b/ydb/core/formats/arrow/ut/ut_arrow.cpp index da620d70fa30..b12fc5561b12 100644 --- a/ydb/core/formats/arrow/ut/ut_arrow.cpp +++ b/ydb/core/formats/arrow/ut/ut_arrow.cpp @@ -185,11 +185,6 @@ struct TDataRow { } }; - -std::shared_ptr GetColumn(const arrow::Table& table, int i, int chunk = 0) { - return table.column(i)->chunk(chunk); -} - std::shared_ptr GetColumn(const arrow::RecordBatch& batch, int i) { return batch.column(i); } @@ -526,22 +521,6 @@ bool CheckSorted(const std::shared_ptr& batch, bool desc = f } Y_UNIT_TEST_SUITE(ArrowTest) { - Y_UNIT_TEST(Basic) { - std::vector rows = TestRows(); - - std::shared_ptr table = TDataRowTableBuilder::Build(rows); - - auto expectedSchema = TDataRow::MakeArrowSchema(); - UNIT_ASSERT_EQUAL(expectedSchema->Equals(*table->schema()), true); - - std::vector readRows = ToVector(table); - - UNIT_ASSERT_EQUAL(rows.size(), readRows.size()); - for (size_t i = 0; i < rows.size(); ++i) { - UNIT_ASSERT_EQUAL(rows[i], readRows[i]); - } - } - Y_UNIT_TEST(BatchBuilder) { std::vector rows = TestRows(); diff --git a/ydb/core/formats/arrow/ut/ut_dictionary.cpp b/ydb/core/formats/arrow/ut/ut_dictionary.cpp index c3df2c6a30f0..02a9dc9b3e69 100644 --- a/ydb/core/formats/arrow/ut/ut_dictionary.cpp +++ b/ydb/core/formats/arrow/ut/ut_dictionary.cpp @@ -1,10 +1,10 @@ #include #include #include -#include -#include -#include #include +#include +#include +#include Y_UNIT_TEST_SUITE(Dictionary) { diff --git a/ydb/core/formats/arrow/ut/ut_hash.cpp b/ydb/core/formats/arrow/ut/ut_hash.cpp index 3255d430352d..4c64c74895cb 100644 --- a/ydb/core/formats/arrow/ut/ut_hash.cpp +++ b/ydb/core/formats/arrow/ut/ut_hash.cpp @@ -1,7 +1,7 @@ #include #include -#include #include +#include Y_UNIT_TEST_SUITE(Hash) { diff --git a/ydb/core/formats/arrow/ut/ya.make b/ydb/core/formats/arrow/ut/ya.make index c4c993ec3220..54fa4d357730 100644 --- a/ydb/core/formats/arrow/ut/ya.make +++ b/ydb/core/formats/arrow/ut/ya.make @@ -5,6 +5,7 @@ SIZE(SMALL) PEERDIR( contrib/libs/apache/arrow ydb/library/arrow_kernels + ydb/library/formats/arrow/simple_builder ydb/core/base # for NYql::NUdf alloc stuff used in binary_json @@ -26,7 +27,6 @@ SRCS( ut_arrow.cpp ut_program_step.cpp ut_dictionary.cpp - ut_size_calcer.cpp ut_column_filter.cpp ut_hash.cpp ) diff --git a/ydb/core/formats/arrow/ya.make b/ydb/core/formats/arrow/ya.make index d4bf1f8529c2..fa66d3a97154 100644 --- a/ydb/core/formats/arrow/ya.make +++ b/ydb/core/formats/arrow/ya.make @@ -7,16 +7,19 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow ydb/core/scheme + ydb/core/formats/arrow/accessor ydb/core/formats/arrow/serializer - ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/dictionary ydb/core/formats/arrow/transformer ydb/core/formats/arrow/reader + ydb/core/formats/arrow/save_load + ydb/core/formats/arrow/splitter ydb/core/formats/arrow/hash ydb/library/actors/core ydb/library/arrow_kernels ydb/library/binary_json ydb/library/dynumber + ydb/library/formats/arrow ydb/library/services ydb/library/yql/core/arrow_kernels/request ) @@ -44,14 +47,11 @@ SRCS( converter.cpp converter.h custom_registry.cpp - input_stream.h permutations.cpp program.cpp - replace_key.cpp size_calcer.cpp ssa_program_optimizer.cpp special_keys.cpp - simple_arrays_cache.cpp process_columns.cpp ) diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp index f752bef3160e..33cec012a0f5 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp +++ b/ydb/core/fq/libs/compute/ydb/control_plane/database_monitoring.cpp @@ -1,6 +1,8 @@ #include #include +#include + #include #include @@ -24,17 +26,9 @@ namespace NFq { class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped { struct TCounters { ::NMonitoring::TDynamicCounterPtr Counters; - struct TCommonMetrics { - ::NMonitoring::TDynamicCounters::TCounterPtr Ok; - ::NMonitoring::TDynamicCounters::TCounterPtr Error; - ::NMonitoring::THistogramPtr LatencyMs; - }; - - TCommonMetrics CpuLoadRequest; - ::NMonitoring::TDynamicCounters::TCounterPtr InstantLoadPercentage; - ::NMonitoring::TDynamicCounters::TCounterPtr AverageLoadPercentage; - ::NMonitoring::TDynamicCounters::TCounterPtr QuotedLoadPercentage; - ::NMonitoring::TDynamicCounters::TCounterPtr AvailableLoadPercentage; + ::NMonitoring::TDynamicCounterPtr SubComponent; + + ::NMonitoring::THistogramPtr CpuLoadRequestLatencyMs; ::NMonitoring::TDynamicCounters::TCounterPtr TargetLoadPercentage; ::NMonitoring::TDynamicCounters::TCounterPtr PendingQueueSize; ::NMonitoring::TDynamicCounters::TCounterPtr PendingQueueOverload; @@ -48,21 +42,11 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrappedGetSubgroup("component", "ComputeDatabaseMonitoring"); - auto subComponent = component->GetSubgroup("subcomponent", "CpuLoadRequest"); - RegisterCommonMetrics(CpuLoadRequest, subComponent); - InstantLoadPercentage = subComponent->GetCounter("InstantLoadPercentage", false); - AverageLoadPercentage = subComponent->GetCounter("AverageLoadPercentage", false); - QuotedLoadPercentage = subComponent->GetCounter("QuotedLoadPercentage", false); - AvailableLoadPercentage = subComponent->GetCounter("AvailableLoadPercentage", false); - TargetLoadPercentage = subComponent->GetCounter("TargetLoadPercentage", false); - PendingQueueSize = subComponent->GetCounter("PendingQueueSize", false); - PendingQueueOverload = subComponent->GetCounter("PendingQueueOverload", true); - } - - void RegisterCommonMetrics(TCommonMetrics& metrics, ::NMonitoring::TDynamicCounterPtr subComponent) { - metrics.Ok = subComponent->GetCounter("Ok", true); - metrics.Error = subComponent->GetCounter("Error", true); - metrics.LatencyMs = subComponent->GetHistogram("LatencyMs", GetLatencyHistogramBuckets()); + SubComponent = component->GetSubgroup("subcomponent", "CpuLoadRequest"); + CpuLoadRequestLatencyMs = SubComponent->GetHistogram("LatencyMs", GetLatencyHistogramBuckets()); + TargetLoadPercentage = SubComponent->GetCounter("TargetLoadPercentage", false); + PendingQueueSize = SubComponent->GetCounter("PendingQueueSize", false); + PendingQueueOverload = SubComponent->GetCounter("PendingQueueOverload", true); } static ::NMonitoring::IHistogramCollectorPtr GetLatencyHistogramBuckets() { @@ -75,15 +59,19 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped(GetDuration(config.GetAverageLoadInterval(), TDuration::Seconds(10)), TDuration::Seconds(1))) , MaxClusterLoad(std::min(config.GetMaxClusterLoadPercentage(), 100) / 100.0) - , DefaultQueryLoad(config.GetDefaultQueryLoadPercentage() ? std::min(config.GetDefaultQueryLoadPercentage(), 100) / 100.0 : 0.1) , PendingQueueSize(config.GetPendingQueueSize()) , Strict(config.GetStrict()) - , CpuNumber(config.GetCpuNumber()) + , CpuQuotaManager( + GetDuration(config.GetMonitoringRequestDelay(), TDuration::Seconds(1)), + std::max(GetDuration(config.GetAverageLoadInterval(), TDuration::Seconds(10)), TDuration::Seconds(1)), + TDuration::Zero(), + config.GetDefaultQueryLoadPercentage() ? std::min(config.GetDefaultQueryLoadPercentage(), 100) / 100.0 : 0.1, + config.GetStrict(), + config.GetCpuNumber(), + Counters.SubComponent + ) { - *Counters.AvailableLoadPercentage = 100; *Counters.TargetLoadPercentage = static_cast(MaxClusterLoad * 100); } @@ -105,8 +93,8 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped(InstantLoad, AverageLoad); - if (!Ready) { + auto response = std::make_unique(CpuQuotaManager.GetInstantLoad(), CpuQuotaManager.GetAverageLoad()); + if (!CpuQuotaManager.CheckLoadIsOutdated()) { response->Issues.AddIssue("CPU Load is unavailable"); } Send(ev->Sender, response.release(), 0, ev->Cookie); @@ -114,45 +102,20 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrappedGet(); - - auto now = TInstant::Now(); - if (!response.Issues) { - auto delta = now - LastCpuLoad; - LastCpuLoad = now; - - if (response.CpuNumber) { - CpuNumber = response.CpuNumber; - } - - InstantLoad = response.InstantLoad; - // exponential moving average - if (!Ready || delta >= AverageLoadInterval) { - AverageLoad = InstantLoad; - QuotedLoad = InstantLoad; - } else { - auto ratio = static_cast(delta.GetValue()) / AverageLoadInterval.GetValue(); - AverageLoad = (1 - ratio) * AverageLoad + ratio * InstantLoad; - QuotedLoad = (1 - ratio) * QuotedLoad + ratio * InstantLoad; - } - Ready = true; - Counters.CpuLoadRequest.Ok->Inc(); - *Counters.InstantLoadPercentage = static_cast(InstantLoad * 100); - *Counters.AverageLoadPercentage = static_cast(AverageLoad * 100); - CheckPendingQueue(); - *Counters.QuotedLoadPercentage = static_cast(QuotedLoad * 100); - } else { + if (response.Issues) { LOG_E("CPU Load Request FAILED: " << response.Issues.ToOneLineString()); - Counters.CpuLoadRequest.Error->Inc(); - CheckLoadIsOutdated(); } - Counters.CpuLoadRequest.LatencyMs->Collect((now - StartCpuLoad).MilliSeconds()); + Counters.CpuLoadRequestLatencyMs->Collect((TInstant::Now() - StartCpuLoad).MilliSeconds()); + + CpuQuotaManager.UpdateCpuLoad(response.InstantLoad, response.CpuNumber, !response.Issues); + CheckPendingQueue(); // TODO: make load pulling reactive // 1. Long period (i.e. AverageLoadInterval/2) when idle (no requests) // 2. Active pulling when busy - if (MonitoringRequestDelay) { - Schedule(MonitoringRequestDelay, new NActors::TEvents::TEvWakeup()); + if (auto delay = CpuQuotaManager.GetMonitoringRequestDelay()) { + Schedule(delay, new NActors::TEvents::TEvWakeup()); } else { SendCpuLoadRequest(); } @@ -164,48 +127,24 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped 1.0) { Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Incorrect quota value (exceeds 1.0) " << request.Quota}}), 0, ev->Cookie); } else { - if (!request.Quota) { - request.Quota = DefaultQueryLoad; - } - CheckLoadIsOutdated(); - if (MaxClusterLoad > 0.0 && ((!Ready && Strict) || QuotedLoad >= MaxClusterLoad)) { - if (PendingQueue.size() >= PendingQueueSize) { - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{ - NYql::TIssue{TStringBuilder{} - << "Cluster is overloaded, current quoted load " << static_cast(QuotedLoad * 100) - << "%, average load " << static_cast(AverageLoad * 100) << "%" - }}), 0, ev->Cookie); + auto response = CpuQuotaManager.RequestCpuQuota(request.Quota, MaxClusterLoad); + CheckPendingQueue(); + if (response.Status == NYdb::EStatus::OVERLOADED && PendingQueue.size() < PendingQueueSize) { + PendingQueue.push(ev); + Counters.PendingQueueSize->Inc(); + } else { + if (response.Status == NYdb::EStatus::OVERLOADED) { Counters.PendingQueueOverload->Inc(); - } else { - PendingQueue.push(ev); - Counters.PendingQueueSize->Inc(); } - } else { - QuotedLoad += request.Quota; - *Counters.QuotedLoadPercentage = static_cast(QuotedLoad * 100); - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(QuotedLoad * 100), 0, ev->Cookie); + Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(response.CurrentLoad, response.Status, response.Issues), 0, ev->Cookie); } } } void Handle(TEvYdbCompute::TEvCpuQuotaAdjust::TPtr& ev) { - if (CpuNumber) { - auto& request = *ev.Get()->Get(); - if (request.Duration && request.Duration < AverageLoadInterval / 2 && request.Quota <= 1.0) { - auto load = (request.CpuSecondsConsumed * 1000 / request.Duration.MilliSeconds()) / CpuNumber; - auto quota = request.Quota ? request.Quota : DefaultQueryLoad; - if (quota > load) { - auto adjustment = (quota - load) / 2; - if (QuotedLoad > adjustment) { - QuotedLoad -= adjustment; - } else { - QuotedLoad = 0.0; - } - CheckPendingQueue(); - *Counters.QuotedLoadPercentage = static_cast(QuotedLoad * 100); - } - } - } + auto& request = *ev.Get()->Get(); + CpuQuotaManager.AdjustCpuQuota(request.Quota, request.Duration, request.CpuSecondsConsumed); + CheckPendingQueue(); } void SendCpuLoadRequest() { @@ -215,57 +154,51 @@ class TComputeDatabaseMonitoringActor : public NActors::TActorBootstrapped AverageLoadInterval) { - Ready = false; - QuotedLoad = 0.0; - if (Strict) { - while (PendingQueue.size()) { - auto& ev = PendingQueue.front(); - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Cluster load info is not available"}}), 0, ev->Cookie); - PendingQueue.pop(); - Counters.PendingQueueSize->Dec(); - } + if (Strict && !CpuQuotaManager.CheckLoadIsOutdated()) { + while (PendingQueue.size()) { + auto& ev = PendingQueue.front(); + Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, NYql::TIssues{NYql::TIssue{TStringBuilder{} << "Cluster load info is not available"}}), 0, ev->Cookie); + PendingQueue.pop(); + Counters.PendingQueueSize->Dec(); } } } void CheckPendingQueue() { + CheckLoadIsOutdated(); + auto now = TInstant::Now(); - while (QuotedLoad < MaxClusterLoad && PendingQueue.size()) { + while (PendingQueue.size()) { auto& ev = PendingQueue.front(); auto& request = *ev.Get()->Get(); if (request.Deadline && now >= request.Deadline) { Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(-1, NYdb::EStatus::CANCELLED, NYql::TIssues{ NYql::TIssue{TStringBuilder{} << "Deadline reached " << request.Deadline}}), 0, ev->Cookie); } else { - QuotedLoad += request.Quota; - Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(QuotedLoad * 100), 0, ev->Cookie); + auto response = CpuQuotaManager.RequestCpuQuota(request.Quota, MaxClusterLoad); + if (response.Status == NYdb::EStatus::OVERLOADED) { + break; + } + + Send(ev->Sender, new TEvYdbCompute::TEvCpuQuotaResponse(response.CurrentLoad, response.Status, response.Issues), 0, ev->Cookie); } + PendingQueue.pop(); Counters.PendingQueueSize->Dec(); } } private: - TInstant StartCpuLoad; - TInstant LastCpuLoad; TActorId MonitoringClientActorId; TCounters Counters; - - double InstantLoad = 0.0; - double AverageLoad = 0.0; - double QuotedLoad = 0.0; - bool Ready = false; - - const TDuration MonitoringRequestDelay; - const TDuration AverageLoadInterval; const double MaxClusterLoad; - const double DefaultQueryLoad; const ui32 PendingQueueSize; const bool Strict; - ui32 CpuNumber = 0; + NKikimr::NKqp::NWorkload::TCpuQuotaManager CpuQuotaManager; TQueue PendingQueue; + + TInstant StartCpuLoad; }; std::unique_ptr CreateDatabaseMonitoringActor(const NActors::TActorId& monitoringClientActorId, NFq::NConfig::TLoadControlConfig config, const ::NMonitoring::TDynamicCounterPtr& counters) { diff --git a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make index a77f4292a98a..fcfb13f095ef 100644 --- a/ydb/core/fq/libs/compute/ydb/control_plane/ya.make +++ b/ydb/core/fq/libs/compute/ydb/control_plane/ya.make @@ -18,6 +18,7 @@ PEERDIR( ydb/core/fq/libs/compute/ydb/synchronization_service ydb/core/fq/libs/control_plane_storage/proto ydb/core/fq/libs/quota_manager/proto + ydb/core/kqp/workload_service/common ydb/core/protos ydb/library/db_pool/protos ydb/library/yql/public/issue diff --git a/ydb/core/grpc_services/grpc_request_check_actor.h b/ydb/core/grpc_services/grpc_request_check_actor.h index 54fe15aca653..5e944d63e16e 100644 --- a/ydb/core/grpc_services/grpc_request_check_actor.h +++ b/ydb/core/grpc_services/grpc_request_check_actor.h @@ -312,6 +312,7 @@ class TGrpcRequestCheckActor SetTokenAndDie(); break; case Ydb::StatusIds::TIMEOUT: + case Ydb::StatusIds::CANCELLED: Counters_->IncDatabaseRateLimitedCounter(); LOG_INFO(*TlsActivationContext, NKikimrServices::GRPC_SERVER, "Throughput limit exceeded"); ReplyOverloadedAndDie(MakeIssue(NKikimrIssues::TIssuesIds::YDB_RESOURCE_USAGE_LIMITED, "Throughput limit exceeded")); @@ -331,7 +332,8 @@ class TGrpcRequestCheckActor } }; - req.mutable_operation_params()->mutable_operation_timeout()->set_nanos(200000000); // same as cloud-go serverless proxy + req.mutable_operation_params()->mutable_operation_timeout()->set_seconds(10); + req.mutable_operation_params()->mutable_cancel_after()->set_nanos(200000000); // same as cloud-go serverless proxy NKikimr::NRpcService::RateLimiterAcquireUseSameMailbox( std::move(req), diff --git a/ydb/core/grpc_services/grpc_request_proxy.cpp b/ydb/core/grpc_services/grpc_request_proxy.cpp index 5a9c0771480f..cf4eafb7e395 100644 --- a/ydb/core/grpc_services/grpc_request_proxy.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy.cpp @@ -420,9 +420,22 @@ void TGRpcRequestProxyImpl::HandleUndelivery(TEvents::TEvUndelivered::TPtr& ev) bool TGRpcRequestProxyImpl::IsAuthStateOK(const IRequestProxyCtx& ctx) { const auto& state = ctx.GetAuthState(); - return state.State == NYdbGrpc::TAuthState::AS_OK || - state.State == NYdbGrpc::TAuthState::AS_FAIL && state.NeedAuth == false || - state.NeedAuth == false && !ctx.GetYdbToken(); + if (state.State == NYdbGrpc::TAuthState::AS_OK) { + return true; + } + + const bool authorizationParamsAreSet = ctx.GetYdbToken() || !ctx.FindClientCertPropertyValues().empty(); + if (!state.NeedAuth && !authorizationParamsAreSet) { + return true; + } + + if (!state.NeedAuth && state.State == NYdbGrpc::TAuthState::AS_FAIL) { + if (AppData()->EnforceUserTokenCheckRequirement && authorizationParamsAreSet) { + return false; + } + return true; + } + return false; } void TGRpcRequestProxyImpl::MaybeStartTracing(IRequestProxyCtx& ctx) { diff --git a/ydb/core/grpc_services/grpc_request_proxy_simple.cpp b/ydb/core/grpc_services/grpc_request_proxy_simple.cpp index 3ad80fbbf542..c2274c3d7be3 100644 --- a/ydb/core/grpc_services/grpc_request_proxy_simple.cpp +++ b/ydb/core/grpc_services/grpc_request_proxy_simple.cpp @@ -172,9 +172,22 @@ void TGRpcRequestProxySimple::HandleUndelivery(TEvents::TEvUndelivered::TPtr& ev bool TGRpcRequestProxySimple::IsAuthStateOK(const IRequestProxyCtx& ctx) { const auto& state = ctx.GetAuthState(); - return state.State == NYdbGrpc::TAuthState::AS_OK || - state.State == NYdbGrpc::TAuthState::AS_FAIL && state.NeedAuth == false || - state.NeedAuth == false && !ctx.GetYdbToken(); + if (state.State == NYdbGrpc::TAuthState::AS_OK) { + return true; + } + + const bool authorizationParamsAreSet = ctx.GetYdbToken() || !ctx.FindClientCertPropertyValues().empty(); + if (!state.NeedAuth && !authorizationParamsAreSet) { + return true; + } + + if (!state.NeedAuth && state.State == NYdbGrpc::TAuthState::AS_FAIL) { + if (AppData()->EnforceUserTokenCheckRequirement && authorizationParamsAreSet) { + return false; + } + return true; + } + return false; } template diff --git a/ydb/core/grpc_services/local_rate_limiter.cpp b/ydb/core/grpc_services/local_rate_limiter.cpp index d77fd26f9845..993c71e246e8 100644 --- a/ydb/core/grpc_services/local_rate_limiter.cpp +++ b/ydb/core/grpc_services/local_rate_limiter.cpp @@ -23,6 +23,7 @@ TActorId RateLimiterAcquireUseSameMailbox( onSuccess(); break; case Ydb::StatusIds::TIMEOUT: + case Ydb::StatusIds::CANCELLED: onTimeout(); break; default: @@ -32,7 +33,8 @@ TActorId RateLimiterAcquireUseSameMailbox( }; Ydb::RateLimiter::AcquireResourceRequest request; - SetDuration(duration, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration * 10, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration, *request.mutable_operation_params()->mutable_cancel_after()); request.set_coordination_node_path(fullPath.CoordinationNode); request.set_resource_path(fullPath.ResourcePath); request.set_required(required); @@ -72,6 +74,7 @@ TActorId RateLimiterAcquireUseSameMailbox( onSuccess(); break; case Ydb::StatusIds::TIMEOUT: + case Ydb::StatusIds::CANCELLED: onTimeout(); break; default: @@ -82,7 +85,8 @@ TActorId RateLimiterAcquireUseSameMailbox( const auto& rlPath = maybeRlPath.GetRef(); Ydb::RateLimiter::AcquireResourceRequest request; - SetDuration(duration, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration * 10, *request.mutable_operation_params()->mutable_operation_timeout()); + SetDuration(duration, *request.mutable_operation_params()->mutable_cancel_after()); request.set_coordination_node_path(rlPath.CoordinationNode); request.set_resource_path(rlPath.ResourcePath); request.set_required(required); diff --git a/ydb/core/grpc_services/local_rpc/local_rpc.h b/ydb/core/grpc_services/local_rpc/local_rpc.h index 53771041766a..eb4b4a8deee2 100644 --- a/ydb/core/grpc_services/local_rpc/local_rpc.h +++ b/ydb/core/grpc_services/local_rpc/local_rpc.h @@ -153,7 +153,12 @@ class TLocalRpcCtx : public TLocalRpcCtxImpl { if (key == NYdb::YDB_DATABASE_HEADER) { return GetDatabaseName(); } - return TMaybe{}; + auto valueIt = PeerMeta.find(key); + return valueIt == PeerMeta.end() ? Nothing() : TMaybe(valueIt->second); + } + + void PutPeerMeta(const TString& key, const TString& value) { + PeerMeta.insert_or_assign(key, value); } TVector FindClientCert() const override { @@ -278,6 +283,7 @@ class TLocalRpcCtx : public TLocalRpcCtxImpl { const bool InternalCall; TIntrusiveConstPtr InternalToken; const TString EmptySerializedTokenMessage_; + TMap PeerMeta; google::protobuf::Arena Arena; }; @@ -318,6 +324,41 @@ NThreading::TFuture DoLocalRpc(typename TRpc::TRequest return DoLocalRpc(std::move(proto), database, token, Nothing(), actorSystem, internalCall); } +template +NThreading::TFuture DoLocalRpc( + typename TRpc::TRequest&& proto, + const TString& database, + const TMaybe& token, + const TMaybe& requestType, + TActorSystem* actorSystem, + const TMap& peerMeta, + bool internalCall = false +) +{ + auto promise = NThreading::NewPromise(); + + SetRequestSyncOperationMode(proto); + + using TCbWrapper = TPromiseWrapper; + auto req = new TLocalRpcCtx( + std::move(proto), + TCbWrapper(promise), + database, + token, + requestType, + internalCall + ); + + for (const auto& [key, value] : peerMeta) { + req->PutPeerMeta(key, value); + } + + auto actor = TRpc::CreateRpcActor(req); + actorSystem->Register(actor, TMailboxType::HTSwap, actorSystem->AppData()->UserPoolId); + + return promise.GetFuture(); +} + template TActorId DoLocalRpcSameMailbox(typename TRpc::TRequest&& proto, std::function&& cb, const TString& database, const TMaybe& token, const TMaybe& requestType, diff --git a/ydb/core/grpc_services/query/rpc_execute_query.cpp b/ydb/core/grpc_services/query/rpc_execute_query.cpp index 0f8ab413a3cb..a885167e9061 100644 --- a/ydb/core/grpc_services/query/rpc_execute_query.cpp +++ b/ydb/core/grpc_services/query/rpc_execute_query.cpp @@ -392,7 +392,7 @@ class TExecuteQueryRPC : public TActorBootstrapped { bool hasTrailingMessage = false; auto& kqpResponse = record.GetResponse(); - if (kqpResponse.GetYdbResults().size() > 1) { + if (kqpResponse.GetYdbResults().size() > 1 && QueryAction != NKikimrKqp::QUERY_ACTION_EXPLAIN) { auto issue = MakeIssue(NKikimrIssues::TIssuesIds::DEFAULT_ERROR, "Unexpected trailing message with multiple result sets."); ReplyFinishStream(Ydb::StatusIds::INTERNAL_ERROR, issue); diff --git a/ydb/core/grpc_services/query/rpc_execute_script.cpp b/ydb/core/grpc_services/query/rpc_execute_script.cpp index 1c5efad67097..7f1502b35574 100644 --- a/ydb/core/grpc_services/query/rpc_execute_script.cpp +++ b/ydb/core/grpc_services/query/rpc_execute_script.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -71,27 +72,28 @@ std::tuple FillKqpRequest( return {Ydb::StatusIds::SUCCESS, {}}; } -class TExecuteScriptRPC : public TActorBootstrapped { +class TExecuteScriptRPC : public TRpcRequestActor { public: + using TRpcRequestActorBase = TRpcRequestActor; + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::GRPC_REQ; } - TExecuteScriptRPC(TEvExecuteScriptRequest* request) - : Request_(request) + TExecuteScriptRPC(IRequestNoOpCtx* request) + : TRpcRequestActorBase(request) {} void Bootstrap() { NYql::TIssues issues; - const auto& request = *Request_->GetProtoRequest(); + const auto& request = GetProtoRequest(); - if (request.operation_params().operation_mode() == Ydb::Operations::OperationParams::SYNC) { + if (request->operation_params().operation_mode() == Ydb::Operations::OperationParams::SYNC) { issues.AddIssue("ExecuteScript must be asyncronous operation"); return Reply(Ydb::StatusIds::BAD_REQUEST, issues); } - AuditContextAppend(Request_.get(), request); - + AuditContextAppend(Request.Get(), request); Ydb::StatusIds::StatusCode status = Ydb::StatusIds::SUCCESS; if (auto scriptRequest = MakeScriptRequest(issues, status)) { if (Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), scriptRequest.Release())) { @@ -122,14 +124,14 @@ class TExecuteScriptRPC : public TActorBootstrapped { } THolder MakeScriptRequest(NYql::TIssues& issues, Ydb::StatusIds::StatusCode& status) const { - const auto* req = Request_->GetProtoRequest(); - const auto traceId = Request_->GetTraceId(); + const auto* req = GetProtoRequest(); + const auto traceId = Request->GetTraceId(); auto ev = MakeHolder(); - SetAuthToken(ev, *Request_); - SetDatabase(ev, *Request_); - SetRlPath(ev, *Request_); + SetAuthToken(ev, *Request); + SetDatabase(ev, *Request); + SetRlPath(ev, *Request); if (traceId) { ev->Record.SetTraceId(traceId.GetRef()); @@ -162,12 +164,9 @@ class TExecuteScriptRPC : public TActorBootstrapped { result.set_status(status); - AuditContextAppend(Request_.get(), *Request_->GetProtoRequest(), result); - - TString serializedResult; - Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&serializedResult); + AuditContextAppend(Request.Get(), GetProtoRequest(), result); - Request_->SendSerializedResult(std::move(serializedResult), status); + TProtoResponseHelper::SendProtoResponse(result, status, Request); PassAway(); } @@ -177,9 +176,6 @@ class TExecuteScriptRPC : public TActorBootstrapped { result.set_ready(true); Reply(status, std::move(result), issues); } - -private: - std::unique_ptr Request_; }; } // namespace @@ -193,6 +189,11 @@ void DoExecuteScript(std::unique_ptr p, const IFacilityProvider f.RegisterActor(new TExecuteScriptRPC(req)); } +} // namespace NQuery + +template<> +IActor* TEvExecuteScriptRequest::CreateRpcActor(IRequestNoOpCtx* msg) { + return new TExecuteScriptRPC(msg); } } // namespace NKikimr::NGRpcService diff --git a/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp b/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp index e25877095cc3..80d815723b8a 100644 --- a/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp +++ b/ydb/core/grpc_services/query/rpc_fetch_script_results.cpp @@ -35,7 +35,7 @@ class TFetchScriptResultsRPC : public TRpcRequestActorSendSerializedResult(std::move(serializedResult), status); + TProtoResponseHelper::SendProtoResponse(result, status, Request); PassAway(); } @@ -154,4 +151,9 @@ void DoFetchScriptResults(std::unique_ptr p, const IFacilityPro } +template<> +IActor* TEvFetchScriptResultsRequest::CreateRpcActor(IRequestNoOpCtx* msg) { + return new TFetchScriptResultsRPC(msg); +} + } // namespace NKikimr::NGRpcService diff --git a/ydb/core/grpc_services/query/rpc_kqp_tx.cpp b/ydb/core/grpc_services/query/rpc_kqp_tx.cpp index 4f92c00faff7..d71caa0f428e 100644 --- a/ydb/core/grpc_services/query/rpc_kqp_tx.cpp +++ b/ydb/core/grpc_services/query/rpc_kqp_tx.cpp @@ -124,6 +124,7 @@ class TBeginTransactionRPC : public TActorBootstrapped { if (kqpResponse.HasTxMeta()) { beginTxResult->mutable_tx_meta()->set_id(kqpResponse.GetTxMeta().id()); } + *beginTxResult->mutable_issues() = issueMessage; } Reply(record.GetYdbStatus(), beginTxResult); @@ -168,7 +169,7 @@ class TFinishTransactionRPC : public TActorBootstrapped private: virtual std::pair GetReqData() const = 0; virtual void Fill(NKikimrKqp::TQueryRequest* req) const = 0; - virtual NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status) const = 0; + virtual NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status, const NYql::TIssues& issues) const = 0; void StateWork(TAutoPtr& ev) { try { @@ -218,15 +219,15 @@ class TFinishTransactionRPC : public TActorBootstrapped const auto& record = ev->Get()->Record.GetRef(); FillCommonKqpRespFields(record, Request.get()); + NYql::TIssues issues; if (record.HasResponse()) { const auto& kqpResponse = record.GetResponse(); const auto& issueMessage = kqpResponse.GetQueryIssues(); - NYql::TIssues issues; NYql::IssuesFromMessage(issueMessage, issues); Request->RaiseIssues(issues); } - Reply(record.GetYdbStatus(), CreateResult(record.GetYdbStatus())); + Reply(record.GetYdbStatus(), CreateResult(record.GetYdbStatus(), issues)); } void InternalError(const TString& message) { @@ -271,9 +272,10 @@ class TCommitTransactionRPC : public TFinishTransactionRPC { req->MutableTxControl()->set_commit_tx(true); } - NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status) const override { + NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status, const NYql::TIssues& issues) const override { auto result = TEvCommitTransactionRequest::AllocateResult(Request); result->set_status(status); + NYql::IssuesToMessage(issues, result->mutable_issues()); return result; } }; @@ -293,9 +295,10 @@ class TRollbackTransactionRPC : public TFinishTransactionRPC { req->SetAction(NKikimrKqp::QUERY_ACTION_ROLLBACK_TX); } - NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status) const override { + NProtoBuf::Message* CreateResult(Ydb::StatusIds::StatusCode status, const NYql::TIssues& issues) const override { auto result = TEvRollbackTransactionRequest::AllocateResult(Request); result->set_status(status); + NYql::IssuesToMessage(issues, result->mutable_issues()); return result; } }; diff --git a/ydb/core/grpc_services/rpc_alter_table.cpp b/ydb/core/grpc_services/rpc_alter_table.cpp index b18ade6d3f0b..290a129fdcbc 100644 --- a/ydb/core/grpc_services/rpc_alter_table.cpp +++ b/ydb/core/grpc_services/rpc_alter_table.cpp @@ -109,12 +109,12 @@ class TAlterTableRPC : public TRpcSchemeRequestActorServices.SchemeCache, ctx); } - void PrepareAlterUserAttrubutes() { + void GetProxyServices() { using namespace NTxProxy; Send(MakeTxProxyID(), new TEvTxUserProxy::TEvGetProxyServicesRequest); } @@ -222,13 +222,38 @@ class TAlterTableRPC : public TRpcSchemeRequestActor(ev)->Request->ResultSet.emplace_back(); - entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTable; entry.Path = paths; } Send(schemeCache, ev); } + void Navigate(const TTableId& pathId) { + DatabaseName = Request_->GetDatabaseName() + .GetOrElse(DatabaseFromDomain(AppData())); + + auto ev = CreateNavigateForPath(DatabaseName); + { + auto& entry = static_cast(ev)->Request->ResultSet.emplace_back(); + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; + entry.TableId = pathId; + entry.ShowPrivatePath = true; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; + } + + Send(MakeSchemeCacheID(), ev); + } + + static bool IsChangefeedOperation(EOp type) { + switch (type) { + case EOp::AddChangefeed: + case EOp::DropChangefeed: + return true; + default: + return false; + } + } + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { TXLOG_D("Handle TEvTxProxySchemeCache::TEvNavigateKeySetResult" << ", errors# " << ev->Get()->Request.Get()->ErrorCount); @@ -251,13 +276,48 @@ class TAlterTableRPC : public TRpcSchemeRequestActorResultSet.empty()); + const auto& entry = resp->ResultSet.back(); + + switch (entry.Kind) { + case NSchemeCache::TSchemeCacheNavigate::KindTable: + case NSchemeCache::TSchemeCacheNavigate::KindColumnTable: + case NSchemeCache::TSchemeCacheNavigate::KindExternalTable: + case NSchemeCache::TSchemeCacheNavigate::KindExternalDataSource: + case NSchemeCache::TSchemeCacheNavigate::KindView: + break; // table + case NSchemeCache::TSchemeCacheNavigate::KindIndex: + if (IsChangefeedOperation(OpType)) { + break; + } + [[fallthrough]]; + default: + Request_->RaiseIssue(MakeIssue(NKikimrIssues::TIssuesIds::GENERIC_RESOLVE_ERROR, TStringBuilder() + << "Unable to nagivate: " << JoinPath(entry.Path) << " status: PathNotTable")); + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + switch (OpType) { case EOp::AddIndex: return AlterTableAddIndexOp(resp, ctx); case EOp::Attribute: - Y_ABORT_UNLESS(!resp->ResultSet.empty()); ResolvedPathId = resp->ResultSet.back().TableId.PathId; return AlterTable(ctx); + case EOp::AddChangefeed: + case EOp::DropChangefeed: + if (entry.Kind != NSchemeCache::TSchemeCacheNavigate::KindIndex) { + AlterTable(ctx); + } else if (auto list = entry.ListNodeEntry) { + if (list->Children.size() != 1) { + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + + const auto& child = list->Children.at(0); + AlterTable(ctx, CanonizePath(ChildPath(NKikimr::SplitPath(GetProtoRequest()->path()), child.Name))); + } else { + Navigate(entry.TableId); + } + break; default: TXLOG_E("Got unexpected cache response"); return Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); @@ -351,13 +411,14 @@ class TAlterTableRPC : public TRpcSchemeRequestActor& overridePath = {}) { const auto req = GetProtoRequest(); std::unique_ptr proposeRequest = CreateProposeTransaction(); auto modifyScheme = proposeRequest->Record.MutableTransaction()->MutableModifyScheme(); + modifyScheme->SetAllowAccessToPrivatePaths(overridePath.Defined()); Ydb::StatusIds::StatusCode code; TString error; - if (!BuildAlterTableModifyScheme(req, modifyScheme, Profiles, ResolvedPathId, code, error)) { + if (!BuildAlterTableModifyScheme(overridePath.GetOrElse(req->path()), req, modifyScheme, Profiles, ResolvedPathId, code, error)) { NYql::TIssues issues; issues.AddIssue(NYql::TIssue(error)); return Reply(code, issues, ctx); diff --git a/ydb/core/grpc_services/rpc_create_table.cpp b/ydb/core/grpc_services/rpc_create_table.cpp index 556004f466d0..88fed4de46d3 100644 --- a/ydb/core/grpc_services/rpc_create_table.cpp +++ b/ydb/core/grpc_services/rpc_create_table.cpp @@ -181,8 +181,28 @@ class TCreateTableRPC : public TRpcSchemeRequestActorcolumns()) { + switch (column.default_value_case()) { + case Ydb::Table::ColumnMeta::kFromSequence: { + auto* seqDesc = modifyScheme->MutableCreateIndexedTable()->MutableSequenceDescription()->Add(); + if (!FillSequenceDescription(*seqDesc, column.from_sequence(), code, error)) { + NYql::TIssues issues; + issues.AddIssue(NYql::TIssue(error)); + return Reply(code, issues, ctx); + } + hasSerial = true; + break; + } + default: break; + } + } + NKikimrSchemeOp::TTableDescription* tableDesc = nullptr; - if (req->indexesSize()) { + if (req->indexesSize() || hasSerial) { modifyScheme->SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpCreateIndexedTable); tableDesc = modifyScheme->MutableCreateIndexedTable()->MutableTableDescription(); } else { @@ -192,9 +212,6 @@ class TCreateTableRPC : public TRpcSchemeRequestActorSetName(name); - StatusIds::StatusCode code = StatusIds::SUCCESS; - TString error; - if (!FillColumnDescription(*tableDesc, req->columns(), code, error)) { NYql::TIssues issues; issues.AddIssue(NYql::TIssue(error)); diff --git a/ydb/core/grpc_services/rpc_describe_table.cpp b/ydb/core/grpc_services/rpc_describe_table.cpp index 05f481e60c89..c4fa00af373b 100644 --- a/ydb/core/grpc_services/rpc_describe_table.cpp +++ b/ydb/core/grpc_services/rpc_describe_table.cpp @@ -1,11 +1,11 @@ -#include "service_table.h" -#include - #include "rpc_calls.h" #include "rpc_scheme_base.h" - #include "service_table.h" -#include "rpc_common/rpc_common.h" + +#include +#include +#include +#include #include #include #include @@ -22,6 +22,20 @@ using TEvDescribeTableRequest = TGrpcRequestOperationCall { using TBase = TRpcSchemeRequestActor; + TString OverrideName; + + static bool ShowPrivatePath(const TString& path) { + if (AppData()->AllowPrivateTableDescribeForTest) { + return true; + } + + if (path.EndsWith("/indexImplTable")) { + return true; + } + + return false; + } + public: TDescribeTableRPC(IRequestOpCtx* msg) : TBase(msg) {} @@ -29,18 +43,63 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorpath(); + const auto paths = NKikimr::SplitPath(path); + if (paths.empty()) { + Request_->RaiseIssue(NYql::TIssue("Invalid path")); + return Reply(Ydb::StatusIds::BAD_REQUEST, ctx); + } + + auto navigate = MakeHolder(); + navigate->DatabaseName = CanonizePath(Request_->GetDatabaseName().GetOrElse("")); + auto& entry = navigate->ResultSet.emplace_back(); + entry.Path = paths; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; + entry.SyncVersion = true; + entry.ShowPrivatePath = ShowPrivatePath(path); + + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(navigate)); Become(&TDescribeTableRPC::StateWork); } private: void StateWork(TAutoPtr& ev) { switch (ev->GetTypeRewrite()) { + HFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); default: TBase::StateWork(ev); } } + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { + auto* navigate = ev->Get()->Request.Get(); + + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + const auto& entry = navigate->ResultSet.front(); + + if (navigate->ErrorCount > 0) { + switch (entry.Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + default: + return Reply(Ydb::StatusIds::UNAVAILABLE, ctx); + } + } + + if (entry.Kind == NSchemeCache::TSchemeCacheNavigate::KindIndex) { + auto list = entry.ListNodeEntry; + if (!list || list->Children.size() != 1) { + return Reply(Ydb::StatusIds::SCHEME_ERROR, ctx); + } + + OverrideName = entry.Path.back(); + SendProposeRequest(CanonizePath(ChildPath(entry.Path, list->Children.at(0).Name)), ctx); + } else { + SendProposeRequest(GetProtoRequest()->path(), ctx); + } + } + void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx) { const auto& record = ev->Get()->GetRecord(); const auto status = record.GetStatus(); @@ -53,9 +112,10 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorset_name(pathDescription.GetSelf().GetName()); - selfEntry->set_type(static_cast(pathDescription.GetSelf().GetPathType())); ConvertDirectoryEntry(pathDescription.GetSelf(), selfEntry, true); + if (OverrideName) { + selfEntry->set_name(OverrideName); + } if (pathDescription.HasColumnTableDescription()) { const auto& tableDescription = pathDescription.GetColumnTableDescription(); @@ -82,6 +142,14 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorRaiseIssue(NYql::TIssue(error)); + return Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); + } + describeTableResult.mutable_primary_key()->CopyFrom(tableDescription.GetKeyColumnNames()); try { @@ -92,7 +160,14 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorRaiseIssue(NYql::ExceptionToIssue(ex)); + return Reply(Ydb::StatusIds::INTERNAL_ERROR, ctx); + } + FillChangefeedDescription(describeTableResult, tableDescription); if (GetProtoRequest()->include_table_stats()) { @@ -129,9 +204,8 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorpath(); std::unique_ptr navigateRequest(new TEvTxUserProxy::TEvNavigate()); SetAuthToken(navigateRequest, *Request_); @@ -146,10 +220,12 @@ class TDescribeTableRPC : public TRpcSchemeRequestActorMutableOptions()->SetReturnPartitionStats(true); } - if (AppData(ctx)->AllowPrivateTableDescribeForTest || path.EndsWith("/indexImplTable")) { - record->MutableOptions()->SetShowPrivateTable(true); + if (req->include_set_val()) { + record->MutableOptions()->SetReturnSetVal(true); } + record->MutableOptions()->SetShowPrivateTable(ShowPrivatePath(path)); + ctx.Send(MakeTxProxyID(), navigateRequest.release()); } }; diff --git a/ydb/core/grpc_services/rpc_execute_data_query.cpp b/ydb/core/grpc_services/rpc_execute_data_query.cpp index f5b7e87043ff..19f392a0d310 100644 --- a/ydb/core/grpc_services/rpc_execute_data_query.cpp +++ b/ydb/core/grpc_services/rpc_execute_data_query.cpp @@ -185,9 +185,8 @@ class TExecuteDataQueryRPC : public TRpcKqpRequestActormutable_result_sets()->Swap(record.MutableResponse()->MutableYdbResults()); - } else { - NKqp::ConvertKqpQueryResultsToDbResult(kqpResponse, queryResult); } + ConvertQueryStats(kqpResponse, queryResult); if (kqpResponse.HasTxMeta()) { queryResult->mutable_tx_meta()->CopyFrom(kqpResponse.GetTxMeta()); diff --git a/ydb/core/grpc_services/rpc_execute_yql_script.cpp b/ydb/core/grpc_services/rpc_execute_yql_script.cpp index 72b8350d8b74..53fd5f6ff146 100644 --- a/ydb/core/grpc_services/rpc_execute_yql_script.cpp +++ b/ydb/core/grpc_services/rpc_execute_yql_script.cpp @@ -97,7 +97,11 @@ class TExecuteYqlScriptRPC : public TRpcKqpRequestActor(Request_); try { - NKqp::ConvertKqpQueryResultsToDbResult(kqpResponse, queryResult); + const auto& results = kqpResponse.GetYdbResults(); + for (const auto& result : results) { + queryResult->add_result_sets()->CopyFrom(result); + } + } catch (const std::exception& ex) { NYql::TIssues issues; issues.AddIssue(NYql::ExceptionToIssue(ex)); diff --git a/ydb/core/grpc_services/rpc_export.cpp b/ydb/core/grpc_services/rpc_export.cpp index 6bf1b5c713ad..59dac9c6fda4 100644 --- a/ydb/core/grpc_services/rpc_export.cpp +++ b/ydb/core/grpc_services/rpc_export.cpp @@ -41,6 +41,7 @@ class TExportRPC: public TRpcOperationRequestActor, if (this->UserToken) { ev->Record.SetUserSID(this->UserToken->GetUserSID()); } + ev->Record.SetPeerName(this->Request->GetPeerName()); auto& createExport = *ev->Record.MutableRequest(); *createExport.MutableOperationParams() = request.operation_params(); diff --git a/ydb/core/grpc_services/rpc_export_base.h b/ydb/core/grpc_services/rpc_export_base.h index c85ca5b5e7db..0950aa815270 100644 --- a/ydb/core/grpc_services/rpc_export_base.h +++ b/ydb/core/grpc_services/rpc_export_base.h @@ -46,12 +46,16 @@ struct TExportConv { } if (exprt.HasStartTime()) { - *operation.mutable_start_time() = exprt.GetStartTime(); + *operation.mutable_create_time() = exprt.GetStartTime(); } if (exprt.HasEndTime()) { *operation.mutable_end_time() = exprt.GetEndTime(); } + if (exprt.HasUserSID()) { + operation.set_created_by(exprt.GetUserSID()); + } + using namespace Ydb::Export; switch (exprt.GetSettingsCase()) { case NKikimrExport::TExport::kExportToYtSettings: diff --git a/ydb/core/grpc_services/rpc_import.cpp b/ydb/core/grpc_services/rpc_import.cpp index 174e9df84cb9..1cf09ad123c0 100644 --- a/ydb/core/grpc_services/rpc_import.cpp +++ b/ydb/core/grpc_services/rpc_import.cpp @@ -39,6 +39,7 @@ class TImportRPC: public TRpcOperationRequestActor, if (this->UserToken) { ev->Record.SetUserSID(this->UserToken->GetUserSID()); } + ev->Record.SetPeerName(this->Request->GetPeerName()); auto& createImport = *ev->Record.MutableRequest(); createImport.MutableOperationParams()->CopyFrom(request.operation_params()); diff --git a/ydb/core/grpc_services/rpc_import_base.h b/ydb/core/grpc_services/rpc_import_base.h index 16d88e0bb3c9..be700af4f026 100644 --- a/ydb/core/grpc_services/rpc_import_base.h +++ b/ydb/core/grpc_services/rpc_import_base.h @@ -43,12 +43,16 @@ struct TImportConv { } if (import.HasStartTime()) { - *operation.mutable_start_time() = import.GetStartTime(); + *operation.mutable_create_time() = import.GetStartTime(); } if (import.HasEndTime()) { *operation.mutable_end_time() = import.GetEndTime(); } + if (import.HasUserSID()) { + operation.set_created_by(import.GetUserSID()); + } + using namespace Ydb::Import; switch (import.GetSettingsCase()) { case NKikimrImport::TImport::kImportFromS3Settings: diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index e780de7eea05..4b5cab1c5b1c 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -75,9 +75,12 @@ bool ConvertArrowToYdbPrimitive(const arrow::DataType& type, Ydb::Type& toType) case arrow::Type::DURATION: toType.set_type_id(Ydb::Type::INTERVAL); return true; - case arrow::Type::DECIMAL: - // TODO - return false; + case arrow::Type::DECIMAL: { + Ydb::DecimalType* decimalType = toType.mutable_decimal_type(); + decimalType->set_precision(22); + decimalType->set_scale(9); + return true; + } case arrow::Type::NA: case arrow::Type::HALF_FLOAT: case arrow::Type::FIXED_SIZE_BINARY: diff --git a/ydb/core/grpc_services/rpc_login.cpp b/ydb/core/grpc_services/rpc_login.cpp index 0f527fae42fb..f259181d2e8b 100644 --- a/ydb/core/grpc_services/rpc_login.cpp +++ b/ydb/core/grpc_services/rpc_login.cpp @@ -63,6 +63,7 @@ class TLoginRPC : public TRpcRequestActor { PipeClient = RegisterWithSameMailbox(pipe); THolder request = MakeHolder(); request.Get()->Record = CreateLoginRequest(Credentials, AppData()->AuthConfig); + request.Get()->Record.SetPeerName(Request->GetPeerName()); NTabletPipe::SendData(SelfId(), PipeClient, request.Release()); return; } diff --git a/ydb/core/grpc_services/rpc_object_storage.cpp b/ydb/core/grpc_services/rpc_object_storage.cpp index 8ed0134084b6..2fd91b4a11cd 100644 --- a/ydb/core/grpc_services/rpc_object_storage.cpp +++ b/ydb/core/grpc_services/rpc_object_storage.cpp @@ -144,6 +144,19 @@ bool CellFromTuple(NScheme::TTypeInfo type, } break; } + case NScheme::NTypeIds::Decimal: + { + if (tupleValue.Haslow_128()) { + NYql::NDecimal::TInt128 int128 = NYql::NDecimal::FromHalfs(tupleValue.Getlow_128(), tupleValue.Gethigh_128()); + auto &data = memoryOwner.emplace_back(); + data.resize(sizeof(NYql::NDecimal::TInt128)); + std::memcpy(data.Detach(), &int128, sizeof(NYql::NDecimal::TInt128)); + c = TCell(data); + } else { + CHECK_OR_RETURN_ERROR(false, Sprintf("Cannot parse value of type Decimal in tuple at position %" PRIu32, position)); + } + break; + } default: CHECK_OR_RETURN_ERROR(false, Sprintf("Unsupported typeId %" PRIu16 " at index %" PRIu32, typeId, position)); break; diff --git a/ydb/core/grpc_services/rpc_rate_limiter_api.cpp b/ydb/core/grpc_services/rpc_rate_limiter_api.cpp index 288527ae1e95..4ce11d417c4d 100644 --- a/ydb/core/grpc_services/rpc_rate_limiter_api.cpp +++ b/ydb/core/grpc_services/rpc_rate_limiter_api.cpp @@ -594,11 +594,18 @@ class TAcquireRateLimiterResourceRPC : public TRateLimiterRequestcoordination_node_path(), GetProtoRequest()->resource_path()), 0, 0); TBase::OnOperationTimeout(ctx); } + // Do nothing here, because quoter service replies after "cancel after" time passes. + void OnCancelOperation(const TActorContext& ctx) { + Y_UNUSED(ctx); + } + STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { hFunc(TEvQuota::TEvClearance, Handle); @@ -637,22 +644,37 @@ class TAcquireRateLimiterResourceRPC : public TRateLimiterRequestGet()->Result) { case TEvQuota::TEvClearance::EResult::Success: Reply(StatusIds::SUCCESS, TActivationContext::AsActorContext()); - break; + break; case TEvQuota::TEvClearance::EResult::UnknownResource: Reply(StatusIds::BAD_REQUEST, TActivationContext::AsActorContext()); - break; + break; case TEvQuota::TEvClearance::EResult::Deadline: - Reply(StatusIds::TIMEOUT, TActivationContext::AsActorContext()); - break; + Reply(QuoterDeadlineStatusCode(), TActivationContext::AsActorContext()); + break; default: Reply(StatusIds::INTERNAL_ERROR, TActivationContext::AsActorContext()); } diff --git a/ydb/core/grpc_services/rpc_replication.cpp b/ydb/core/grpc_services/rpc_replication.cpp index 83c28cc04845..8b75b72a08d0 100644 --- a/ydb/core/grpc_services/rpc_replication.cpp +++ b/ydb/core/grpc_services/rpc_replication.cpp @@ -11,6 +11,8 @@ #include +#include + namespace NKikimr::NGRpcService { using namespace Ydb; @@ -138,9 +140,18 @@ class TDescribeReplicationRPC: public TRpcSchemeRequestActor +#include +#include +#include +#include +#include + +namespace NKikimr::NGRpcService { + +using namespace Ydb; + +using TEvDescribeView = TGrpcRequestOperationCall; + +class TDescribeViewRPC : public TRpcSchemeRequestActor { + using TBase = TRpcSchemeRequestActor; + +public: + using TBase::TBase; + + void Bootstrap() { + DescribeScheme(); + } + + void PassAway() override { + TBase::PassAway(); + } + +private: + void DescribeScheme() { + auto ev = std::make_unique(); + SetAuthToken(ev, *Request_); + SetDatabase(ev.get(), *Request_); + ev->Record.MutableDescribePath()->SetPath(GetProtoRequest()->path()); + + Send(MakeTxProxyID(), ev.release()); + Become(&TDescribeViewRPC::StateDescribeScheme); + } + + STATEFN(StateDescribeScheme) { + switch (ev->GetTypeRewrite()) { + HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); + default: + return TBase::StateWork(ev); + } + } + + void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev, const TActorContext& ctx) { + const auto& record = ev->Get()->GetRecord(); + const auto& desc = record.GetPathDescription(); + + if (record.HasReason()) { + Request_->RaiseIssue(NYql::TIssue(record.GetReason())); + } + + switch (record.GetStatus()) { + case NKikimrScheme::StatusSuccess: + if (desc.GetSelf().GetPathType() != NKikimrSchemeOp::EPathTypeView) { + auto message = TStringBuilder() << "Expected a view, but got: " << desc.GetSelf().GetPathType(); + Request_->RaiseIssue(NYql::TIssue(message)); + return Reply(StatusIds::SCHEME_ERROR, ctx); + } + + ConvertDirectoryEntry(desc.GetSelf(), Result_.mutable_self(), true); + Result_.set_query_text(desc.GetViewDescription().GetQueryText()); + + return ReplyWithResult(StatusIds::SUCCESS, Result_, ctx); + + case NKikimrScheme::StatusPathDoesNotExist: + case NKikimrScheme::StatusSchemeError: + return Reply(StatusIds::SCHEME_ERROR, ctx); + + case NKikimrScheme::StatusAccessDenied: + return Reply(StatusIds::UNAUTHORIZED, ctx); + + case NKikimrScheme::StatusNotAvailable: + return Reply(StatusIds::UNAVAILABLE, ctx); + + default: + return Reply(StatusIds::GENERIC_ERROR, ctx); + } + } + +private: + View::DescribeViewResult Result_; +}; + +void DoDescribeView(std::unique_ptr p, const IFacilityProvider& f) { + f.RegisterActor(new TDescribeViewRPC(p.release())); +} + +} diff --git a/ydb/core/grpc_services/rpc_whoami.cpp b/ydb/core/grpc_services/rpc_whoami.cpp index 137c4360b0c5..d0096a50e444 100644 --- a/ydb/core/grpc_services/rpc_whoami.cpp +++ b/ydb/core/grpc_services/rpc_whoami.cpp @@ -21,24 +21,29 @@ class TWhoAmIRPC : public TActorBootstrapped { : Request(request) {} - void Bootstrap(const TActorContext& ctx) { + void Bootstrap() { //TODO: Do we realy realy need to make call to the ticket parser here??? //we have done it already in grpc_request_proxy auto req = dynamic_cast(Request.get()); Y_ABORT_UNLESS(req, "Unexpected request type for TWhoAmIRPC"); - TMaybe authToken = req->GetYdbToken(); - if (authToken) { - TMaybe database = Request->GetDatabaseName(); - ctx.Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ - .Database = database ? database.GetRef() : TString(), - .Ticket = authToken.GetRef(), - .PeerName = Request->GetPeerName() - })); - Become(&TThis::StateWaitForTicket); + TString ticket; + if (TMaybe authToken = req->GetYdbToken()) { + ticket = authToken.GetRef(); + } else if (TVector clientCert = Request->FindClientCert(); !clientCert.empty()) { + ticket = TString(clientCert.front()); } else { ReplyError("No token provided"); PassAway(); + return; } + + TMaybe database = Request->GetDatabaseName(); + Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ + .Database = database ? database.GetRef() : TString(), + .Ticket = ticket, + .PeerName = Request->GetPeerName() + })); + Become(&TThis::StateWaitForTicket); } STFUNC(StateWaitForTicket) { diff --git a/ydb/core/grpc_services/service_view.h b/ydb/core/grpc_services/service_view.h new file mode 100644 index 000000000000..54021e0f3b18 --- /dev/null +++ b/ydb/core/grpc_services/service_view.h @@ -0,0 +1,12 @@ +#pragma once + +#include + +namespace NKikimr::NGRpcService { + +class IRequestOpCtx; +class IFacilityProvider; + +void DoDescribeView(std::unique_ptr p, const IFacilityProvider& f); + +} diff --git a/ydb/core/grpc_services/service_ymq.h b/ydb/core/grpc_services/service_ymq.h new file mode 100644 index 000000000000..6ccf8137df2f --- /dev/null +++ b/ydb/core/grpc_services/service_ymq.h @@ -0,0 +1,30 @@ +#pragma once +#include + +namespace NActors { +struct TActorId; +} + +namespace NKikimr { +namespace NGRpcService { + +class IRequestOpCtx; +class IFacilityProvider; + +void DoYmqGetQueueUrlRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqCreateQueueRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqSendMessageRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqReceiveMessageRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqGetQueueAttributesRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqListQueuesRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqDeleteMessageRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqPurgeQueueRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqDeleteQueueRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqChangeMessageVisibilityRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqSetQueueAttributesRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqSendMessageBatchRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqDeleteMessageBatchRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqChangeMessageVisibilityBatchRequest(std::unique_ptr p, const IFacilityProvider& f); +void DoYmqListDeadLetterSourceQueuesRequest(std::unique_ptr p, const IFacilityProvider& f); +} +} diff --git a/ydb/core/grpc_services/ya.make b/ydb/core/grpc_services/ya.make index 63344c31d9fb..0f58e4c2c20c 100644 --- a/ydb/core/grpc_services/ya.make +++ b/ydb/core/grpc_services/ya.make @@ -74,6 +74,7 @@ SRCS( rpc_stream_execute_yql_script.cpp rpc_whoami.cpp rpc_object_storage.cpp + rpc_view.cpp table_settings.cpp rpc_common/rpc_common_kqp_session.cpp diff --git a/ydb/core/health_check/health_check.cpp b/ydb/core/health_check/health_check.cpp index 46460e4de841..1f1e595d0679 100644 --- a/ydb/core/health_check/health_check.cpp +++ b/ydb/core/health_check/health_check.cpp @@ -32,6 +32,9 @@ #include #include +#include +#include + static decltype(auto) make_vslot_tuple(const NKikimrBlobStorage::TVSlotId& id) { return std::make_tuple(id.GetNodeId(), id.GetPDiskId(), id.GetVSlotId()); } @@ -56,16 +59,21 @@ struct hash { } #define BLOG_CRIT(stream) LOG_CRIT_S(*TlsActivationContext, NKikimrServices::HEALTH, stream) +#define BLOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::HEALTH, stream) namespace NKikimr { using NNodeWhiteboard::TNodeId; -using NNodeWhiteboard::TTabletId; namespace NHealthCheck { using namespace NActors; using namespace Ydb; +using namespace NSchemeCache; +using namespace NSchemeShard; +using namespace NSysView; +using namespace NConsole; +using NNodeWhiteboard::TTabletId; void RemoveUnrequestedEntries(Ydb::Monitoring::SelfCheckResult& result, const Ydb::Monitoring::SelfCheckRequest& request) { if (!request.return_verbose_status()) { @@ -117,11 +125,13 @@ class TSelfCheckRequest : public TActorBootstrapped { TActorId Sender; THolder Request; ui64 Cookie; + NWilson::TSpan Span; - TSelfCheckRequest(const TActorId& sender, THolder request, ui64 cookie) + TSelfCheckRequest(const TActorId& sender, THolder request, ui64 cookie, NWilson::TTraceId&& traceId) : Sender(sender) , Request(std::move(request)) , Cookie(cookie) + , Span(TComponentTracingLevels::TTablet::Basic, std::move(traceId), "health_check", NWilson::EFlags::AUTO_END) {} using TGroupId = ui32; @@ -179,21 +189,34 @@ class TSelfCheckRequest : public TActorBootstrapped { int Count = 1; TStackVec Identifiers; - TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) { - Type = info.tablettype(); - Leader = info.followerid() == 0; + static ETabletState GetState(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) { if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_STOPPED) { - State = ETabletState::Stopped; - } else if (!settings.IsHiveSynchronizationPeriod - && info.volatilestate() != NKikimrHive::TABLET_VOLATILE_STATE_RUNNING - && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier - && info.tabletbootmode() == NKikimrHive::TABLET_BOOT_MODE_DEFAULT) { - State = ETabletState::Dead; - } else if (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) { - State = ETabletState::RestartsTooOften; - } else { - State = ETabletState::Good; + return ETabletState::Stopped; + } + ETabletState state = (info.restartsperperiod() >= settings.MaxRestartsPerPeriod) ? ETabletState::RestartsTooOften : ETabletState::Good; + if (info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_RUNNING) { + return state; + } + if (info.tabletbootmode() != NKikimrHive::TABLET_BOOT_MODE_DEFAULT) { + return state; + } + if (info.lastalivetimestamp() != 0 && TInstant::MilliSeconds(info.lastalivetimestamp()) < settings.AliveBarrier) { + // Tablet is not alive for a long time + // We should report it as dead unless it's just waiting to be created + if (info.generation() == 0 && info.volatilestate() == NKikimrHive::TABLET_VOLATILE_STATE_BOOTING && !info.inwaitqueue()) { + return state; + } + return ETabletState::Dead; } + return state; + + } + + TNodeTabletStateCount(const NKikimrHive::TTabletInfo& info, const TTabletStateSettings& settings) + : Type(info.tablettype()) + , State(GetState(info, settings)) + , Leader(info.followerid() == 0) + { } bool operator ==(const TNodeTabletStateCount& o) const { @@ -248,6 +271,7 @@ class TSelfCheckRequest : public TActorBootstrapped { struct TGroupState { TString ErasureSpecies; std::vector VSlots; + ui32 Generation; }; struct TSelfCheckResult { @@ -437,6 +461,167 @@ class TSelfCheckRequest : public TActorBootstrapped { } }; + template + struct TRequestResponse { + std::variant, TString> Response; + NWilson::TSpan Span; + + TRequestResponse() = default; + TRequestResponse(NWilson::TSpan&& span) + : Span(std::move(span)) + {} + + TRequestResponse(const TRequestResponse&) = delete; + TRequestResponse(TRequestResponse&&) = default; + TRequestResponse& operator =(const TRequestResponse&) = delete; + TRequestResponse& operator =(TRequestResponse&&) = default; + + void Set(std::unique_ptr&& response) { + constexpr bool hasErrorCheck = requires(const std::unique_ptr& r) {TSelfCheckRequest::IsSuccess(r);}; + if constexpr (hasErrorCheck) { + if (!TSelfCheckRequest::IsSuccess(response)) { + Error(TSelfCheckRequest::GetError(response)); + return; + } + } + if (!IsDone()) { + Span.EndOk(); + } + Response = std::move(response); + } + + void Set(TAutoPtr>&& response) { + Set(std::unique_ptr(response->Release().Release())); + } + + bool Error(const TString& error) { + if (!IsDone()) { + Span.EndError(error); + Response = error; + return true; + } + return false; + } + + bool IsOk() const { + return std::holds_alternative>(Response); + } + + bool IsError() const { + return std::holds_alternative(Response); + } + + bool IsDone() const { + return Response.index() != 0; + } + + explicit operator bool() const { + return IsOk(); + } + + T* Get() { + return std::get>(Response).get(); + } + + const T* Get() const { + return std::get>(Response).get(); + } + + T& GetRef() { + return *Get(); + } + + const T& GetRef() const { + return *Get(); + } + + T* operator ->() { + return Get(); + } + + const T* operator ->() const { + return Get(); + } + + T& operator *() { + return GetRef(); + } + + const T& operator *() const { + return GetRef(); + } + + TString GetError() const { + return std::get(Response); + } + + void Event(const TString& name) { + if (Span) { + Span.Event(name); + } + } + }; + + static bool IsSuccess(const std::unique_ptr& ev) { + return (ev->Request->ResultSet.size() > 0) && (std::find_if(ev->Request->ResultSet.begin(), ev->Request->ResultSet.end(), + [](const auto& entry) { + return entry.Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok; + }) != ev->Request->ResultSet.end()); + } + + static TString GetError(const std::unique_ptr& ev) { + if (ev->Request->ResultSet.size() == 0) { + return "empty response"; + } + for (const auto& entry : ev->Request->ResultSet) { + if (entry.Status != NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { + switch (entry.Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::Ok: + return "Ok"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::Unknown: + return "Unknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + return "RootUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + return "PathErrorUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotTable: + return "PathNotTable"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotPath: + return "PathNotPath"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::TableCreationNotComplete: + return "TableCreationNotComplete"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::LookupError: + return "LookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RedirectLookupError: + return "RedirectLookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::AccessDenied: + return "AccessDenied"; + default: + return ::ToString(static_cast(entry.Status)); + } + } + } + return "no error"; + } + + static bool IsSuccess(const std::unique_ptr& ev) { + return ev->GetRecord().status() == NKikimrScheme::StatusSuccess; + } + + static TString GetError(const std::unique_ptr& ev) { + return NKikimrScheme::EStatus_Name(ev->GetRecord().status()); + } + + static bool IsSuccess(const std::unique_ptr& ev) { + const auto& operation(ev->Record.GetResponse().operation()); + return operation.ready() && operation.status() == Ydb::StatusIds::SUCCESS; + } + + static TString GetError(const std::unique_ptr& ev) { + const auto& operation(ev->Record.GetResponse().operation()); + return Ydb::StatusIds_StatusCode_Name(operation.status()); + } + TString FilterDatabase; THashMap FilterDomainKey; TVector PipeClients; @@ -447,21 +632,23 @@ class TSelfCheckRequest : public TActorBootstrapped { TTabletId RootSchemeShardId; TTabletId RootHiveId; THashMap TenantByPath; - THashMap> DescribeByPath; + THashMap> DescribeByPath; THashMap> PathsByPoolName; + THashMap> TenantStatusByPath; THashMap DatabaseStatusByPath; THashMap> TenantStateByPath; - THashMap> NavigateResult; - THashMap> HiveDomainStats; - THashMap> HiveNodeStats; - THashMap> HiveInfo; - THolder NodesInfo; + THashMap NavigateResult; + THashMap> HiveDomainStats; + THashMap> HiveNodeStats; + THashMap> HiveInfo; + std::optional> ListTenants; + std::optional> NodesInfo; THashMap MergedNodeInfo; - std::optional StoragePools; - std::optional Groups; - std::optional VSlots; - std::optional PDisks; - bool RequestedStorageConfig = false; + std::optional> StoragePools; + std::optional> Groups; + std::optional> VSlots; + std::optional> PDisks; + std::optional> NodeWardenStorageConfig; THashSet UnknownStaticGroups; THashSet NodeIds; @@ -474,7 +661,7 @@ class TSelfCheckRequest : public TActorBootstrapped { THashMap DatabaseState; THashMap SharedDatabases; - THashMap> NodeSystemState; + THashMap> NodeSystemState; THashMap MergedNodeSystemState; std::unordered_map PDisksMap; @@ -491,15 +678,15 @@ class TSelfCheckRequest : public TActorBootstrapped { THashSet UnavailableStorageNodes; THashSet UnavailableComputeNodes; - THashMap> NodeVDiskState; + THashMap> NodeVDiskState; TList VDisksAppended; std::unordered_map MergedVDiskState; - THashMap> NodePDiskState; + THashMap> NodePDiskState; TList PDisksAppended; std::unordered_map MergedPDiskState; - THashMap> NodeBSGroupState; + THashMap> NodeBSGroupState; TList BSGroupAppended; std::unordered_map MergedBSGroupState; @@ -600,9 +787,9 @@ class TSelfCheckRequest : public TActorBootstrapped { TTenantInfo& tenant = TenantByPath[DomainPath]; tenant.Name = DomainPath; RequestSchemeCacheNavigate(DomainPath); - RequestListTenants(); + ListTenants = RequestListTenants(); } else if (FilterDatabase != DomainPath) { - RequestTenantStatus(FilterDatabase); + TenantStatusByPath[FilterDatabase] = RequestTenantStatus(FilterDatabase); } else { TTenantInfo& tenant = TenantByPath[DomainPath]; tenant.Name = DomainPath; @@ -614,14 +801,14 @@ class TSelfCheckRequest : public TActorBootstrapped { TabletRequests.TabletStates[RootHiveId].Database = DomainPath; TabletRequests.TabletStates[RootHiveId].Type = TTabletTypes::Hive; //RequestHiveDomainStats(RootHiveId); - RequestHiveNodeStats(RootHiveId); - RequestHiveInfo(RootHiveId); + HiveNodeStats[RootHiveId] = RequestHiveNodeStats(RootHiveId); + HiveInfo[RootHiveId] = RequestHiveInfo(RootHiveId); } if (RootSchemeShardId && !IsSpecificDatabaseFilter()) { TabletRequests.TabletStates[RootSchemeShardId].Database = DomainPath; TabletRequests.TabletStates[RootSchemeShardId].Type = TTabletTypes::SchemeShard; - RequestDescribe(RootSchemeShardId, DomainPath); + DescribeByPath[DomainPath] = RequestDescribe(RootSchemeShardId, DomainPath); } if (BsControllerId) { @@ -630,7 +817,9 @@ class TSelfCheckRequest : public TActorBootstrapped { RequestBsController(); } - Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); + + NodesInfo = TRequestResponse(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, "TEvInterconnect::TEvListNodes")); + Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(), 0/*flags*/, 0/*cookie*/, Span.GetTraceId()); ++Requests; Become(&TThis::StateWait); @@ -639,15 +828,16 @@ class TSelfCheckRequest : public TActorBootstrapped { } bool HaveAllBSControllerInfo() { - return StoragePools && Groups && VSlots && PDisks; + return StoragePools && StoragePools->IsOk() && Groups && Groups->IsOk() && VSlots && VSlots->IsOk() && PDisks && PDisks->IsOk(); } bool NeedWhiteboardInfoForGroup(TGroupId groupId) { - return !HaveAllBSControllerInfo() && IsStaticGroup(groupId); + return UnknownStaticGroups.contains(groupId) || (!HaveAllBSControllerInfo() && IsStaticGroup(groupId)); } void Handle(TEvNodeWardenStorageConfig::TPtr ev) { - if (const NKikimrBlobStorage::TStorageConfig& config = *ev->Get()->Config; config.HasBlobStorageConfig()) { + NodeWardenStorageConfig->Set(std::move(ev)); + if (const NKikimrBlobStorage::TStorageConfig& config = *NodeWardenStorageConfig->Get()->Config; config.HasBlobStorageConfig()) { if (const auto& bsConfig = config.GetBlobStorageConfig(); bsConfig.HasServiceSet()) { const auto& staticConfig = bsConfig.GetServiceSet(); for (const NKikimrBlobStorage::TNodeWardenServiceSet_TPDisk& pDisk : staticConfig.pdisks()) { @@ -678,6 +868,7 @@ class TSelfCheckRequest : public TActorBootstrapped { auto groupId = vDisk.GetVDiskID().GetGroupID(); if (NeedWhiteboardInfoForGroup(groupId)) { + BLOG_D("Requesting whiteboard for group " << groupId); RequestStorageNode(vDisk.GetVDiskLocation().GetNodeID()); } } @@ -737,10 +928,11 @@ class TSelfCheckRequest : public TActorBootstrapped { } } - void RequestTabletPipe(TTabletId tabletId, - const TString& key, - IEventBase* payload, - std::optional requestId = std::nullopt) { + template + [[nodiscard]] TRequestResponse RequestTabletPipe(TTabletId tabletId, + IEventBase* payload, + std::optional requestId = std::nullopt) { + TString key = TypeName(*payload); ui64 cookie; if (requestId) { cookie = *requestId; @@ -748,6 +940,10 @@ class TSelfCheckRequest : public TActorBootstrapped { } else { cookie = TabletRequests.MakeRequest(tabletId, key); } + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, key)); + if (Span) { + response.Span.Attribute("tablet_id", ::ToString(tabletId)); + } TTabletRequestsState::TTabletState& requestState(TabletRequests.TabletStates[tabletId]); if (!requestState.TabletPipe) { requestState.TabletPipe = RegisterWithSameMailbox(NTabletPipe::CreateClient( @@ -756,93 +952,123 @@ class TSelfCheckRequest : public TActorBootstrapped { NTabletPipe::TClientRetryPolicy::WithRetries())); PipeClients.emplace_back(requestState.TabletPipe); } - NTabletPipe::SendData(SelfId(), requestState.TabletPipe, payload, cookie); + NTabletPipe::SendData(SelfId(), requestState.TabletPipe, payload, cookie, response.Span.GetTraceId()); ++Requests; + return response; } - void RequestDescribe(TTabletId schemeShardId, const TString& path) { + [[nodiscard]] TRequestResponse RequestDescribe(TTabletId schemeShardId, const TString& path) { THolder request = MakeHolder(); NKikimrSchemeOp::TDescribePath& record = request->Record; record.SetPath(path); record.MutableOptions()->SetReturnPartitioningInfo(false); record.MutableOptions()->SetReturnPartitionConfig(false); record.MutableOptions()->SetReturnChildren(false); - RequestTabletPipe(schemeShardId, "TEvDescribeScheme:" + path, request.Release()); + auto response = RequestTabletPipe(schemeShardId, request.Release()); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; } - void RequestHiveInfo(TTabletId hiveId) { + [[nodiscard]] TRequestResponse RequestHiveInfo(TTabletId hiveId) { THolder request = MakeHolder(); request->Record.SetReturnFollowers(true); - RequestTabletPipe(hiveId, "TEvRequestHiveInfo", request.Release()); + return RequestTabletPipe(hiveId, request.Release()); } - void RequestHiveDomainStats(TTabletId hiveId) { + [[nodiscard]] TRequestResponse RequestHiveDomainStats(TTabletId hiveId) { THolder request = MakeHolder(); request->Record.SetReturnFollowers(true); request->Record.SetReturnMetrics(true); - RequestTabletPipe(hiveId, "TEvRequestHiveDomainStats", request.Release()); + return RequestTabletPipe(hiveId, request.Release()); } - void RequestHiveNodeStats(TTabletId hiveId) { + [[nodiscard]] TRequestResponse RequestHiveNodeStats(TTabletId hiveId) { THolder request = MakeHolder(); - RequestTabletPipe(hiveId, "TEvRequestHiveNodeStats", request.Release()); + return RequestTabletPipe(hiveId, request.Release()); } - void RequestTenantStatus(const TString& path) { + [[nodiscard]] TRequestResponse RequestTenantStatus(const TString& path) { THolder request = MakeHolder(); request->Record.MutableRequest()->set_path(path); - RequestTabletPipe(ConsoleId, "TEvGetTenantStatusRequest:" + path, request.Release()); + auto response = RequestTabletPipe(ConsoleId, request.Release()); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; } - void RequestListTenants() { + [[nodiscard]] TRequestResponse RequestListTenants() { THolder request = MakeHolder(); - RequestTabletPipe(ConsoleId, "TEvListTenantsRequest", request.Release()); + return RequestTabletPipe(ConsoleId, request.Release()); } void RequestBsController() { THolder requestPools = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetStoragePoolsRequest", requestPools.Release(), TTabletRequestsState::RequestStoragePools); + StoragePools = RequestTabletPipe(BsControllerId, requestPools.Release(), TTabletRequestsState::RequestStoragePools); THolder requestGroups = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetGroupsRequest", requestGroups.Release(), TTabletRequestsState::RequestGroups); + Groups = RequestTabletPipe(BsControllerId, requestGroups.Release(), TTabletRequestsState::RequestGroups); THolder requestVSlots = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetVSlotsRequest", requestVSlots.Release(), TTabletRequestsState::RequestVSlots); + VSlots = RequestTabletPipe(BsControllerId, requestVSlots.Release(), TTabletRequestsState::RequestVSlots); THolder requestPDisks = MakeHolder(); - RequestTabletPipe(BsControllerId, "TEvGetPDisksRequest", requestPDisks.Release(), TTabletRequestsState::RequestPDisks); + PDisks = RequestTabletPipe(BsControllerId, requestPDisks.Release(), TTabletRequestsState::RequestPDisks); } + THashMap> NavigateKeySet; + void RequestSchemeCacheNavigate(const TString& path) { - THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; + ui64 cookie = NavigateKeySet.size(); + THolder request = MakeHolder(); + request->Cookie = cookie; + TSchemeCacheNavigate::TEntry& entry = request->ResultSet.emplace_back(); entry.Path = NKikimr::SplitPath(path); - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); + entry.Operation = TSchemeCacheNavigate::EOp::OpPath; + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName(*request.Get()))); + if (Span) { + response.Span.Attribute("path", path); + } + NavigateKeySet.emplace(cookie, std::move(response)); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0/*flags*/, 0/*cookie*/, response.Span.GetTraceId()); ++Requests; } void RequestSchemeCacheNavigate(const TPathId& pathId) { + ui64 cookie = NavigateKeySet.size(); THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; + request->Cookie = cookie; + NSchemeCache::TSchemeCacheNavigate::TEntry& entry = request->ResultSet.emplace_back(); entry.TableId.PathId = pathId; entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; entry.RedirectRequired = false; entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName(*request.Get()))); + if (Span) { + response.Span.Attribute("path_id", pathId.ToString()); + } + NavigateKeySet.emplace(cookie, std::move(response)); + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0/*flags*/, 0/*cookie*/, response.Span.GetTraceId()); ++Requests; } template - void RequestNodeWhiteboard(TNodeId nodeId) { + [[nodiscard]] TRequestResponse::Type> RequestNodeWhiteboard(TNodeId nodeId) { TActorId whiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(nodeId); auto request = MakeHolder(); - Send(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery, nodeId); + TRequestResponse::Type> response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName(*request.Get()))); + if (response.Span) { + response.Span.Attribute("target_node_id", nodeId); + } + Send(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery, nodeId, response.Span.GetTraceId()); + return response; } void RequestGenericNode(TNodeId nodeId) { - if (NodeIds.emplace(nodeId).second) { - Send(TlsActivationContext->ActorSystem()->InterconnectProxy(nodeId), new TEvents::TEvSubscribe()); - RequestNodeWhiteboard(nodeId); + if (NodeSystemState.count(nodeId) == 0) { + if (NodeIds.insert(nodeId).second) { + Send(TlsActivationContext->ActorSystem()->InterconnectProxy(nodeId), new TEvents::TEvSubscribe()); + } + NodeSystemState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); ++Requests; } } @@ -856,36 +1082,47 @@ class TSelfCheckRequest : public TActorBootstrapped { void RequestStorageNode(TNodeId nodeId) { if (StorageNodeIds.emplace(nodeId).second) { RequestGenericNode(nodeId); - RequestNodeWhiteboard(nodeId); - ++Requests; - RequestNodeWhiteboard(nodeId); - ++Requests; - RequestNodeWhiteboard(nodeId); - ++Requests; + if (NodeVDiskState.count(nodeId) == 0) { + NodeVDiskState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); + ++Requests; + } + if (NodePDiskState.count(nodeId) == 0) { + NodePDiskState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); + ++Requests; + } + if (NodeBSGroupState.count(nodeId) == 0) { + NodeBSGroupState.emplace(nodeId, RequestNodeWhiteboard(nodeId)); + ++Requests; + } } } - void RequestStorageConfig() { - if (!RequestedStorageConfig) { - Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), new TEvNodeWardenQueryStorageConfig(false)); - RequestedStorageConfig = true; - ++Requests; - } + [[nodiscard]] TRequestResponse RequestStorageConfig() { + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::TTablet::Detailed, TypeName())); + Send(MakeBlobStorageNodeWardenID(SelfId().NodeId()), new TEvNodeWardenQueryStorageConfig(false), 0/*flags*/, 0/*cookie*/, response.Span.GetTraceId()); + ++Requests; + return response; } void Handle(TEvPrivate::TEvRetryNodeWhiteboard::TPtr& ev) { - switch (ev->Get()->EventId) { + auto eventId = ev->Get()->EventId; + auto nodeId = ev->Get()->NodeId; + switch (eventId) { case NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodeSystemState.erase(nodeId); + NodeSystemState[nodeId] = RequestNodeWhiteboard(nodeId); break; case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodeVDiskState.erase(nodeId); + NodeVDiskState[nodeId] = RequestNodeWhiteboard(nodeId); break; case NNodeWhiteboard::TEvWhiteboard::EvPDiskStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodePDiskState.erase(nodeId); + NodePDiskState[nodeId] = RequestNodeWhiteboard(nodeId); break; case NNodeWhiteboard::TEvWhiteboard::EvBSGroupStateRequest: - RequestNodeWhiteboard(ev->Get()->NodeId); + NodeBSGroupState.erase(nodeId); + NodeBSGroupState[nodeId] = RequestNodeWhiteboard(nodeId); break; default: RequestDone("unsupported event scheduled"); @@ -904,37 +1141,34 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(TEvents::TEvUndelivered::TPtr& ev) { ui32 nodeId = ev.Get()->Cookie; + TString error = "Undelivered"; if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - if (NodeIds.count(nodeId) != 0 && NodeSystemState.count(nodeId) == 0) { + if (NodeSystemState.count(nodeId) && NodeSystemState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeSystemState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvSystemStateRequest"); UnavailableComputeNodes.insert(nodeId); } } } if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvVDiskStateRequest) { - if (StorageNodeIds.count(nodeId) != 0 && NodeVDiskState.count(nodeId) == 0) { + if (NodeVDiskState.count(nodeId) && NodeVDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeVDiskState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvVDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } } if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvPDiskStateRequest) { - if (StorageNodeIds.count(nodeId) != 0 && NodePDiskState.count(nodeId) == 0) { + if (NodePDiskState.count(nodeId) && NodePDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodePDiskState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvPDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } } if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvBSGroupStateRequest) { - if (StorageNodeIds.count(nodeId) != 0 && NodeBSGroupState.count(nodeId) == 0) { + if (NodeBSGroupState.count(nodeId) && NodeBSGroupState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeBSGroupState.emplace(nodeId, nullptr); RequestDone("undelivered of TEvBSGroupStateRequest"); } } @@ -943,30 +1177,27 @@ class TSelfCheckRequest : public TActorBootstrapped { void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { ui32 nodeId = ev->Get()->NodeId; - if (NodeIds.count(nodeId) != 0 && NodeSystemState.count(nodeId) == 0) { + TString error = "NodeDisconnected"; + if (NodeSystemState.count(nodeId) && NodeSystemState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeSystemState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvSystemStateRequest"); UnavailableComputeNodes.insert(nodeId); } } - if (StorageNodeIds.count(nodeId) != 0 && NodeVDiskState.count(nodeId) == 0) { + if (NodeVDiskState.count(nodeId) && NodeVDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeVDiskState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvVDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } - if (StorageNodeIds.count(nodeId) != 0 && NodePDiskState.count(nodeId) == 0) { + if (NodePDiskState.count(nodeId) && NodePDiskState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodePDiskState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvPDiskStateRequest"); UnavailableStorageNodes.insert(nodeId); } } - if (StorageNodeIds.count(nodeId) != 0 && NodeBSGroupState.count(nodeId) == 0) { + if (NodeBSGroupState.count(nodeId) && NodeBSGroupState[nodeId].Error(error)) { if (!RetryRequestNodeWhiteboard(nodeId)) { - NodeBSGroupState.emplace(nodeId, nullptr); RequestDone("node disconnected with TEvBSGroupStateRequest"); } } @@ -989,11 +1220,15 @@ class TSelfCheckRequest : public TActorBootstrapped { void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) { switch (ev->Get()->Tag) { case TimeoutBSC: + Span.Event("TimeoutBSC"); if (!HaveAllBSControllerInfo()) { - RequestStorageConfig(); + if (!NodeWardenStorageConfig) { + NodeWardenStorageConfig = RequestStorageConfig(); + } } break; case TimeoutFinal: + Span.Event("TimeoutFinal"); ReplyAndPassAway(); break; } @@ -1010,8 +1245,8 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { bool needComputeFromStaticNodes = !IsSpecificDatabaseFilter(); - NodesInfo = ev->Release(); - for (const auto& ni : NodesInfo->Nodes) { + NodesInfo->Set(std::move(ev)); + for (const auto& ni : NodesInfo->Get()->Nodes) { MergedNodeInfo[ni.NodeId] = ∋ if (IsStaticNode(ni.NodeId) && needComputeFromStaticNodes) { DatabaseState[DomainPath].ComputeNodeIds.push_back(ni.NodeId); @@ -1026,44 +1261,46 @@ class TSelfCheckRequest : public TActorBootstrapped { } bool NeedWhiteboardForStaticGroupsWithUnknownStatus() { - return RequestedStorageConfig && !IsSpecificDatabaseFilter(); + return NodeWardenStorageConfig && !IsSpecificDatabaseFilter(); } void Handle(NSysView::TEvSysView::TEvGetStoragePoolsResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestStoragePools); - StoragePools = std::move(ev->Get()->Record); + StoragePools->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetStoragePoolsRequest"); } void Handle(NSysView::TEvSysView::TEvGetGroupsResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestGroups); - Groups = std::move(ev->Get()->Record); + Groups->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetGroupsRequest"); } void Handle(NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestVSlots); - VSlots = std::move(ev->Get()->Record); + VSlots->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetVSlotsRequest"); } void Handle(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr& ev) { TabletRequests.CompleteRequest(TTabletRequestsState::RequestPDisks); - PDisks = std::move(ev->Get()->Record); + PDisks->Set(std::move(ev)); AggregateBSControllerState(); RequestDone("TEvGetPDisksRequest"); } void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { TabletRequests.CompleteRequest(ev->Cookie); - if (ev->Get()->GetRecord().status() == NKikimrScheme::StatusSuccess) { - TString path = ev->Get()->GetRecord().path(); + TString path = ev->Get()->GetRecord().path(); + auto& response = DescribeByPath[path]; + response.Set(std::move(ev)); + if (response.IsOk()) { TDatabaseState& state(DatabaseState[path]); state.Path = path; - for (const auto& storagePool : ev->Get()->GetRecord().pathdescription().domaindescription().storagepools()) { + for (const auto& storagePool : response.Get()->GetRecord().pathdescription().domaindescription().storagepools()) { TString storagePoolName = storagePool.name(); state.StoragePoolNames.emplace(storagePoolName); PathsByPoolName[storagePoolName].emplace(path); // no poolId in TEvDescribeSchemeResult, so it's neccesary to keep poolNames instead @@ -1073,18 +1310,19 @@ class TSelfCheckRequest : public TActorBootstrapped { state.StoragePools.emplace(0); // static group has poolId = 0 StoragePoolState[0].Name = STATIC_STORAGE_POOL_NAME; } - state.StorageUsage = ev->Get()->GetRecord().pathdescription().domaindescription().diskspaceusage().tables().totalsize(); - state.StorageQuota = ev->Get()->GetRecord().pathdescription().domaindescription().databasequotas().data_size_hard_quota(); - - DescribeByPath[path] = ev->Release(); + state.StorageUsage = response.Get()->GetRecord().pathdescription().domaindescription().diskspaceusage().tables().totalsize(); + state.StorageQuota = response.Get()->GetRecord().pathdescription().domaindescription().databasequotas().data_size_hard_quota(); } RequestDone("TEvDescribeSchemeResult"); } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - auto domainInfo = ev->Get()->Request->ResultSet.begin()->DomainInfo; - TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); + TRequestResponse& response = NavigateKeySet[ev->Get()->Request->Cookie]; + response.Set(std::move(ev)); + if (response.IsOk()) { + auto domainInfo = response.Get()->Request->ResultSet.begin()->DomainInfo; + TString path = CanonizePath(response.Get()->Request->ResultSet.begin()->Path); + NavigateResult[path] = response.Get()->Request->Cookie; if (domainInfo->IsServerless()) { if (NeedHealthCheckForServerless(domainInfo)) { if (SharedDatabases.emplace(domainInfo->ResourcesDomainKey, path).second) { @@ -1105,11 +1343,14 @@ class TSelfCheckRequest : public TActorBootstrapped { TabletRequests.TabletStates[hiveId].Database = path; TabletRequests.TabletStates[hiveId].Type = TTabletTypes::Hive; //RequestHiveDomainStats(hiveId); - RequestHiveNodeStats(hiveId); - RequestHiveInfo(hiveId); + if (HiveNodeStats.count(hiveId) == 0) { + HiveNodeStats[hiveId] = RequestHiveNodeStats(hiveId); + } + if (HiveInfo.count(hiveId) == 0) { + HiveInfo[hiveId] = RequestHiveInfo(hiveId); + } } FilterDomainKey[TSubDomainKey(domainInfo->DomainKey.OwnerId, domainInfo->DomainKey.LocalPathId)] = path; - NavigateResult[path] = std::move(ev->Get()->Request); TTabletId schemeShardId = domainInfo->Params.GetSchemeShard(); if (!schemeShardId) { schemeShardId = RootSchemeShardId; @@ -1117,7 +1358,9 @@ class TSelfCheckRequest : public TActorBootstrapped { TabletRequests.TabletStates[schemeShardId].Database = path; TabletRequests.TabletStates[schemeShardId].Type = TTabletTypes::SchemeShard; } - RequestDescribe(schemeShardId, path); + if (DescribeByPath.count(path) == 0) { + DescribeByPath[path] = RequestDescribe(schemeShardId, path); + } } RequestDone("TEvNavigateKeySetResult"); } @@ -1129,53 +1372,62 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie); - for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) { + auto& response = HiveDomainStats[hiveId]; + response.Set(std::move(ev)); + for (const NKikimrHive::THiveDomainStats& hiveStat : response.Get()->Record.GetDomainStats()) { for (TNodeId nodeId : hiveStat.GetNodeIds()) { RequestComputeNode(nodeId); } } - HiveDomainStats[hiveId] = std::move(ev->Release()); RequestDone("TEvResponseHiveDomainStats"); } void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) { TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie); + auto& response = HiveNodeStats[hiveId]; + response.Set(std::move(ev)); TInstant aliveBarrier = TInstant::Now() - TDuration::Minutes(5); - for (const NKikimrHive::THiveNodeStats& hiveStat : ev->Get()->Record.GetNodeStats()) { + for (const NKikimrHive::THiveNodeStats& hiveStat : response.Get()->Record.GetNodeStats()) { if (!hiveStat.HasLastAliveTimestamp() || TInstant::MilliSeconds(hiveStat.GetLastAliveTimestamp()) > aliveBarrier) { RequestComputeNode(hiveStat.GetNodeId()); } } - HiveNodeStats[hiveId] = std::move(ev->Release()); RequestDone("TEvResponseHiveNodeStats"); } void Handle(TEvHive::TEvResponseHiveInfo::TPtr& ev) { TTabletId hiveId = TabletRequests.CompleteRequest(ev->Cookie); - HiveInfo[hiveId] = std::move(ev->Release()); + HiveInfo[hiveId].Set(std::move(ev)); RequestDone("TEvResponseHiveInfo"); } void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { TabletRequests.CompleteRequest(ev->Cookie); auto& operation(ev->Get()->Record.GetResponse().operation()); - if (operation.ready() && operation.status() == Ydb::StatusIds::SUCCESS) { + if (operation.ready()) { Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; operation.result().UnpackTo(&getTenantStatusResult); TString path = getTenantStatusResult.path(); - DatabaseStatusByPath[path] = std::move(getTenantStatusResult); - DatabaseState[path]; - RequestSchemeCacheNavigate(path); + auto& response = TenantStatusByPath[path]; + response.Set(std::move(ev)); + if (response.IsOk()) { + Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; + operation.result().UnpackTo(&getTenantStatusResult); + DatabaseStatusByPath[path] = getTenantStatusResult; + DatabaseState[path]; + RequestSchemeCacheNavigate(path); + } } RequestDone("TEvGetTenantStatusResponse"); } void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { TabletRequests.CompleteRequest(ev->Cookie); + ListTenants->Set(std::move(ev)); Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); + ListTenants->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); for (const TString& path : listTenantsResult.paths()) { - RequestTenantStatus(path); + TenantStatusByPath[path] = RequestTenantStatus(path); DatabaseState[path]; } RequestDone("TEvListTenantsResponse"); @@ -1183,20 +1435,18 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { TNodeId nodeId = ev.Get()->Cookie; - if (NodeSystemState.count(nodeId) == 0) { - auto& nodeSystemState(NodeSystemState[nodeId]); - nodeSystemState = ev->Release(); - for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record.MutableSystemStateInfo()) { - state.set_nodeid(nodeId); - MergedNodeSystemState[nodeId] = &state; - } - RequestDone("TEvSystemStateResponse"); + auto& nodeSystemState(NodeSystemState[nodeId]); + nodeSystemState.Set(std::move(ev)); + for (NKikimrWhiteboard::TSystemStateInfo& state : *nodeSystemState->Record.MutableSystemStateInfo()) { + state.set_nodeid(nodeId); + MergedNodeSystemState[nodeId] = &state; } + RequestDone("TEvSystemStateResponse"); } static const int HIVE_SYNCHRONIZATION_PERIOD_MS = 10000; - bool IsHiveSynchronizationPeriod(NKikimrHive::TEvResponseHiveInfo& hiveInfo) { + bool IsHiveSynchronizationPeriod(const NKikimrHive::TEvResponseHiveInfo& hiveInfo) { return hiveInfo.GetResponseTimestamp() < hiveInfo.GetStartTimeTimestamp() + HIVE_SYNCHRONIZATION_PERIOD_MS; } @@ -1204,7 +1454,7 @@ class TSelfCheckRequest : public TActorBootstrapped { TNodeTabletState::TTabletStateSettings settings; settings.AliveBarrier = TInstant::Now() - TDuration::Minutes(5); for (const auto& [hiveId, hiveResponse] : HiveInfo) { - if (hiveResponse) { + if (hiveResponse.IsOk()) { settings.IsHiveSynchronizationPeriod = IsHiveSynchronizationPeriod(hiveResponse->Record); for (const NKikimrHive::TTabletInfo& hiveTablet : hiveResponse->Record.GetTablets()) { TSubDomainKey tenantId = TSubDomainKey(hiveTablet.GetObjectDomain()); @@ -1283,22 +1533,27 @@ class TSelfCheckRequest : public TActorBootstrapped { if (!HaveAllBSControllerInfo()) { return; } - for (const auto& group : Groups->GetEntries()) { + for (const auto& group : Groups->Get()->Record.GetEntries()) { auto groupId = group.GetKey().GetGroupId(); auto poolId = group.GetInfo().GetStoragePoolId(); - GroupState[groupId].ErasureSpecies = group.GetInfo().GetErasureSpeciesV2(); + auto& groupState = GroupState[groupId]; + groupState.ErasureSpecies = group.GetInfo().GetErasureSpeciesV2(); + groupState.Generation = group.GetInfo().GetGeneration(); StoragePoolState[poolId].Groups.emplace(groupId); } - for (const auto& vSlot : VSlots->GetEntries()) { + for (const auto& vSlot : VSlots->Get()->Record.GetEntries()) { auto vSlotId = GetVSlotId(vSlot.GetKey()); - GroupState[vSlot.GetInfo().GetGroupId()].VSlots.push_back(&vSlot); + auto groupStateIt = GroupState.find(vSlot.GetInfo().GetGroupId()); + if (groupStateIt != GroupState.end() && vSlot.GetInfo().GetGroupGeneration() == groupStateIt->second.Generation) { + groupStateIt->second.VSlots.push_back(&vSlot); + } } - for (const auto& pool : StoragePools->GetEntries()) { // there is no specific pool for static group here + for (const auto& pool : StoragePools->Get()->Record.GetEntries()) { // there is no specific pool for static group here ui64 poolId = pool.GetKey().GetStoragePoolId(); TString storagePoolName = pool.GetInfo().GetName(); StoragePoolState[poolId].Name = storagePoolName; } - for (const auto& pDisk : PDisks->GetEntries()) { + for (const auto& pDisk : PDisks->Get()->Record.GetEntries()) { auto pDiskId = GetPDiskId(pDisk.GetKey()); PDisksMap.emplace(pDiskId, &pDisk); } @@ -1308,9 +1563,12 @@ class TSelfCheckRequest : public TActorBootstrapped { // it should not be trusted Ydb::Monitoring::StorageGroupStatus staticGroupStatus; FillGroupStatus(0, staticGroupStatus, {nullptr}); + BLOG_D("Static group status is " << staticGroupStatus.overall()); if (staticGroupStatus.overall() != Ydb::Monitoring::StatusFlag::GREEN) { UnknownStaticGroups.emplace(0); - RequestStorageConfig(); + if (!NodeWardenStorageConfig) { + NodeWardenStorageConfig = RequestStorageConfig(); + } } } @@ -1712,12 +1970,9 @@ class TSelfCheckRequest : public TActorBootstrapped { ETags::PDiskState); } switch (status->number()) { - case NKikimrBlobStorage::ACTIVE: { - context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); - break; - } + case NKikimrBlobStorage::ACTIVE: case NKikimrBlobStorage::INACTIVE: { - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "PDisk is inactive", ETags::PDiskState); + context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); break; } case NKikimrBlobStorage::FAULTY: @@ -1741,6 +1996,8 @@ class TSelfCheckRequest : public TActorBootstrapped { } } + // do not propagate RED status to vdisk - so that vdisk is not considered down when computing group status + context.OverallStatus = MinStatus(context.OverallStatus, Ydb::Monitoring::StatusFlag::ORANGE); storagePDiskStatus.set_overall(context.GetOverallStatus()); } @@ -1797,6 +2054,13 @@ class TSelfCheckRequest : public TActorBootstrapped { storageVDiskStatus.set_id(GetVSlotId(vSlot->GetKey())); + if (!vSlot->GetInfo().HasStatusV2()) { + // this should mean that BSC recently restarted and does not have accurate data yet - we should not report to avoid false positives + context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); + storageVDiskStatus.set_overall(context.GetOverallStatus()); + return; + } + const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); auto status = descriptor->FindValueByName(vSlot->GetInfo().GetStatusV2()); if (!status) { // this case is not expected because becouse bsc assignes status according EVDiskStatus enum @@ -1816,16 +2080,12 @@ class TSelfCheckRequest : public TActorBootstrapped { storageVDiskStatus.set_overall(context.GetOverallStatus()); return; } - case NKikimrBlobStorage::INIT_PENDING: { // initialization in process - context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, TStringBuilder() << "VDisk is being initialized", ETags::VDiskState); - storageVDiskStatus.set_overall(context.GetOverallStatus()); - return; - } case NKikimrBlobStorage::REPLICATING: { // the disk accepts queries, but not all the data was replicated context.ReportStatus(Ydb::Monitoring::StatusFlag::BLUE, TStringBuilder() << "Replication in progress", ETags::VDiskState); storageVDiskStatus.set_overall(context.GetOverallStatus()); return; } + case NKikimrBlobStorage::INIT_PENDING: case NKikimrBlobStorage::READY: { // the disk is fully operational and does not affect group fault tolerance context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); } @@ -1839,53 +2099,47 @@ class TSelfCheckRequest : public TActorBootstrapped { void Handle(NNodeWhiteboard::TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { TNodeId nodeId = ev.Get()->Cookie; - if (NodeVDiskState.count(nodeId) == 0) { - auto& nodeVDiskState(NodeVDiskState[nodeId]); - nodeVDiskState = ev->Release(); - for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record.MutableVDiskStateInfo()) { - state.set_nodeid(nodeId); - auto id = GetVDiskId(state.vdiskid()); - MergedVDiskState[id] = &state; - } - RequestDone("TEvVDiskStateResponse"); + auto& nodeVDiskState(NodeVDiskState[nodeId]); + nodeVDiskState.Set(std::move(ev)); + for (NKikimrWhiteboard::TVDiskStateInfo& state : *nodeVDiskState->Record.MutableVDiskStateInfo()) { + state.set_nodeid(nodeId); + auto id = GetVDiskId(state.vdiskid()); + MergedVDiskState[id] = &state; } + RequestDone("TEvVDiskStateResponse"); } void Handle(NNodeWhiteboard::TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { TNodeId nodeId = ev.Get()->Cookie; - if (NodePDiskState.count(nodeId) == 0) { - auto& nodePDiskState(NodePDiskState[nodeId]); - nodePDiskState = ev->Release(); - for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record.MutablePDiskStateInfo()) { - state.set_nodeid(nodeId); - auto id = GetPDiskId(state); - MergedPDiskState[id] = &state; - } - RequestDone("TEvPDiskStateResponse"); + auto& nodePDiskState(NodePDiskState[nodeId]); + nodePDiskState.Set(std::move(ev)); + for (NKikimrWhiteboard::TPDiskStateInfo& state : *nodePDiskState->Record.MutablePDiskStateInfo()) { + state.set_nodeid(nodeId); + auto id = GetPDiskId(state); + MergedPDiskState[id] = &state; } + RequestDone("TEvPDiskStateResponse"); } void Handle(NNodeWhiteboard::TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) { ui64 nodeId = ev.Get()->Cookie; - if (NodeBSGroupState.count(nodeId) == 0) { - auto& nodeBSGroupState(NodeBSGroupState[nodeId]); - nodeBSGroupState = ev->Release(); - for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record.MutableBSGroupStateInfo()) { - state.set_nodeid(nodeId); - TString storagePoolName = state.storagepoolname(); - TGroupID groupId(state.groupid()); - const NKikimrWhiteboard::TBSGroupStateInfo*& current(MergedBSGroupState[state.groupid()]); - if (current == nullptr || current->GetGroupGeneration() < state.GetGroupGeneration()) { - current = &state; - } - if (storagePoolName.empty() && groupId.ConfigurationType() != EGroupConfigurationType::Static) { - continue; - } - StoragePoolStateByName[storagePoolName].Groups.emplace(state.groupid()); - StoragePoolStateByName[storagePoolName].Name = storagePoolName; + auto& nodeBSGroupState(NodeBSGroupState[nodeId]); + nodeBSGroupState.Set(std::move(ev)); + for (NKikimrWhiteboard::TBSGroupStateInfo& state : *nodeBSGroupState->Record.MutableBSGroupStateInfo()) { + state.set_nodeid(nodeId); + TString storagePoolName = state.storagepoolname(); + TGroupID groupId(state.groupid()); + const NKikimrWhiteboard::TBSGroupStateInfo*& current(MergedBSGroupState[state.groupid()]); + if (current == nullptr || current->GetGroupGeneration() < state.GetGroupGeneration()) { + current = &state; + } + if (storagePoolName.empty() && groupId.ConfigurationType() != EGroupConfigurationType::Static) { + continue; } - RequestDone("TEvBSGroupStateResponse"); + StoragePoolStateByName[storagePoolName].Groups.emplace(state.groupid()); + StoragePoolStateByName[storagePoolName].Name = storagePoolName; } + RequestDone("TEvBSGroupStateResponse"); } void FillPDiskStatusWithWhiteboard(const TString& pDiskId, const NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo, Ydb::Monitoring::StoragePDiskStatus& storagePDiskStatus, TSelfCheckContext context) { @@ -1999,9 +2253,9 @@ class TSelfCheckRequest : public TActorBootstrapped { switch (vDiskInfo.GetVDiskState()) { case NKikimrWhiteboard::EVDiskState::OK: + case NKikimrWhiteboard::EVDiskState::Initial: context.ReportStatus(Ydb::Monitoring::StatusFlag::GREEN); break; - case NKikimrWhiteboard::EVDiskState::Initial: case NKikimrWhiteboard::EVDiskState::SyncGuidRecovery: context.IssueRecords.clear(); context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, @@ -2081,7 +2335,7 @@ class TSelfCheckRequest : public TActorBootstrapped { ++DisksColors[status]; switch (status) { case Ydb::Monitoring::StatusFlag::BLUE: // disk is good, but not available - case Ydb::Monitoring::StatusFlag::YELLOW: // disk is initializing, not currently available + // No yellow or orange status here - this is intentional - they are used when a disk is running out of space, but is currently available case Ydb::Monitoring::StatusFlag::RED: // disk is bad, probably not available case Ydb::Monitoring::StatusFlag::GREY: // the status is absent, the disk is not available IncrementFor(realm); @@ -2097,7 +2351,7 @@ class TSelfCheckRequest : public TActorBootstrapped { if (ErasureSpecies == NONE) { if (FailedDisks > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::RED, "Group failed", ETags::GroupState, {ETags::VDiskState}); - } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) { + } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } else if (ErasureSpecies == BLOCK_4_2) { @@ -2111,7 +2365,7 @@ class TSelfCheckRequest : public TActorBootstrapped { } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } - } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) { + } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } else if (ErasureSpecies == MIRROR_3_DC) { @@ -2125,7 +2379,7 @@ class TSelfCheckRequest : public TActorBootstrapped { } else { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } - } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0) { + } else if (DisksColors[Ydb::Monitoring::StatusFlag::YELLOW] > 0 || DisksColors[Ydb::Monitoring::StatusFlag::ORANGE] > 0) { context.ReportStatus(Ydb::Monitoring::StatusFlag::YELLOW, "Group degraded", ETags::GroupState, {ETags::VDiskState}); } } @@ -2169,7 +2423,7 @@ class TSelfCheckRequest : public TActorBootstrapped { context.OverallStatus = MinStatus(context.OverallStatus, Ydb::Monitoring::StatusFlag::YELLOW); checker.ReportStatus(context); - + BLOG_D("Group " << groupId << " has status " << context.GetOverallStatus()); storageGroupStatus.set_overall(context.GetOverallStatus()); } @@ -2727,6 +2981,7 @@ class TSelfCheckRequest : public TActorBootstrapped { } void ReplyAndPassAway() { + Span.Event("ReplyAndPassAway"); THolder response = MakeHolder(); Ydb::Monitoring::SelfCheckResult& result = response->Result; @@ -2987,7 +3242,7 @@ class THealthCheckService : public TActorBootstrapped { } void Handle(TEvSelfCheckRequest::TPtr& ev) { - Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie)); + Register(new TSelfCheckRequest(ev->Sender, ev.Get()->Release(), ev->Cookie, std::move(ev->TraceId))); } std::shared_ptr GRpcClientLow; diff --git a/ydb/core/health_check/health_check_ut.cpp b/ydb/core/health_check/health_check_ut.cpp index fa869c265be3..eefa30345e57 100644 --- a/ydb/core/health_check/health_check_ut.cpp +++ b/ydb/core/health_check/health_check_ut.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -58,12 +59,35 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { const int GROUP_START_ID = 0x80000000; const int VCARD_START_ID = 55; + const int PDISK_START_ID = 42; + const int DEFAULT_GROUP_GENERATION = 3; const TPathId SUBDOMAIN_KEY = {7000000000, 1}; const TPathId SERVERLESS_DOMAIN_KEY = {7000000000, 2}; const TPathId SHARED_DOMAIN_KEY = {7000000000, 3}; const TString STORAGE_POOL_NAME = "/Root:test"; + struct TTestVSlotInfo { + std::optional Status; + ui32 Generation = DEFAULT_GROUP_GENERATION; + NKikimrBlobStorage::EDriveStatus PDiskStatus = NKikimrBlobStorage::ACTIVE; + + TTestVSlotInfo(std::optional status = NKikimrBlobStorage::READY, + ui32 generation = DEFAULT_GROUP_GENERATION) + : Status(status) + , Generation(generation) + { + } + + TTestVSlotInfo(NKikimrBlobStorage::EVDiskStatus status, NKikimrBlobStorage::EDriveStatus pDiskStatus = NKikimrBlobStorage::ACTIVE) + : Status(status) + , PDiskStatus(pDiskStatus) + { + } + }; + + using TVDisks = TVector; + void ChangeDescribeSchemeResult(TEvSchemeShard::TEvDescribeSchemeResult::TPtr* ev, ui64 size = 20000000, ui64 quota = 90000000) { auto record = (*ev)->Get()->MutableRecord(); auto pool = record->mutable_pathdescription()->mutable_domaindescription()->add_storagepools(); @@ -150,6 +174,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { entry->mutable_key()->set_groupid(groupId); entry->mutable_info()->set_erasurespeciesv2(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); entry->mutable_info()->set_storagepoolid(poolId); + entry->mutable_info()->set_generation(DEFAULT_GROUP_GENERATION); }; if (addStatic) { @@ -163,24 +188,33 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } void AddVSlotsToSysViewResponse(NSysView::TEvSysView::TEvGetVSlotsResponse::TPtr* ev, size_t groupCount, - const TVector& vdiskStatuses) { + const TVDisks& vslots, ui32 groupStartId = GROUP_START_ID, + bool withPdisk = false) { auto& record = (*ev)->Get()->Record; auto entrySample = record.entries(0); record.clear_entries(); - auto groupId = GROUP_START_ID; + auto groupId = groupStartId; const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); for (size_t i = 0; i < groupCount; ++i) { auto vslotId = VCARD_START_ID; - for (auto status: vdiskStatuses) { + auto pdiskId = PDISK_START_ID; + for (const auto& vslot : vslots) { auto* entry = record.add_entries(); entry->CopyFrom(entrySample); entry->mutable_key()->set_vslotid(vslotId); + if (withPdisk) { + entry->mutable_key()->set_pdiskid(pdiskId); + } entry->mutable_info()->set_groupid(groupId); entry->mutable_info()->set_failrealm(vslotId); - entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(status)->name()); + if (vslot.Status) { + entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(*vslot.Status)->name()); + } + entry->mutable_info()->set_groupgeneration(vslot.Generation); entry->mutable_info()->set_vdisk(vslotId); ++vslotId; + ++pdiskId; } ++groupId; } @@ -194,8 +228,26 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { entry->mutable_info()->set_name(STORAGE_POOL_NAME); } + void AddPDisksToSysViewResponse(NSysView::TEvSysView::TEvGetPDisksResponse::TPtr* ev, const TVDisks& vslots, double occupancy) { + auto& record = (*ev)->Get()->Record; + auto entrySample = record.entries(0); + record.clear_entries(); + auto pdiskId = PDISK_START_ID; + const size_t totalSize = 3'200'000'000'000ull; + const auto *descriptor = NKikimrBlobStorage::EDriveStatus_descriptor(); + for (const auto& vslot : vslots) { + auto* entry = record.add_entries(); + entry->CopyFrom(entrySample); + entry->mutable_key()->set_pdiskid(pdiskId); + entry->mutable_info()->set_totalsize(totalSize); + entry->mutable_info()->set_availablesize((1 - occupancy) * totalSize); + entry->mutable_info()->set_statusv2(descriptor->FindValueByNumber(vslot.PDiskStatus)->name()); + ++pdiskId; + } + } + void AddGroupVSlotInControllerConfigResponseWithStaticGroup(TEvBlobStorage::TEvControllerConfigResponse::TPtr* ev, - const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVector& vdiskStatuses) + const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vslots) { auto& pbRecord = (*ev)->Get()->Record; auto pbConfig = pbRecord.mutable_response()->mutable_status(0)->mutable_baseconfig(); @@ -216,6 +268,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { staticGroup->set_storagepoolid(0); staticGroup->set_operatingstatus(groupStatus); staticGroup->set_erasurespecies(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); + staticGroup->set_groupgeneration(DEFAULT_GROUP_GENERATION); auto group = pbConfig->add_group(); group->CopyFrom(groupSample); @@ -223,11 +276,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { group->set_storagepoolid(1); group->set_operatingstatus(groupStatus); group->set_erasurespecies(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); + group->set_groupgeneration(DEFAULT_GROUP_GENERATION); group->clear_vslotid(); auto vslotId = VCARD_START_ID; - for (auto status: vdiskStatuses) { + for (const auto& vslotInfo : vslots) { auto vslot = pbConfig->add_vslot(); vslot->CopyFrom(vslotSample); vslot->set_vdiskidx(vslotId); @@ -239,8 +293,11 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { slotId->CopyFrom(vslotIdSample); slotId->set_vslotid(vslotId); - const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); - vslot->set_status(descriptor->FindValueByNumber(status)->name()); + if (vslotInfo.Status) { + const auto *descriptor = NKikimrBlobStorage::EVDiskStatus_descriptor(); + vslot->set_status(descriptor->FindValueByNumber(*vslotInfo.Status)->name()); + } + vslot->set_groupgeneration(vslotInfo.Generation); vslotId++; } @@ -252,13 +309,13 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { sPool->set_name(STORAGE_POOL_NAME); }; - void AddVSlotInVDiskStateResponse(TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, int groupCount, int vslotCount) { + void AddVSlotInVDiskStateResponse(TEvWhiteboard::TEvVDiskStateResponse::TPtr* ev, int groupCount, int vslotCount, ui32 groupStartId = GROUP_START_ID) { auto& pbRecord = (*ev)->Get()->Record; auto sample = pbRecord.vdiskstateinfo(0); pbRecord.clear_vdiskstateinfo(); - auto groupId = GROUP_START_ID; + auto groupId = groupStartId; for (int i = 0; i < groupCount; i++) { auto slotId = VCARD_START_ID; for (int j = 0; j < vslotCount; j++) { @@ -273,6 +330,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } } + void ChangeGroupStateResponse(NNodeWhiteboard::TEvWhiteboard::TEvBSGroupStateResponse::TPtr* ev) { + for (auto& groupInfo : *(*ev)->Get()->Record.mutable_bsgroupstateinfo()) { + groupInfo.set_erasurespecies(NHealthCheck::TSelfCheckRequest::BLOCK_4_2); + } + } + void SetLongHostValue(TEvInterconnect::TEvNodesInfo::TPtr* ev) { TString host(1000000, 'a'); auto& pbRecord = (*ev)->Get()->Nodes; @@ -323,7 +386,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - AddVSlotsToSysViewResponse(x, groupNumber, {vdiscPerGroupNumber, NKikimrBlobStorage::EVDiskStatus::ERROR}); + AddVSlotsToSysViewResponse(x, groupNumber, TVDisks{vdiscPerGroupNumber, NKikimrBlobStorage::EVDiskStatus::ERROR}); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -383,7 +446,7 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { CheckHcResult(result, groupNumber, vdiscPerGroupNumber, isMergeRecords); } - Ydb::Monitoring::SelfCheckResult RequestHcWithVdisks(const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVector& vdiskStatuses) { + Ydb::Monitoring::SelfCheckResult RequestHcWithVdisks(const NKikimrBlobStorage::TGroupStatus::E groupStatus, const TVDisks& vdisks, bool forStaticGroup = false, double occupancy = 0) { TPortManager tp; ui16 port = tp.GetPort(2134); ui16 grpcPort = tp.GetPort(2135); @@ -413,12 +476,21 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, groupStatus, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, groupStatus, vdisks); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + if (forStaticGroup) { + AddVSlotsToSysViewResponse(x, 1, vdisks, 0, true); + } else { + AddVSlotsToSysViewResponse(x, 1, vdisks, GROUP_START_ID, true); + } + break; + } + case NSysView::TEvSysView::EvGetPDisksResponse: { + auto* x = reinterpret_cast(&ev); + AddPDisksToSysViewResponse(x, vdisks, occupancy); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -431,6 +503,19 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { AddStoragePoolsToSysViewResponse(x); break; } + case NNodeWhiteboard::TEvWhiteboard::EvVDiskStateResponse: { + auto *x = reinterpret_cast(&ev); + if (forStaticGroup) { + AddVSlotInVDiskStateResponse(x, 1, vdisks.size(), 0); + } else { + AddVSlotInVDiskStateResponse(x, 1, vdisks.size()); + } + break; + } + case NNodeWhiteboard::TEvWhiteboard::EvBSGroupStateResponse: { + auto* x = reinterpret_cast(&ev); + ChangeGroupStateResponse(x); + } } return TTestActorRuntime::EEventAction::PROCESS; @@ -444,10 +529,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { return runtime.GrabEdgeEvent(handle)->Result; } - void CheckHcResultHasIssuesWithStatus(Ydb::Monitoring::SelfCheckResult& result, const TString& type, const Ydb::Monitoring::StatusFlag::Status expectingStatus, ui32 total) { + void CheckHcResultHasIssuesWithStatus(Ydb::Monitoring::SelfCheckResult& result, const TString& type, + const Ydb::Monitoring::StatusFlag::Status expectingStatus, ui32 total, + std::string_view pool = "/Root:test") { int issuesCount = 0; for (const auto& issue_log : result.Getissue_log()) { - if (issue_log.type() == type && issue_log.location().storage().pool().name() == "/Root:test" && issue_log.status() == expectingStatus) { + if (issue_log.type() == type && issue_log.location().storage().pool().name() == pool && issue_log.status() == expectingStatus) { issuesCount++; } } @@ -570,25 +657,75 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } Y_UNIT_TEST(YellowGroupIssueWhenPartialGroupStatus) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, {NKikimrBlobStorage::ERROR}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{NKikimrBlobStorage::ERROR}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); } Y_UNIT_TEST(BlueGroupIssueWhenPartialGroupStatusAndReplicationDisks) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, {NKikimrBlobStorage::REPLICATING}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{NKikimrBlobStorage::REPLICATING}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::BLUE, 1); } Y_UNIT_TEST(OrangeGroupIssueWhenDegradedGroupStatus) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DEGRADED, {2, NKikimrBlobStorage::ERROR}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DEGRADED, TVDisks{2, NKikimrBlobStorage::ERROR}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 1); } Y_UNIT_TEST(RedGroupIssueWhenDisintegratedGroupStatus) { - auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DISINTEGRATED, {3, NKikimrBlobStorage::ERROR}); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::DISINTEGRATED, TVDisks{3, NKikimrBlobStorage::ERROR}); CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1); } + Y_UNIT_TEST(StaticGroupIssue) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{NKikimrBlobStorage::ERROR}, /*forStatic*/ true); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1, "static"); + } + + Y_UNIT_TEST(GreenStatusWhenCreatingGroup) { + std::optional emptyStatus; + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{8, emptyStatus}); + Cerr << result.ShortDebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD); + } + + Y_UNIT_TEST(GreenStatusWhenInitPending) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{8, NKikimrBlobStorage::INIT_PENDING}); + Cerr << result.ShortDebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD); + } + + Y_UNIT_TEST(IgnoreOtherGenerations) { + TVDisks vdisks; + vdisks.emplace_back(NKikimrBlobStorage::ERROR, DEFAULT_GROUP_GENERATION - 1); + vdisks.emplace_back(NKikimrBlobStorage::READY, DEFAULT_GROUP_GENERATION); + vdisks.emplace_back(NKikimrBlobStorage::ERROR, DEFAULT_GROUP_GENERATION + 1); + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, vdisks); + Cerr << result.ShortDebugString() << Endl; + UNIT_ASSERT_VALUES_EQUAL(result.self_check_result(), Ydb::Monitoring::SelfCheck::GOOD); + } + + Y_UNIT_TEST(YellowGroupIssueOnYellowSpace) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.9); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0); + } + + Y_UNIT_TEST(RedGroupIssueOnRedSpace) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, NKikimrBlobStorage::READY}, false, 0.95); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 1); + } + + Y_UNIT_TEST(YellowIssueReadyVDisksOnFaultyPDisks) { + auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::PARTIAL, TVDisks{3, {NKikimrBlobStorage::READY, NKikimrBlobStorage::FAULTY}}); + Cerr << result.ShortDebugString() << Endl; + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::YELLOW, 1); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::ORANGE, 0); + CheckHcResultHasIssuesWithStatus(result, "STORAGE_GROUP", Ydb::Monitoring::StatusFlag::RED, 0); + } + /* HC currently infers group status on its own, so it's never unknown Y_UNIT_TEST(RedGroupIssueWhenUnknownGroupStatus) { auto result = RequestHcWithVdisks(NKikimrBlobStorage::TGroupStatus::UNKNOWN, {}); @@ -768,14 +905,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -870,14 +1005,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -983,14 +1116,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1085,14 +1216,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1180,14 +1309,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1293,14 +1420,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1439,14 +1564,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1592,14 +1715,12 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { } case TEvBlobStorage::EvControllerConfigResponse: { auto *x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, vdiskStatuses); + AddGroupVSlotInControllerConfigResponseWithStaticGroup(x, NKikimrBlobStorage::TGroupStatus::FULL, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetVSlotsResponse: { auto* x = reinterpret_cast(&ev); - TVector vdiskStatuses = { NKikimrBlobStorage::EVDiskStatus::READY }; - AddVSlotsToSysViewResponse(x, 1, vdiskStatuses); + AddVSlotsToSysViewResponse(x, 1, TVDisks(1)); break; } case NSysView::TEvSysView::EvGetGroupsResponse: { @@ -1713,123 +1834,128 @@ Y_UNIT_TEST_SUITE(THealthCheckTest) { UNIT_ASSERT_VALUES_EQUAL(database_status.storage().pools()[0].id(), "static"); } - void HiveSyncTest(bool syncPeriod) { + Y_UNIT_TEST(ShardsLimit999) { + ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED); + } + + Y_UNIT_TEST(ShardsLimit995) { + ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE); + } + + Y_UNIT_TEST(ShardsLimit905) { + ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW); + } + + Y_UNIT_TEST(ShardsLimit800) { + ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN); + } + + Y_UNIT_TEST(ShardsNoLimit) { + ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN); + } + + bool HasDeadTabletIssue(const Ydb::Monitoring::SelfCheckResult& result) { + for (const auto& issue_log : result.issue_log()) { + if (issue_log.level() == 4 && issue_log.type() == "TABLET") { + return true; + } + } + return false; + } + + Y_UNIT_TEST(TestTabletIsDead) { TPortManager tp; ui16 port = tp.GetPort(2134); ui16 grpcPort = tp.GetPort(2135); auto settings = TServerSettings(port) - .SetNodeCount(1) + .SetNodeCount(2) .SetDynamicNodeCount(1) .SetUseRealThreads(false) .SetDomainName("Root"); TServer server(settings); server.EnableGRpc(grpcPort); + TClient client(settings); - TTestActorRuntime& runtime = *server.GetRuntime(); - ui32 dynNodeId = runtime.GetNodeId(1); + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); - auto observerFunc = [&](TAutoPtr& ev) { - switch (ev->GetTypeRewrite()) { - case TEvHive::EvResponseHiveInfo: { - auto *x = reinterpret_cast(&ev); - auto& record = (*x)->Get()->Record; - record.SetStartTimeTimestamp(0); - if (syncPeriod) { - record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS / 2); - } else { - record.SetResponseTimestamp(NHealthCheck::TSelfCheckRequest::HIVE_SYNCHRONIZATION_PERIOD_MS * 2); - } - auto *tablet = record.MutableTablets()->Add(); - tablet->SetTabletID(1); - tablet->SetNodeID(dynNodeId); - tablet->SetTabletType(NKikimrTabletBase::TTabletTypes::DataShard); - tablet->SetVolatileState(NKikimrHive::TABLET_VOLATILE_STATE_BOOTING); - tablet->MutableObjectDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); - tablet->MutableObjectDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); - break; - } - case TEvHive::EvResponseHiveNodeStats: { - auto *x = reinterpret_cast(&ev); - auto &record = (*x)->Get()->Record; - auto *nodeStats = record.MutableNodeStats()->Add(); - nodeStats->SetNodeId(dynNodeId); - nodeStats->MutableNodeDomain()->SetSchemeShard(SUBDOMAIN_KEY.OwnerId); - nodeStats->MutableNodeDomain()->SetPathId(SUBDOMAIN_KEY.LocalPathId); - break; - } - case NConsole::TEvConsole::EvGetTenantStatusResponse: { - auto *x = reinterpret_cast(&ev); - ChangeGetTenantStatusResponse(x, "/Root/database"); - break; - } - case TEvTxProxySchemeCache::EvNavigateKeySetResult: { - auto *x = reinterpret_cast(&ev); - TSchemeCacheNavigate::TEntry& entry((*x)->Get()->Request->ResultSet.front()); - entry.Status = TSchemeCacheNavigate::EStatus::Ok; - entry.Kind = TSchemeCacheNavigate::EKind::KindExtSubdomain; - entry.Path = {"Root", "database"}; - entry.DomainInfo = MakeIntrusive(SUBDOMAIN_KEY, SUBDOMAIN_KEY); + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1); + server.DestroyDynamicLocalService(2); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - break; - } - } + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); - return TTestActorRuntime::EEventAction::PROCESS; - }; - runtime.SetObserverFunc(observerFunc); + UNIT_ASSERT(HasDeadTabletIssue(result)); + } - TActorId sender = runtime.AllocateEdgeActor(); - TAutoPtr handle; + Y_UNIT_TEST(TestBootingTabletIsNotDead) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetDynamicNodeCount(1) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); - auto *request = new NHealthCheck::TEvSelfCheckRequest; - request->Request.set_return_verbose_status(true); - request->Database = "/Root/database"; - runtime.Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, request, 0)); - const auto result = runtime.GrabEdgeEvent(handle)->Result; + TClient client(settings); - Cerr << result.ShortDebugString() << Endl; + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); - UNIT_ASSERT_VALUES_EQUAL(result.database_status_size(), 1); + auto blockBoot = runtime->AddObserver([](auto&& ev) { ev.Reset(); }); - bool deadTabletIssueFoundInResult = false; - for (const auto &issue_log : result.issue_log()) { - if (issue_log.level() == 4 && issue_log.type() == "TABLET") { - UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().id().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(issue_log.location().compute().tablet().type(), "DataShard"); - deadTabletIssueFoundInResult = true; - } - } + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1, false); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - UNIT_ASSERT_VALUES_EQUAL(syncPeriod, !deadTabletIssueFoundInResult); - } + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); - Y_UNIT_TEST(HiveSyncPeriodIgnoresTabletsState) { - HiveSyncTest(true); + UNIT_ASSERT(!HasDeadTabletIssue(result)); } - Y_UNIT_TEST(AfterHiveSyncPeriodReportsTabletsState) { - HiveSyncTest(false); - } + Y_UNIT_TEST(TestReBootingTabletIsDead) { + TPortManager tp; + ui16 port = tp.GetPort(2134); + ui16 grpcPort = tp.GetPort(2135); + auto settings = TServerSettings(port) + .SetNodeCount(2) + .SetDynamicNodeCount(2) + .SetUseRealThreads(false) + .SetDomainName("Root"); + TServer server(settings); + server.EnableGRpc(grpcPort); - Y_UNIT_TEST(ShardsLimit999) { - ShardsQuotaTest(999, 1000, 1, Ydb::Monitoring::StatusFlag::RED); - } + TClient client(settings); - Y_UNIT_TEST(ShardsLimit995) { - ShardsQuotaTest(995, 1000, 1, Ydb::Monitoring::StatusFlag::ORANGE); - } + TTestActorRuntime* runtime = server.GetRuntime(); + runtime->SetLogPriority(NKikimrServices::HIVE, NActors::NLog::PRI_TRACE); + TActorId sender = runtime->AllocateEdgeActor(); - Y_UNIT_TEST(ShardsLimit905) { - ShardsQuotaTest(905, 1000, 1, Ydb::Monitoring::StatusFlag::YELLOW); - } - Y_UNIT_TEST(ShardsLimit800) { - ShardsQuotaTest(805, 1000, 0, Ydb::Monitoring::StatusFlag::GREEN); - } + server.SetupDynamicLocalService(2, "Root"); + server.StartPQTablets(1, true); + server.SetupDynamicLocalService(3, "Root"); + auto blockBoot = runtime->AddObserver([](auto&& ev) { ev.Reset(); }); + server.DestroyDynamicLocalService(2); + runtime->AdvanceCurrentTime(TDuration::Minutes(5)); - Y_UNIT_TEST(ShardsNoLimit) { - ShardsQuotaTest(105, 0, 0, Ydb::Monitoring::StatusFlag::GREEN); + TAutoPtr handle; + runtime->Send(new IEventHandle(NHealthCheck::MakeHealthCheckID(), sender, new NHealthCheck::TEvSelfCheckRequest(), 0)); + auto result = runtime->GrabEdgeEvent(handle)->Result; + Cerr << result.ShortDebugString(); + + UNIT_ASSERT(HasDeadTabletIssue(result)); } } } diff --git a/ydb/core/http_proxy/events.h b/ydb/core/http_proxy/events.h index 3c6ee15db123..501b35881bed 100644 --- a/ydb/core/http_proxy/events.h +++ b/ydb/core/http_proxy/events.h @@ -148,6 +148,40 @@ namespace NKikimr::NHttpProxy { }; }; + enum TEv { + EvYmqCloudAuthResponse + }; + + struct TEvYmqCloudAuthResponse: public TEventLocal< + TEvYmqCloudAuthResponse, + EvYmqCloudAuthResponse> { + struct TError { + TString ErrorCode; + ui32 HttpStatusCode; + TString Message; + }; + + bool IsSuccess; + + TString CloudId; + TString FolderId; + TString Sid; + + TMaybe Error; + + TEvYmqCloudAuthResponse(const TString& cloudId, const TString& folderId, const TString& sid) + : IsSuccess(true) + , CloudId(cloudId) + , FolderId(folderId) + , Sid(sid) + , Error(Nothing()) + {} + + TEvYmqCloudAuthResponse(TError& error) + : IsSuccess(false) + , Error(error) + {} + }; inline TActorId MakeAccessServiceID() { static const char x[12] = "accss_srvce"; @@ -184,6 +218,11 @@ namespace NKikimr::NHttpProxy { return TActorId(0, TStringBuf(x, 12)); } + inline TActorId MakeFolderServiceID() { + static const char x[12] = "folder_svc"; + return TActorId(0, TStringBuf(x, 12)); + } + #define LOG_SP_ERROR_S(actorCtxOrSystem, component, stream) LOG_ERROR_S(actorCtxOrSystem, component, LogPrefix() << " " << stream) #define LOG_SP_WARN_S(actorCtxOrSystem, component, stream) LOG_WARN_S(actorCtxOrSystem, component, LogPrefix() << " " << stream) #define LOG_SP_INFO_S(actorCtxOrSystem, component, stream) LOG_INFO_S(actorCtxOrSystem, component, LogPrefix() << " " << stream) diff --git a/ydb/core/http_proxy/http_req.cpp b/ydb/core/http_proxy/http_req.cpp index f7aee26f521d..d7db194ec8e5 100644 --- a/ydb/core/http_proxy/http_req.cpp +++ b/ydb/core/http_proxy/http_req.cpp @@ -45,6 +45,9 @@ #include #include +#include +#include + #include #include @@ -55,6 +58,15 @@ #include +#include +#include + +#include + +#include + +#include +#include namespace NKikimr::NHttpProxy { @@ -158,6 +170,7 @@ namespace NKikimr::NHttpProxy { } constexpr TStringBuf IAM_HEADER = "x-yacloud-subjecttoken"; + constexpr TStringBuf SECURITY_TOKEN_HEADER = "x-amz-security-token"; constexpr TStringBuf AUTHORIZATION_HEADER = "authorization"; constexpr TStringBuf REQUEST_ID_HEADER = "x-request-id"; constexpr TStringBuf REQUEST_ID_HEADER_EXT = "x-amzn-requestid"; @@ -166,11 +179,22 @@ namespace NKikimr::NHttpProxy { constexpr TStringBuf REQUEST_TARGET_HEADER = "x-amz-target"; constexpr TStringBuf REQUEST_CONTENT_TYPE_HEADER = "content-type"; constexpr TStringBuf CRC32_HEADER = "x-amz-crc32"; - static const TString CREDENTIAL_PARAM = "credential"; + constexpr TStringBuf CREDENTIAL_PARAM = "Credential"; + template - class THttpRequestProcessor : public IHttpRequestProcessor { + class TBaseHttpRequestProcessor : public IHttpRequestProcessor { public: + TBaseHttpRequestProcessor(TString method, TProtoCall protoCall) + : Method(method) + , ProtoCall(protoCall) + { + } + + const TString& Name() const override { + return Method; + } + enum TRequestState { StateIdle, StateAuthentication, @@ -179,34 +203,415 @@ namespace NKikimr::NHttpProxy { StateGrpcRequest, StateFinished }; + protected: + TString Method; + TProtoCall ProtoCall; + }; - enum TEv { - EvRequest, - EvResponse, - EvResult - }; - + template + class TYmqHttpRequestProcessor : public TBaseHttpRequestProcessor{ + using TProcessorBase = TBaseHttpRequestProcessor; public: - THttpRequestProcessor(TString method, TProtoCall protoCall) - : Method(method) - , ProtoCall(protoCall) + TYmqHttpRequestProcessor( + TString method, + TProtoCall protoCall, + std::function queueUrlExtractor) + : TProcessorBase(method, protoCall) + , QueueUrlExtractor(queueUrlExtractor) { } - const TString& Name() const override { - return Method; + void Execute(THttpRequestContext&& context, THolder signature, const TActorContext& ctx) override { + ctx.Register( + new TYmqHttpRequestActor( + std::move(context), + std::move(signature), + TProcessorBase::ProtoCall, + TProcessorBase::Method, + QueueUrlExtractor + ) + ); + } + + private: + class TYmqHttpRequestActor : public NActors::TActorBootstrapped { + public: + using TBase = NActors::TActorBootstrapped; + + TYmqHttpRequestActor( + THttpRequestContext&& httpContext, + THolder&& signature, + TProtoCall protoCall, + const TString& method, + std::function queueUrlExtractor) + : HttpContext(std::move(httpContext)) + , Signature(std::move(signature)) + , ProtoCall(protoCall) + , Method(method) + , QueueUrlExtractor(queueUrlExtractor) + { + } + + TStringBuilder LogPrefix() const { + return HttpContext.LogPrefix(); + } + + private: + STFUNC(StateWork) + { + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvWakeup, HandleTimeout); + HFunc(TEvServerlessProxy::TEvGrpcRequestResult, HandleGrpcResponse); + HFunc(TEvYmqCloudAuthResponse, HandleYmqCloudAuthorizationResponse); + default: + HandleUnexpectedEvent(ev); + break; + } + } + + void SendGrpcRequestNoDriver(const TActorContext& ctx) { + RequestState = TProcessorBase::TRequestState::StateGrpcRequest; + LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, + "sending grpc request to '" << HttpContext.DiscoveryEndpoint << + "' database: '" << HttpContext.DatabasePath << + "' iam token size: " << HttpContext.IamToken.size()); + TMap peerMetadata { + {NYmq::V1::FOLDER_ID, FolderId}, + {NYmq::V1::CLOUD_ID, CloudId ? CloudId : HttpContext.UserName }, + {NYmq::V1::USER_SID, UserSid}, + {NYmq::V1::REQUEST_ID, HttpContext.RequestId}, + {NYmq::V1::SECURITY_TOKEN, HttpContext.SecurityToken}, + }; + RpcFuture = NRpcService::DoLocalRpc( + std::move(Request), + HttpContext.DatabasePath, + HttpContext.SerializedUserToken, + Nothing(), + ctx.ActorSystem(), + peerMetadata + ); + RpcFuture.Subscribe( + [actorId = ctx.SelfID, actorSystem = ctx.ActorSystem()] + (const NThreading::TFuture& future) { + auto& response = future.GetValueSync(); + auto result = MakeHolder(); + Y_ABORT_UNLESS(response.operation().ready()); + if (response.operation().status() == Ydb::StatusIds::SUCCESS) { + TProtoResult rs; + response.operation().result().UnpackTo(&rs); + result->Message = MakeHolder(rs); + } + NYql::TIssues issues; + NYql::IssuesFromMessage(response.operation().issues(), issues); + result->Status = MakeHolder( + NYdb::EStatus(response.operation().status()), + std::move(issues) + ); + actorSystem->Send(actorId, result.Release()); + } + ); + return; + } + + void HandleUnexpectedEvent(const TAutoPtr& ev) { + Y_UNUSED(ev); + } + + void ReplyWithError( + const TActorContext& ctx, + NYdb::EStatus status, + const TString& errorText, + size_t issueCode = ISSUE_CODE_GENERIC) { + HttpContext.ResponseData.Status = status; + HttpContext.ResponseData.ErrorText = errorText; + + ReplyToHttpContext(ctx, issueCode); + + ctx.Send(AuthActor, new TEvents::TEvPoisonPill()); + + TBase::Die(ctx); + } + + void ReplyWithError( + const TActorContext& ctx, + ui32 httpStatusCode, + const TString& ymqStatusCode, + const TString& errorText) { + HttpContext.ResponseData.IsYmq = true; + HttpContext.ResponseData.Status = NYdb::EStatus::STATUS_UNDEFINED; + HttpContext.ResponseData.YmqHttpCode = httpStatusCode; + HttpContext.ResponseData.YmqStatusCode = ymqStatusCode; + HttpContext.ResponseData.ErrorText = errorText; + + ReplyToHttpContext(ctx); + + ctx.Send(AuthActor, new TEvents::TEvPoisonPill()); + + TBase::Die(ctx); + } + + void ReplyToHttpContext(const TActorContext& ctx, std::optional issueCode = std::nullopt) { + if (issueCode.has_value()) { + HttpContext.DoReply(ctx, issueCode.value()); + } else { + HttpContext.DoReply(ctx); + } + } + + void HandleGrpcResponse(TEvServerlessProxy::TEvGrpcRequestResult::TPtr ev, + const TActorContext& ctx) { + if (ev->Get()->Status->IsSuccess()) { + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + "Got succesfult GRPC response."; + ); + ProtoToJson( + *ev->Get()->Message, + HttpContext.ResponseData.Body, + HttpContext.ContentType == MIME_CBOR + ); + HttpContext.ResponseData.IsYmq = true; + HttpContext.ResponseData.YmqHttpCode = 200; + ReplyToHttpContext(ctx); + } else { + auto retryClass = NYdb::NTopic::GetRetryErrorClass(ev->Get()->Status->GetStatus()); + + switch (retryClass) { + case ERetryErrorClass::ShortRetry: + case ERetryErrorClass::LongRetry: + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + "Retrying failed GRPC response" + ); + RetryCounter.Click(); + if (RetryCounter.HasAttemps()) { + return SendGrpcRequestNoDriver(ctx); + } + case ERetryErrorClass::NoRetry: + TString errorText; + TStringOutput stringOutput(errorText); + + ev->Get()->Status->GetIssues().PrintTo(stringOutput); + + RetryCounter.Void(); + + auto issues = ev->Get()->Status->GetIssues(); + auto errorAndCode = issues.Empty() + ? std::make_tuple( + NSQS::NErrors::INTERNAL_FAILURE.ErrorCode, + NSQS::NErrors::INTERNAL_FAILURE.HttpStatusCode) + : NKikimr::NSQS::TErrorClass::GetErrorAndCode(issues.begin()->GetCode()); + + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + "Not retrying GRPC response." + << " Code: " << get<1>(errorAndCode) + << ", Error: " << get<0>(errorAndCode); + ); + + return ReplyWithError( + ctx, + get<1>(errorAndCode), + get<0>(errorAndCode), + issues.begin()->GetMessage() + ); + } + } + TBase::Die(ctx); + } + + void HandleTimeout(TEvents::TEvWakeup::TPtr ev, const TActorContext& ctx) { + Y_UNUSED(ev); + return ReplyWithError(ctx, NYdb::EStatus::TIMEOUT, "Request hasn't been completed by deadline"); + } + + void HandleYmqCloudAuthorizationResponse(TEvYmqCloudAuthResponse::TPtr ev, const TActorContext& ctx) { + if (ev->Get()->IsSuccess) { + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + TStringBuilder() << "Got cloud auth response." + << " FolderId: " << ev->Get()->FolderId + << " CloudId: " << ev->Get()->CloudId + << " UserSid: " << ev->Get()->Sid; + ); + FolderId = ev->Get()->FolderId; + CloudId = ev->Get()->CloudId; + UserSid = ev->Get()->Sid; + SendGrpcRequestNoDriver(ctx); + } else { + LOG_SP_DEBUG_S( + ctx, + NKikimrServices::HTTP_PROXY, + TStringBuilder() << "Got cloud auth response." + << " HttpStatusCode: " << ev->Get()->Error->HttpStatusCode + << " ErrorCode: " << ev->Get()->Error->ErrorCode + << " Message: " << ev->Get()->Error->Message; + ); + ReplyWithError( + ctx, + ev->Get()->Error->HttpStatusCode, + ev->Get()->Error->ErrorCode, + ev->Get()->Error->Message + ); + } + } + + public: + void Bootstrap(const TActorContext& ctx) { + StartTime = ctx.Now(); + try { + HttpContext.RequestBodyToProto(&Request); + auto queueUrl = QueueUrlExtractor(Request); + if (!queueUrl.empty()) { + auto cloudIdAndResourceId = NKikimr::NYmq::CloudIdAndResourceIdFromQueueUrl(queueUrl); + if(cloudIdAndResourceId.Empty()) { + return ReplyWithError(ctx, NYdb::EStatus::BAD_REQUEST, "Invalid queue url"); + } + CloudId = cloudIdAndResourceId.Get()->first; + ResourceId = cloudIdAndResourceId.Get()->second; + } + } catch (const NKikimr::NSQS::TSQSException& e) { + NYds::EErrorCodes issueCode = NYds::EErrorCodes::OK; + if (e.ErrorClass.ErrorCode == "MissingParameter") { + issueCode = NYds::EErrorCodes::MISSING_PARAMETER; + } else if (e.ErrorClass.ErrorCode == "InvalidQueryParameter" + || e.ErrorClass.ErrorCode == "MalformedQueryString") { + issueCode = NYds::EErrorCodes::INVALID_ARGUMENT; + } + return ReplyWithError(ctx, NYdb::EStatus::BAD_REQUEST, e.what(), static_cast(issueCode)); + } catch (const std::exception& e) { + LOG_SP_WARN_S( + ctx, + NKikimrServices::HTTP_PROXY, + "got new request with incorrect json from [" << HttpContext.SourceAddress << "] " + ); + return ReplyWithError( + ctx, + NYdb::EStatus::BAD_REQUEST, + e.what(), + static_cast(NYds::EErrorCodes::INVALID_ARGUMENT) + ); + } + + LOG_SP_INFO_S( + ctx, + NKikimrServices::HTTP_PROXY, + "got new request from [" << HttpContext.SourceAddress << "]" + ); + + if (!HttpContext.ServiceConfig.GetHttpConfig().GetYandexCloudMode()) { + SendGrpcRequestNoDriver(ctx); + } else { + auto requestHolder = MakeHolder(); + NSQS::EAction action = NSQS::EAction::Unknown; + if (Method == "CreateQueue") { + action = NSQS::EAction::CreateQueue; + } else if (Method == "GetQueueUrl") { + action = NSQS::EAction::GetQueueUrl; + } else if (Method == "SendMessage") { + action = NSQS::EAction::SendMessage; + } else if (Method == "ReceiveMessage") { + action = NSQS::EAction::ReceiveMessage; + } else if (Method == "GetQueueAttributes") { + action = NSQS::EAction::GetQueueAttributes; + } else if (Method == "ListQueues") { + action = NSQS::EAction::ListQueues; + } else if (Method == "DeleteMessage") { + action = NSQS::EAction::DeleteMessage; + } else if (Method == "PurgeQueue") { + action = NSQS::EAction::PurgeQueue; + } else if (Method == "DeleteQueue") { + action = NSQS::EAction::DeleteQueue; + } else if (Method == "ChangeMessageVisibility") { + action = NSQS::EAction::ChangeMessageVisibility; + } else if (Method == "SetQueueAttributes") { + action = NSQS::EAction::SetQueueAttributes; + } else if (Method == "SendMessageBatch") { + action = NSQS::EAction::SendMessageBatch; + }else if (Method == "DeleteMessageBatch") { + action = NSQS::EAction::DeleteMessageBatch; + } else if (Method == "ChangeMessageVisibilityBatch") { + action = NSQS::EAction::ChangeMessageVisibilityBatch; + } else if (Method == "ListDeadLetterSourceQueues") { + action = NSQS::EAction::ListDeadLetterSourceQueues; + } + + requestHolder->SetRequestId(HttpContext.RequestId); + + NSQS::TAuthActorData data { + .SQSRequest = std::move(requestHolder), + .UserSidCallback = [](const TString& userSid) { Y_UNUSED(userSid); }, + .EnableQueueLeader = true, + .Action = action, + .ExecutorPoolID = PoolId, + .CloudID = CloudId, + .ResourceID = ResourceId, + .Counters = nullptr, + .AWSSignature = std::move(HttpContext.GetSignature()), + .IAMToken = HttpContext.IamToken, + .FolderID = HttpContext.FolderId + }; + + auto authRequestProxy = MakeHolder( + std::move(data), + "", + ctx.SelfID); + + ctx.RegisterWithSameMailbox(authRequestProxy.Release()); + } + + ctx.Schedule(RequestTimeout, new TEvents::TEvWakeup()); + + TBase::Become(&TYmqHttpRequestActor::StateWork); + } + + private: + TInstant StartTime; + typename TProcessorBase::TRequestState RequestState = TProcessorBase::TRequestState::StateIdle; + TProtoRequest Request; + TDuration RequestTimeout = TDuration::Seconds(60); + ui32 PoolId; + THttpRequestContext HttpContext; + THolder Signature; + THolder>> Future; + NThreading::TFuture RpcFuture; + THolder> DiscoveryFuture; + TProtoCall ProtoCall; + TString Method; + std::function QueueUrlExtractor; + TRetryCounter RetryCounter; + TActorId AuthActor; + bool InputCountersReported = false; + TString FolderId; + TString CloudId; + TString ResourceId; + TString UserSid; + }; + + std::function QueueUrlExtractor; + }; + + template + class THttpRequestProcessor : public TBaseHttpRequestProcessor{ + using TProcessorBase = TBaseHttpRequestProcessor; + public: + THttpRequestProcessor(TString method, TProtoCall protoCall) : TProcessorBase(method, protoCall) + { } void Execute(THttpRequestContext&& context, THolder signature, const TActorContext& ctx) override { ctx.Register(new THttpRequestActor( std::move(context), std::move(signature), - ProtoCall, Method)); + TProcessorBase::ProtoCall, TProcessorBase::Method)); } private: - class THttpRequestActor : public NActors::TActorBootstrapped { public: using TBase = NActors::TActorBootstrapped; @@ -244,7 +649,7 @@ namespace NKikimr::NHttpProxy { void SendYdbDriverRequest(const TActorContext& ctx) { Y_ABORT_UNLESS(HttpContext.Driver); - RequestState = StateAuthorization; + RequestState = TProcessorBase::TRequestState::StateAuthorization; auto request = MakeHolder(); request->DatabasePath = HttpContext.DatabasePath; @@ -253,7 +658,7 @@ namespace NKikimr::NHttpProxy { } void CreateClient(const TActorContext& ctx) { - RequestState = StateListEndpoints; + RequestState = TProcessorBase::TRequestState::StateListEndpoints; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "create client to '" << HttpContext.DiscoveryEndpoint << "' database: '" << HttpContext.DatabasePath << @@ -282,7 +687,7 @@ namespace NKikimr::NHttpProxy { } void SendGrpcRequestNoDriver(const TActorContext& ctx) { - RequestState = StateGrpcRequest; + RequestState = TProcessorBase::TRequestState::StateGrpcRequest; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "sending grpc request to '" << HttpContext.DiscoveryEndpoint << "' database: '" << HttpContext.DatabasePath << @@ -310,7 +715,7 @@ namespace NKikimr::NHttpProxy { } void SendGrpcRequest(const TActorContext& ctx) { - RequestState = StateGrpcRequest; + RequestState = TProcessorBase::TRequestState::StateGrpcRequest; LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "sending grpc request to '" << HttpContext.DiscoveryEndpoint << "' database: '" << HttpContext.DatabasePath << @@ -588,7 +993,7 @@ namespace NKikimr::NHttpProxy { private: TInstant StartTime; - TRequestState RequestState = StateIdle; + typename TProcessorBase::TRequestState RequestState = TProcessorBase::TRequestState::StateIdle; TProtoRequest Request; TDuration RequestTimeout = TDuration::Seconds(60); ui32 PoolId; @@ -606,62 +1011,100 @@ namespace NKikimr::NHttpProxy { TActorId AuthActor; bool InputCountersReported = false; }; - - private: - TString Method; - - struct TAccessKeySignature { - TString AccessKeyId; - TString SignedString; - TString Signature; - TString Region; - TInstant SignedAt; - }; - - TProtoCall ProtoCall; }; + template + TString ExtractQueueName(TProtoRequest& request) { + return request.GetQueueUrl(); + }; void THttpRequestProcessors::Initialize() { - #define DECLARE_PROCESSOR(name) Name2Processor[#name] = MakeHolder> \ (#name, &Ydb::DataStreams::V1::DataStreamsService::Stub::Async##name); - DECLARE_PROCESSOR(PutRecords); - DECLARE_PROCESSOR(CreateStream); - DECLARE_PROCESSOR(ListStreams); - DECLARE_PROCESSOR(DeleteStream); - DECLARE_PROCESSOR(UpdateStream); - DECLARE_PROCESSOR(DescribeStream); - DECLARE_PROCESSOR(ListShards); - DECLARE_PROCESSOR(PutRecord); - DECLARE_PROCESSOR(GetRecords); - DECLARE_PROCESSOR(GetShardIterator); - DECLARE_PROCESSOR(DescribeLimits); - DECLARE_PROCESSOR(DescribeStreamSummary); - DECLARE_PROCESSOR(DecreaseStreamRetentionPeriod); - DECLARE_PROCESSOR(IncreaseStreamRetentionPeriod); - DECLARE_PROCESSOR(UpdateShardCount); - DECLARE_PROCESSOR(UpdateStreamMode); - DECLARE_PROCESSOR(RegisterStreamConsumer); - DECLARE_PROCESSOR(DeregisterStreamConsumer); - DECLARE_PROCESSOR(DescribeStreamConsumer); - DECLARE_PROCESSOR(ListStreamConsumers); - DECLARE_PROCESSOR(AddTagsToStream); - DECLARE_PROCESSOR(DisableEnhancedMonitoring); - DECLARE_PROCESSOR(EnableEnhancedMonitoring); - DECLARE_PROCESSOR(ListTagsForStream); - DECLARE_PROCESSOR(MergeShards); - DECLARE_PROCESSOR(RemoveTagsFromStream); - DECLARE_PROCESSOR(SplitShard); - DECLARE_PROCESSOR(StartStreamEncryption); - DECLARE_PROCESSOR(StopStreamEncryption); - #undef DECLARE_PROCESSOR + + DECLARE_DATASTREAMS_PROCESSOR(PutRecords); + DECLARE_DATASTREAMS_PROCESSOR(CreateStream); + DECLARE_DATASTREAMS_PROCESSOR(ListStreams); + DECLARE_DATASTREAMS_PROCESSOR(DeleteStream); + DECLARE_DATASTREAMS_PROCESSOR(UpdateStream); + DECLARE_DATASTREAMS_PROCESSOR(DescribeStream); + DECLARE_DATASTREAMS_PROCESSOR(ListShards); + DECLARE_DATASTREAMS_PROCESSOR(PutRecord); + DECLARE_DATASTREAMS_PROCESSOR(GetRecords); + DECLARE_DATASTREAMS_PROCESSOR(GetShardIterator); + DECLARE_DATASTREAMS_PROCESSOR(DescribeLimits); + DECLARE_DATASTREAMS_PROCESSOR(DescribeStreamSummary); + DECLARE_DATASTREAMS_PROCESSOR(DecreaseStreamRetentionPeriod); + DECLARE_DATASTREAMS_PROCESSOR(IncreaseStreamRetentionPeriod); + DECLARE_DATASTREAMS_PROCESSOR(UpdateShardCount); + DECLARE_DATASTREAMS_PROCESSOR(UpdateStreamMode); + DECLARE_DATASTREAMS_PROCESSOR(RegisterStreamConsumer); + DECLARE_DATASTREAMS_PROCESSOR(DeregisterStreamConsumer); + DECLARE_DATASTREAMS_PROCESSOR(DescribeStreamConsumer); + DECLARE_DATASTREAMS_PROCESSOR(ListStreamConsumers); + DECLARE_DATASTREAMS_PROCESSOR(AddTagsToStream); + DECLARE_DATASTREAMS_PROCESSOR(DisableEnhancedMonitoring); + DECLARE_DATASTREAMS_PROCESSOR(EnableEnhancedMonitoring); + DECLARE_DATASTREAMS_PROCESSOR(ListTagsForStream); + DECLARE_DATASTREAMS_PROCESSOR(MergeShards); + DECLARE_DATASTREAMS_PROCESSOR(RemoveTagsFromStream); + DECLARE_DATASTREAMS_PROCESSOR(SplitShard); + DECLARE_DATASTREAMS_PROCESSOR(StartStreamEncryption); + DECLARE_DATASTREAMS_PROCESSOR(StopStreamEncryption); + #undef DECLARE_DATASTREAMS_PROCESSOR + + + #define DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(name) Name2YmqProcessor[#name] = MakeHolder> \ + (#name, &Ydb::Ymq::V1::YmqService::Stub::AsyncYmq##name, [](Ydb::Ymq::V1::name##Request&){return "";}); + DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(GetQueueUrl); + DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(CreateQueue); + DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN(ListQueues); + #undef DECLARE_YMQ_PROCESSOR_QUEUE_UNKNOWN + + #define DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(name) Name2YmqProcessor[#name] = MakeHolder> \ + (#name, &Ydb::Ymq::V1::YmqService::Stub::AsyncYmq##name, [](Ydb::Ymq::V1::name##Request& request){return request.Getqueue_url();}); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(SendMessage); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ReceiveMessage); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(GetQueueAttributes); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(DeleteMessage); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(PurgeQueue); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(DeleteQueue); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ChangeMessageVisibility); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(SetQueueAttributes); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(SendMessageBatch); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(DeleteMessageBatch); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ChangeMessageVisibilityBatch); + DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN(ListDeadLetterSourceQueues); + #undef DECLARE_YMQ_PROCESSOR_QUEUE_KNOWN + } + + void SetApiVersionDisabledErrorText(THttpRequestContext& context) { + context.ResponseData.ErrorText = (TStringBuilder() << context.ApiVersion << " is disabled"); } bool THttpRequestProcessors::Execute(const TString& name, THttpRequestContext&& context, THolder signature, const TActorContext& ctx) { - if (auto proc = Name2Processor.find(name); proc != Name2Processor.end()) { + THashMap>* Name2Processor; + if (context.ApiVersion == "AmazonSQS") { + if (!context.ServiceConfig.GetHttpConfig().GetYmqEnabled()) { + context.ResponseData.IsYmq = true; + context.ResponseData.YmqHttpCode = 400; + SetApiVersionDisabledErrorText(context); + } + Name2Processor = &Name2YmqProcessor; + } else { + if (!context.ServiceConfig.GetHttpConfig().GetDataStreamsEnabled()) { + context.ResponseData.Status = NYdb::EStatus::BAD_REQUEST; + SetApiVersionDisabledErrorText(context); + } + Name2Processor = &Name2DataStreamsProcessor; + } + + if (auto proc = Name2Processor->find(name); proc != Name2Processor->end()) { proc->second->Execute(std::move(context), std::move(signature), ctx); return true; } @@ -705,10 +1148,15 @@ namespace NKikimr::NHttpProxy { SourceAddress = address; } - DatabasePath = Request->URL; + DatabasePath = Request->URL.Before('?'); if (DatabasePath == "/") { DatabasePath = ""; } + auto params = TCgiParameters(Request->URL.After('?')); + if (auto it = params.Find("folderId"); it != params.end()) { + FolderId = it->second; + } + //TODO: find out databaseId ParseHeaders(Request->Headers); } @@ -775,13 +1223,23 @@ namespace NKikimr::NHttpProxy { LOG_SP_INFO_S(ctx, NKikimrServices::HTTP_PROXY, "reply with status: " << ResponseData.Status << " message: " << ResponseData.ErrorText); - ResponseData.Body.SetType(NJson::JSON_MAP); ResponseData.Body["message"] = ResponseData.ErrorText; - ResponseData.Body["__type"] = MapToException(ResponseData.Status, MethodName, issueCode).first; + if (ResponseData.IsYmq) { + ResponseData.Body["__type"] = ResponseData.YmqStatusCode; + } else { + ResponseData.Body["__type"] = MapToException(ResponseData.Status, MethodName, issueCode).first; + } } - auto [errorName, httpCode] = MapToException(ResponseData.Status, MethodName, issueCode); + TString errorName; + ui32 httpCode; + if (ResponseData.IsYmq) { + httpCode = ResponseData.YmqHttpCode; + errorName = ResponseData.YmqStatusCode; + } else { + std::tie(errorName, httpCode) = MapToException(ResponseData.Status, MethodName, issueCode); + } auto response = createResponse( Request, TStringBuilder() << (ui32)httpCode, @@ -793,15 +1251,42 @@ namespace NKikimr::NHttpProxy { ctx.Send(Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(response)); } + TMaybe ExtractUserName(const TStringBuf& authorizationHeader) { + const size_t spacePos = authorizationHeader.find(' '); + if (spacePos == TString::npos) { + return Nothing(); + } + auto restOfHeader = authorizationHeader.substr(spacePos + 1); + if (restOfHeader.StartsWith(CREDENTIAL_PARAM)) { + const size_t equalsPos = restOfHeader.find('='); + if (equalsPos == TString::npos) { + return Nothing(); + } + const size_t slashPos = restOfHeader.find('/'); + if (slashPos == TString::npos || slashPos < equalsPos) { + return Nothing(); + } + return restOfHeader.substr(equalsPos + 1, slashPos - equalsPos - 1); + } + return Nothing(); + } + void THttpRequestContext::ParseHeaders(TStringBuf str) { TString sourceReqId; NHttp::THeaders headers(str); for (const auto& header : headers.Headers) { if (AsciiEqualsIgnoreCase(header.first, IAM_HEADER)) { IamToken = header.second; + } else if(AsciiEqualsIgnoreCase(header.first, SECURITY_TOKEN_HEADER)) { + SecurityToken = header.second; } else if (AsciiEqualsIgnoreCase(header.first, AUTHORIZATION_HEADER)) { if (header.second.StartsWith("Bearer ")) { IamToken = header.second; + } else { + auto userName = ExtractUserName(header.second); + if (userName.Defined()) { + UserName = userName.GetRef(); + } } } else if (AsciiEqualsIgnoreCase(header.first, REQUEST_ID_HEADER)) { sourceReqId = header.second; diff --git a/ydb/core/http_proxy/http_req.h b/ydb/core/http_proxy/http_req.h index 10b0c84f4d4b..e7dbe4d2e9e9 100644 --- a/ydb/core/http_proxy/http_req.h +++ b/ydb/core/http_proxy/http_req.h @@ -52,9 +52,12 @@ class TRetryCounter { struct THttpResponseData { + bool IsYmq = false; NYdb::EStatus Status{NYdb::EStatus::SUCCESS}; NJson::TJsonValue Body; TString ErrorText{"OK"}; + TString YmqStatusCode; + ui32 YmqHttpCode; TString DumpBody(MimeTypes contentType); }; @@ -85,7 +88,9 @@ struct THttpRequestContext { TString ApiVersion; // used once MimeTypes ContentType{MIME_UNKNOWN}; TString IamToken; + TString SecurityToken; TString SerializedUserToken; + TString UserName; TStringBuilder LogPrefix() const { return TStringBuilder() << "http request [" << MethodName << "] requestId [" << RequestId << "]"; @@ -119,7 +124,8 @@ class THttpRequestProcessors { const TActorContext& ctx); private: - THashMap> Name2Processor; + THashMap> Name2DataStreamsProcessor; + THashMap> Name2YmqProcessor; }; NActors::IActor* CreateAccessServiceActor(const NKikimrConfig::TServerlessProxyConfig& config); diff --git a/ydb/core/http_proxy/http_service.cpp b/ydb/core/http_proxy/http_service.cpp index 7ac573c3be2b..968979070a47 100644 --- a/ydb/core/http_proxy/http_service.cpp +++ b/ydb/core/http_proxy/http_service.cpp @@ -103,7 +103,8 @@ namespace NKikimr::NHttpProxy { try { auto signature = context.GetSignature(); - Processors->Execute(context.MethodName, std::move(context), std::move(signature), ctx); + auto methodName = context.MethodName; + Processors->Execute(std::move(methodName), std::move(context), std::move(signature), ctx); } catch (const NKikimr::NSQS::TSQSException& e) { context.ResponseData.Status = NYdb::EStatus::BAD_REQUEST; context.ResponseData.ErrorText = e.what(); diff --git a/ydb/core/http_proxy/json_proto_conversion.h b/ydb/core/http_proxy/json_proto_conversion.h index 13f30e46c087..61e3ff14c20f 100644 --- a/ydb/core/http_proxy/json_proto_conversion.h +++ b/ydb/core/http_proxy/json_proto_conversion.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include @@ -21,11 +23,9 @@ inline TString ProxyFieldNameConverter(const google::protobuf::FieldDescriptor& class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { public: - TYdsProtoToJsonPrinter(const google::protobuf::Reflection* reflection, - const NProtobufJson::TProto2JsonConfig& config, + TYdsProtoToJsonPrinter(const NProtobufJson::TProto2JsonConfig& config, bool skipBase64Encode) : NProtobufJson::TProto2JsonPrinter(config) - , ProtoReflection(reflection) , SkipBase64Encode(skipBase64Encode) {} @@ -42,9 +42,9 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { void PrintField(const NProtoBuf::Message& proto, const NProtoBuf::FieldDescriptor& field, NProtobufJson::IJsonOutput& json, TStringBuf key = {}) override { - if (field.options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { - if (field.options().GetExtension(Ydb::DataStreams::V1::FieldTransformer) == - Ydb::DataStreams::V1::TRANSFORM_BASE64) { + if (field.options().HasExtension(Ydb::FieldTransformation::FieldTransformer)) { + if (field.options().GetExtension(Ydb::FieldTransformation::FieldTransformer) == + Ydb::FieldTransformation::TRANSFORM_BASE64) { Y_ENSURE(field.cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 is only supported for strings"); if (!key) { @@ -59,20 +59,21 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { return Base64Encode(str); }; + auto* reflection = proto.GetReflection(); if (field.is_repeated()) { - for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { + for (int i = 0, endI = reflection->FieldSize(proto, &field); i < endI; ++i) { PrintStringValue(field, TStringBuf(), - maybeBase64Encode(proto.GetReflection()->GetRepeatedString(proto, &field, i)), json); + maybeBase64Encode(reflection->GetRepeatedString(proto, &field, i)), json); } } else { PrintStringValue(field, key, - maybeBase64Encode(proto.GetReflection()->GetString(proto, &field)), json); + maybeBase64Encode(reflection->GetString(proto, &field)), json); } return; } - if (field.options().GetExtension(Ydb::DataStreams::V1::FieldTransformer) == - Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS) { + if (field.options().GetExtension(Ydb::FieldTransformation::FieldTransformer) == + Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS) { Y_ENSURE(field.cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_INT64, "Double S to Int MS is only supported for int64 timestamps"); @@ -80,20 +81,21 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { key = MakeKey(field); } + auto* reflection = proto.GetReflection(); if (field.is_repeated()) { - for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { - double value = proto.GetReflection()->GetRepeatedInt64(proto, &field, i) / 1000.0; + for (int i = 0, endI = reflection->FieldSize(proto, &field); i < endI; ++i) { + double value = reflection->GetRepeatedInt64(proto, &field, i) / 1000.0; PrintDoubleValue(TStringBuf(), value, json); } } else { - double value = proto.GetReflection()->GetInt64(proto, &field) / 1000.0; + double value = reflection->GetInt64(proto, &field) / 1000.0; PrintDoubleValue(key, value, json); } return; } - if (field.options().GetExtension(Ydb::DataStreams::V1::FieldTransformer) == - Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING) { + if (field.options().GetExtension(Ydb::FieldTransformation::FieldTransformer) == + Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING) { Y_ENSURE(field.cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Empty to nothing is only supported for strings"); @@ -101,19 +103,20 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { key = MakeKey(field); } + auto* reflection = proto.GetReflection(); if (field.is_repeated()) { - for (int i = 0, endI = ProtoReflection->FieldSize(proto, &field); i < endI; ++i) { - auto value = proto.GetReflection()->GetRepeatedString(proto, &field, i); + for (int i = 0, endI = reflection->FieldSize(proto, &field); i < endI; ++i) { + auto value = reflection->GetRepeatedString(proto, &field, i); if (!value.empty()) { PrintStringValue(field, TStringBuf(), - proto.GetReflection()->GetRepeatedString(proto, &field, i), json); + reflection->GetRepeatedString(proto, &field, i), json); } } } else { - auto value = proto.GetReflection()->GetString(proto, &field); + auto value = reflection->GetString(proto, &field); if (!value.empty()) { PrintStringValue(field, key, - proto.GetReflection()->GetString(proto, &field), json); + reflection->GetString(proto, &field), json); } } return; @@ -124,7 +127,6 @@ class TYdsProtoToJsonPrinter : public NProtobufJson::TProto2JsonPrinter { } private: - const google::protobuf::Reflection* ProtoReflection = nullptr; bool SkipBase64Encode; }; @@ -133,12 +135,90 @@ inline void ProtoToJson(const NProtoBuf::Message& resp, NJson::TJsonValue& value .SetFormatOutput(false) .SetMissingSingleKeyMode(NProtobufJson::TProto2JsonConfig::MissingKeyDefault) .SetNameGenerator(ProxyFieldNameConverter) + .SetMapAsObject(true) .SetEnumMode(NProtobufJson::TProto2JsonConfig::EnumName); - TYdsProtoToJsonPrinter printer(resp.GetReflection(), config, skipBase64Encode); + TYdsProtoToJsonPrinter printer(config, skipBase64Encode); printer.Print(resp, *NProtobufJson::CreateJsonMapOutput(value)); } -inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { +template +inline void AddJsonObjectToProtoAsMap( + const google::protobuf::FieldDescriptor* fieldDescriptor, + const google::protobuf::Reflection* reflection, + grpc::protobuf::Message* message, + const JSON& jsonObject, + ui32 depth, + std::function extractMap, + std::function valueToString, + std::function jsonObjectToMessage +) { + const auto& protoMap = reflection->GetMutableRepeatedFieldRef(message, fieldDescriptor); + for (const auto& [key, value] : extractMap(jsonObject)) { + std::unique_ptr mapEntry( + google::protobuf::MessageFactory::generated_factory() + ->GetPrototype(fieldDescriptor->message_type()) + ->New(message->GetArena()) + ); + mapEntry + ->GetReflection() + ->SetString(mapEntry.get(), fieldDescriptor->message_type()->field(0), key); + + auto valueField = fieldDescriptor->message_type()->field(1); + if (valueField->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE) { + auto *msg = mapEntry->GetReflection()->MutableMessage(mapEntry.get(), valueField); + jsonObjectToMessage(value, msg, depth); + } else if (valueField->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING) { + mapEntry->GetReflection()->SetString(mapEntry.get(), valueField, valueToString(value)); + } else { + throw NKikimr::NSQS::TSQSException(NKikimr::NSQS::NErrors::INVALID_PARAMETER_VALUE) + << "Only String and Object can be converted to protobuf map"; + } + protoMap.Add(std::move(*mapEntry)); + } +} + +void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth = 0); + +inline void AddJsonObjectToProtoAsMap( + const google::protobuf::FieldDescriptor* fieldDescriptor, + const google::protobuf::Reflection* reflection, + grpc::protobuf::Message* message, + const NJson::TJsonValue& jsonObject, + ui32 depth +) { + AddJsonObjectToProtoAsMap( + fieldDescriptor, + reflection, + message, + jsonObject, + depth, + [](auto& json) { return json.GetMap(); }, + [](auto& value) -> const TString { return value.GetString(); }, + [](auto& json, auto message, auto depth) { JsonToProto(json, message, depth); } + ); +} +void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth = 0); + +inline void AddJsonObjectToProtoAsMap( + const google::protobuf::FieldDescriptor* fieldDescriptor, + const google::protobuf::Reflection* reflection, + grpc::protobuf::Message* message, + const nlohmann::basic_json<>& jsonObject, + ui32 depth +) { + AddJsonObjectToProtoAsMap, std::map>>( + fieldDescriptor, + reflection, + message, + jsonObject, + depth, + [](auto& json) { return json.template get>>(); }, + [](auto& value) -> const TString { return value.template get(); }, + [](auto& json, auto message, auto depth) { NlohmannJsonToProto(json, message, depth); } + ); +} + +inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* message, ui32 depth) { Y_ENSURE(depth < 101, "Json depth is > 100"); Y_ENSURE_EX( !jsonValue.IsNull(), @@ -155,28 +235,28 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* "Unexpected json key: " << key ); Y_ENSURE(fieldDescriptor, "Unexpected json key: " + key); - auto transformer = Ydb::DataStreams::V1::TRANSFORM_NONE; - if (fieldDescriptor->options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { - transformer = fieldDescriptor->options().GetExtension(Ydb::DataStreams::V1::FieldTransformer); + auto transformer = Ydb::FieldTransformation::TRANSFORM_NONE; + if (fieldDescriptor->options().HasExtension(Ydb::FieldTransformation::FieldTransformer)) { + transformer = fieldDescriptor->options().GetExtension(Ydb::FieldTransformation::FieldTransformer); } if (value.IsArray()) { Y_ENSURE(fieldDescriptor->is_repeated()); for (auto& elem : value.GetArray()) { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is only applicable to strings"); reflection->AddString(message, fieldDescriptor, Base64Decode(elem.GetString())); break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->AddInt64(message, fieldDescriptor, elem.GetDouble() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->AddInt32(message, fieldDescriptor, elem.GetInteger()); @@ -233,19 +313,19 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* } } else { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is applicable only to strings"); reflection->SetString(message, fieldDescriptor, Base64Decode(value.GetString())); break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->SetInt64(message, fieldDescriptor, value.GetDouble() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->SetInt32(message, fieldDescriptor, value.GetInteger()); @@ -286,8 +366,12 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* reflection->SetString(message, fieldDescriptor, value.GetString()); break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { - auto *msg = reflection->MutableMessage(message, fieldDescriptor); - JsonToProto(value, msg, depth + 1); + if (fieldDescriptor->is_map()) { + AddJsonObjectToProtoAsMap(fieldDescriptor, reflection, message, value, depth + 1); + } else { + auto *msg = reflection->MutableMessage(message, fieldDescriptor); + JsonToProto(value, msg, depth + 1); + } break; } default: @@ -301,7 +385,7 @@ inline void JsonToProto(const NJson::TJsonValue& jsonValue, NProtoBuf::Message* } } -inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth = 0) { +inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Message* message, ui32 depth) { Y_ENSURE(depth < 101, "Json depth is > 100"); Y_ENSURE_EX( !jsonValue.is_null(), @@ -313,16 +397,16 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess for (const auto& [key, value] : jsonValue.get>()) { auto* fieldDescriptor = desc->FindFieldByName(NNaming::CamelToSnakeCase(key.c_str())); Y_ENSURE(fieldDescriptor, "Unexpected json key: " + key); - auto transformer = Ydb::DataStreams::V1::TRANSFORM_NONE; - if (fieldDescriptor->options().HasExtension(Ydb::DataStreams::V1::FieldTransformer)) { - transformer = fieldDescriptor->options().GetExtension(Ydb::DataStreams::V1::FieldTransformer); + auto transformer = Ydb::FieldTransformation::TRANSFORM_NONE; + if (fieldDescriptor->options().HasExtension(Ydb::FieldTransformation::FieldTransformer)) { + transformer = fieldDescriptor->options().GetExtension(Ydb::FieldTransformation::FieldTransformer); } if (value.is_array()) { Y_ENSURE(fieldDescriptor->is_repeated()); for (auto& elem : value) { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is only applicable to strings"); @@ -333,12 +417,12 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess } break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->AddInt64(message, fieldDescriptor, elem.get() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->AddInt32(message, fieldDescriptor, elem.get()); @@ -395,7 +479,7 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess } } else { switch (transformer) { - case Ydb::DataStreams::V1::TRANSFORM_BASE64: { + case Ydb::FieldTransformation::TRANSFORM_BASE64: { Y_ENSURE(fieldDescriptor->cpp_type() == google::protobuf::FieldDescriptor::CPPTYPE_STRING, "Base64 transformer is applicable only to strings"); @@ -406,12 +490,12 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess } break; } - case Ydb::DataStreams::V1::TRANSFORM_DOUBLE_S_TO_INT_MS: { + case Ydb::FieldTransformation::TRANSFORM_DOUBLE_S_TO_INT_MS: { reflection->SetInt64(message, fieldDescriptor, value.get() * 1000); break; } - case Ydb::DataStreams::V1::TRANSFORM_EMPTY_TO_NOTHING: - case Ydb::DataStreams::V1::TRANSFORM_NONE: { + case Ydb::FieldTransformation::TRANSFORM_EMPTY_TO_NOTHING: + case Ydb::FieldTransformation::TRANSFORM_NONE: { switch (fieldDescriptor->cpp_type()) { case google::protobuf::FieldDescriptor::CPPTYPE_INT32: reflection->SetInt32(message, fieldDescriptor, value.get()); @@ -452,8 +536,12 @@ inline void NlohmannJsonToProto(const nlohmann::json& jsonValue, NProtoBuf::Mess reflection->SetString(message, fieldDescriptor, value.get()); break; case google::protobuf::FieldDescriptor::CPPTYPE_MESSAGE: { - auto *msg = reflection->MutableMessage(message, fieldDescriptor); - NlohmannJsonToProto(value, msg, depth + 1); + if (fieldDescriptor->is_map()) { + AddJsonObjectToProtoAsMap(fieldDescriptor, reflection, message, value, depth); + } else { + auto *msg = reflection->MutableMessage(message, fieldDescriptor); + NlohmannJsonToProto(value, msg, depth + 1); + } break; } default: diff --git a/ydb/core/http_proxy/ut/datastreams_fixture.h b/ydb/core/http_proxy/ut/datastreams_fixture.h index 77bfab022d81..5a4c98fcd68e 100644 --- a/ydb/core/http_proxy/ut/datastreams_fixture.h +++ b/ydb/core/http_proxy/ut/datastreams_fixture.h @@ -33,6 +33,9 @@ #include +#include +#include + using TJMap = NJson::TJsonValue::TMapType; using TJVector = NJson::TJsonValue::TArray; @@ -178,6 +181,18 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { } + static NJson::TJsonValue CreateSqsGetQueueUrlRequest() { + NJson::TJsonValue record; + record["QueueName"] = "ExampleQueueName"; + return record; + } + + static NJson::TJsonValue CreateSqsCreateQueueRequest() { + NJson::TJsonValue record; + record["QueueName"] = "ExampleQueueName"; + return record; + } + THttpResult SendHttpRequestRaw(const TString& handler, const TString& target, const IOutputStream::TPart& body, const TString& authorizationStr, const TString& contentType = "application/json") { @@ -321,6 +336,34 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { } private: + TMaybe RunYqlDataQuery(TString query) { + TString endpoint = TStringBuilder() << "localhost:" << KikimrGrpcPort; + auto driverConfig = NYdb::TDriverConfig() + .SetEndpoint(endpoint) + .SetLog(CreateLogBackend("cerr", ELogPriority::TLOG_DEBUG)); + NYdb::TDriver driver(driverConfig); + auto tableClient = NYdb::NTable::TTableClient(driver); + + TMaybe resultSet; + + auto operationResult = tableClient.RetryOperationSync([&](NYdb::NTable::TSession session) { + NYdb::TParamsBuilder paramsBuilder; + auto queryResult = session.ExecuteDataQuery( + query, + NYdb::NTable::TTxControl::BeginTx(NYdb::NTable::TTxSettings::SerializableRW()).CommitTx(), + paramsBuilder.Build() + ).GetValueSync(); + + if (queryResult.IsSuccess() && queryResult.GetResultSets().size() > 0) { + resultSet = queryResult.GetResultSet(0); + } + return queryResult; + }); + + Y_ABORT_UNLESS(operationResult.IsSuccess()); + return resultSet; + } + void InitKikimr() { AuthFactory = std::make_shared(); NKikimrConfig::TAppConfig appConfig; @@ -331,6 +374,10 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { appConfig.MutablePQConfig()->AddValidWriteSpeedLimitsKbPerSec(1_KB); appConfig.MutablePQConfig()->MutableBillingMeteringConfig()->SetEnabled(true); + appConfig.MutableSqsConfig()->SetEnableSqs(true); + appConfig.MutableSqsConfig()->SetYandexCloudMode(true); + appConfig.MutableSqsConfig()->SetEnableDeadLetterQueues(true); + auto limit = appConfig.MutablePQConfig()->AddValidRetentionLimits(); limit->SetMinPeriodSeconds(0); limit->SetMaxPeriodSeconds(TDuration::Days(1).Seconds()); @@ -356,6 +403,7 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { server->ServerSettings->SetUseRealThreads(false); KikimrServer = THolder(server); + KikimrGrpcPort = KikimrServer->ServerSettings->GrpcPort; ActorRuntime = KikimrServer->GetRuntime(); @@ -375,14 +423,211 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { acl.AddAccess(NACLib::EAccessType::Allow, NACLib::GenericFull, "proxy_sa@as"); client.ModifyACL("/", "Root", acl.SerializeAsString()); + + client.MkDir("/Root", "SQS"); + + client.CreateTable("/Root/SQS", + "Name: \".Queues\"" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"QueueName\" Type: \"Utf8\"}" + "Columns { Name: \"QueueId\" Type: \"String\"}" + "Columns { Name: \"QueueState\" Type: \"Uint64\"}" + "Columns { Name: \"FifoQueue\" Type: \"Bool\"}" + "Columns { Name: \"DeadLetterQueue\" Type: \"Bool\"}" + "Columns { Name: \"CreatedTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"Shards\" Type: \"Uint64\"}" + "Columns { Name: \"Partitions\" Type: \"Uint64\"}" + "Columns { Name: \"MasterTabletId\" Type: \"Uint64\"}" + "Columns { Name: \"CustomQueueName\" Type: \"Utf8\"}" + "Columns { Name: \"FolderId\" Type: \"Utf8\"}" + "Columns { Name: \"Version\" Type: \"Uint64\"}" + "Columns { Name: \"DlqName\" Type: \"Utf8\"}" + "Columns { Name: \"TablesFormat\" Type: \"Uint32\"}" + "KeyColumnNames: [\"Account\", \"QueueName\"]" + ); + + client.CreateTable("/Root/SQS", + "Name: \".RemovedQueues\"" + "Columns { Name: \"RemoveTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"QueueName\" Type: \"Utf8\"}" + "Columns { Name: \"FifoQueue\" Type: \"Bool\"}" + "Columns { Name: \"Shards\" Type: \"Uint32\"}" + "Columns { Name: \"CustomQueueName\" Type: \"Utf8\"}" + "Columns { Name: \"FolderId\" Type: \"Utf8\"}" + "Columns { Name: \"TablesFormat\" Type: \"Uint32\"}" + "Columns { Name: \"StartProcessTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"NodeProcess\" Type: \"Uint32\"}" + "KeyColumnNames: [\"RemoveTimestamp\", \"QueueIdNumber\"]" + ); + + client.MkDir("/Root/SQS", ".STD"); + client.CreateTable("/Root/SQS/.STD", + "Name: \"Messages\"" + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"DelayDeadline\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"Offset\"]" + ); + + client.MkDir("/Root/SQS", ".FIFO"); + client.CreateTable("/Root/SQS/.FIFO", + "Name: \"Messages\"" + "Columns { Name: \"QueueIdNumberHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"GroupId\" Type: \"String\"}" + "Columns { Name: \"NextOffset\" Type: \"Uint64\"}" + "Columns { Name: \"NextRandomId\" Type: \"Uint64\"}" + "Columns { Name: \"ReceiveCount\" Type: \"Uint32\"}" + "Columns { Name: \"FirstReceiveTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\", \"Offset\"]" + ); + + client.CreateTable("/Root/SQS", + "Name: \".Settings\"" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"Name\" Type: \"Utf8\"}" + "Columns { Name: \"Value\" Type: \"Utf8\"}" + "KeyColumnNames: [\"Account\", \"Name\"]" + ); + + client.CreateTable("/Root/SQS", + "Name: \".AtomicCounter\"" + "Columns { Name: \"counter_key\" Type: \"Uint64\"}" + "Columns { Name: \"value\" Type: \"Uint64\"}" + "KeyColumnNames: [\"counter_key\"]" + ); + RunYqlDataQuery("INSERT INTO `/Root/SQS/.AtomicCounter` (counter_key, value) VALUES (0, 0)"); + + auto attributesTable= "Name: \"Attributes\"" + "Columns { Name: \"QueueIdNumberHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"ContentBasedDeduplication\" Type: \"Bool\"}" + "Columns { Name: \"DelaySeconds\" Type: \"Uint64\"}" + "Columns { Name: \"FifoQueue\" Type: \"Bool\"}" + "Columns { Name: \"MaximumMessageSize\" Type: \"Uint64\"}" + "Columns { Name: \"MessageRetentionPeriod\" Type: \"Uint64\"}" + "Columns { Name: \"ReceiveMessageWaitTime\" Type: \"Uint64\"}" + "Columns { Name: \"VisibilityTimeout\" Type: \"Uint64\"}" + "Columns { Name: \"DlqName\" Type: \"Utf8\"}" + "Columns { Name: \"DlqArn\" Type: \"Utf8\"}" + "Columns { Name: \"MaxReceiveCount\" Type: \"Uint64\"}" + "Columns { Name: \"ShowDetailedCountersDeadline\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\"]"; + client.CreateTable("/Root/SQS/.STD", attributesTable); + client.CreateTable("/Root/SQS/.FIFO", attributesTable); + + client.CreateTable("/Root/SQS", + "Name: \".Events\"" + "Columns { Name: \"Account\" Type: \"Utf8\"}" + "Columns { Name: \"QueueName\" Type: \"Utf8\"}" + "Columns { Name: \"EventType\" Type: \"Uint64\"}" + "Columns { Name: \"CustomQueueName\" Type: \"Utf8\"}" + "Columns { Name: \"EventTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"FolderId\" Type: \"Utf8\"}" + "KeyColumnNames: [\"Account\", \"QueueName\", \"EventType\"]" + ); + + auto stateTableCommon = + "Name: \"State\"" + "Columns { Name: \"QueueIdNumberHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"CleanupTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"CreatedTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"LastModifiedTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"RetentionBoundary\" Type: \"Uint64\"}" + "Columns { Name: \"InflyCount\" Type: \"Int64\"}" + "Columns { Name: \"MessageCount\" Type: \"Int64\"}" + "Columns { Name: \"ReadOffset\" Type: \"Uint64\"}" + "Columns { Name: \"WriteOffset\" Type: \"Uint64\"}" + "Columns { Name: \"CleanupVersion\" Type: \"Uint64\"}" + "Columns { Name: \"InflyVersion\" Type: \"Uint64\"}"; + client.CreateTable("/Root/SQS/.STD", + TStringBuilder() + << stateTableCommon + << "Columns { Name: \"Shard\" Type: \"Uint32\"}" + << "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\", \"Shard\"]" + ); + client.CreateTable("/Root/SQS/.FIFO", + TStringBuilder() + << stateTableCommon + << "KeyColumnNames: [\"QueueIdNumberHash\", \"QueueIdNumber\"]" + ); + + + client.CreateTable("/Root/SQS/.STD", + "Name: \"Infly\"" + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"LoadId\" Type: \"Uint64\"}" + "Columns { Name: \"FirstReceiveTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"LockTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"ReceiveCount\" Type: \"Uint32\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"VisibilityDeadline\" Type: \"Uint64\"}" + "Columns { Name: \"DelayDeadline\" Type: \"Uint64\"}" + "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"Offset\"]" + ); + + auto sentTimestampIdxCommonColumns= + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"SentTimestamp\" Type: \"Uint64\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"DelayDeadline\" Type: \"Uint64\"}"; + auto sendTimestampIdsKeys = "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"SentTimestamp\", \"Offset\"]"; + client.CreateTable("/Root/SQS/.STD", + TStringBuilder() + << "Name: \"SentTimestampIdx\"" + << sentTimestampIdxCommonColumns + << sendTimestampIdsKeys + ); + client.CreateTable("/Root/SQS/.FIFO", + TStringBuilder() + << "Name: \"SentTimestampIdx\"" + << "Columns { Name: \"GroupId\" Type: \"String\"}" + << sentTimestampIdxCommonColumns + << sendTimestampIdsKeys + ); + + client.CreateTable("/Root/SQS/.STD", + "Name: \"MessageData\"" + "Columns { Name: \"QueueIdNumberAndShardHash\" Type: \"Uint64\"}" + "Columns { Name: \"QueueIdNumber\" Type: \"Uint64\"}" + "Columns { Name: \"Shard\" Type: \"Uint32\"}" + "Columns { Name: \"RandomId\" Type: \"Uint64\"}" + "Columns { Name: \"Offset\" Type: \"Uint64\"}" + "Columns { Name: \"Attributes\" Type: \"String\"}" + "Columns { Name: \"Data\" Type: \"String\"}" + "Columns { Name: \"MessageId\" Type: \"String\"}" + "Columns { Name: \"SenderId\" Type: \"String\"}" + "KeyColumnNames: [\"QueueIdNumberAndShardHash\", \"QueueIdNumber\", \"Shard\", \"RandomId\", \"Offset\"]" + ); } void InitAccessServiceService() { // Service Account Service Mock grpc::ServerBuilder builder; AccessServiceMock.AuthenticateData["kinesis"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); + AccessServiceMock.AuthenticateData["kinesis"].Response.mutable_subject()->mutable_service_account()->set_folder_id("folder4"); // AccessServiceMock.AuthenticateData["proxy_sa@builtin"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); + AccessServiceMock.AuthenticateData["sqs"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); + AccessServiceMock.AuthenticateData["sqs"].Response.mutable_subject()->mutable_service_account()->set_folder_id("folder4"); + AccessServiceMock.AuthorizeData["AKIDEXAMPLE-ydb.databases.list-folder4"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); AccessServiceMock.AuthorizeData["proxy_sa@builtin-ydb.databases.list-folder4"].Response.mutable_subject()->mutable_service_account()->set_id("Service1_id"); @@ -403,6 +648,8 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { config.MutableHttpConfig()->SetAccessServiceEndpoint(TStringBuilder() << "127.0.0.1:" << AccessServicePort); config.SetTestMode(true); config.MutableHttpConfig()->SetPort(HttpServicePort); + config.MutableHttpConfig()->SetYandexCloudMode(true); + config.MutableHttpConfig()->SetYmqEnabled(true); std::shared_ptr credentialsProviderFactory = NYdb::CreateOAuthCredentialsProviderFactory("proxy_sa@builtin"); @@ -436,6 +683,9 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { TActorId actorId = as->Register(CreateAccessServiceActor(config)); as->RegisterLocalService(MakeAccessServiceID(), actorId); + actorId = as->Register(CreateAccessServiceActor(config)); + as->RegisterLocalService(NSQS::MakeSqsAccessServiceID(), actorId); + actorId = as->Register(CreateIamTokenServiceActor(config)); as->RegisterLocalService(MakeIamTokenServiceID(), actorId); @@ -445,6 +695,24 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { actorId = as->Register(CreateMetricsActor(TMetricsSettings{Counters})); as->RegisterLocalService(MakeMetricsServiceID(), actorId); + NKikimrProto::NFolderService::TFolderServiceConfig folderServiceConfig; + folderServiceConfig.SetEnable(false); + actorId = as->Register(NKikimr::NFolderService::CreateFolderServiceActor(folderServiceConfig, "cloud4")); + as->RegisterLocalService(NFolderService::FolderServiceActorId(), actorId); + + actorId = as->Register(NKikimr::NFolderService::CreateFolderServiceActor(folderServiceConfig, "cloud4")); + as->RegisterLocalService(NSQS::MakeSqsFolderServiceID(), actorId); + + for (ui32 i = 0; i < ActorRuntime->GetNodeCount(); i++) { + auto nodeId = ActorRuntime->GetNodeId(i); + + actorId = as->Register(NSQS::CreateSqsService()); + as->RegisterLocalService(NSQS::MakeSqsServiceID(nodeId), actorId); + + actorId = as->Register(NSQS::CreateSqsProxyService()); + as->RegisterLocalService(NSQS::MakeSqsProxyServiceID(nodeId), actorId); + } + actorId = as->Register(NHttp::CreateHttpProxy()); as->RegisterLocalService(MakeHttpServerServiceID(), actorId); @@ -452,6 +720,7 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { httpProxyConfig.Config = config; httpProxyConfig.CredentialsProvider = credentialsProvider; httpProxyConfig.UseSDK = GetEnv("INSIDE_YDB").empty(); + actorId = as->Register(NKikimr::NHttpProxy::CreateHttpProxy(httpProxyConfig)); as->RegisterLocalService(MakeHttpProxyID(), actorId); @@ -482,4 +751,5 @@ class THttpProxyTestMock : public NUnitTest::TBaseFixture { ui16 IamTokenServicePort = 0; ui16 DatabaseServicePort = 0; ui16 MonPort = 0; + ui16 KikimrGrpcPort = 0; }; diff --git a/ydb/core/http_proxy/ut/http_proxy_ut.h b/ydb/core/http_proxy/ut/http_proxy_ut.h index 3845f09a9cca..39ea72747ccc 100644 --- a/ydb/core/http_proxy/ut/http_proxy_ut.h +++ b/ydb/core/http_proxy/ut/http_proxy_ut.h @@ -3,11 +3,16 @@ #include "library/cpp/json/writer/json_value.h" #include "library/cpp/testing/unittest/registar.h" +#include +#include + extern TString Name_; extern bool ForceFork_; extern TString FormAuthorizationStr(const TString& region); extern NJson::TJsonValue CreateCreateStreamRequest(); extern NJson::TJsonValue CreateDescribeStreamRequest(); +extern NJson::TJsonValue CreateSqsGetQueueUrlRequest(); +extern NJson::TJsonValue CreateSqsCreateQueueRequest(); extern struct THttpResult httpResult; extern THttpResult SendHttpRequest( @@ -1525,4 +1530,801 @@ Y_UNIT_TEST_SUITE(TestHttpProxy) { UNIT_ASSERT_VALUES_EQUAL(res.Description, "MissingParameter"); } } + + Y_UNIT_TEST_F(TestCreateQueue, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + } + + Y_UNIT_TEST_F(TestCreateQueueWithSameNameAndSameParams, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + req = CreateSqsCreateQueueRequest(); + res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + } + + Y_UNIT_TEST_F(TestCreateQueueWithSameNameAndDifferentParams, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + NJson::TJsonMap attributes = NJson::TJsonMap({std::pair("MessageRetentionPeriod", "60")}); + req["Attributes"] = attributes; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + req = CreateSqsCreateQueueRequest(); + attributes = NJson::TJsonMap({std::pair("MessageRetentionPeriod", "61")}); + req["Attributes"] = attributes; + res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "ValidationError"); + } + + Y_UNIT_TEST_F(TestCreateQueueWithBadQueueName, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + req["QueueName"] = "B@d_queue_name"; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "InvalidParameterValue"); + } + + Y_UNIT_TEST_F(TestCreateQueueWithEmptyName, THttpProxyTestMock) { + NJson::TJsonValue req; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "MissingParameter"); + } + + Y_UNIT_TEST_F(TestCreateQueueWithWrongBody, THttpProxyTestMock) { + NJson::TJsonValue req; + req["wrongField"] = "foobar"; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "InvalidArgumentException"); + } + + Y_UNIT_TEST_F(TestGetQueueUrlOfNotExistingQueue, THttpProxyTestMock) { + auto req = CreateSqsGetQueueUrlRequest(); + req["QueueName"] = "not-existing-queue"; + auto res = SendHttpRequest("/Root", "AmazonSQS.GetQueueUrl", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "AWS.SimpleQueueService.NonExistentQueue"); + TString resultMessage = GetByPath(json, "message"); + UNIT_ASSERT_VALUES_EQUAL(resultMessage, "The specified queue doesn't exist."); + } + + Y_UNIT_TEST_F(TestGetQueueUrlWithIAM, THttpProxyTestMock) { + auto req = CreateSqsGetQueueUrlRequest(); + req["QueueName"] = "not-existing-queue"; + auto res = SendHttpRequest("/Root?folderId=XXX", "AmazonSQS.GetQueueUrl", std::move(req), "X-YaCloud-SubjectToken: Bearer proxy_sa@builtin"); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultType = GetByPath(json, "__type"); + UNIT_ASSERT_VALUES_EQUAL(resultType, "AWS.SimpleQueueService.NonExistentQueue"); + TString resultMessage = GetByPath(json, "message"); + UNIT_ASSERT_VALUES_EQUAL(resultMessage, "The specified queue doesn't exist."); + } + + Y_UNIT_TEST_F(TestSendMessage, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + sendMessageReq["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + sendMessageReq["MessageGroupId"] = "MessageGroupId-0"; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(!GetByPath(json, "SequenceNumber").empty()); + UNIT_ASSERT(!GetByPath(json, "MD5OfMessageBody").empty()); + UNIT_ASSERT(!GetByPath(json, "MessageId").empty()); + } + + Y_UNIT_TEST_F(TestReceiveMessage, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", createQueueReq, FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body0 = "MessageBody-0"; + sendMessageReq["MessageBody"] = body0; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", sendMessageReq, FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(!GetByPath(json, "MD5OfMessageBody").empty()); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + for (int i = 0; i < 20; ++i) { + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", receiveMessageReq, FormAuthorizationStr("ru-central1")); + if (res.Body != "{}") { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"][0]["Body"], body0); + } + + Y_UNIT_TEST_F(TestReceiveMessageWithAttributes, THttpProxyTestMock) { + // Test if we process AttributeNames, MessageSystemAttributeNames, MessageAttributeNames correctly. + + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", createQueueReq, FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + auto sendMessage = [this, resultQueueUrl](const TString& body) { + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + sendMessageReq["MessageBody"] = body; + + auto res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", sendMessageReq, FormAuthorizationStr("ru-central1")); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(!GetByPath(json, "MD5OfMessageBody").empty()); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + }; + + TString body = "MessageBody-0"; + sendMessage(body); + + auto receiveMessage = [this](NJson::TJsonValue request, const TString& expectedBody) -> NJson::TJsonValue { + request["VisibilityTimeout"] = 0; // Keep the message visible for next ReceiveMessage requests. + THttpResult res; + for (int i = 0; i < 20; ++i) { + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", request, FormAuthorizationStr("ru-central1")); + if (res.Body != "{}") { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"][0]["Body"], expectedBody); + return json; + }; + + { + // Request SentTimestamp message system attribute using deprecated AttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["AttributeNames"] = NJson::TJsonArray{"SentTimestamp"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + + { + // Request SentTimestamp message system attribute using MessageSystemAttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["MessageSystemAttributeNames"] = NJson::TJsonArray{"SentTimestamp"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + + { + // Request All message system attributes using deprecated AttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["AttributeNames"] = NJson::TJsonArray{"All"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + + { + // Request All message system attributes using MessageSystemAttributeNames field. + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + receiveMessageReq["MessageSystemAttributeNames"] = NJson::TJsonArray{"All"}; + json = receiveMessage(receiveMessageReq, body); + UNIT_ASSERT(!json["Messages"][0]["Attributes"]["SentTimestamp"].GetString().empty()); + } + } + + Y_UNIT_TEST_F(TestGetQueueAttributes, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + NJson::TJsonValue attributes; + attributes["DelaySeconds"] = "1"; + createQueueReq["Attributes"] = attributes; + + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + { + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = {"DelaySeconds"}; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "1"); + } + + { + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = { + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesDelayed", + "ApproximateNumberOfMessagesNotVisible", + "CreatedTimestamp", + "DelaySeconds", + "MaximumMessageSize", + "MessageRetentionPeriod", + "ReceiveMessageWaitTimeSeconds", + "RedrivePolicy", + "VisibilityTimeout", + "FifoQueue", + "ContentBasedDeduplication", + "QueueArn" + }; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "1"); + } + + { + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = {"All"}; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "1"); + } + } + + Y_UNIT_TEST_F(TestListQueues, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue listQueuesReq; + listQueuesReq["QueueNamePrefix"] = "Ex"; + res = SendHttpRequest("/Root", "AmazonSQS.ListQueues", std::move(listQueuesReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonArray result; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &result)); + UNIT_ASSERT_VALUES_EQUAL(result["QueueUrls"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(result["QueueUrls"][0], resultQueueUrl); + } + + Y_UNIT_TEST_F(TestDeleteMessage, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"][0]["Body"], body); + + auto receiptHandle = json["Messages"][0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle.Empty()); + + NJson::TJsonValue deleteMessageReq; + deleteMessageReq["QueueUrl"] = resultQueueUrl; + deleteMessageReq["ReceiptHandle"] = receiptHandle; + + res = SendHttpRequest("/Root", "AmazonSQS.DeleteMessage", std::move(deleteMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 0); + } + + Y_UNIT_TEST_F(TestPurgeQueue, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue purgeQueueReq; + purgeQueueReq["QueueUrl"] = resultQueueUrl; + + res = SendHttpRequest("/Root", "AmazonSQS.PurgeQueue", std::move(purgeQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 0); + } + + Y_UNIT_TEST_F(TestDeleteQueue, THttpProxyTestMock) { + auto req = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(req), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue deleteQueueReq; + deleteQueueReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.DeleteQueue", std::move(deleteQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + for (int i = 0; i < 61; ++i) { + req = CreateSqsGetQueueUrlRequest(); + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueUrl", std::move(req), FormAuthorizationStr("ru-central1")); + if (res.HttpCode != 200) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 400); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(GetByPath(json, "__type"), "AWS.SimpleQueueService.NonExistentQueue"); + } + + Y_UNIT_TEST_F(TestSetQueueAttributes, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + NJson::TJsonValue attributes; + attributes["DelaySeconds"] = "1"; + createQueueReq["Attributes"] = attributes; + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue setQueueAttributes; + setQueueAttributes["QueueUrl"] = resultQueueUrl; + attributes = {}; + attributes["DelaySeconds"] = "2"; + setQueueAttributes["Attributes"] = attributes; + + res = SendHttpRequest("/Root", "AmazonSQS.SetQueueAttributes", std::move(setQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonArray attributeNames = {"DelaySeconds"}; + getQueueAttributes["AttributeNames"] = attributeNames; + + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue resultJson; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &resultJson)); + UNIT_ASSERT_VALUES_EQUAL(resultJson["Attributes"]["DelaySeconds"], "2"); + } + + Y_UNIT_TEST_F(TestSendMessageBatch, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue message0; + message0["Id"] = "Id-0"; + message0["MessageBody"] = "MessageBody-0"; + message0["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + + NJson::TJsonValue delaySeconds; + delaySeconds["StringValue"] = "1"; + delaySeconds["DataType"] = "String"; + + NJson::TJsonValue attributes; + attributes["DelaySeconds"] = delaySeconds; + + message0["MessageAttributes"] = attributes; + + NJson::TJsonValue message1; + message1["Id"] = "Id-1"; + message1["MessageBody"] = "MessageBody-1"; + message1["MessageDeduplicationId"] = "MessageDeduplicationId-1"; + + NJson::TJsonArray entries = {message0, message1}; + + NJson::TJsonValue sendMessageBatchReq; + sendMessageBatchReq["QueueUrl"] = resultQueueUrl; + sendMessageBatchReq["Entries"] = entries; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessageBatch", std::move(sendMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(json["Successful"].GetArray().size() == 2); + auto succesful0 = json["Successful"][0]; + UNIT_ASSERT(succesful0["Id"] == "Id-0"); + UNIT_ASSERT(!GetByPath(succesful0, "MD5OfMessageAttributes").empty()); + UNIT_ASSERT(!GetByPath(succesful0, "MD5OfMessageBody").empty()); + UNIT_ASSERT(!GetByPath(succesful0, "MessageId").empty()); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + } + + Y_UNIT_TEST_F(TestDeleteMessageBatch, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue message0; + message0["Id"] = "Id-0"; + message0["MessageBody"] = "MessageBody-0"; + message0["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + + NJson::TJsonValue message1; + message1["Id"] = "Id-1"; + message1["MessageBody"] = "MessageBody-1"; + message1["MessageDeduplicationId"] = "MessageDeduplicationId-1"; + + NJson::TJsonArray entries = {message0, message1}; + + NJson::TJsonValue sendMessageBatchReq; + sendMessageBatchReq["QueueUrl"] = resultQueueUrl; + sendMessageBatchReq["Entries"] = entries; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessageBatch", std::move(sendMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(json["Successful"].GetArray().size() == 2); + + TVector messages; + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + NJson::ReadJsonTree(res.Body, &json); + if (json["Messages"].GetArray().size() == 2) { + messages.push_back(json["Messages"][0]); + messages.push_back(json["Messages"][1]); + break; + } + if (json["Messages"].GetArray().size() == 1) { + messages.push_back(json["Messages"][0]); + if (messages.size() == 2) { + break; + } + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), 2); + + auto receiptHandle0 = messages[0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle0.Empty()); + auto receiptHandle1 = messages[1]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle1.Empty()); + + NJson::TJsonValue deleteMessageBatchReq; + deleteMessageBatchReq["QueueUrl"] = resultQueueUrl; + + NJson::TJsonValue entry0; + entry0["Id"] = "Id-0"; + entry0["ReceiptHandle"] = receiptHandle0; + + NJson::TJsonValue entry1; + entry1["Id"] = "Id-1"; + entry1["ReceiptHandle"] = receiptHandle1; + + NJson::TJsonArray deleteEntries = {entry0, entry1}; + deleteMessageBatchReq["Entries"] = deleteEntries; + + res = SendHttpRequest("/Root", "AmazonSQS.DeleteMessageBatch", std::move(deleteMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"].GetArray().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][0]["Id"], "Id-0"); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][1]["Id"], "Id-1"); + + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT_VALUES_EQUAL(json["Messages"].GetArray().size(), 0); + + } + + Y_UNIT_TEST_F(TestListDeadLetterSourceQueues, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + auto createDlqReq = CreateSqsCreateQueueRequest(); + createQueueReq["QueueName"] = "DlqName"; + res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString dlqUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue getQueueAttributes; + getQueueAttributes["QueueUrl"] = dlqUrl; + NJson::TJsonArray attributeNames = {"QueueArn"}; + getQueueAttributes["AttributeNames"] = attributeNames; + res = SendHttpRequest("/Root", "AmazonSQS.GetQueueAttributes", std::move(getQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + TString dlqArn = GetByPath(json["Attributes"], "QueueArn"); + + NJson::TJsonValue setQueueAttributes; + setQueueAttributes["QueueUrl"] = resultQueueUrl; + NJson::TJsonValue attributes = {}; + auto redrivePolicy = TStringBuilder() + << "{\"deadLetterTargetArn\" : \"" << dlqArn << "\", \"maxReceiveCount\" : 100}"; + attributes["RedrivePolicy"] = redrivePolicy; + setQueueAttributes["Attributes"] = attributes; + + res = SendHttpRequest("/Root", "AmazonSQS.SetQueueAttributes", std::move(setQueueAttributes), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + NJson::TJsonValue listDeadLetterSourceQueues; + listDeadLetterSourceQueues["QueueUrl"] = dlqUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ListDeadLetterSourceQueues", std::move(listDeadLetterSourceQueues), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(json["QueueUrls"][0], resultQueueUrl); + } + + Y_UNIT_TEST_F(TestChangeMessageVisibility, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + + NJson::TJsonValue sendMessageReq; + sendMessageReq["QueueUrl"] = resultQueueUrl; + auto body = "MessageBody-0"; + sendMessageReq["MessageBody"] = body; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessage", std::move(sendMessageReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + break; + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + + auto receiptHandle = json["Messages"][0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle.Empty()); + + NJson::TJsonValue changeMessageVisibility; + changeMessageVisibility["QueueUrl"] = resultQueueUrl; + changeMessageVisibility["ReceiptHandle"] = receiptHandle; + changeMessageVisibility["VisibilityTimeout"] = 1; + + res = SendHttpRequest( + "/Root", + "AmazonSQS.ChangeMessageVisibility", + std::move(changeMessageVisibility), + FormAuthorizationStr("ru-central1") + ); + + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + } + + Y_UNIT_TEST_F(TestChangeMessageVisibilityBatch, THttpProxyTestMock) { + auto createQueueReq = CreateSqsCreateQueueRequest(); + auto res = SendHttpRequest("/Root", "AmazonSQS.CreateQueue", std::move(createQueueReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + NJson::TJsonValue json; + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + TString resultQueueUrl = GetByPath(json, "QueueUrl"); + UNIT_ASSERT(resultQueueUrl.EndsWith("ExampleQueueName")); + + NJson::TJsonValue message0; + message0["Id"] = "Id-0"; + message0["MessageBody"] = "MessageBody-0"; + message0["MessageDeduplicationId"] = "MessageDeduplicationId-0"; + + NJson::TJsonValue message1; + message1["Id"] = "Id-1"; + message1["MessageBody"] = "MessageBody-1"; + message1["MessageDeduplicationId"] = "MessageDeduplicationId-1"; + + NJson::TJsonArray entries = {message0, message1}; + + NJson::TJsonValue sendMessageBatchReq; + sendMessageBatchReq["QueueUrl"] = resultQueueUrl; + sendMessageBatchReq["Entries"] = entries; + + res = SendHttpRequest("/Root", "AmazonSQS.SendMessageBatch", std::move(sendMessageBatchReq), FormAuthorizationStr("ru-central1")); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT(json["Successful"].GetArray().size() == 2); + + TVector messages; + for (int i = 0; i < 20; ++i) { + NJson::TJsonValue receiveMessageReq; + receiveMessageReq["QueueUrl"] = resultQueueUrl; + res = SendHttpRequest("/Root", "AmazonSQS.ReceiveMessage", std::move(receiveMessageReq), FormAuthorizationStr("ru-central1")); + if (res.Body != TString("{}")) { + NJson::ReadJsonTree(res.Body, &json); + if (json["Messages"].GetArray().size() == 2) { + messages.push_back(json["Messages"][0]); + messages.push_back(json["Messages"][1]); + break; + } + if (json["Messages"].GetArray().size() == 1) { + messages.push_back(json["Messages"][0]); + if (messages.size() == 2) { + break; + } + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(1000)); + } + + UNIT_ASSERT_VALUES_EQUAL(messages.size(), 2); + + auto receiptHandle0 = messages[0]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle0.Empty()); + auto receiptHandle1 = messages[1]["ReceiptHandle"].GetString(); + UNIT_ASSERT(!receiptHandle1.Empty()); + + + NJson::TJsonValue changeMessageVisibilityBatchReq; + changeMessageVisibilityBatchReq["QueueUrl"] = resultQueueUrl; + + NJson::TJsonValue entry0; + entry0["Id"] = "Id-0"; + entry0["ReceiptHandle"] = receiptHandle0; + entry0["VisibilityTimeout"] = 1; + + NJson::TJsonValue entry1; + entry1["Id"] = "Id-1"; + entry1["ReceiptHandle"] = receiptHandle1; + entry1["VisibilityTimeout"] = 2; + + NJson::TJsonArray changeVisibilityEntries = {entry0, entry1}; + changeMessageVisibilityBatchReq["Entries"] = changeVisibilityEntries; + + res = SendHttpRequest( + "/Root", "AmazonSQS.ChangeMessageVisibilityBatch", + std::move(changeMessageVisibilityBatchReq), + FormAuthorizationStr("ru-central1") + ); + UNIT_ASSERT_VALUES_EQUAL(res.HttpCode, 200); + UNIT_ASSERT(NJson::ReadJsonTree(res.Body, &json)); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"].GetArray().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][0]["Id"], "Id-0"); + UNIT_ASSERT_VALUES_EQUAL(json["Successful"][1]["Id"], "Id-1"); + } } // Y_UNIT_TEST_SUITE(TestHttpProxy) diff --git a/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp b/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp index fb986daf9284..dd117d0535cf 100644 --- a/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp +++ b/ydb/core/http_proxy/ut/json_proto_conversion_ut.cpp @@ -1,5 +1,6 @@ #include #include "json_proto_conversion.h" +#include Y_UNIT_TEST_SUITE(JsonProtoConversion) { @@ -164,4 +165,72 @@ Y_UNIT_TEST(NlohmannJsonToProtoArray) { } } + +Y_UNIT_TEST(JsonToProtoMap) { + { + Ydb::Ymq::V1::CreateQueueRequest message; + + NJson::TJsonValue jsonObject; + jsonObject["QueueName"] = "SampleQueueName"; + + NJson::TJsonMap attributes; + attributes["DelaySeconds"] = "900"; + attributes["MaximumMessageSize"] = "1024"; + + jsonObject["Attributes"] = attributes; + + NKikimr::NHttpProxy::JsonToProto(jsonObject, &message); + + UNIT_ASSERT_VALUES_EQUAL(message.queue_name(), "SampleQueueName"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("DelaySeconds")->second, "900"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("MaximumMessageSize")->second, "1024"); + } +} + +Y_UNIT_TEST(ProtoMapToJson) { + { + Ydb::Ymq::V1::GetQueueAttributesResult message; + message.mutable_attributes()->insert({google::protobuf::MapPair("DelaySeconds", "900")}); + message.mutable_attributes()->insert({google::protobuf::MapPair("MaximumMessageSize", "1024")}); + + NJson::TJsonValue jsonObject; + NKikimr::NHttpProxy::ProtoToJson(message, jsonObject, false); + + UNIT_ASSERT_VALUES_EQUAL(jsonObject.GetMap().find("Attributes")->second.GetMap().size(), 2); + UNIT_ASSERT_VALUES_EQUAL(jsonObject.GetMap().find("Attributes")->second.GetMap().find("DelaySeconds")->second.GetString(), "900"); + UNIT_ASSERT_VALUES_EQUAL(jsonObject.GetMap().find("Attributes")->second.GetMap().find("MaximumMessageSize")->second.GetString(), "1024"); + } +} + +Y_UNIT_TEST(ProtoMapToJson_ReceiveMessageResult) { + // Test using ReceiveMessageResult that has a repeated field with TRANSFORM_BASE64. + // Before fix it failed on messages with attributes. + { + Ydb::Ymq::V1::ReceiveMessageResult message; + message.add_messages()->mutable_message_attributes()->insert({google::protobuf::MapPair("a", {})}); + + NJson::TJsonValue jsonObject; + NKikimr::NHttpProxy::ProtoToJson(message, jsonObject, false); + } +} + +Y_UNIT_TEST(NlohmannJsonToProtoMap) { + { + nlohmann::json jsonObject; + jsonObject["QueueName"] = "SampleQueueName"; + + nlohmann::json attributes; + attributes["DelaySeconds"] = "900"; + attributes["MaximumMessageSize"] = "1024"; + jsonObject["Attributes"] = attributes; + nlohmann::json record; + + Ydb::Ymq::V1::CreateQueueRequest message; + NKikimr::NHttpProxy::NlohmannJsonToProto(jsonObject, &message); + + UNIT_ASSERT_VALUES_EQUAL(message.queue_name(), "SampleQueueName"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("DelaySeconds")->second, "900"); + UNIT_ASSERT_VALUES_EQUAL(message.attributes().find("MaximumMessageSize")->second, "1024"); + } +} } // Y_UNIT_TEST_SUITE(JsonProtoConversion) diff --git a/ydb/core/http_proxy/ut/ya.make b/ydb/core/http_proxy/ut/ya.make index ac00c02ab912..bf2a6a040af1 100644 --- a/ydb/core/http_proxy/ut/ya.make +++ b/ydb/core/http_proxy/ut/ya.make @@ -25,6 +25,7 @@ PEERDIR( ydb/services/kesus ydb/services/persqueue_cluster_discovery ydb/services/ydb + ydb/services/ymq ) SRCS( diff --git a/ydb/core/http_proxy/ya.make b/ydb/core/http_proxy/ya.make index 2839751963bd..24857f5ccb2b 100644 --- a/ydb/core/http_proxy/ya.make +++ b/ydb/core/http_proxy/ya.make @@ -42,6 +42,7 @@ PEERDIR( ydb/public/sdk/cpp/client/iam_private ydb/services/datastreams ydb/services/persqueue_v1/actors + ydb/services/ymq ydb/public/api/grpc ydb/public/api/protos ) diff --git a/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp index f498a4ee0925..55600858669f 100644 --- a/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_alter_configs_actor.cpp @@ -34,14 +34,14 @@ class TAlterConfigsActor : public TAlterTopicActor userToken, TString topicPath, TString databaseName, std::optional retentionMs, std::optional retentionBytes) : TAlterTopicActor( - requester, + requester, userToken, topicPath, databaseName) @@ -54,12 +54,12 @@ class TAlterConfigsActor : public TAlterTopicActorDatabasePath, convertedRetentions.Ms, - convertedRetentions.Bytes + convertedRetentions.Bytes )); InflyTopics++; @@ -201,7 +201,7 @@ void TKafkaAlterConfigsActor::Reply(const TActorContext& ctx) { responseResource.ErrorCode = INVALID_REQUEST; response->Responses.push_back(responseResource); responseStatus = INVALID_REQUEST; - } + } Send(Context->ConnectionId, new TEvKafka::TEvResponse(CorrelationId, response, responseStatus)); diff --git a/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp index 6e13bd61cc7a..521f8598f9b3 100644 --- a/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_create_partitions_actor.cpp @@ -215,13 +215,13 @@ class TCreatePartitionsActor : public TAlterTopicActor userToken, TString topicPath, TString databaseName, ui32 partitionsNumber) : TAlterTopicActor( - requester, + requester, userToken, topicPath, databaseName) @@ -234,12 +234,12 @@ class TCreatePartitionsActor : public TAlterTopicActorResults.push_back(responseTopic); responseStatus = INVALID_REQUEST; - } + } Send(Context->ConnectionId, new TEvKafka::TEvResponse(CorrelationId, response, responseStatus)); Die(ctx); diff --git a/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp index 3fad0055a1b1..44af7beb5ff7 100644 --- a/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_create_topics_actor.cpp @@ -16,7 +16,7 @@ class TCreateTopicActor : public NKikimr::NGRpcProxy::V1::TPQGrpcSchemaBase userToken, TString topicPath, TString databaseName, @@ -78,13 +78,13 @@ class TCreateTopicActor : public NKikimr::NGRpcProxy::V1::TPQGrpcSchemaBase, std::optional>( convertedRetentions.Ms, - convertedRetentions.Bytes + convertedRetentions.Bytes ); ctx.Register(new TCreateTopicActor( @@ -202,7 +202,7 @@ void TKafkaCreateTopicsActor::Bootstrap(const NActors::TActorContext& ctx) { Context->DatabasePath, topic.NumPartitions, convertedRetentions.Ms, - convertedRetentions.Bytes + convertedRetentions.Bytes )); InflyTopics++; @@ -243,7 +243,7 @@ void TKafkaCreateTopicsActor::Reply(const TActorContext& ctx) { responseTopic.ErrorMessage = TopicNamesToResponses[topicName]->Message; } - auto addConfigIfRequired = [this, &topicName, &responseTopic](std::optional configValue, TString configName) { + auto addConfigIfRequired = [this, &topicName, &responseTopic](std::optional configValue, TString configName) { if (configValue.has_value()) { TCreateTopicsResponseData::TCreatableTopicResult::TCreatableTopicConfigs config; config.Name = configName; @@ -271,7 +271,7 @@ void TKafkaCreateTopicsActor::Reply(const TActorContext& ctx) { responseTopic.ErrorMessage = "Duplicate topic in request."; response->Topics.push_back(responseTopic); responseStatus = INVALID_REQUEST; - } + } Send(Context->ConnectionId, new TEvKafka::TEvResponse(CorrelationId, response, responseStatus)); diff --git a/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp index 4968aac5f210..648d38d273e2 100644 --- a/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_fetch_actor.cpp @@ -35,7 +35,7 @@ void TKafkaFetchActor::SendFetchRequests(const TActorContext& ctx) { TVector partPQRequests; PrepareFetchRequestData(topicIndex, partPQRequests); auto ruPerRequest = topicIndex == 0 && Context->Config.GetMeteringV2Enabled(); - NKikimr::NPQ::TFetchRequestSettings request(Context->DatabasePath, partPQRequests, FetchRequestData->MaxWaitMs, FetchRequestData->MaxBytes, Context->RlContext, *Context->UserToken, ruPerRequest); + NKikimr::NPQ::TFetchRequestSettings request(Context->DatabasePath, partPQRequests, FetchRequestData->MaxWaitMs, FetchRequestData->MaxBytes, Context->RlContext, *Context->UserToken, 0, ruPerRequest); auto fetchActor = NKikimr::NPQ::CreatePQFetchRequestActor(request, NKikimr::MakeSchemeCacheID(), ctx.SelfID); auto actorId = ctx.Register(fetchActor); PendingResponses++; diff --git a/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp index 96893aaa59ce..7f494d25e1b3 100644 --- a/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_offset_fetch_actor.cpp @@ -28,7 +28,8 @@ struct PartitionOffsets { class TTopicOffsetActor: public NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor, - public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl { + public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl, + public NKikimr::NGRpcProxy::V1::TCdcStreamCompatible { using TBase = NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor; @@ -36,7 +37,7 @@ class TTopicOffsetActor: public NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor< public: TTopicOffsetActor(std::shared_ptr> consumers, const NKikimr::NGRpcProxy::V1::TLocalRequestBase& request, - const TActorId& requester, + const TActorId& requester, std::shared_ptr> partitions, const TString& originalTopicName, const TString& userSID) diff --git a/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp index 062e74853a18..3f7d50f7dc58 100644 --- a/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_produce_actor.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace NKafka { @@ -262,6 +263,7 @@ THolder Convert(const TProduceRequestData:: for (const auto& record : batch->Records) { NKikimrPQClient::TDataChunk proto; + proto.set_codec(NPersQueueCommon::RAW); for(auto& h : record.Headers) { auto res = proto.AddMessageMeta(); if (h.Key) { diff --git a/ydb/core/kafka_proxy/actors/kafka_produce_actor.h b/ydb/core/kafka_proxy/actors/kafka_produce_actor.h index f1eea1c150de..1473ba616bf4 100644 --- a/ydb/core/kafka_proxy/actors/kafka_produce_actor.h +++ b/ydb/core/kafka_proxy/actors/kafka_produce_actor.h @@ -19,7 +19,7 @@ using namespace NKikimrClient; // Each request can contain data for writing to several topics, and in each topic to several partitions. // When a request to write to an unknown topic arrives, the actor changes the state to Init until it receives // information about all the topics needed to process the request. -// +// // Requests are processed in parallel, but it is guaranteed that the recording order will be preserved. // The order of responses to requests is also guaranteed. // diff --git a/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp b/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp index 8a0fafc5de3f..f011e4ba248d 100644 --- a/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp +++ b/ydb/core/kafka_proxy/actors/kafka_sasl_auth_actor.cpp @@ -134,12 +134,13 @@ bool TKafkaSaslAuthActor::TryParseAuthDataTo(TKafkaSaslAuthActor::TAuthData& aut auto password = tokens[2]; size_t atPos = userAndDatabase.rfind('@'); if (atPos == TString::npos) { - SendResponseAndDie(EKafkaErrors::SASL_AUTHENTICATION_FAILED, "Database not provided.", "", ctx); - return false; + authData.UserName = ""; + authData.Database = userAndDatabase; + } else { + authData.UserName = userAndDatabase.substr(0, atPos); + authData.Database = userAndDatabase.substr(atPos + 1); } - authData.UserName = userAndDatabase.substr(0, atPos); - authData.Database = userAndDatabase.substr(atPos + 1); authData.Password = password; return true; } diff --git a/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h b/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h index 08db21ce59c4..c284a9d36510 100644 --- a/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h +++ b/ydb/core/kafka_proxy/actors/kafka_topic_offsets_actor.h @@ -14,7 +14,8 @@ namespace NKafka { class TTopicOffsetsActor : public NKikimr::NGRpcProxy::V1::TPQInternalSchemaActor - , public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl { + , public NKikimr::NGRpcProxy::V1::TDescribeTopicActorImpl + , public NKikimr::NGRpcProxy::V1::TCdcStreamCompatible { using TBase = TPQInternalSchemaActor& ev); void HandleCacheNavigateResponse(NKikimr::TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) override; - + virtual void ApplyResponse(TTabletInfo&, NKikimr::TEvPersQueue::TEvReadSessionsInfoResponse::TPtr&, const TActorContext&) override { Y_ABORT(); } + bool ApplyResponse(NKikimr::TEvPersQueue::TEvGetPartitionsLocationResponse::TPtr&, const TActorContext&) override { Y_ABORT(); } void ApplyResponse(TTabletInfo& tabletInfo, NKikimr::TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx) override; - + void Reply(const TActorContext&) override; void RaiseError(const TString& error, const Ydb::PersQueue::ErrorCode::ErrorCode errorCode, const Ydb::StatusIds::StatusCode status, const TActorContext&) override; diff --git a/ydb/core/kafka_proxy/kafka_events.h b/ydb/core/kafka_proxy/kafka_events.h index f3bacc32b395..14855d40c1d4 100644 --- a/ydb/core/kafka_proxy/kafka_events.h +++ b/ydb/core/kafka_proxy/kafka_events.h @@ -208,8 +208,8 @@ struct TGetOffsetsRequest : public NKikimr::NGRpcProxy::V1::TLocalRequestBase { TVector PartitionIds; }; -struct TEvTopicOffsetsResponse : public NActors::TEventLocal - , public NKikimr::NGRpcProxy::V1::TEvPQProxy::TLocalResponseBase +struct TEvTopicOffsetsResponse : public NActors::TEventLocal + , public NKikimr::NGRpcProxy::V1::TLocalResponseBase { TEvTopicOffsetsResponse() {} @@ -217,8 +217,8 @@ struct TEvTopicOffsetsResponse : public NActors::TEventLocal Partitions; }; -struct TEvCommitedOffsetsResponse : public NActors::TEventLocal - , public NKikimr::NGRpcProxy::V1::TEvPQProxy::TLocalResponseBase +struct TEvCommitedOffsetsResponse : public NActors::TEventLocal + , public NKikimr::NGRpcProxy::V1::TLocalResponseBase { TEvCommitedOffsetsResponse() {} @@ -228,8 +228,8 @@ struct TEvCommitedOffsetsResponse : public NActors::TEventLocal>> PartitionIdToOffsets; }; -struct TEvTopicModificationResponse : public NActors::TEventLocal - , public NKikimr::NGRpcProxy::V1::TEvPQProxy::TLocalResponseBase +struct TEvTopicModificationResponse : public NActors::TEventLocal + , public NKikimr::NGRpcProxy::V1::TLocalResponseBase { enum EStatus { OK, diff --git a/ydb/core/kafka_proxy/ut/ut_protocol.cpp b/ydb/core/kafka_proxy/ut/ut_protocol.cpp index 1a07bf9b505b..dbca66e624a3 100644 --- a/ydb/core/kafka_proxy/ut/ut_protocol.cpp +++ b/ydb/core/kafka_proxy/ut/ut_protocol.cpp @@ -1000,6 +1000,11 @@ Y_UNIT_TEST_SUITE(KafkaProtocol) { TString topicName = "/Root/topic-0-test"; TString shortTopicName = "topic-0-test"; TString notExistsTopicName = "/Root/not-exists"; + + TString tableName = "/Root/table-0-test"; + TString feedName = "feed"; + TString feedPath = tableName + "/" + feedName; + ui64 minActivePartitions = 10; TString key = "record-key"; @@ -1207,6 +1212,60 @@ Y_UNIT_TEST_SUITE(KafkaProtocol) { } } + { + NYdb::NTable::TTableClient tableClient(*testServer.Driver); + tableClient.RetryOperationSync([&](TSession session) + { + NYdb::NTable::TTableBuilder builder; + builder.AddNonNullableColumn("key", NYdb::EPrimitiveType::Int64).SetPrimaryKeyColumn("key"); + builder.AddNonNullableColumn("value", NYdb::EPrimitiveType::Int64); + + auto createResult = session.CreateTable(tableName, builder.Build()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(createResult.IsTransportError(), false); + Cerr << createResult.GetIssues().ToString() << "\n"; + UNIT_ASSERT_VALUES_EQUAL(createResult.GetStatus(), EStatus::SUCCESS); + + auto alterResult = session.AlterTable(tableName, NYdb::NTable::TAlterTableSettings() + .AppendAddChangefeeds(NYdb::NTable::TChangefeedDescription(feedName, + NYdb::NTable::EChangefeedMode::Updates, + NYdb::NTable::EChangefeedFormat::Json)) + ).ExtractValueSync(); + Cerr << alterResult.GetIssues().ToString() << "\n"; + UNIT_ASSERT_VALUES_EQUAL(alterResult.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(alterResult.GetStatus(), EStatus::SUCCESS); + return alterResult; + } + ); + + TValueBuilder rows; + rows.BeginList(); + rows.AddListItem() + .BeginStruct() + .AddMember("key").Int64(1) + .AddMember("value").Int64(2) + .EndStruct(); + rows.EndList(); + + auto upsertResult = tableClient.BulkUpsert(tableName, rows.Build()).GetValueSync(); + UNIT_ASSERT_EQUAL(upsertResult.GetStatus(), EStatus::SUCCESS); + } + + { + // Check CDC + std::vector>> topics {{feedPath, {0}}}; + auto msg = client.Fetch(topics); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions[0].ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions[0].Records.has_value(), true); + UNIT_ASSERT_VALUES_EQUAL(msg->Responses[0].Partitions[0].Records->Records.size(), 1); + auto record = msg->Responses[0].Partitions[0].Records->Records[0]; + + auto data = record.Value.value(); + auto dataStr = TString(data.data(), data.size()); + UNIT_ASSERT_VALUES_EQUAL(dataStr, "{\"update\":{\"value\":2},\"key\":[1]}"); + } + } // Y_UNIT_TEST(FetchScenario) Y_UNIT_TEST(BalanceScenario) { @@ -2300,4 +2359,53 @@ Y_UNIT_TEST_SUITE(KafkaProtocol) { Sleep(TDuration::Seconds(1)); } + + Y_UNIT_TEST(LoginWithApiKeyWithoutAt) { + TInsecureTestServer testServer; + + TString topicName = "/Root/topic-0-test"; + + NYdb::NTopic::TTopicClient pqClient(*testServer.Driver); + { + auto result = + pqClient + .CreateTopic(topicName, + NYdb::NTopic::TCreateTopicSettings() + .PartitioningSettings(10, 100) + .BeginAddConsumer("consumer-0").EndAddConsumer()) + .ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(result.IsTransportError(), false); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + } + + auto settings = NTopic::TReadSessionSettings() + .AppendTopics(NTopic::TTopicReadSettings(topicName)) + .ConsumerName("consumer-0"); + auto topicReader = pqClient.CreateReadSession(settings); + + TTestClient client(testServer.Port); + + { + auto msg = client.ApiVersions(); + + UNIT_ASSERT_VALUES_EQUAL(msg->ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + UNIT_ASSERT_VALUES_EQUAL(msg->ApiKeys.size(), 18u); + } + + { + auto msg = client.SaslHandshake(); + + UNIT_ASSERT_VALUES_EQUAL(msg->ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + UNIT_ASSERT_VALUES_EQUAL(msg->Mechanisms.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(*msg->Mechanisms[0], "PLAIN"); + } + + { + auto msg = client.SaslAuthenticate("/Root", "ApiKey-value-valid"); + Cerr << msg->ErrorMessage << "\n"; + UNIT_ASSERT_VALUES_EQUAL(msg->ErrorCode, static_cast(EKafkaErrors::NONE_ERROR)); + } + + Sleep(TDuration::Seconds(1)); + } } // Y_UNIT_TEST_SUITE(KafkaProtocol) diff --git a/ydb/core/keyvalue/keyvalue_state.cpp b/ydb/core/keyvalue/keyvalue_state.cpp index 8135c86c2a2b..5ec47557f1fa 100644 --- a/ydb/core/keyvalue/keyvalue_state.cpp +++ b/ydb/core/keyvalue/keyvalue_state.cpp @@ -3092,6 +3092,9 @@ void TKeyValueState::RegisterRequestActor(const TActorContext &ctx, THolderRefCountsIncr.emplace_back(patch.PatchedBlobId, true); + + LOG_INFO_S(ctx, NKikimrServices::KEYVALUE, "KeyValue# " << TabletId + << " PatchedKey# " << patch.PatchedKey << " BlobId# " << patch.PatchedBlobId); }; for (auto& write : intermediate->Writes) { diff --git a/ydb/core/kqp/common/compilation/events.h b/ydb/core/kqp/common/compilation/events.h index 062148adf752..7b43ccd4e31d 100644 --- a/ydb/core/kqp/common/compilation/events.h +++ b/ydb/core/kqp/common/compilation/events.h @@ -76,7 +76,8 @@ struct TEvRecompileRequest: public TEventLocal& query, bool isQueryActionPrepare, TInstant deadline, TKqpDbCountersPtr dbCounters, const TGUCSettings::TPtr& gUCSettings, const TMaybe& applicationName, std::shared_ptr> intrestedInResult, const TIntrusivePtr& userRequestContext, - NLWTrace::TOrbit orbit = {}, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, TMaybe queryAst = Nothing()) + NLWTrace::TOrbit orbit = {}, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, TMaybe queryAst = Nothing(), + bool split = false, NYql::TExprContext* splitCtx = nullptr, NYql::TExprNode::TPtr splitExpr = nullptr) : UserToken(userToken) , Uid(uid) , Query(query) @@ -90,6 +91,9 @@ struct TEvRecompileRequest: public TEventLocal> IntrestedInResult; TMaybe QueryAst; + bool Split = false; + + NYql::TExprContext* SplitCtx = nullptr; + NYql::TExprNode::TPtr SplitExpr = nullptr; }; struct TEvCompileResponse: public TEventLocal { diff --git a/ydb/core/kqp/common/compilation/result.cpp b/ydb/core/kqp/common/compilation/result.cpp index 292cd0d03c4b..02672db8189a 100644 --- a/ydb/core/kqp/common/compilation/result.cpp +++ b/ydb/core/kqp/common/compilation/result.cpp @@ -2,4 +2,11 @@ namespace NKikimr::NKqp { +std::shared_ptr TKqpCompileResult::GetAst() const { + if (QueryAst) { + return QueryAst->Ast; + } + return nullptr; +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/compilation/result.h b/ydb/core/kqp/common/compilation/result.h index e5ab7cfc7d6a..1f74b7f8f045 100644 --- a/ydb/core/kqp/common/compilation/result.h +++ b/ydb/core/kqp/common/compilation/result.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -14,24 +15,26 @@ struct TKqpCompileResult { using TConstPtr = std::shared_ptr; TKqpCompileResult(const TString& uid, const Ydb::StatusIds::StatusCode& status, const NYql::TIssues& issues, - ETableReadType maxReadType, TMaybe query = {}, std::shared_ptr ast = {}, + ETableReadType maxReadType, TMaybe query = {}, TMaybe queryAst = {}, bool needToSplit = false, const TMaybe& commandTagName = {}) : Status(status) , Issues(issues) , Query(std::move(query)) , Uid(uid) , MaxReadType(maxReadType) - , Ast(std::move(ast)) + , QueryAst(std::move(queryAst)) , NeedToSplit(needToSplit) , CommandTagName(commandTagName) {} static std::shared_ptr Make(const TString& uid, const Ydb::StatusIds::StatusCode& status, const NYql::TIssues& issues, ETableReadType maxReadType, TMaybe query = {}, - std::shared_ptr ast = {}, bool needToSplit = false, const TMaybe& commandTagName = {}) + TMaybe queryAst = {}, bool needToSplit = false, const TMaybe& commandTagName = {}) { - return std::make_shared(uid, status, issues, maxReadType, std::move(query), std::move(ast), needToSplit, commandTagName); + return std::make_shared(uid, status, issues, maxReadType, std::move(query), std::move(queryAst), needToSplit, commandTagName); } + std::shared_ptr GetAst() const; + Ydb::StatusIds::StatusCode Status; NYql::TIssues Issues; @@ -40,7 +43,7 @@ struct TKqpCompileResult { ETableReadType MaxReadType; bool AllowCache = true; - std::shared_ptr Ast; + TMaybe QueryAst; bool NeedToSplit = false; TMaybe CommandTagName = {}; diff --git a/ydb/core/kqp/common/events/query.h b/ydb/core/kqp/common/events/query.h index 93f3ebb5b0dd..b1d01c8b5159 100644 --- a/ydb/core/kqp/common/events/query.h +++ b/ydb/core/kqp/common/events/query.h @@ -1,4 +1,5 @@ #pragma once +#include #include #include #include @@ -69,7 +70,9 @@ struct TEvQueryRequest: public NActors::TEventLocalSetUsePublicResponseDataFormat(true); + } bool IsSerializable() const override { return true; @@ -342,6 +345,14 @@ struct TEvQueryRequest: public NActors::TEventLocal GetPoolConfig() const { + return PoolConfig; + } + mutable NKikimrKqp::TEvQueryRequest Record; private: @@ -370,6 +381,7 @@ struct TEvQueryRequest: public NActors::TEventLocal UserRequestContext; TDuration ProgressStatsPeriod; + std::optional PoolConfig; }; struct TEvDataQueryStreamPart: public TEventPB { - TEvCleanupRequest(const TString& database, const TString& sessionId, const TString& poolId) + TEvCleanupRequest(const TString& database, const TString& sessionId, const TString& poolId, TDuration duration, TDuration cpuConsumed) : Database(database) , SessionId(sessionId) , PoolId(poolId) + , Duration(duration) + , CpuConsumed(cpuConsumed) {} const TString Database; const TString SessionId; const TString PoolId; + const TDuration Duration; + const TDuration CpuConsumed; }; struct TEvCleanupResponse : public NActors::TEventLocal { @@ -62,4 +66,18 @@ struct TEvCleanupResponse : public NActors::TEventLocal { + TEvUpdatePoolInfo(const TString& database, const TString& poolId, const std::optional& config, const std::optional& securityObject) + : Database(database) + , PoolId(poolId) + , Config(config) + , SecurityObject(securityObject) + {} + + const TString Database; + const TString PoolId; + const std::optional Config; + const std::optional SecurityObject; +}; + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/common/kqp.h b/ydb/core/kqp/common/kqp.h index 14785394faa1..e0bccb2e5027 100644 --- a/ydb/core/kqp/common/kqp.h +++ b/ydb/core/kqp/common/kqp.h @@ -33,18 +33,9 @@ namespace NKikimr::NKqp { -void ConvertKqpQueryResultToDbResult(const NKikimrMiniKQL::TResult& from, Ydb::ResultSet* to); - TString ScriptExecutionRunnerActorIdString(const NActors::TActorId& actorId); bool ScriptExecutionRunnerActorIdFromString(const TString& executionId, TActorId& actorId); -template -inline void ConvertKqpQueryResultsToDbResult(const TFrom& from, TTo* to) { - const auto& results = from.GetResults(); - for (const auto& result : results) { - ConvertKqpQueryResultToDbResult(result, to->add_result_sets()); - } -} class TKqpRequestInfo { public: @@ -80,7 +71,7 @@ class IQueryReplayBackend : public TNonCopyable { /// Accepts query text virtual void Collect(const TString& queryData) = 0; - virtual bool IsNull() { return false; } + virtual bool IsNull() { return false; } virtual ~IQueryReplayBackend() {}; diff --git a/ydb/core/kqp/common/kqp_event_impl.cpp b/ydb/core/kqp/common/kqp_event_impl.cpp index cadd44a1c89e..5396e7c0d6f3 100644 --- a/ydb/core/kqp/common/kqp_event_impl.cpp +++ b/ydb/core/kqp/common/kqp_event_impl.cpp @@ -90,6 +90,7 @@ void TEvKqp::TEvQueryRequest::PrepareRemote() const { Record.MutableRequest()->SetPoolId(PoolId); } + Record.MutableRequest()->SetUsePublicResponseDataFormat(true); Record.MutableRequest()->SetSessionId(SessionId); Record.MutableRequest()->SetAction(QueryAction); Record.MutableRequest()->SetType(QueryType); diff --git a/ydb/core/kqp/common/kqp_timeouts.cpp b/ydb/core/kqp/common/kqp_timeouts.cpp index 4389a3e3241d..1a59fbef4d13 100644 --- a/ydb/core/kqp/common/kqp_timeouts.cpp +++ b/ydb/core/kqp/common/kqp_timeouts.cpp @@ -19,9 +19,10 @@ ui64 GetDefaultQueryTimeoutMs(NKikimrKqp::EQueryType queryType, case NKikimrKqp::QUERY_TYPE_SQL_DML: case NKikimrKqp::QUERY_TYPE_PREPARED_DML: case NKikimrKqp::QUERY_TYPE_AST_DML: + return queryLimits.GetDataQueryTimeoutMs(); case NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY: case NKikimrKqp::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY: - return queryLimits.GetDataQueryTimeoutMs(); + return queryServiceConfig.GetQueryTimeoutDefaultSeconds() * 1000; case NKikimrKqp::QUERY_TYPE_SQL_GENERIC_SCRIPT: return queryServiceConfig.GetScriptOperationTimeoutDefaultSeconds() diff --git a/ydb/core/kqp/common/kqp_tx.cpp b/ydb/core/kqp/common/kqp_tx.cpp index e3209f08dec9..519995c69179 100644 --- a/ydb/core/kqp/common/kqp_tx.cpp +++ b/ydb/core/kqp/common/kqp_tx.cpp @@ -7,24 +7,33 @@ namespace NKqp { using namespace NYql; -TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TMaybe& invalidatedLock) { +NYql::TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TKikimrPathId& pathId) { TStringBuilder message; message << "Transaction locks invalidated."; - TMaybe tableName; - if (invalidatedLock) { - TKikimrPathId id(invalidatedLock->GetSchemeShard(), invalidatedLock->GetPathId()); - auto table = txCtx.TableByIdMap.FindPtr(id); - if (table) { - tableName = *table; + if (pathId.OwnerId() != 0) { + auto table = txCtx.TableByIdMap.FindPtr(pathId); + if (!table) { + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Unknown table."); } + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Table: " << *table); + } else { + // Olap tables don't return SchemeShard in locks, thus we use tableId here. + for (const auto& [pathId, table] : txCtx.TableByIdMap) { + if (pathId.TableId() == pathId.TableId()) { + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Table: " << table); + } + } + return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message << " Unknown table."); } +} - if (tableName) { - message << " Table: " << *tableName; - } - - return YqlIssue(TPosition(), TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message); +TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const TKqpTxLock& invalidatedLock) { + return GetLocksInvalidatedIssue( + txCtx, + TKikimrPathId( + invalidatedLock.GetSchemeShard(), + invalidatedLock.GetPathId())); } std::pair> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, @@ -166,6 +175,13 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig for (const auto &input : stage.GetInputs()) { hasStreamLookup |= input.GetTypeCase() == NKqpProto::TKqpPhyConnection::kStreamLookup; } + + for (const auto &tableOp : stage.GetTableOps()) { + if (tableOp.GetTypeCase() == NKqpProto::TKqpPhyTableOperation::kReadOlapRange) { + // always need snapshot for OLAP reads + return true; + } + } } } @@ -188,6 +204,11 @@ bool NeedSnapshot(const TKqpTransactionContext& txCtx, const NYql::TKikimrConfig return false; } + // We need snapshot for stream lookup, besause it's used for dependent reads + if (hasStreamLookup) { + return true; + } + // We need snapshot when there are multiple table read phases, most // likely it involves multiple tables and we would have to use a // distributed commit otherwise. Taking snapshot helps as avoid TLI diff --git a/ydb/core/kqp/common/kqp_tx.h b/ydb/core/kqp/common/kqp_tx.h index 289c7b6650b2..a668c0ea4977 100644 --- a/ydb/core/kqp/common/kqp_tx.h +++ b/ydb/core/kqp/common/kqp_tx.h @@ -434,6 +434,7 @@ class TTransactionsCache { } }; +NYql::TIssue GetLocksInvalidatedIssue(const TKqpTransactionContext& txCtx, const NYql::TKikimrPathId& pathId); std::pair> MergeLocks(const NKikimrMiniKQL::TType& type, const NKikimrMiniKQL::TValue& value, TKqpTransactionContext& txCtx); diff --git a/ydb/core/kqp/common/kqp_user_request_context.h b/ydb/core/kqp/common/kqp_user_request_context.h index 1d5a966bd0fb..1aa4a0574d67 100644 --- a/ydb/core/kqp/common/kqp_user_request_context.h +++ b/ydb/core/kqp/common/kqp_user_request_context.h @@ -15,7 +15,7 @@ namespace NKikimr::NKqp { TString CurrentExecutionId; TString CustomerSuppliedId; TString PoolId; - NResourcePool::TPoolSettings PoolConfig; + std::optional PoolConfig; TUserRequestContext() = default; diff --git a/ydb/core/kqp/common/kqp_yql.h b/ydb/core/kqp/common/kqp_yql.h index f1a52fc7a897..dfd4bdbc4963 100644 --- a/ydb/core/kqp/common/kqp_yql.h +++ b/ydb/core/kqp/common/kqp_yql.h @@ -48,6 +48,7 @@ constexpr TStringBuf KqpTableSinkName = "KqpTableSinkName"; static constexpr std::string_view TKqpStreamLookupStrategyName = "LookupRows"sv; static constexpr std::string_view TKqpStreamLookupJoinStrategyName = "LookupJoinRows"sv; +static constexpr std::string_view TKqpStreamLookupSemiJoinStrategyName = "LookupSemiJoinRows"sv; struct TKqpReadTableSettings { static constexpr TStringBuf SkipNullKeysSettingName = "SkipNullKeys"; diff --git a/ydb/core/kqp/common/simple/kqp_event_ids.h b/ydb/core/kqp/common/simple/kqp_event_ids.h index de1be6b95043..b0002f332bd2 100644 --- a/ydb/core/kqp/common/simple/kqp_event_ids.h +++ b/ydb/core/kqp/common/simple/kqp_event_ids.h @@ -174,6 +174,7 @@ struct TKqpWorkloadServiceEvents { EvContinueRequest, EvCleanupRequest, EvCleanupResponse, + EvUpdatePoolInfo, }; }; diff --git a/ydb/core/kqp/common/simple/query_id.cpp b/ydb/core/kqp/common/simple/query_id.cpp index 849c384e969a..b2066bbd8b88 100644 --- a/ydb/core/kqp/common/simple/query_id.cpp +++ b/ydb/core/kqp/common/simple/query_id.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -74,4 +75,25 @@ bool TKqpQueryId::operator==(const TKqpQueryId& other) const { return true; } +TString TKqpQueryId::SerializeToString() const { + TStringBuilder result = TStringBuilder() << "{" + << "Cluster: " << Cluster << ", " + << "Database: " << Database << ", " + << "UserSid: " << UserSid << ", " + << "Text: " << EscapeC(Text) << ", " + << "Settings: " << Settings.SerializeToString() << ", "; + if (QueryParameterTypes) { + result << "QueryParameterTypes: ["; + for (const auto& param : *QueryParameterTypes) { + result << "name: " << param.first << ", type: " << param.second.ShortDebugString(); + } + result << "], "; + } else { + result << "QueryParameterTypes: , "; + } + + result << "GUCSettings: " << GUCSettings.SerializeToString() << "}"; + return result; +} + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/simple/query_id.h b/ydb/core/kqp/common/simple/query_id.h index 3470333b5072..7dd7ef0dc5a7 100644 --- a/ydb/core/kqp/common/simple/query_id.h +++ b/ydb/core/kqp/common/simple/query_id.h @@ -44,6 +44,8 @@ struct TKqpQueryId { GUCSettings.GetHash()); return THash()(tuple); } + + TString SerializeToString() const; }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/common/simple/settings.h b/ydb/core/kqp/common/simple/settings.h index f6227f45602f..70b7c4aaa285 100644 --- a/ydb/core/kqp/common/simple/settings.h +++ b/ydb/core/kqp/common/simple/settings.h @@ -3,7 +3,9 @@ #include #include +#include #include +#include #include @@ -39,6 +41,14 @@ struct TKqpQuerySettings { auto tuple = std::make_tuple(DocumentApiRestricted, IsInternalCall, QueryType, Syntax); return THash()(tuple); } + + TString SerializeToString() const { + TStringBuilder result = TStringBuilder() << "{" + << "DocumentApiRestricted: " << DocumentApiRestricted << ", " + << "IsInternalCall: " << IsInternalCall << ", " + << "QueryType: " << QueryType << "}"; + return result; + } }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp index 776d2c534583..4e496d6bd276 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_actor.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_actor.cpp @@ -146,7 +146,8 @@ class TKqpCompileActor : public TActorBootstrapped { .SetIsEnableExternalDataSources(AppData(ctx)->FeatureFlags.GetEnableExternalDataSources()) .SetIsEnablePgConstsToParams(Config->EnablePgConstsToParams) .SetApplicationName(ApplicationName) - .SetQueryParameters(QueryId.QueryParameterTypes); + .SetQueryParameters(QueryId.QueryParameterTypes) + .SetIsEnablePgSyntax(AppData(ctx)->FeatureFlags.GetEnablePgSyntax()); return ParseStatements(QueryId.Text, QueryId.Settings.Syntax, QueryId.IsSql(), settingsBuilder, PerStatementResult); } @@ -172,11 +173,8 @@ class TKqpCompileActor : public TActorBootstrapped { } void StartSplitting(const TActorContext &ctx) { - YQL_ENSURE(PerStatementResult); - const auto prepareSettings = PrepareCompilationSettings(ctx); - - auto result = KqpHost->SplitQuery(QueryId.Text, prepareSettings); + auto result = KqpHost->SplitQuery(QueryRef, prepareSettings); Become(&TKqpCompileActor::CompileState); ReplySplitResult(ctx, std::move(result)); @@ -275,13 +273,12 @@ class TKqpCompileActor : public TActorBootstrapped { Config->FeatureFlags = AppData(ctx)->FeatureFlags; KqpHost = CreateKqpHost(Gateway, QueryId.Cluster, QueryId.Database, Config, ModuleResolverState->ModuleResolver, - FederatedQuerySetup, UserToken, GUCSettings, ApplicationName, AppData(ctx)->FunctionRegistry, + FederatedQuerySetup, UserToken, GUCSettings, QueryServiceConfig, ApplicationName, AppData(ctx)->FunctionRegistry, false, false, std::move(TempTablesState), nullptr, SplitCtx); IKqpHost::TPrepareSettings prepareSettings; prepareSettings.DocumentApiRestricted = QueryId.Settings.DocumentApiRestricted; prepareSettings.IsInternalCall = QueryId.Settings.IsInternalCall; - prepareSettings.PerStatementResult = PerStatementResult; switch (QueryId.Settings.Syntax) { case Ydb::Query::Syntax::SYNTAX_YQL_V1: @@ -382,9 +379,9 @@ class TKqpCompileActor : public TActorBootstrapped { void ReplyError(Ydb::StatusIds::StatusCode status, const TIssues& issues) { if (!KqpCompileResult) { - KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(QueryId)); + KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(QueryId), std::move(QueryAst)); } else { - KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(KqpCompileResult->Query)); + KqpCompileResult = TKqpCompileResult::Make(Uid, status, issues, ETableReadType::Other, std::move(KqpCompileResult->Query), std::move(KqpCompileResult->QueryAst)); } Reply(); @@ -453,16 +450,12 @@ class TKqpCompileActor : public TActorBootstrapped { } void FillCompileResult(std::unique_ptr preparingQuery, NKikimrKqp::EQueryType queryType, - bool allowCache) { + bool allowCache, bool success) { auto preparedQueryHolder = std::make_shared( - preparingQuery.release(), AppData()->FunctionRegistry); + preparingQuery.release(), AppData()->FunctionRegistry, !success); preparedQueryHolder->MutableLlvmSettings().Fill(Config, queryType); KqpCompileResult->PreparedQuery = preparedQueryHolder; KqpCompileResult->AllowCache = CanCacheQuery(KqpCompileResult->PreparedQuery->GetPhysicalQuery()) && allowCache; - - if (QueryAst) { - KqpCompileResult->Ast = QueryAst->Ast; - } } void Handle(TEvKqp::TEvContinueProcess::TPtr &ev, const TActorContext &ctx) { @@ -481,7 +474,7 @@ class TKqpCompileActor : public TActorBootstrapped { if (kqpResult.NeedToSplit) { KqpCompileResult = TKqpCompileResult::Make( - Uid, status, kqpResult.Issues(), ETableReadType::Other, std::move(QueryId), {}, true); + Uid, status, kqpResult.Issues(), ETableReadType::Other, std::move(QueryId), std::move(QueryAst), true); Reply(); return; } @@ -499,12 +492,12 @@ class TKqpCompileActor : public TActorBootstrapped { auto queryType = QueryId.Settings.QueryType; - KqpCompileResult = TKqpCompileResult::Make(Uid, status, kqpResult.Issues(), maxReadType, std::move(QueryId)); + KqpCompileResult = TKqpCompileResult::Make(Uid, status, kqpResult.Issues(), maxReadType, std::move(QueryId), std::move(QueryAst)); KqpCompileResult->CommandTagName = kqpResult.CommandTagName; if (status == Ydb::StatusIds::SUCCESS) { YQL_ENSURE(kqpResult.PreparingQuery); - FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache); + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache, true); auto now = TInstant::Now(); auto duration = now - StartTime; @@ -515,7 +508,7 @@ class TKqpCompileActor : public TActorBootstrapped { << ", duration: " << duration); } else { if (kqpResult.PreparingQuery) { - FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache); + FillCompileResult(std::move(kqpResult.PreparingQuery), queryType, kqpResult.AllowCache, false); } LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_ACTOR, "Compilation failed" diff --git a/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp index 6cdde2f174d4..ed8159b8591a 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_computation_pattern_service.cpp @@ -62,7 +62,7 @@ class TKqpCompileComputationPatternService : public TActorBootstrappedPattern->Compile({}, nullptr); - patternCache->NotifyPatternCompiled(patternToCompile.SerializedProgram, patternToCompile.Entry); + patternCache->NotifyPatternCompiled(patternToCompile.SerializedProgram); patternToCompile.Entry = nullptr; Counters->CompiledComputationPatterns->Inc(); diff --git a/ydb/core/kqp/compile_service/kqp_compile_service.cpp b/ydb/core/kqp/compile_service/kqp_compile_service.cpp index ff52fe2277f9..2f6582b5dff4 100644 --- a/ydb/core/kqp/compile_service/kqp_compile_service.cpp +++ b/ydb/core/kqp/compile_service/kqp_compile_service.cpp @@ -41,21 +41,25 @@ class TKqpQueryCache { YQL_ENSURE(compileResult->PreparedQuery); auto queryIt = QueryIndex.emplace(query, compileResult->Uid); + if (!queryIt.second) { + EraseByUid(compileResult->Uid); + QueryIndex.erase(query); + } Y_ENSURE(queryIt.second); } void InsertAst(const TKqpCompileResult::TConstPtr& compileResult) { Y_ENSURE(compileResult->Query); - Y_ENSURE(compileResult->Ast); + Y_ENSURE(compileResult->GetAst()); - AstIndex.emplace(GetQueryIdWithAst(*compileResult->Query, *compileResult->Ast), compileResult->Uid); + AstIndex.emplace(GetQueryIdWithAst(*compileResult->Query, *compileResult->GetAst()), compileResult->Uid); } bool Insert(const TKqpCompileResult::TConstPtr& compileResult, bool isEnableAstCache, bool isPerStatementExecution) { if (!isPerStatementExecution) { InsertQuery(compileResult); } - if (isEnableAstCache && compileResult->Ast) { + if (isEnableAstCache && compileResult->GetAst()) { InsertAst(compileResult); } @@ -72,8 +76,8 @@ class TKqpQueryCache { auto queryId = *removedItem->Value.CompileResult->Query; QueryIndex.erase(queryId); - if (removedItem->Value.CompileResult->Ast) { - AstIndex.erase(GetQueryIdWithAst(queryId, *removedItem->Value.CompileResult->Ast)); + if (removedItem->Value.CompileResult->GetAst()) { + AstIndex.erase(GetQueryIdWithAst(queryId, *removedItem->Value.CompileResult->GetAst())); } auto indexIt = Index.find(*removedItem); if (indexIt != Index.end()) { @@ -186,8 +190,8 @@ class TKqpQueryCache { Y_ABORT_UNLESS(item->Value.CompileResult->Query); auto queryId = *item->Value.CompileResult->Query; QueryIndex.erase(queryId); - if (item->Value.CompileResult->Ast) { - AstIndex.erase(GetQueryIdWithAst(queryId, *item->Value.CompileResult->Ast)); + if (item->Value.CompileResult->GetAst()) { + AstIndex.erase(GetQueryIdWithAst(queryId, *item->Value.CompileResult->GetAst())); } Index.erase(it); @@ -323,6 +327,8 @@ struct TKqpCompileRequest { NYql::TExprContext* SplitCtx; NYql::TExprNode::TPtr SplitExpr; + bool FindInCache = true; + bool IsIntrestedInResult() const { return IntrestedInResult->load(); } @@ -528,6 +534,8 @@ class TKqpCompileService : public TActorBootstrapped { bool enableCreateTableAs = TableServiceConfig.GetEnableCreateTableAs(); auto blockChannelsMode = TableServiceConfig.GetBlockChannelsMode(); + bool enableImplicitQueryParameterTypes = TableServiceConfig.GetEnableImplicitQueryParameterTypes(); + auto mkqlHeavyLimit = TableServiceConfig.GetResourceManager().GetMkqlHeavyProgramMemoryLimit(); bool enableQueryServiceSpilling = TableServiceConfig.GetEnableQueryServiceSpilling(); @@ -558,7 +566,8 @@ class TKqpCompileService : public TActorBootstrapped { TableServiceConfig.GetExtractPredicateRangesLimit() != rangesLimit || TableServiceConfig.GetResourceManager().GetMkqlHeavyProgramMemoryLimit() != mkqlHeavyLimit || TableServiceConfig.GetIdxLookupJoinPointsLimit() != idxLookupPointsLimit || - TableServiceConfig.GetEnableQueryServiceSpilling() != enableQueryServiceSpilling) { + TableServiceConfig.GetEnableQueryServiceSpilling() != enableQueryServiceSpilling || + TableServiceConfig.GetEnableImplicitQueryParameterTypes() != enableImplicitQueryParameterTypes) { QueryCache.Clear(); @@ -611,6 +620,7 @@ class TKqpCompileService : public TActorBootstrapped { << ", queryUid: " << (request.Uid ? *request.Uid : "") << ", queryText: \"" << (request.Query ? EscapeC(request.Query->Text) : "") << "\"" << ", keepInCache: " << request.KeepInCache + << ", split: " << request.Split << *request.UserRequestContext); *Counters->CompileQueryCacheSize = QueryCache.Size(); @@ -668,6 +678,8 @@ class TKqpCompileService : public TActorBootstrapped { Y_ENSURE(query.UserSid == userSid); } + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Try to find query by queryId, queryId: " << query.SerializeToString()); + auto compileResult = QueryCache.FindByQuery(query, request.KeepInCache); if (HasTempTablesNameClashes(compileResult, request.TempTablesState)) { compileResult = nullptr; @@ -697,7 +709,7 @@ class TKqpCompileService : public TActorBootstrapped { request.Deadline, ev->Get()->Split ? ECompileActorAction::SPLIT - : TableServiceConfig.GetEnableAstCache() + : (TableServiceConfig.GetEnableAstCache() && !request.QueryAst) ? ECompileActorAction::PARSE : ECompileActorAction::COMPILE); TKqpCompileRequest compileRequest(ev->Sender, CreateGuidAsString(), std::move(*request.Query), @@ -754,19 +766,37 @@ class TKqpCompileService : public TActorBootstrapped { } if (compileResult || request.Query) { - QueryCache.EraseByUid(request.Uid); - Counters->ReportCompileRequestCompile(dbCounters); NWilson::TSpan compileServiceSpan(TWilsonKqp::CompileService, ev->Get() ? std::move(ev->TraceId) : NWilson::TTraceId(), "CompileService"); - TKqpCompileSettings compileSettings(true, request.IsQueryActionPrepare, false, request.Deadline, TableServiceConfig.GetEnableAstCache() ? ECompileActorAction::PARSE : ECompileActorAction::COMPILE); - TKqpCompileRequest compileRequest(ev->Sender, request.Uid, request.Query ? *request.Query : *compileResult->Query, + TKqpCompileSettings compileSettings( + true, + request.IsQueryActionPrepare, + false, + request.Deadline, + ev->Get()->Split + ? ECompileActorAction::SPLIT + : (TableServiceConfig.GetEnableAstCache() && !request.QueryAst) + ? ECompileActorAction::PARSE + : ECompileActorAction::COMPILE); + auto query = request.Query ? *request.Query : *compileResult->Query; + if (compileResult) { + query.UserSid = compileResult->Query->UserSid; + if (query != *compileResult->Query) { + LOG_WARN_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "queryId in recompile request and queryId in cache are different" + << ", queryId in request: " << query.SerializeToString() + << ", queryId in cache: " << compileResult->Query->SerializeToString() + ); + } + } + TKqpCompileRequest compileRequest(ev->Sender, request.Uid, compileResult ? *compileResult->Query : *request.Query, compileSettings, request.UserToken, dbCounters, request.GUCSettings, request.ApplicationName, ev->Cookie, std::move(ev->Get()->IntrestedInResult), ev->Get()->UserRequestContext, ev->Get() ? std::move(ev->Get()->Orbit) : NLWTrace::TOrbit(), std::move(compileServiceSpan), std::move(ev->Get()->TempTablesState)); + compileRequest.FindInCache = false; if (TableServiceConfig.GetEnableAstCache() && request.QueryAst) { return CompileByAst(*request.QueryAst, compileRequest, ctx); @@ -824,6 +854,7 @@ class TKqpCompileService : public TActorBootstrapped { if (compileResult->NeedToSplit) { Reply(compileRequest.Sender, compileResult, compileStats, ctx, compileRequest.Cookie, std::move(compileRequest.Orbit), std::move(compileRequest.CompileServiceSpan), (CollectDiagnostics ? ev->Get()->ReplayMessageUserView : std::nullopt)); + ProcessQueue(ctx); return; } @@ -835,7 +866,7 @@ class TKqpCompileService : public TActorBootstrapped { try { if (compileResult->Status == Ydb::StatusIds::SUCCESS) { if (!hasTempTablesNameClashes) { - UpdateQueryCache(compileResult, keepInCache, compileRequest.CompileSettings.IsQueryActionPrepare, isPerStatementExecution); + UpdateQueryCache(ctx, compileResult, keepInCache, compileRequest.CompileSettings.IsQueryActionPrepare, isPerStatementExecution); } if (ev->Get()->ReplayMessage && !QueryReplayBackend->IsNull()) { @@ -917,15 +948,21 @@ class TKqpCompileService : public TActorBootstrapped { return compileResult->PreparedQuery->HasTempTables(tempTablesState, withSessionId); } - void UpdateQueryCache(TKqpCompileResult::TConstPtr compileResult, bool keepInCache, bool isQueryActionPrepare, bool isPerStatementExecution) { + void UpdateQueryCache(const TActorContext& ctx, TKqpCompileResult::TConstPtr compileResult, bool keepInCache, bool isQueryActionPrepare, bool isPerStatementExecution) { if (QueryCache.FindByUid(compileResult->Uid, false)) { QueryCache.Replace(compileResult); } else if (keepInCache) { + if (compileResult->Query) { + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Insert query into compile cache, queryId: " << compileResult->Query->SerializeToString()); + if (QueryCache.FindByQuery(*compileResult->Query, keepInCache)) { + LOG_ERROR_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Trying to insert query into compile cache when it is already there"); + } + } if (QueryCache.Insert(compileResult, TableServiceConfig.GetEnableAstCache(), isPerStatementExecution)) { Counters->CompileQueryCacheEvicted->Inc(); } if (compileResult->Query && isQueryActionPrepare) { - if (InsertPreparingQuery(compileResult, true, isPerStatementExecution)) { + if (InsertPreparingQuery(ctx, compileResult, true, isPerStatementExecution)) { Counters->CompileQueryCacheEvicted->Inc(); }; } @@ -936,9 +973,11 @@ class TKqpCompileService : public TActorBootstrapped { YQL_ENSURE(queryAst.Ast); YQL_ENSURE(queryAst.Ast->IsOk()); YQL_ENSURE(queryAst.Ast->Root); + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Try to find query by ast, queryId: " << compileRequest.Query.SerializeToString() + << ", ast: " << queryAst.Ast->Root->ToString()); auto compileResult = QueryCache.FindByAst(compileRequest.Query, *queryAst.Ast, compileRequest.CompileSettings.KeepInCache); - if (HasTempTablesNameClashes(compileResult, compileRequest.TempTablesState)) { + if (!compileRequest.FindInCache || HasTempTablesNameClashes(compileResult, compileRequest.TempTablesState)) { compileResult = nullptr; } @@ -949,7 +988,7 @@ class TKqpCompileService : public TActorBootstrapped { << ", sender: " << compileRequest.Sender << ", queryUid: " << compileResult->Uid); - compileResult->Ast->PgAutoParamValues = std::move(queryAst.Ast->PgAutoParamValues); + compileResult->GetAst()->PgAutoParamValues = std::move(queryAst.Ast->PgAutoParamValues); ReplyFromCache(compileRequest.Sender, compileResult, ctx, compileRequest.Cookie, std::move(compileRequest.Orbit), std::move(compileRequest.CompileServiceSpan)); return; @@ -961,7 +1000,6 @@ class TKqpCompileService : public TActorBootstrapped { compileRequest.Orbit, compileRequest.Query.UserSid); - compileRequest.CompileSettings.Action = ECompileActorAction::COMPILE; compileRequest.QueryAst = std::move(queryAst); if (!RequestsQueue.Enqueue(std::move(compileRequest))) { @@ -994,6 +1032,7 @@ class TKqpCompileService : public TActorBootstrapped { return; } + compileRequest.CompileSettings.Action = ECompileActorAction::COMPILE; CompileByAst(astStatements.front(), compileRequest, ctx); } @@ -1004,7 +1043,7 @@ class TKqpCompileService : public TActorBootstrapped { } private: - bool InsertPreparingQuery(const TKqpCompileResult::TConstPtr& compileResult, bool keepInCache, bool isPerStatementExecution) { + bool InsertPreparingQuery(const TActorContext& ctx, const TKqpCompileResult::TConstPtr& compileResult, bool keepInCache, bool isPerStatementExecution) { YQL_ENSURE(compileResult->Query); auto query = *compileResult->Query; @@ -1023,12 +1062,13 @@ class TKqpCompileService : public TActorBootstrapped { if (QueryCache.FindByQuery(query, keepInCache)) { return false; } - if (compileResult->Ast && QueryCache.FindByAst(query, *compileResult->Ast, keepInCache)) { + if (compileResult->GetAst() && QueryCache.FindByAst(query, *compileResult->GetAst(), keepInCache)) { return false; } - auto newCompileResult = TKqpCompileResult::Make(CreateGuidAsString(), compileResult->Status, compileResult->Issues, compileResult->MaxReadType, std::move(query), compileResult->Ast); + auto newCompileResult = TKqpCompileResult::Make(CreateGuidAsString(), compileResult->Status, compileResult->Issues, compileResult->MaxReadType, std::move(query), compileResult->QueryAst); newCompileResult->AllowCache = compileResult->AllowCache; newCompileResult->PreparedQuery = compileResult->PreparedQuery; + LOG_DEBUG_S(ctx, NKikimrServices::KQP_COMPILE_SERVICE, "Insert preparing query with params, queryId: " << query.SerializeToString()); return QueryCache.Insert(newCompileResult, TableServiceConfig.GetEnableAstCache(), isPerStatementExecution); } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp index 74b0f6f14f79..758476d48339 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.cpp @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include @@ -131,18 +131,18 @@ namespace NKikimr::NKqp { using namespace NYql::NDq; using namespace NYql::NDqProto; -IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, +IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, TDqTask* task, IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena) { - return new NScanPrivate::TKqpScanComputeActor(executerId, txId, task, std::move(asyncIoFactory), + return new NScanPrivate::TKqpScanComputeActor(executerId, txId, lockTxId, lockNodeId, task, std::move(asyncIoFactory), settings, memoryLimits, std::move(traceId), std::move(arena)); } IActor* CreateKqpScanFetcher(const NKikimrKqp::TKqpSnapshot& snapshot, std::vector&& computeActors, const NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta& meta, const NYql::NDq::TComputeRuntimeSettings& settings, - const ui64 txId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) { - return new NScanPrivate::TKqpScanFetcherActor(snapshot, settings, std::move(computeActors), txId, meta, shardsScanningPolicy, counters, std::move(traceId)); + const ui64 txId, TMaybe lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) { + return new NScanPrivate::TKqpScanFetcherActor(snapshot, settings, std::move(computeActors), txId, lockTxId, lockNodeId, meta, shardsScanningPolicy, counters, std::move(traceId)); } } diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_compute_actor.h index 0e191e113780..9bd19c8b002d 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor.h @@ -50,14 +50,14 @@ IActor* CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NYql::NDqPr TIntrusivePtr arena, const std::optional& federatedQuerySetup, const TGUCSettings::TPtr& GUCSettings); -IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, +IActor* CreateKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, NYql::NDqProto::TDqTask* task, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena); IActor* CreateKqpScanFetcher(const NKikimrKqp::TKqpSnapshot& snapshot, std::vector&& computeActors, const NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta& meta, const NYql::NDq::TComputeRuntimeSettings& settings, - const ui64 txId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); + const ui64 txId, TMaybe lockTxId, ui32 lockNodeId, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); NYql::NDq::IDqAsyncIoFactory::TPtr CreateKqpAsyncIoFactory( TIntrusivePtr counters, diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp index 9122f611e39c..7a77406889ee 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.cpp @@ -6,137 +6,180 @@ namespace NKikimr::NKqp::NComputeActor { + struct TMemoryQuotaManager : public NYql::NDq::TGuaranteeQuotaManager { TMemoryQuotaManager(std::shared_ptr resourceManager , NRm::EKqpMemoryPool memoryPool , std::shared_ptr state - , ui64 txId - , ui64 taskId + , TIntrusivePtr tx + , TIntrusivePtr task , ui64 limit , ui64 reasonableSpillingTreshold) : NYql::NDq::TGuaranteeQuotaManager(limit, limit) , ResourceManager(std::move(resourceManager)) , MemoryPool(memoryPool) , State(std::move(state)) - , TxId(txId) - , TaskId(taskId) + , Tx(std::move(tx)) + , Task(std::move(task)) , ReasonableSpillingTreshold(reasonableSpillingTreshold) { } ~TMemoryQuotaManager() override { - State->OnTaskTerminate(TxId, TaskId, Success); - ResourceManager->FreeResources(TxId, TaskId); + if (State) { + State->OnTaskTerminate(Tx->TxId, Task->TaskId, Success); + } + + ResourceManager->FreeResources(Tx, Task); } bool AllocateExtraQuota(ui64 extraSize) override { - auto result = ResourceManager->AllocateResources(TxId, TaskId, + auto result = ResourceManager->AllocateResources(Tx, Task, NRm::TKqpResourcesRequest{.MemoryPool = MemoryPool, .Memory = extraSize}); if (!result) { AFL_WARN(NKikimrServices::KQP_COMPUTE) ("problem", "cannot_allocate_memory") - ("tx_id", TxId) - ("task_id", TaskId) + ("tx_id", Tx->TxId) + ("task_id", Task->TaskId) ("memory", extraSize); return false; } - TotalQueryAllocationsSize = result.TotalAllocatedQueryMemory; - return true; } void FreeExtraQuota(ui64 extraSize) override { - ResourceManager->FreeResources(TxId, TaskId, - NRm::TKqpResourcesRequest{.MemoryPool = MemoryPool, .Memory = extraSize} - ); + NRm::TKqpResourcesRequest request = NRm::TKqpResourcesRequest{.MemoryPool = MemoryPool, .Memory = extraSize}; + ResourceManager->FreeResources(Tx, Task, Task->FitRequest(request)); } bool IsReasonableToUseSpilling() const override { - return TotalQueryAllocationsSize >= ReasonableSpillingTreshold; + return Tx->GetExtraMemoryAllocatedSize() >= ReasonableSpillingTreshold; + } + + TString MemoryConsumptionDetails() const override { + return Tx->ToString(); } void TerminateHandler(bool success, const NYql::TIssues& issues) { AFL_DEBUG(NKikimrServices::KQP_COMPUTE) ("problem", "finish_compute_actor") - ("tx_id", TxId)("task_id", TaskId)("success", success)("message", issues.ToOneLineString()); + ("tx_id", Tx->TxId)("task_id", Task->TaskId)("success", success)("message", issues.ToOneLineString()); Success = success; } std::shared_ptr ResourceManager; NRm::EKqpMemoryPool MemoryPool; std::shared_ptr State; - ui64 TxId; - ui64 TaskId; + TIntrusivePtr Tx; + TIntrusivePtr Task; bool Success = true; - ui64 TotalQueryAllocationsSize = 0; ui64 ReasonableSpillingTreshold = 0; }; class TKqpCaFactory : public IKqpNodeComputeActorFactory { - NKikimrConfig::TTableServiceConfig::TResourceManager Config; std::shared_ptr ResourceManager_; NYql::NDq::IDqAsyncIoFactory::TPtr AsyncIoFactory; const std::optional FederatedQuerySetup; + std::atomic MkqlLightProgramMemoryLimit = 0; + std::atomic MkqlHeavyProgramMemoryLimit = 0; + std::atomic MinChannelBufferSize = 0; + std::atomic ReasonableSpillingTreshold = 0; + std::atomic MinMemAllocSize = 8_MB; + std::atomic MinMemFreeSize = 32_MB; + public: TKqpCaFactory(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, std::shared_ptr resourceManager, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional federatedQuerySetup) - : Config(config) - , ResourceManager_(resourceManager) + : ResourceManager_(resourceManager) , AsyncIoFactory(asyncIoFactory) , FederatedQuerySetup(federatedQuerySetup) - {} + { + ApplyConfig(config); + } - TActorId CreateKqpComputeActor(const TActorId& executerId, ui64 txId, NYql::NDqProto::TDqTask* dqTask, - const NYql::NDq::TComputeRuntimeSettings& settings, - NWilson::TTraceId traceId, TIntrusivePtr arena, const TString& serializedGUCSettings, - TComputeStagesWithScan& computesByStage, ui64 outputChunkMaxSize, std::shared_ptr state, - NRm::EKqpMemoryPool memoryPool, ui32 numberOfTasks) + void ApplyConfig(const NKikimrConfig::TTableServiceConfig::TResourceManager& config) { + MkqlLightProgramMemoryLimit.store(config.GetMkqlLightProgramMemoryLimit()); + MkqlHeavyProgramMemoryLimit.store(config.GetMkqlHeavyProgramMemoryLimit()); + MinChannelBufferSize.store(config.GetMinChannelBufferSize()); + ReasonableSpillingTreshold.store(config.GetReasonableSpillingTreshold()); + MinMemAllocSize.store(config.GetMinMemAllocSize()); + MinMemFreeSize.store(config.GetMinMemFreeSize()); + } + + TActorStartResult CreateKqpComputeActor(TCreateArgs&& args) { NYql::NDq::TComputeMemoryLimits memoryLimits; memoryLimits.ChannelBufferSize = 0; - memoryLimits.MkqlLightProgramMemoryLimit = Config.GetMkqlLightProgramMemoryLimit(); - memoryLimits.MkqlHeavyProgramMemoryLimit = Config.GetMkqlHeavyProgramMemoryLimit(); + memoryLimits.MkqlLightProgramMemoryLimit = MkqlLightProgramMemoryLimit.load(); + memoryLimits.MkqlHeavyProgramMemoryLimit = MkqlHeavyProgramMemoryLimit.load(); + memoryLimits.MinMemAllocSize = MinMemAllocSize.load(); + memoryLimits.MinMemFreeSize = MinMemFreeSize.load(); - auto estimation = EstimateTaskResources(*dqTask, Config, numberOfTasks); + auto estimation = ResourceManager_->EstimateTaskResources(*args.Task, args.NumberOfTasks); + NRm::TKqpResourcesRequest resourcesRequest; + resourcesRequest.MemoryPool = args.MemoryPool; + resourcesRequest.ExecutionUnits = 1; + resourcesRequest.Memory = memoryLimits.MkqlLightProgramMemoryLimit; + + TIntrusivePtr task = MakeIntrusive(args.Task->GetId(), args.TxInfo->CreatedAt); + + auto rmResult = ResourceManager_->AllocateResources( + args.TxInfo, task, resourcesRequest); + + if (!rmResult) { + return NRm::TKqpRMAllocateResult{rmResult}; + } { ui32 inputChannelsCount = 0; - for (auto&& i : dqTask->GetInputs()) { + for (auto&& i : args.Task->GetInputs()) { inputChannelsCount += i.ChannelsSize(); } - memoryLimits.ChannelBufferSize = std::max(estimation.ChannelBufferMemoryLimit / std::max(1, inputChannelsCount), Config.GetMinChannelBufferSize()); - memoryLimits.OutputChunkMaxSize = outputChunkMaxSize; + memoryLimits.ChannelBufferSize = std::max(estimation.ChannelBufferMemoryLimit / std::max(1, inputChannelsCount), MinChannelBufferSize.load()); + memoryLimits.OutputChunkMaxSize = args.OutputChunkMaxSize; AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "channel_info") ("ch_size", estimation.ChannelBufferMemoryLimit) ("ch_count", estimation.ChannelBuffersCount) ("ch_limit", memoryLimits.ChannelBufferSize) - ("inputs", dqTask->InputsSize()) + ("inputs", args.Task->InputsSize()) ("input_channels_count", inputChannelsCount); } - auto& taskOpts = dqTask->GetProgram().GetSettings(); + auto& taskOpts = args.Task->GetProgram().GetSettings(); auto limit = taskOpts.GetHasMapJoin() || taskOpts.GetHasStateAggregation() ? memoryLimits.MkqlHeavyProgramMemoryLimit : memoryLimits.MkqlLightProgramMemoryLimit; memoryLimits.MemoryQuotaManager = std::make_shared( ResourceManager_, - memoryPool, - std::move(state), - txId, - dqTask->GetId(), + args.MemoryPool, + std::move(args.State), + std::move(args.TxInfo), + std::move(task), limit, - Config.GetReasonableSpillingTreshold()); + ReasonableSpillingTreshold.load()); + + auto runtimeSettings = args.RuntimeSettings; + runtimeSettings.ExtraMemoryAllocationPool = args.MemoryPool; + runtimeSettings.UseSpilling = args.WithSpilling; + runtimeSettings.StatsMode = args.StatsMode; + + if (args.Deadline) { + runtimeSettings.Timeout = args.Deadline - TAppData::TimeProvider->Now(); + } + + if (args.RlPath) { + runtimeSettings.RlPath = args.RlPath; + } - auto runtimeSettings = settings; NYql::NDq::IMemoryQuotaManager::TWeakPtr memoryQuotaManager = memoryLimits.MemoryQuotaManager; runtimeSettings.TerminateHandler = [memoryQuotaManager] (bool success, const NYql::TIssues& issues) { @@ -157,29 +200,32 @@ class TKqpCaFactory : public IKqpNodeComputeActorFactory { }; ETableKind tableKind = ETableKind::Unknown; - if (dqTask->HasMetaId()) { - YQL_ENSURE(computesByStage.GetMetaById(*dqTask, meta) || dqTask->GetMeta().UnpackTo(&meta), "cannot take meta on MetaId exists in tasks"); + if (args.Task->HasMetaId()) { + YQL_ENSURE(args.ComputesByStages); + YQL_ENSURE(args.ComputesByStages->GetMetaById(*args.Task, meta) || args.Task->GetMeta().UnpackTo(&meta), "cannot take meta on MetaId exists in tasks"); tableKind = tableKindExtract(meta); - } else if (dqTask->GetMeta().UnpackTo(&meta)) { + } else if (args.Task->GetMeta().UnpackTo(&meta)) { tableKind = tableKindExtract(meta); } if (tableKind == ETableKind::Datashard || tableKind == ETableKind::Olap) { - auto& info = computesByStage.UpsertTaskWithScan(*dqTask, meta, !AppData()->FeatureFlags.GetEnableSeparationComputeActorsFromRead()); - IActor* computeActor = CreateKqpScanComputeActor(executerId, txId, dqTask, + YQL_ENSURE(args.ComputesByStages); + auto& info = args.ComputesByStages->UpsertTaskWithScan(*args.Task, meta, !AppData()->FeatureFlags.GetEnableSeparationComputeActorsFromRead()); + IActor* computeActor = CreateKqpScanComputeActor(args.ExecuterId, args.TxId, args.LockTxId, args.LockNodeId, args.Task, AsyncIoFactory, runtimeSettings, memoryLimits, - std::move(traceId), std::move(arena)); + std::move(args.TraceId), std::move(args.Arena)); TActorId result = TlsActivationContext->Register(computeActor); info.MutableActorIds().emplace_back(result); return result; } else { std::shared_ptr GUCSettings; - if (!serializedGUCSettings.empty()) { - GUCSettings = std::make_shared(serializedGUCSettings); + if (!args.SerializedGUCSettings.empty()) { + GUCSettings = std::make_shared(args.SerializedGUCSettings); } - IActor* computeActor = ::NKikimr::NKqp::CreateKqpComputeActor(executerId, txId, dqTask, AsyncIoFactory, - runtimeSettings, memoryLimits, std::move(traceId), std::move(arena), FederatedQuerySetup, GUCSettings); - return TlsActivationContext->Register(computeActor); + IActor* computeActor = ::NKikimr::NKqp::CreateKqpComputeActor(args.ExecuterId, args.TxId, args.Task, AsyncIoFactory, + runtimeSettings, memoryLimits, std::move(args.TraceId), std::move(args.Arena), FederatedQuerySetup, GUCSettings); + return args.ShareMailbox ? TlsActivationContext->AsActorContext().RegisterWithSameMailbox(computeActor) : + TlsActivationContext->AsActorContext().Register(computeActor); } } }; @@ -192,4 +238,4 @@ std::shared_ptr MakeKqpCaFactory(const NKikimrConfi return std::make_shared(config, resourceManager, asyncIoFactory, federatedQuerySetup); } -} \ No newline at end of file +} diff --git a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h index 13f0fa11a0da..5d63a7aa6c9b 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_actor_factory.h @@ -103,11 +103,33 @@ struct IKqpNodeComputeActorFactory { virtual ~IKqpNodeComputeActorFactory() = default; public: - virtual NActors::TActorId CreateKqpComputeActor(const NActors::TActorId& executerId, ui64 txId, NYql::NDqProto::TDqTask* task, - const NYql::NDq::TComputeRuntimeSettings& settings, - NWilson::TTraceId traceId, TIntrusivePtr arena, const TString& serializedGUCSettings, - TComputeStagesWithScan& computeStages, ui64 outputChunkMaxSize, std::shared_ptr state, - NKikimr::NKqp::NRm::EKqpMemoryPool memoryPool, ui32 numberOfTasks) = 0; + struct TCreateArgs { + const NActors::TActorId& ExecuterId; + const ui64 TxId; + const TMaybe LockTxId; + const ui32 LockNodeId; + NYql::NDqProto::TDqTask* Task; + TIntrusivePtr TxInfo; + const NYql::NDq::TComputeRuntimeSettings& RuntimeSettings; + NWilson::TTraceId TraceId; + TIntrusivePtr Arena; + const TString& SerializedGUCSettings; + const ui32 NumberOfTasks; + const ui64 OutputChunkMaxSize; + const NKikimr::NKqp::NRm::EKqpMemoryPool MemoryPool; + const bool WithSpilling; + const NYql::NDqProto::EDqStatsMode StatsMode; + const TInstant& Deadline; + const bool ShareMailbox; + const TMaybe& RlPath; + TComputeStagesWithScan* ComputesByStages = nullptr; + std::shared_ptr State = nullptr; + }; + + typedef std::variant TActorStartResult; + virtual TActorStartResult CreateKqpComputeActor(TCreateArgs&& args) = 0; + + virtual void ApplyConfig(const NKikimrConfig::TTableServiceConfig::TResourceManager& config) = 0; }; std::shared_ptr MakeKqpCaFactory(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, diff --git a/ydb/core/kqp/compute_actor/kqp_compute_events.h b/ydb/core/kqp/compute_actor/kqp_compute_events.h index 6092c4a1a378..a9dd127a64b0 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_events.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_events.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -11,164 +11,181 @@ namespace NKikimr::NKqp { -struct TEvKqpCompute { - struct TEvRemoteScanData : public TEventPB {}; +struct TLocksInfo { + TVector Locks; + TVector BrokenLocks; +}; - class IShardScanStats { - public: - virtual ~IShardScanStats() = default; - virtual THashMap GetMetrics() const = 0; - }; +namespace NInternalImplementation { +struct TEvRemoteScanData: public TEventPB { +}; - /* - * Scan communications. - * - * TEvScanData is intentionally preserved as a local event for performance reasons: leaf compute - * actors are communicating with shard scans using this message, so big amount of unfiltered data - * is expected. However, it is possible that after query planning datashard would migrate to other - * node. To support scans in this case we provide serialization routines. For now such remote scan - * is considered as rare event and not worth of some fast serialization, so we just use protobuf. - * - * TEvScanDataAck follows the same pattern mostly for symmetry reasons. - */ - struct TEvScanData : public NActors::TEventLocal { - TEvScanData(const ui32 scanId, const ui32 generation = 0) - : ScanId(scanId) - , Generation(generation) - , Finished(false) {} - - std::optional AvailablePacks; - ui32 ScanId; - ui32 Generation; - TVector Rows; - std::shared_ptr ArrowBatch; - std::vector> SplittedBatches; - - TOwnedCellVec LastKey; - TDuration CpuTime; - TDuration WaitTime; - ui32 PageFaults = 0; // number of page faults occurred when filling in this message - bool RequestedBytesLimitReached = false; - bool Finished = false; - bool PageFault = false; // page fault was the reason for sending this message - mutable THolder Remote; - std::shared_ptr StatsOnFinished; - - template - const T& GetStatsAs() const { - Y_ABORT_UNLESS(!!StatsOnFinished); - return VerifyDynamicCast(*StatsOnFinished); - } +class IShardScanStats { +public: + virtual ~IShardScanStats() = default; + virtual THashMap GetMetrics() const = 0; +}; - template - bool CheckStatsIs() const { - auto p = dynamic_cast(StatsOnFinished.get()); - return p; +/* + * Scan communications. + * + * TEvScanData is intentionally preserved as a local event for performance reasons: leaf compute + * actors are communicating with shard scans using this message, so big amount of unfiltered data + * is expected. However, it is possible that after query planning datashard would migrate to other + * node. To support scans in this case we provide serialization routines. For now such remote scan + * is considered as rare event and not worth of some fast serialization, so we just use protobuf. + * + * TEvScanDataAck follows the same pattern mostly for symmetry reasons. + */ +struct TEvScanData: public NActors::TEventLocal { + TEvScanData(const ui32 scanId, const ui32 generation = 0) + : ScanId(scanId) + , Generation(generation) + , Finished(false) { + } + + std::optional AvailablePacks; + ui32 ScanId; + ui32 Generation; + TVector Rows; + std::shared_ptr ArrowBatch; + std::vector> SplittedBatches; + + TOwnedCellVec LastKey; + TDuration CpuTime; + TDuration WaitTime; + ui32 PageFaults = 0; // number of page faults occurred when filling in this message + bool RequestedBytesLimitReached = false; + bool Finished = false; + bool PageFault = false; // page fault was the reason for sending this message + mutable THolder Remote; + std::shared_ptr StatsOnFinished; + TLocksInfo LocksInfo; + + template + const T& GetStatsAs() const { + Y_ABORT_UNLESS(!!StatsOnFinished); + return VerifyDynamicCast(*StatsOnFinished); + } + + template + bool CheckStatsIs() const { + auto p = dynamic_cast(StatsOnFinished.get()); + return p; + } + + ui32 GetRowsCount() const { + if (ArrowBatch) { + return ArrowBatch->num_rows(); + } else { + return Rows.size(); } + } - ui32 GetRowsCount() const { - if (ArrowBatch) { - return ArrowBatch->num_rows(); - } else { - return Rows.size(); - } - } + bool IsEmpty() const { + return GetRowsCount() == 0; + } - bool IsEmpty() const { - return GetRowsCount() == 0; - } + bool IsSerializable() const override { + return true; + } - bool IsSerializable() const override { - return true; - } + ui32 CalculateSerializedSize() const override { + InitRemote(); + return Remote->CalculateSerializedSizeCached(); + } - ui32 CalculateSerializedSize() const override { - InitRemote(); - return Remote->CalculateSerializedSizeCached(); - } + bool SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { + InitRemote(); + return Remote->SerializeToArcadiaStream(chunker); + } - bool SerializeToArcadiaStream(NActors::TChunkSerializer* chunker) const override { - InitRemote(); - return Remote->SerializeToArcadiaStream(chunker); + NKikimrDataEvents::EDataFormat GetDataFormat() const { + if (ArrowBatch != nullptr || SplittedBatches.size()) { + return NKikimrDataEvents::FORMAT_ARROW; } - - NKikimrDataEvents::EDataFormat GetDataFormat() const { - if (ArrowBatch != nullptr || SplittedBatches.size()) { - return NKikimrDataEvents::FORMAT_ARROW; - } - return NKikimrDataEvents::FORMAT_CELLVEC; + return NKikimrDataEvents::FORMAT_CELLVEC; + } + + + static NActors::IEventBase* Load(TEventSerializedData* data) { + auto pbEv = THolder(static_cast(TEvRemoteScanData::Load(data))); + auto ev = MakeHolder(pbEv->Record.GetScanId(), pbEv->Record.GetGeneration()); + + ev->CpuTime = TDuration::MicroSeconds(pbEv->Record.GetCpuTimeUs()); + ev->WaitTime = TDuration::MilliSeconds(pbEv->Record.GetWaitTimeMs()); + ev->PageFault = pbEv->Record.GetPageFault(); + ev->PageFaults = pbEv->Record.GetPageFaults(); + ev->Finished = pbEv->Record.GetFinished(); + ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); + ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); + if (pbEv->Record.HasAvailablePacks()) { + ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); } + auto rows = pbEv->Record.GetRows(); + ev->Rows.reserve(rows.size()); + for (const auto& row : rows) { + ev->Rows.emplace_back(TSerializedCellVec(row).GetCells()); + } - static NActors::IEventBase* Load(TEventSerializedData* data) { - auto pbEv = THolder(static_cast(TEvRemoteScanData::Load(data))); - auto ev = MakeHolder(pbEv->Record.GetScanId(), pbEv->Record.GetGeneration()); - - ev->CpuTime = TDuration::MicroSeconds(pbEv->Record.GetCpuTimeUs()); - ev->WaitTime = TDuration::MilliSeconds(pbEv->Record.GetWaitTimeMs()); - ev->PageFault = pbEv->Record.GetPageFault(); - ev->PageFaults = pbEv->Record.GetPageFaults(); - ev->Finished = pbEv->Record.GetFinished(); - ev->RequestedBytesLimitReached = pbEv->Record.GetRequestedBytesLimitReached(); - ev->LastKey = TOwnedCellVec(TSerializedCellVec(pbEv->Record.GetLastKey()).GetCells()); - if (pbEv->Record.HasAvailablePacks()) { - ev->AvailablePacks = pbEv->Record.GetAvailablePacks(); - } - - auto rows = pbEv->Record.GetRows(); - ev->Rows.reserve(rows.size()); - for (const auto& row: rows) { - ev->Rows.emplace_back(TSerializedCellVec(row).GetCells()); + if (pbEv->Record.HasArrowBatch()) { + auto batch = pbEv->Record.GetArrowBatch(); + auto schema = NArrow::DeserializeSchema(batch.GetSchema()); + ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({ NArrow::DeserializeBatch(batch.GetBatch(), schema) })); + } + return ev.Release(); + } + +private: + void InitRemote() const { + if (!Remote) { + Remote = MakeHolder(); + + Remote->Record.SetScanId(ScanId); + Remote->Record.SetGeneration(Generation); + Remote->Record.SetCpuTimeUs(CpuTime.MicroSeconds()); + Remote->Record.SetWaitTimeMs(WaitTime.MilliSeconds()); + Remote->Record.SetPageFaults(PageFaults); + Remote->Record.SetFinished(Finished); + Remote->Record.SetRequestedBytesLimitReached(RequestedBytesLimitReached); + Remote->Record.SetPageFaults(PageFaults); + Remote->Record.SetPageFault(PageFault); + Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); + if (AvailablePacks) { + Remote->Record.SetAvailablePacks(*AvailablePacks); } - if (pbEv->Record.HasArrowBatch()) { - auto batch = pbEv->Record.GetArrowBatch(); - auto schema = NArrow::DeserializeSchema(batch.GetSchema()); - ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({NArrow::DeserializeBatch(batch.GetBatch(), schema)})); + switch (GetDataFormat()) { + case NKikimrDataEvents::FORMAT_UNSPECIFIED: + case NKikimrDataEvents::FORMAT_CELLVEC: { + Remote->Record.MutableRows()->Reserve(Rows.size()); + for (const auto& row : Rows) { + Remote->Record.AddRows(TSerializedCellVec::Serialize(row)); + } + break; + } + case NKikimrDataEvents::FORMAT_ARROW: { + Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); + auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); + protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); + protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); + break; + } } - return ev.Release(); } + } +}; - private: - void InitRemote() const { - if (!Remote) { - Remote = MakeHolder(); +} - Remote->Record.SetScanId(ScanId); - Remote->Record.SetGeneration(Generation); - Remote->Record.SetCpuTimeUs(CpuTime.MicroSeconds()); - Remote->Record.SetWaitTimeMs(WaitTime.MilliSeconds()); - Remote->Record.SetPageFaults(PageFaults); - Remote->Record.SetFinished(Finished); - Remote->Record.SetRequestedBytesLimitReached(RequestedBytesLimitReached); - Remote->Record.SetPageFaults(PageFaults); - Remote->Record.SetPageFault(PageFault); - Remote->Record.SetLastKey(TSerializedCellVec::Serialize(LastKey)); - if (AvailablePacks) { - Remote->Record.SetAvailablePacks(*AvailablePacks); - } +struct TEvKqpCompute { + using TEvRemoteScanData = NInternalImplementation::TEvRemoteScanData; - switch (GetDataFormat()) { - case NKikimrDataEvents::FORMAT_UNSPECIFIED: - case NKikimrDataEvents::FORMAT_CELLVEC: { - Remote->Record.MutableRows()->Reserve(Rows.size()); - for (const auto& row: Rows) { - Remote->Record.AddRows(TSerializedCellVec::Serialize(row)); - } - break; - } - case NKikimrDataEvents::FORMAT_ARROW: { - Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); - auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); - protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); - protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); - break; - } - } - } - } - }; + using IShardScanStats = NInternalImplementation::IShardScanStats; + + using TEvScanData = NInternalImplementation::TEvScanData; struct TEvRemoteScanDataAck: public NActors::TEventPB { @@ -247,5 +264,4 @@ struct TEvKqpCompute { TKqpComputeEvents::EvKillScanTablet> {}; }; - } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp index 1c00eca6c792..ac8f34e1979b 100644 --- a/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_pure_compute_actor.cpp @@ -133,10 +133,7 @@ STFUNC(TKqpComputeActor::StateFunc) { BaseStateFuncBody(ev); } } catch (const TMemoryLimitExceededException& e) { - InternalError(TIssuesIds::KIKIMR_PRECONDITION_FAILED, TStringBuilder() - << "Mkql memory limit exceeded, limit: " << GetMkqlMemoryLimit() - << ", host: " << HostName() - << ", canAllocateExtraMemory: " << CanAllocateExtraMemory); + TBase::OnMemoryLimitExceptionHandler(); } catch (const NMiniKQL::TKqpEnsureFail& e) { InternalError((TIssuesIds::EIssueCode) e.GetCode(), e.GetMessage()); } catch (const yexception& e) { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp index b144be2b09b0..907b5c03d671 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.cpp @@ -23,13 +23,15 @@ static constexpr TDuration RL_MAX_BATCH_DELAY = TDuration::Seconds(50); } // anonymous namespace -TKqpScanComputeActor::TKqpScanComputeActor(const TActorId& executerId, ui64 txId, NDqProto::TDqTask* task, - IDqAsyncIoFactory::TPtr asyncIoFactory, +TKqpScanComputeActor::TKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, + NDqProto::TDqTask* task, IDqAsyncIoFactory::TPtr asyncIoFactory, const TComputeRuntimeSettings& settings, const TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena) : TBase(executerId, txId, task, std::move(asyncIoFactory), AppData()->FunctionRegistry, settings, memoryLimits, /* ownMemoryQuota = */ true, /* passExceptions = */ true, /*taskCounters = */ nullptr, std::move(traceId), std::move(arena)) , ComputeCtx(settings.StatsMode) + , LockTxId(lockTxId) + , LockNodeId(lockNodeId) { InitializeTask(); YQL_ENSURE(GetTask().GetMeta().UnpackTo(&Meta), "Invalid task meta: " << GetTask().GetMeta().DebugString()); @@ -103,6 +105,19 @@ void TKqpScanComputeActor::FillExtraStats(NDqProto::TDqComputeActorStats* dst, b } } +TMaybe TKqpScanComputeActor::ExtraData() { + NKikimrTxDataShard::TEvKqpInputActorResultInfo resultInfo; + for (const auto& lock : Locks) { + resultInfo.AddLocks()->CopyFrom(lock); + } + for (const auto& lock : BrokenLocks) { + resultInfo.AddLocks()->CopyFrom(lock); + } + google::protobuf::Any result; + result.PackFrom(resultInfo); + return result; +} + void TKqpScanComputeActor::HandleEvWakeup(EEvWakeupTag tag) { AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "HandleEvWakeup")("self_id", SelfId()); switch (tag) { @@ -130,10 +145,18 @@ void TKqpScanComputeActor::Handle(TEvScanExchange::TEvTerminateFromFetcher::TPtr void TKqpScanComputeActor::Handle(TEvScanExchange::TEvSendData::TPtr& ev) { ALS_DEBUG(NKikimrServices::KQP_COMPUTE) << "TEvSendData: " << ev->Sender << "/" << SelfId(); auto& msg = *ev->Get(); + + for (const auto& lock : msg.GetLocksInfo().Locks) { + Locks.insert(lock); + } + for (const auto& lock : msg.GetLocksInfo().BrokenLocks) { + BrokenLocks.insert(lock); + } + auto guard = TaskRunner->BindAllocator(); if (!!msg.GetArrowBatch()) { ScanData->AddData(NMiniKQL::TBatchDataAccessor(msg.GetArrowBatch(), std::move(msg.MutableDataIndexes())), msg.GetTabletId(), TaskRunner->GetHolderFactory()); - } else { + } else if (!msg.GetRows().empty()) { ScanData->AddData(std::move(msg.MutableRows()), msg.GetTabletId(), TaskRunner->GetHolderFactory()); } if (IsQuotingEnabled()) { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h index 69b0bb613c1a..6d970f68d08d 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_actor.h @@ -11,6 +11,7 @@ namespace NKikimr::NKqp::NScanPrivate { class TKqpScanComputeActor: public NYql::NDq::TDqSyncComputeActorBase { private: using TBase = NYql::NDq::TDqSyncComputeActorBase; + NMiniKQL::TKqpScanComputeContext ComputeCtx; NKikimrTxDataShard::TKqpTransaction::TScanTaskMeta Meta; using TBase::TaskRunner; @@ -22,15 +23,49 @@ class TKqpScanComputeActor: public NYql::NDq::TDqSyncComputeActorBase Fetchers; NMiniKQL::TKqpScanComputeContext::TScanData* ScanData = nullptr; + const TMaybe LockTxId; + const ui32 LockNodeId; + + struct TLockHash { + bool operator()(const NKikimrDataEvents::TLock& lock) { + return MultiHash( + lock.GetLockId(), + lock.GetDataShard(), + lock.GetSchemeShard(), + lock.GetPathId(), + lock.GetGeneration(), + lock.GetCounter(), + lock.GetHasWrites()); + } + }; + + struct TLockEqual { + bool operator()(const NKikimrDataEvents::TLock& lhs, const NKikimrDataEvents::TLock& rhs) { + return lhs.GetLockId() == rhs.GetLockId() + && lhs.GetDataShard() == rhs.GetDataShard() + && lhs.GetSchemeShard() == rhs.GetSchemeShard() + && lhs.GetPathId() == rhs.GetPathId() + && lhs.GetGeneration() == rhs.GetGeneration() + && lhs.GetCounter() == rhs.GetCounter() + && lhs.GetHasWrites() == rhs.GetHasWrites(); + } + }; + + using TLocksHashSet = THashSet; + + TLocksHashSet Locks; + TLocksHashSet BrokenLocks; + ui64 CalcMkqlMemoryLimit() override { return TBase::CalcMkqlMemoryLimit() + ComputeCtx.GetTableScans().size() * MemoryLimits.ChannelBufferSize; } + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::KQP_SCAN_COMPUTE_ACTOR; } - TKqpScanComputeActor(const TActorId& executerId, ui64 txId, + TKqpScanComputeActor(const TActorId& executerId, ui64 txId, TMaybe lockTxId, ui32 lockNodeId, NYql::NDqProto::TDqTask* task, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const NYql::NDq::TComputeRuntimeSettings& settings, const NYql::NDq::TComputeMemoryLimits& memoryLimits, NWilson::TTraceId traceId, TIntrusivePtr arena); @@ -46,10 +81,7 @@ class TKqpScanComputeActor: public NYql::NDq::TDqSyncComputeActorBase ExtraData() override; + void HandleEvWakeup(EEvWakeupTag tag); void Handle(TEvScanExchange::TEvTerminateFromFetcher::TPtr& ev); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp index 0bc9a487e2ce..7eab5ef78196 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.cpp @@ -20,23 +20,22 @@ std::vector> TShardScannerInfo::OnReceiveData( } else { Finished = true; } - if (data.IsEmpty()) { - AFL_ENSURE(data.Finished); - return {}; - } AFL_ENSURE(ActorId); AFL_ENSURE(!DataChunksInFlightCount)("data_chunks_in_flightCount", DataChunksInFlightCount); std::vector> result; - if (data.SplittedBatches.size() > 1) { + if (data.IsEmpty()) { + AFL_ENSURE(data.Finished); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(TabletId, data.LocksInfo))); + } else if (data.SplittedBatches.size() > 1) { ui32 idx = 0; AFL_ENSURE(data.ArrowBatch); for (auto&& i : data.SplittedBatches) { - result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId, std::move(i)), idx++)); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId, std::move(i), data.LocksInfo), idx++)); } } else if (data.ArrowBatch) { - result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId))); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(data.ArrowBatch, TabletId, data.LocksInfo))); } else { - result.emplace_back(std::make_unique(selfPtr, std::make_unique(std::move(data.Rows), TabletId))); + result.emplace_back(std::make_unique(selfPtr, std::make_unique(std::move(data.Rows), TabletId, data.LocksInfo))); } AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "receive_data")("actor_id", ActorId)("count_chunks", result.size()); DataChunksInFlightCount = result.size(); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h index 1e4d9ac58937..2d684d2f6b09 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h @@ -23,6 +23,7 @@ class TComputeTaskData; class TShardScannerInfo { private: std::optional ActorId; + const ui64 ScanId; const ui64 TabletId; const ui64 Generation; i64 DataChunksInFlightCount = 0; @@ -51,15 +52,16 @@ class TShardScannerInfo { } } public: - TShardScannerInfo(TShardState& state, const IExternalObjectsProvider& externalObjectsProvider) - : TabletId(state.TabletId) + TShardScannerInfo(const ui64 scanId, TShardState& state, const IExternalObjectsProvider& externalObjectsProvider) + : ScanId(scanId) + , TabletId(state.TabletId) , Generation(++state.Generation) { const bool subscribed = std::exchange(state.SubscribedOnTablet, true); const auto& keyColumnTypes = externalObjectsProvider.GetKeyColumnTypes(); auto ranges = state.GetScanRanges(keyColumnTypes); - auto ev = externalObjectsProvider.BuildEvKqpScan(0, Generation, ranges); + auto ev = externalObjectsProvider.BuildEvKqpScan(ScanId, Generation, ranges); AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "start_scanner")("tablet_id", TabletId)("generation", Generation) ("info", state.ToString(keyColumnTypes))("range", DebugPrintRanges(keyColumnTypes, ranges, *AppData()->TypeRegistry)) @@ -250,6 +252,7 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { THashMap ShardsByActorId; bool IsActiveFlag = true; THashMap> ShardScanners; + const ui64 ScanId; const IExternalObjectsProvider& ExternalObjectsProvider; public: @@ -313,7 +316,7 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { AFL_ENSURE(state.TabletId); AFL_ENSURE(!state.ActorId)("actor_id", state.ActorId); state.State = NComputeActor::EShardState::Starting; - auto newScanner = std::make_shared(state, ExternalObjectsProvider); + auto newScanner = std::make_shared(ScanId, state, ExternalObjectsProvider); AFL_ENSURE(ShardScanners.emplace(state.TabletId, newScanner).second); } @@ -356,8 +359,9 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { return nullptr; } - TInFlightShards(const IExternalObjectsProvider& externalObjectsProvider) - : ExternalObjectsProvider(externalObjectsProvider) + TInFlightShards(const ui64 scanId, const IExternalObjectsProvider& externalObjectsProvider) + : ScanId(scanId) + , ExternalObjectsProvider(externalObjectsProvider) { } bool IsActive() const { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_events.h b/ydb/core/kqp/compute_actor/kqp_scan_events.h index af455be7b7e7..08ac0e535a88 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_events.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_events.h @@ -43,33 +43,42 @@ struct TEvScanExchange { YDB_ACCESSOR_DEF(TVector, Rows); YDB_READONLY(ui64, TabletId, 0); YDB_ACCESSOR_DEF(std::vector, DataIndexes); + YDB_READONLY_DEF(TLocksInfo, LocksInfo); public: ui32 GetRowsCount() const { return ArrowBatch ? ArrowBatch->num_rows() : Rows.size(); } - TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId) + TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, const TLocksInfo& locksInfo) : ArrowBatch(arrowBatch) , TabletId(tabletId) + , LocksInfo(locksInfo) { Y_ABORT_UNLESS(ArrowBatch); Y_ABORT_UNLESS(ArrowBatch->num_rows()); } - TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, std::vector&& dataIndexes) + TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, std::vector&& dataIndexes, const TLocksInfo& locksInfo) : ArrowBatch(arrowBatch) , TabletId(tabletId) , DataIndexes(std::move(dataIndexes)) + , LocksInfo(locksInfo) { Y_ABORT_UNLESS(ArrowBatch); Y_ABORT_UNLESS(ArrowBatch->num_rows()); } - TEvSendData(TVector&& rows, const ui64 tabletId) + TEvSendData(TVector&& rows, const ui64 tabletId, const TLocksInfo& locksInfo) : Rows(std::move(rows)) - , TabletId(tabletId) { + , TabletId(tabletId) + , LocksInfo(locksInfo) { Y_ABORT_UNLESS(Rows.size()); } + + TEvSendData(const ui64 tabletId, const TLocksInfo& locksInfo) + : TabletId(tabletId) + , LocksInfo(locksInfo) { + } }; class TEvAckData: public NActors::TEventLocal { diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp index 4d4bfed29c89..73f4c86398b9 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp @@ -23,18 +23,20 @@ static constexpr ui64 MAX_SHARD_RESOLVES = 3; TKqpScanFetcherActor::TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snapshot, - const TComputeRuntimeSettings& settings, std::vector&& computeActors, const ui64 txId, + const TComputeRuntimeSettings& settings, std::vector&& computeActors, const ui64 txId, const TMaybe lockTxId, const ui32 lockNodeId, const NKikimrTxDataShard::TKqpTransaction_TScanTaskMeta& meta, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId) : Meta(meta) , ScanDataMeta(Meta) , RuntimeSettings(settings) , TxId(txId) + , LockTxId(lockTxId) + , LockNodeId(lockNodeId) , ComputeActorIds(std::move(computeActors)) , Snapshot(snapshot) , ShardsScanningPolicy(shardsScanningPolicy) , Counters(counters) - , InFlightShards(*this) + , InFlightShards(ScanId, *this) , InFlightComputes(ComputeActorIds) { Y_UNUSED(traceId); @@ -86,7 +88,11 @@ void TKqpScanFetcherActor::Bootstrap() { void TKqpScanFetcherActor::HandleExecute(TEvScanExchange::TEvAckData::TPtr& ev) { Y_ABORT_UNLESS(ev->Get()->GetFreeSpace()); - ALS_DEBUG(NKikimrServices::KQP_COMPUTE) << "EvAckData (" << SelfId() << "): " << ev->Sender; + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("event", "AckDataFromCompute")("self_id", SelfId())("scan_id", ScanId) + ("packs_to_send", InFlightComputes.GetPacksToSendCount()) + ("from", ev->Sender)("shards remain", PendingShards.size()) + ("in flight scans", InFlightShards.GetScansCount()) + ("in flight shards", InFlightShards.GetShardsCount()); InFlightComputes.OnComputeAck(ev->Sender, ev->Get()->GetFreeSpace()); CheckFinish(); } @@ -119,6 +125,25 @@ void TKqpScanFetcherActor::HandleExecute(TEvKqpCompute::TEvScanData::TPtr& ev) { } AFL_ENSURE(state->State == EShardState::Running)("state", state->State)("actor_id", state->ActorId)("ev_sender", ev->Sender); + AFL_DEBUG(NKikimrServices::KQP_COMPUTE) + ("Recv TEvScanData from ShardID=", ev->Sender) + ("ScanId", ev->Get()->ScanId) + ("Finished", ev->Get()->Finished) + ("Lock", [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->LocksInfo.Locks) { + builder << lock.ShortDebugString(); + } + return builder; + }()) + ("BrokenLocks", [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->LocksInfo.BrokenLocks) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + TInstant startTime = TActivationContext::Now(); if (ev->Get()->Finished) { state->State = EShardState::PostRunning; @@ -411,6 +436,10 @@ std::unique_ptr TKqpScanFetcherActor::BuildEv ev->Record.SetStatsMode(RuntimeSettings.StatsMode); ev->Record.SetScanId(scanId); ev->Record.SetTxId(std::get(TxId)); + if (LockTxId) { + ev->Record.SetLockTxId(*LockTxId); + } + ev->Record.SetLockNodeId(LockNodeId); ev->Record.SetTablePath(ScanDataMeta.TablePath); ev->Record.SetSchemaVersion(ScanDataMeta.TableId.SchemaVersion); @@ -458,12 +487,17 @@ void TKqpScanFetcherActor::ProcessPendingScanDataItem(TEvKqpCompute::TEvScanData state->LastKey = std::move(msg.LastKey); const ui64 rowsCount = msg.GetRowsCount(); - CA_LOG_D("action=got EvScanData;rows=" << rowsCount << ";finished=" << msg.Finished << ";exceeded=" << msg.RequestedBytesLimitReached - << ";from=" << ev->Sender << ";shards remain=" << PendingShards.size() - << ";in flight scans=" << InFlightShards.GetScansCount() - << ";in flight shards=" << InFlightShards.GetShardsCount() - << ";delayed_for=" << latency.SecondsFloat() << " seconds by ratelimiter" - << ";tablet_id=" << state->TabletId); + AFL_ENSURE(!LockTxId || !msg.LocksInfo.Locks.empty() || !msg.LocksInfo.BrokenLocks.empty()); + AFL_ENSURE(LockTxId || (msg.LocksInfo.Locks.empty() && msg.LocksInfo.BrokenLocks.empty())); + AFL_DEBUG(NKikimrServices::KQP_COMPUTE)("action","got EvScanData")("rows", rowsCount)("finished", msg.Finished)("exceeded", msg.RequestedBytesLimitReached) + ("scan", ScanId)("packs_to_send", InFlightComputes.GetPacksToSendCount()) + ("from", ev->Sender)("shards remain", PendingShards.size()) + ("in flight scans", InFlightShards.GetScansCount()) + ("in flight shards", InFlightShards.GetShardsCount()) + ("delayed_for_seconds_by_ratelimiter", latency.SecondsFloat()) + ("tablet_id", state->TabletId) + ("locks", msg.LocksInfo.Locks.size()) + ("broken locks", msg.LocksInfo.BrokenLocks.size()); auto shardScanner = InFlightShards.GetShardScannerVerified(state->TabletId); auto tasksForCompute = shardScanner->OnReceiveData(msg, shardScanner); AFL_ENSURE(tasksForCompute.size() == 1 || tasksForCompute.size() == 0 || tasksForCompute.size() == ComputeActorIds.size())("size", tasksForCompute.size())("compute_size", ComputeActorIds.size()); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h index 962aee326470..0bd2bfc1d58b 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.h @@ -50,13 +50,15 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped LockTxId; + const ui32 LockNodeId; public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::KQP_SCAN_FETCH_ACTOR; } TKqpScanFetcherActor(const NKikimrKqp::TKqpSnapshot& snapshot, const NYql::NDq::TComputeRuntimeSettings& settings, - std::vector&& computeActors, const ui64 txId, + std::vector&& computeActors, const ui64 txId, const TMaybe lockTxId, const ui32 lockNodeId, const NKikimrTxDataShard::TKqpTransaction_TScanTaskMeta& meta, const TShardsScanningPolicy& shardsScanningPolicy, TIntrusivePtr counters, NWilson::TTraceId traceId); @@ -168,6 +170,9 @@ class TKqpScanFetcherActor: public NActors::TActorBootstrapped PendingShards; std::deque PendingResolveShards; + static inline TAtomicCounter ScanIdCounter = 0; + const ui64 ScanId = ScanIdCounter.Inc(); + TInFlightShards InFlightShards; TInFlightComputes InFlightComputes; ui32 TotalRetries = 0; diff --git a/ydb/core/kqp/compute_actor/ya.make b/ydb/core/kqp/compute_actor/ya.make index 50d029c6bacf..7b45cfa31530 100644 --- a/ydb/core/kqp/compute_actor/ya.make +++ b/ydb/core/kqp/compute_actor/ya.make @@ -22,7 +22,8 @@ PEERDIR( ydb/core/kqp/runtime ydb/core/tx/datashard ydb/core/tx/scheme_cache - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos + ydb/library/formats/arrow/common ydb/library/yql/dq/actors/compute ydb/library/yql/providers/generic/actors ydb/library/yql/providers/s3/actors_factory diff --git a/ydb/core/kqp/counters/kqp_counters.cpp b/ydb/core/kqp/counters/kqp_counters.cpp index a8b0b8a1a2c5..70e5169c4546 100644 --- a/ydb/core/kqp/counters/kqp_counters.cpp +++ b/ydb/core/kqp/counters/kqp_counters.cpp @@ -776,7 +776,10 @@ TKqpCounters::TKqpCounters(const ::NMonitoring::TDynamicCounterPtr& counters, co RmExternalMemory = KqpGroup->GetCounter("RM/ExternalMemory", false); RmNotEnoughMemory = KqpGroup->GetCounter("RM/NotEnoughMemory", true); RmNotEnoughComputeActors = KqpGroup->GetCounter("RM/NotEnoughComputeActors", true); + RmOnStartAllocs = KqpGroup->GetCounter("Rm/OnStartAllocs", true); RmExtraMemAllocs = KqpGroup->GetCounter("RM/ExtraMemAllocs", true); + RmExtraMemFree = KqpGroup->GetCounter("RM/ExtraMemFree", true); + RmOnCompleteFree = KqpGroup->GetCounter("RM/OnCompleteFree", true); RmInternalError = KqpGroup->GetCounter("RM/InternalError", true); RmSnapshotLatency = KqpGroup->GetHistogram( "RM/SnapshotLatency", NMonitoring::ExponentialHistogram(20, 2, 1)); diff --git a/ydb/core/kqp/counters/kqp_counters.h b/ydb/core/kqp/counters/kqp_counters.h index 4a3328cbb0d1..f302897f1b8d 100644 --- a/ydb/core/kqp/counters/kqp_counters.h +++ b/ydb/core/kqp/counters/kqp_counters.h @@ -350,7 +350,7 @@ class TKqpCounters : public TKqpCountersBase, public NYql::NDq::TSpillingCounter ::NMonitoring::TDynamicCounterPtr WorkloadManagerGroup; ::NMonitoring::TDynamicCounters::TCounterPtr FullScansExecuted; - + // Lease updates counters ::NMonitoring::THistogramPtr LeaseUpdateLatency; ::NMonitoring::THistogramPtr RunActorLeaseUpdateBacklog; @@ -377,6 +377,9 @@ class TKqpCounters : public TKqpCountersBase, public NYql::NDq::TSpillingCounter ::NMonitoring::TDynamicCounters::TCounterPtr RmNotEnoughMemory; ::NMonitoring::TDynamicCounters::TCounterPtr RmNotEnoughComputeActors; ::NMonitoring::TDynamicCounters::TCounterPtr RmExtraMemAllocs; + ::NMonitoring::TDynamicCounters::TCounterPtr RmOnStartAllocs; + ::NMonitoring::TDynamicCounters::TCounterPtr RmExtraMemFree; + ::NMonitoring::TDynamicCounters::TCounterPtr RmOnCompleteFree; ::NMonitoring::TDynamicCounters::TCounterPtr RmInternalError; NMonitoring::THistogramPtr RmSnapshotLatency; NMonitoring::THistogramPtr NodeServiceStartEventDelivery; diff --git a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp index 9bc57b456d02..b97e71161824 100644 --- a/ydb/core/kqp/executer_actor/kqp_data_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_data_executer.cpp @@ -241,11 +241,11 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.MutableResponse(); @@ -279,6 +279,9 @@ class TKqpDataExecuter : public TKqpExecuterBaseSnapshot = GetSnapshot(); @@ -495,10 +498,20 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.GetIssues(), issues); - LOG_D("Got evWrite result, shard: " << shardId << ", status: " - << NKikimrDataEvents::TEvWriteResult::EStatus_Name(res->Record.GetStatus()) - << ", error: " << issues.ToString()); + LOG_D("Recv EvWriteResult (prepare) from ShardID=" << shardId + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie + << ", error=" << issues.ToString()); + if (Stats) { Stats->AddDatashardPrepareStats(std::move(*res->Record.MutableTxStats())); } @@ -516,6 +529,19 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.DebugString()); + YQL_ENSURE(shardState->State == TShardState::EState::Preparing); + Counters->TxProxyMon->TxResultAborted->Inc(); + LocksBroken = true; + + if (!res->Record.GetTxLocks().empty()) { + ResponseEv->BrokenLockPathId = NYql::TKikimrPathId( + res->Record.GetTxLocks(0).GetSchemeShard(), + res->Record.GetTxLocks(0).GetPathId()); + } + ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); + } default: { return ShardError(res->Record); @@ -891,6 +917,7 @@ class TKqpDataExecuter : public TKqpExecuterBase(); ev->Record.SetCoordinatorID(TxCoordinator); @@ -1158,9 +1186,19 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.GetIssues(), issues); - LOG_D("Got evWrite result, shard: " << shardId << ", status: " - << NKikimrDataEvents::TEvWriteResult::EStatus_Name(res->Record.GetStatus()) - << ", error: " << issues.ToString()); + + LOG_D("Recv EvWriteResult (execute) from ShardID=" << shardId + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie + << ", error=" << issues.ToString()); if (Stats) { Stats->AddDatashardStats(std::move(*res->Record.MutableTxStats())); @@ -1183,6 +1221,21 @@ class TKqpDataExecuter : public TKqpExecuterBaseRecord.DebugString()); + YQL_ENSURE(shardState->State == TShardState::EState::Executing); + shardState->State = TShardState::EState::Finished; + Counters->TxProxyMon->TxResultAborted->Inc(); + LocksBroken = true; + if (!res->Record.GetTxLocks().empty()) { + ResponseEv->BrokenLockPathId = NYql::TKikimrPathId( + res->Record.GetTxLocks(0).GetSchemeShard(), + res->Record.GetTxLocks(0).GetPathId()); + ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); + } + CheckExecutionComplete(); + return; + } default: { return ShardError(res->Record); @@ -1235,29 +1288,15 @@ class TKqpDataExecuter : public TKqpExecuterBaseState = TShardState::EState::Finished; Counters->TxProxyMon->TxResultAborted->Inc(); // TODO: dedicated counter? - LocksBroken = true; - TMaybe tableName; if (!res->Record.GetTxLocks().empty()) { - auto& lock = res->Record.GetTxLocks(0); - auto tableId = TTableId(lock.GetSchemeShard(), lock.GetPathId()); - auto it = FindIf(TasksGraph.GetStagesInfo(), [tableId](const auto& x){ return x.second.Meta.TableId.HasSamePath(tableId); }); - if (it != TasksGraph.GetStagesInfo().end()) { - tableName = it->second.Meta.TableConstInfo->Path; - } - } - - // Reply as soon as we know which table had locks invalidated - if (tableName) { - auto message = TStringBuilder() - << "Transaction locks invalidated. Table: " << *tableName; - - return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, - YqlIssue({}, TIssuesIds::KIKIMR_LOCKS_INVALIDATED, message)); + ResponseEv->BrokenLockPathId = TKikimrPathId( + res->Record.GetTxLocks(0).GetSchemeShard(), + res->Record.GetTxLocks(0).GetPathId()); + return ReplyErrorAndDie(Ydb::StatusIds::ABORTED, {}); } - // Receive more replies from other shards CheckExecutionComplete(); return; } @@ -1702,18 +1741,15 @@ class TKqpDataExecuter : public TKqpExecuterBase(); evWriteTransaction->Record = evWrite; - evWriteTransaction->Record.SetTxMode(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWriteTransaction->Record.SetTxMode(ImmediateTx ? NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE : NKikimrDataEvents::TEvWrite::MODE_PREPARE); evWriteTransaction->Record.SetTxId(TxId); - evWriteTransaction->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); - auto locksCount = evWriteTransaction->Record.GetLocks().LocksSize(); shardState.DatashardState->ShardReadLocks = locksCount > 0; @@ -1724,6 +1760,29 @@ class TKqpDataExecuter : public TKqpExecuterBase; using TEvWriteTxs = THashMap; - using TTopicTabletTxs = THashMap; + using TTopicTabletTxs = NTopic::TTopicOperationTransactions; void ContinueExecute() { if (Stats) { @@ -2424,10 +2483,10 @@ class TKqpDataExecuter : public TKqpExecuterBaseOrbit, TxId, ComputeTasks.size(), DatashardTxs.size() + EvWriteTxs.size()); @@ -2469,6 +2526,8 @@ class TKqpDataExecuter : public TKqpExecuterBasePlanExecution(); @@ -2587,13 +2648,12 @@ class TKqpDataExecuter : public TKqpExecuterBase(); + auto ev = std::make_unique(); - if (writeId.Defined()) { + if (t.hasWrite && writeId.Defined()) { auto* w = transaction.MutableWriteId(); w->SetNodeId(SelfId().NodeId()); w->SetKeyId(*writeId); diff --git a/ydb/core/kqp/executer_actor/kqp_executer.h b/ydb/core/kqp/executer_actor/kqp_executer.h index a871450170d7..203f6666214a 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer.h +++ b/ydb/core/kqp/executer_actor/kqp_executer.h @@ -26,6 +26,7 @@ struct TEvKqpExecuter { NLWTrace::TOrbit Orbit; IKqpGateway::TKqpSnapshot Snapshot; + std::optional BrokenLockPathId; ui64 ResultRowsCount = 0; ui64 ResultRowsBytes = 0; diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp index 40adee90bd9c..1b735ef19fca 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.cpp @@ -39,7 +39,7 @@ void TEvKqpExecuter::TEvTxResponse::TakeResult(ui32 idx, NDq::TDqSerializedBatch ResultRowsBytes += rows.Size(); auto guard = AllocState->TypeEnv.BindAllocator(); auto& result = TxResults[idx]; - if (rows.RowCount() || !result.IsStream) { + if (rows.RowCount()) { NDq::TDqDataSerializer dataSerializer( AllocState->TypeEnv, AllocState->HolderFactory, static_cast(rows.Proto.GetTransportVersion())); diff --git a/ydb/core/kqp/executer_actor/kqp_executer_impl.h b/ydb/core/kqp/executer_actor/kqp_executer_impl.h index 4bfb1ec0ad86..8de618408606 100644 --- a/ydb/core/kqp/executer_actor/kqp_executer_impl.h +++ b/ydb/core/kqp/executer_actor/kqp_executer_impl.h @@ -512,6 +512,7 @@ class TKqpExecuterBase : public TActorBootstrapped { } TasksGraph.GetMeta().SetLockTxId(lockTxId); + TasksGraph.GetMeta().SetLockNodeId(SelfId().NodeId()); LWTRACK(KqpBaseExecuterHandleReady, ResponseEv->Orbit, TxId); if (IsDebugLogEnabled()) { @@ -1300,13 +1301,9 @@ class TKqpExecuterBase : public TActorBootstrapped { const auto& input = stage.GetInputs(inputIndex); // Current assumptions: - // 1. `Broadcast` can not be the 1st stage input unless it's a single input - // 2. All stage's inputs, except 1st one, must be a `Broadcast` or `UnionAll` - if (inputIndex == 0) { - if (stage.InputsSize() > 1) { - YQL_ENSURE(input.GetTypeCase() != NKqpProto::TKqpPhyConnection::kBroadcast); - } - } else { + // 1. All stage's inputs, except 1st one, must be a `Broadcast` or `UnionAll` + // 2. Stages where 1st input is `Broadcast` are not partitioned. + if (inputIndex > 0) { switch (input.GetTypeCase()) { case NKqpProto::TKqpPhyConnection::kBroadcast: case NKqpProto::TKqpPhyConnection::kHashShuffle: @@ -1772,7 +1769,9 @@ class TKqpExecuterBase : public TActorBootstrapped { auto& response = *ResponseEv->Record.MutableResponse(); response.SetStatus(status); - response.MutableIssues()->Swap(issues); + if (issues) { + response.MutableIssues()->Swap(issues); + } LOG_T("ReplyErrorAndDie. Response: " << response.DebugString() << ", to ActorId: " << Target); diff --git a/ydb/core/kqp/executer_actor/kqp_planner.cpp b/ydb/core/kqp/executer_actor/kqp_planner.cpp index dcc6049176bf..e6699c72e2b3 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner.cpp @@ -37,6 +37,11 @@ std::unique_ptr CheckTaskSize(ui64 TxId, const TIntru return nullptr; } +std::unique_ptr MakeActorStartFailureError(const TActorId& executerId, const TString& reason) { + auto ev = std::make_unique(NYql::NDqProto::StatusIds::OVERLOADED, reason); + return std::make_unique(executerId, executerId, ev.release()); +} + void BuildInitialTaskResources(const TKqpTasksGraph& graph, ui64 taskId, TTaskResourceEstimation& ret) { const auto& task = graph.GetTask(taskId); const auto& stageInfo = graph.GetStageInfo(task.StageId); @@ -55,10 +60,11 @@ bool TKqpPlanner::UseMockEmptyPlanner = false; // Task can allocate extra memory during execution. // So, we estimate total memory amount required for task as apriori task size multiplied by this constant. constexpr ui32 MEMORY_ESTIMATION_OVERFLOW = 2; -constexpr ui32 MAX_NON_PARALLEL_TASKS_EXECUTION_LIMIT = 8; TKqpPlanner::TKqpPlanner(TKqpPlanner::TArgs&& args) : TxId(args.TxId) + , LockTxId(args.LockTxId) + , LockNodeId(args.LockNodeId) , ExecuterId(args.Executer) , Snapshot(args.Snapshot) , Database(args.Database) @@ -81,7 +87,13 @@ TKqpPlanner::TKqpPlanner(TKqpPlanner::TArgs&& args) , OutputChunkMaxSize(args.OutputChunkMaxSize) , GUCSettings(std::move(args.GUCSettings)) , MayRunTasksLocally(args.MayRunTasksLocally) + , ResourceManager_(args.ResourceManager_) + , CaFactory_(args.CaFactory_) { + if (GUCSettings) { + SerializedGUCSettings = GUCSettings->SerializeToString(); + } + if (!Database) { // a piece of magic for tests if (const auto& domain = AppData()->DomainsInfo->Domain) { @@ -165,6 +177,10 @@ std::unique_ptr TKqpPlanner::SerializeReque auto result = std::make_unique(TasksGraph.GetMeta().GetArenaIntrusivePtr()); auto& request = result->Record; request.SetTxId(TxId); + if (LockTxId) { + request.SetLockTxId(*LockTxId); + } + request.SetLockNodeId(LockNodeId); ActorIdToProto(ExecuterId, request.MutableExecuterActorId()); if (Deadline) { @@ -205,8 +221,8 @@ std::unique_ptr TKqpPlanner::SerializeReque request.SetOutputChunkMaxSize(OutputChunkMaxSize); } - if (GUCSettings) { - request.SetSerializedGUCSettings(GUCSettings->SerializeToString()); + if (SerializedGUCSettings) { + request.SetSerializedGUCSettings(SerializedGUCSettings); } return result; @@ -242,11 +258,26 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { PrepareToProcess(); - auto localResources = GetKqpResourceManager()->GetLocalResources(); + auto localResources = ResourceManager_->GetLocalResources(); Y_UNUSED(MEMORY_ESTIMATION_OVERFLOW); + + auto placingOptions = ResourceManager_->GetPlacingOptions(); + + ui64 nonParallelLimit = placingOptions.MaxNonParallelTasksExecutionLimit; + if (MayRunTasksLocally) { + // not applied to column shards and external sources + nonParallelLimit = placingOptions.MaxNonParallelDataQueryTasksLimit; + } + + bool singleNodeExecutionMakeSence = ( + ResourceEstimations.size() <= nonParallelLimit || + // all readers are located on the one node. + TasksPerNode.size() == 1 + ); + if (LocalRunMemoryEst * MEMORY_ESTIMATION_OVERFLOW <= localResources.Memory[NRm::EKqpMemoryPool::ScanQuery] && ResourceEstimations.size() <= localResources.ExecutionUnits && - ResourceEstimations.size() <= MAX_NON_PARALLEL_TASKS_EXECUTION_LIMIT) + singleNodeExecutionMakeSence) { ui64 selfNodeId = ExecuterId.NodeId(); for(ui64 taskId: ComputeTasks) { @@ -257,7 +288,7 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { } if (ResourcesSnapshot.empty()) { - ResourcesSnapshot = std::move(GetKqpResourceManager()->GetClusterResources()); + ResourcesSnapshot = std::move(ResourceManager_->GetClusterResources()); } if (ResourcesSnapshot.empty() || (ResourcesSnapshot.size() == 1 && ResourcesSnapshot[0].GetNodeId() == ExecuterId.NodeId())) { @@ -281,22 +312,24 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { return std::make_unique(ExecuterId, ExecuterId, ev.Release()); } - auto planner = (UseMockEmptyPlanner ? CreateKqpMockEmptyPlanner() : CreateKqpGreedyPlanner()); // KqpMockEmptyPlanner is a mock planner for tests + std::vector deepestTasks; + ui64 maxLevel = 0; + for(auto& task: TasksGraph.GetTasks()) { + // const auto& task = TasksGraph.GetTask(taskId); + const auto& stageInfo = TasksGraph.GetStageInfo(task.StageId); + const NKqpProto::TKqpPhyStage& stage = stageInfo.Meta.GetStage(stageInfo.Id); + const ui64 stageLevel = stage.GetProgram().GetSettings().GetStageLevel(); - auto ctx = TlsActivationContext->AsActorContext(); - if (ctx.LoggerSettings() && ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::KQP_EXECUTER)) { - planner->SetLogFunc([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); - } + if (stageLevel > maxLevel) { + maxLevel = stageLevel; + deepestTasks.clear(); + } - THashMap nodeIdtoIdx; - for (size_t idx = 0; idx < ResourcesSnapshot.size(); ++idx) { - nodeIdtoIdx[ResourcesSnapshot[idx].nodeid()] = idx; + if (stageLevel == maxLevel) { + deepestTasks.push_back(task.Id); + } } - LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); - - auto plan = planner->Plan(ResourcesSnapshot, ResourceEstimations); - THashMap alreadyAssigned; for(auto& [nodeId, tasks] : TasksPerNode) { for(ui64 taskId: tasks) { @@ -304,81 +337,126 @@ std::unique_ptr TKqpPlanner::AssignTasksToNodes() { } } - if (!plan.empty()) { - for (auto& group : plan) { - for(ui64 taskId: group.TaskIds) { - auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId); - if (success) { - TasksPerNode[group.NodeId].push_back(taskId); - } + if (deepestTasks.size() <= placingOptions.MaxNonParallelTopStageExecutionLimit) { + // looks like the merge / union all connection + for(ui64 taskId: deepestTasks) { + auto [it, success] = alreadyAssigned.emplace(taskId, ExecuterId.NodeId()); + if (success) { + TasksPerNode[ExecuterId.NodeId()].push_back(taskId); } } + } - return nullptr; - } else { - LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_E(msg); }); + auto planner = (UseMockEmptyPlanner ? CreateKqpMockEmptyPlanner() : CreateKqpGreedyPlanner()); // KqpMockEmptyPlanner is a mock planner for tests - auto ev = MakeHolder(NYql::NDqProto::StatusIds::PRECONDITION_FAILED, - TStringBuilder() << "Not enough resources to execute query. " << "TraceId: " << UserRequestContext->TraceId); - return std::make_unique(ExecuterId, ExecuterId, ev.Release()); + auto ctx = TlsActivationContext->AsActorContext(); + if (ctx.LoggerSettings() && ctx.LoggerSettings()->Satisfies(NActors::NLog::PRI_DEBUG, NKikimrServices::KQP_EXECUTER)) { + planner->SetLogFunc([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); } -} -const IKqpGateway::TKqpSnapshot& TKqpPlanner::GetSnapshot() const { - return TasksGraph.GetMeta().Snapshot; -} + LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_D(msg); }); -// optimizeProtoForLocalExecution - if we want to execute compute actor locally and don't want to serialize & then deserialize proto message -// instead we just give ptr to proto message and after that we swap/copy it -void TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, bool shareMailbox, bool optimizeProtoForLocalExecution) { + ui64 selfNodeId = ExecuterId.NodeId(); + TString selfNodeDC; - auto& task = TasksGraph.GetTask(taskId); - NYql::NDqProto::TDqTask* taskDesc = ArenaSerializeTaskToProto(TasksGraph, task, /* serializeAsyncIoSettings = */ !optimizeProtoForLocalExecution); + TVector allNodes; + TVector executerDcNodes; + allNodes.reserve(ResourcesSnapshot.size()); - NYql::NDq::TComputeRuntimeSettings settings; - if (Deadline) { - settings.Timeout = Deadline - TAppData::TimeProvider->Now(); + for(auto& snapNode: ResourcesSnapshot) { + const TString& dc = snapNode.GetKqpProxyNodeResources().GetDataCenterId(); + if (snapNode.GetNodeId() == selfNodeId) { + selfNodeDC = dc; + break; + } } - settings.ExtraMemoryAllocationPool = NRm::EKqpMemoryPool::DataQuery; - settings.FailOnUndelivery = true; - settings.StatsMode = GetDqStatsMode(StatsMode); - settings.UseSpilling = WithSpilling; + for(auto& snapNode: ResourcesSnapshot) { + allNodes.push_back(&snapNode); + if (selfNodeDC == snapNode.GetKqpProxyNodeResources().GetDataCenterId()) { + executerDcNodes.push_back(&snapNode); + } + } - NYql::NDq::TComputeMemoryLimits limits; - limits.ChannelBufferSize = 32_MB; // Depends on NYql::NDq::TDqOutputChannelSettings::ChunkSizeLimit (now 48 MB) with a ratio of 1.5 - limits.OutputChunkMaxSize = OutputChunkMaxSize; - limits.MkqlLightProgramMemoryLimit = MkqlMemoryLimit > 0 ? std::min(500_MB, MkqlMemoryLimit) : 500_MB; - limits.MkqlHeavyProgramMemoryLimit = MkqlMemoryLimit > 0 ? std::min(2_GB, MkqlMemoryLimit) : 2_GB; + TVector plan; + + if (!executerDcNodes.empty() && placingOptions.PreferLocalDatacenterExecution) { + plan = planner->Plan(executerDcNodes, ResourceEstimations); + } - auto& taskOpts = taskDesc->GetProgram().GetSettings(); - auto limit = taskOpts.GetHasMapJoin() /* || opts.GetHasSort()*/ - ? limits.MkqlHeavyProgramMemoryLimit - : limits.MkqlLightProgramMemoryLimit; + if (plan.empty()) { + plan = planner->Plan(allNodes, ResourceEstimations); + } - limits.MemoryQuotaManager = std::make_shared(limit * 2, limit); + if (plan.empty()) { + LogMemoryStatistics([TxId = TxId, &UserRequestContext = UserRequestContext](TStringBuf msg) { LOG_E(msg); }); - auto computeActor = NKikimr::NKqp::CreateKqpComputeActor(ExecuterId, TxId, taskDesc, AsyncIoFactory, - settings, limits, ExecuterSpan.GetTraceId(), TasksGraph.GetMeta().GetArenaIntrusivePtr(), FederatedQuerySetup, GUCSettings); + auto ev = MakeHolder(NYql::NDqProto::StatusIds::PRECONDITION_FAILED, + TStringBuilder() << "Not enough resources to execute query. " << "TraceId: " << UserRequestContext->TraceId); + return std::make_unique(ExecuterId, ExecuterId, ev.Release()); + } - if (optimizeProtoForLocalExecution) { - TVector& taskSourceSettings = static_cast(computeActor)->MutableTaskSourceSettings(); - taskSourceSettings.assign(task.Inputs.size(), nullptr); - for (size_t i = 0; i < task.Inputs.size(); ++i) { - const auto input = task.Inputs[i]; - if (input.Type() == NYql::NDq::TTaskInputType::Source && Y_LIKELY(input.Meta.SourceSettings)) { - taskSourceSettings[i] = (&(*input.Meta.SourceSettings)); + for (auto& group : plan) { + for(ui64 taskId: group.TaskIds) { + auto [it, success] = alreadyAssigned.emplace(taskId, group.NodeId); + if (success) { + TasksPerNode[group.NodeId].push_back(taskId); } } } - auto computeActorId = shareMailbox ? TlsActivationContext->AsActorContext().RegisterWithSameMailbox(computeActor) : TlsActivationContext->AsActorContext().Register(computeActor); - task.ComputeActorId = computeActorId; + return nullptr; +} + +const IKqpGateway::TKqpSnapshot& TKqpPlanner::GetSnapshot() const { + return TasksGraph.GetMeta().Snapshot; +} + +// optimizeProtoForLocalExecution - if we want to execute compute actor locally and don't want to serialize & then deserialize proto message +// instead we just give ptr to proto message and after that we swap/copy it +TString TKqpPlanner::ExecuteDataComputeTask(ui64 taskId, ui32 computeTasksSize) { + auto& task = TasksGraph.GetTask(taskId); + NYql::NDqProto::TDqTask* taskDesc = ArenaSerializeTaskToProto(TasksGraph, task, true); + NYql::NDq::TComputeRuntimeSettings settings; + if (!TxInfo) { + TxInfo = MakeIntrusive( + TxId, TInstant::Now(), ResourceManager_->GetCounters()); + } + + auto startResult = CaFactory_->CreateKqpComputeActor({ + .ExecuterId = ExecuterId, + .TxId = TxId, + .LockTxId = LockTxId, + .LockNodeId = LockNodeId, + .Task = taskDesc, + .TxInfo = TxInfo, + .RuntimeSettings = settings, + .TraceId = NWilson::TTraceId(ExecuterSpan.GetTraceId()), + .Arena = TasksGraph.GetMeta().GetArenaIntrusivePtr(), + .SerializedGUCSettings = SerializedGUCSettings, + .NumberOfTasks = computeTasksSize, + .OutputChunkMaxSize = OutputChunkMaxSize, + .MemoryPool = NRm::EKqpMemoryPool::DataQuery, + .WithSpilling = WithSpilling, + .StatsMode = GetDqStatsMode(StatsMode), + .Deadline = Deadline, + .ShareMailbox = (computeTasksSize <= 1), + .RlPath = Nothing() + }); + + if (const auto* rmResult = std::get_if(&startResult)) { + return rmResult->GetFailReason(); + } + + TActorId* actorId = std::get_if(&startResult); + Y_ABORT_UNLESS(actorId); + task.ComputeActorId = *actorId; LOG_D("Executing task: " << taskId << " on compute actor: " << task.ComputeActorId); auto result = PendingComputeActors.emplace(task.ComputeActorId, TProgressStat()); YQL_ENSURE(result.second); + return TString(); } ui32 TKqpPlanner::GetnScanTasks() { @@ -415,9 +493,11 @@ std::unique_ptr TKqpPlanner::PlanExecution() { // explicit requirement to execute task on the same node because it has dependencies // on datashard tx. if (LocalComputeTasks) { - bool shareMailbox = (ComputeTasks.size() <= 1); for (ui64 taskId : ComputeTasks) { - ExecuteDataComputeTask(taskId, shareMailbox, /* optimizeProtoForLocalExecution = */ true); + auto result = ExecuteDataComputeTask(taskId, ComputeTasks.size()); + if (!result.empty()) { + return MakeActorStartFailureError(ExecuterId, result); + } } ComputeTasks.clear(); } @@ -427,7 +507,10 @@ std::unique_ptr TKqpPlanner::PlanExecution() { // to execute this task locally so we can avoid useless overhead for remote task launching. for (auto& [shardId, tasks]: TasksPerNode) { for (ui64 taskId: tasks) { - ExecuteDataComputeTask(taskId, true, /* optimizeProtoForLocalExecution = */ true); + auto result = ExecuteDataComputeTask(taskId, tasks.size()); + if (!result.empty()) { + return MakeActorStartFailureError(ExecuterId, result); + } } } @@ -452,9 +535,12 @@ std::unique_ptr TKqpPlanner::PlanExecution() { auto tasksOnNodeIt = TasksPerNode.find(ExecuterId.NodeId()); if (tasksOnNodeIt != TasksPerNode.end()) { auto& tasks = tasksOnNodeIt->second; - const bool shareMailbox = (tasks.size() <= 1); for (ui64 taskId: tasks) { - ExecuteDataComputeTask(taskId, shareMailbox, /* optimizeProtoForLocalExecution = */ true); + auto result = ExecuteDataComputeTask(taskId, tasks.size()); + if (!result.empty()) { + return MakeActorStartFailureError(ExecuterId, result); + } + PendingComputeTasks.erase(taskId); } } @@ -506,8 +592,6 @@ THashSet& TKqpPlanner::GetPendingComputeTasks() { } void TKqpPlanner::PrepareToProcess() { - auto rmConfig = GetKqpResourceManager()->GetConfig(); - ui32 tasksCount = ComputeTasks.size(); for (auto& [shardId, tasks] : TasksPerNode) { tasksCount += tasks.size(); @@ -518,7 +602,7 @@ void TKqpPlanner::PrepareToProcess() { for (size_t i = 0; i < ComputeTasks.size(); ++i) { BuildInitialTaskResources(TasksGraph, ComputeTasks[i], ResourceEstimations[i]); - EstimateTaskResources(rmConfig, ResourceEstimations[i], ComputeTasks.size()); + ResourceManager_->EstimateTaskResources(ResourceEstimations[i], ComputeTasks.size()); LocalRunMemoryEst += ResourceEstimations[i].TotalMemoryLimit; } @@ -526,7 +610,7 @@ void TKqpPlanner::PrepareToProcess() { for(auto& [nodeId, tasks] : TasksPerNode) { for (ui64 taskId: tasks) { BuildInitialTaskResources(TasksGraph, taskId, ResourceEstimations[currentEst]); - EstimateTaskResources(rmConfig, ResourceEstimations[currentEst], tasks.size()); + ResourceManager_->EstimateTaskResources(ResourceEstimations[currentEst], tasks.size()); LocalRunMemoryEst += ResourceEstimations[currentEst].TotalMemoryLimit; ++currentEst; } diff --git a/ydb/core/kqp/executer_actor/kqp_planner.h b/ydb/core/kqp/executer_actor/kqp_planner.h index 01efd3e79454..e868cc84f6a0 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner.h +++ b/ydb/core/kqp/executer_actor/kqp_planner.h @@ -43,6 +43,8 @@ class TKqpPlanner { struct TArgs { TKqpTasksGraph& TasksGraph; const ui64 TxId; + const TMaybe LockTxId; + const ui32 LockNodeId; const TActorId& Executer; const IKqpGateway::TKqpSnapshot& Snapshot; const TString& Database; @@ -64,6 +66,8 @@ class TKqpPlanner { const ui64 OutputChunkMaxSize = 0; const TGUCSettings::TPtr GUCSettings; const bool MayRunTasksLocally = false; + const std::shared_ptr& ResourceManager_; + const std::shared_ptr& CaFactory_; }; TKqpPlanner(TKqpPlanner::TArgs&& args); @@ -83,7 +87,7 @@ class TKqpPlanner { private: const IKqpGateway::TKqpSnapshot& GetSnapshot() const; - void ExecuteDataComputeTask(ui64 taskId, bool shareMailbox, bool optimizeProtoForLocalExecution); + TString ExecuteDataComputeTask(ui64 taskId, ui32 computeTasksSize); void PrepareToProcess(); TString GetEstimationsInfo() const; @@ -94,6 +98,8 @@ class TKqpPlanner { private: const ui64 TxId; + const TMaybe LockTxId; + const ui32 LockNodeId; const TActorId ExecuterId; TVector ComputeTasks; THashMap> TasksPerNode; @@ -128,6 +134,10 @@ class TKqpPlanner { const ui64 OutputChunkMaxSize; const TGUCSettings::TPtr GUCSettings; const bool MayRunTasksLocally; + TString SerializedGUCSettings; + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; + TIntrusivePtr TxInfo; public: static bool UseMockEmptyPlanner; // for tests: if true then use TKqpMockEmptyPlanner that leads to the error diff --git a/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp b/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp index ce395eb98ec5..0e3755ec4e53 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp +++ b/ydb/core/kqp/executer_actor/kqp_planner_strategy.cpp @@ -90,16 +90,16 @@ class TNodesManager { return result; } - TNodesManager(const TVector& nodeResources) { + TNodesManager(const TVector& nodeResources) { for (auto& node : nodeResources) { - if (!node.GetAvailableComputeActors()) { + if (!node->GetAvailableComputeActors()) { continue; } Nodes.emplace_back(TNodeDesc{ - node.GetNodeId(), - ActorIdFromProto(node.GetResourceManagerActorId()), - node.GetTotalMemory() - node.GetUsedMemory(), - node.GetAvailableComputeActors(), + node->GetNodeId(), + ActorIdFromProto(node->GetResourceManagerActorId()), + node->GetTotalMemory() - node->GetUsedMemory(), + node->GetAvailableComputeActors(), {} }); } @@ -111,7 +111,7 @@ class TKqpGreedyPlanner : public IKqpPlannerStrategy { public: ~TKqpGreedyPlanner() override {} - TVector Plan(const TVector& nodeResources, + TVector Plan(const TVector& nodeResources, const TVector& tasks) override { TVector result; @@ -161,7 +161,7 @@ class TKqpMockEmptyPlanner : public IKqpPlannerStrategy { public: ~TKqpMockEmptyPlanner() override {} - TVector Plan(const TVector&, + TVector Plan(const TVector&, const TVector&) override { return {}; diff --git a/ydb/core/kqp/executer_actor/kqp_planner_strategy.h b/ydb/core/kqp/executer_actor/kqp_planner_strategy.h index 548e18e1511b..841515f93c48 100644 --- a/ydb/core/kqp/executer_actor/kqp_planner_strategy.h +++ b/ydb/core/kqp/executer_actor/kqp_planner_strategy.h @@ -23,7 +23,7 @@ class IKqpPlannerStrategy { TVector TaskIds; }; - virtual TVector Plan(const TVector& nodeResources, + virtual TVector Plan(const TVector& nodeResources, const TVector& estimatedResources) = 0; protected: diff --git a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp index 771b15510290..e98361797ca1 100644 --- a/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp +++ b/ydb/core/kqp/executer_actor/kqp_scan_executer.cpp @@ -339,7 +339,10 @@ class TKqpScanExecuter : public TKqpExecuterBase { break; } + case NKqpProto::TKqpSchemeOperation::kCreateTopic: { + const auto& modifyScheme = schemeOp.GetCreateTopic(); + ev->Record.MutableTransaction()->MutableModifyScheme()->CopyFrom(modifyScheme); + break; + } + + case NKqpProto::TKqpSchemeOperation::kAlterTopic: { + const auto& modifyScheme = schemeOp.GetAlterTopic(); + ev->Record.MutableTransaction()->MutableModifyScheme()->CopyFrom(modifyScheme); + break; + } + + case NKqpProto::TKqpSchemeOperation::kDropTopic: { + const auto& modifyScheme = schemeOp.GetDropTopic(); + ev->Record.MutableTransaction()->MutableModifyScheme()->CopyFrom(modifyScheme); + break; + } + default: InternalError(TStringBuilder() << "Unexpected scheme operation: " << (ui32) schemeOp.GetOperationCase()); @@ -435,7 +453,7 @@ class TKqpSchemeExecuter : public TActorBootstrapped { } void Handle(TEvPrivate::TEvMakeTempDirResult::TPtr& result) { - if (!result->Get()->Result.Success()) { + if (!result->Get()->Result.Success()) { InternalError(TStringBuilder() << "Error creating temporary directory for session " << SessionId << ": " << result->Get()->Result.Issues().ToString(true)); diff --git a/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp b/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp index b89b83e45785..fb53a62369bb 100644 --- a/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp +++ b/ydb/core/kqp/executer_actor/kqp_tasks_graph.cpp @@ -536,7 +536,8 @@ bool IsCrossShardChannel(const TKqpTasksGraph& tasksGraph, const TChannel& chann return false; } - return targetShard != tasksGraph.GetTask(channel.SrcTask).Meta.ShardId; + ui64 srcShard = tasksGraph.GetTask(channel.SrcTask).Meta.ShardId; + return srcShard && targetShard != srcShard; } void TShardKeyRanges::AddPoint(TSerializedCellVec&& point) { @@ -1127,6 +1128,7 @@ void FillInputDesc(const TKqpTasksGraph& tasksGraph, NYql::NDqProto::TTaskInput& if (lockTxId) { input.Meta.StreamLookupSettings->SetLockTxId(*lockTxId); + input.Meta.StreamLookupSettings->SetLockNodeId(tasksGraph.GetMeta().LockNodeId); } transformProto->MutableSettings()->PackFrom(*input.Meta.StreamLookupSettings); } else if (input.Meta.SequencerSettings) { diff --git a/ydb/core/kqp/executer_actor/kqp_tasks_graph.h b/ydb/core/kqp/executer_actor/kqp_tasks_graph.h index e9141c7d5f84..fe0d1a42fbb3 100644 --- a/ydb/core/kqp/executer_actor/kqp_tasks_graph.h +++ b/ydb/core/kqp/executer_actor/kqp_tasks_graph.h @@ -91,6 +91,7 @@ struct TStageInfoMeta { struct TGraphMeta { IKqpGateway::TKqpSnapshot Snapshot; TMaybe LockTxId; + ui32 LockNodeId; std::unordered_map ResultChannelProxies; TActorId ExecuterId; bool UseFollowers = false; @@ -117,6 +118,10 @@ struct TGraphMeta { void SetLockTxId(TMaybe lockTxId) { LockTxId = lockTxId; } + + void SetLockNodeId(ui32 lockNodeId) { + LockNodeId = lockNodeId; + } }; struct TTaskInputMeta { diff --git a/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp b/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp index 4889ee332b27..b358f0efcf1c 100644 --- a/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp +++ b/ydb/core/kqp/executer_actor/ut/kqp_executer_ut.cpp @@ -8,6 +8,7 @@ #include #include +#include namespace NKikimr { namespace NKqp { @@ -28,7 +29,7 @@ NKqpProto::TKqpPhyTx BuildTxPlan(const TString& sql, TIntrusivePtr IModuleResolver::TPtr moduleResolver; UNIT_ASSERT(GetYqlDefaultModuleResolver(moduleCtx, moduleResolver)); - auto qp = CreateKqpHost(gateway, cluster, "/Root", config, moduleResolver, NYql::IHTTPGateway::Make(), nullptr, nullptr, Nothing(), nullptr, nullptr, false, false, nullptr, actorSystem); + auto qp = CreateKqpHost(gateway, cluster, "/Root", config, moduleResolver, NYql::IHTTPGateway::Make(), nullptr, nullptr, NKikimrConfig::TQueryServiceConfig(), Nothing(), nullptr, nullptr, false, false, nullptr, actorSystem, nullptr); auto result = qp->SyncPrepareDataQuery(sql, IKqpHost::TPrepareSettings()); result.Issues().PrintTo(Cerr); UNIT_ASSERT(result.Success()); diff --git a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json index 205fecddc662..532ed349b4e3 100644 --- a/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json +++ b/ydb/core/kqp/expr_nodes/kqp_expr_nodes.json @@ -209,7 +209,10 @@ { "Name": "TKqlStreamLookupIndex", "Base": "TKqlLookupIndexBase", - "Match": {"Type": "Callable", "Name": "KqlStreamLookupIndex"} + "Match": {"Type": "Callable", "Name": "KqlStreamLookupIndex"}, + "Children": [ + {"Index": 4, "Name": "LookupStrategy", "Type": "TCoAtom"} + ] }, { "Name": "TKqlEffectBase", diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp index 42c4687a19c6..d3196d1f2f85 100644 --- a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.cpp @@ -138,8 +138,14 @@ void FillResourcePoolDescription(NKikimrSchemeOp::TResourcePoolDescription& reso } if (settings.GetObjectId() == NResourcePool::DEFAULT_POOL_ID) { - if (properties.contains("concurrent_query_limit")) { - ythrow yexception() << "Can not change property concurrent_query_limit for default pool"; + std::vector forbiddenProperties = { + "concurrent_query_limit", + "database_load_cpu_threshold" + }; + for (const TString& property : forbiddenProperties) { + if (properties.contains(property)) { + ythrow yexception() << "Can not change property " << property << " for default pool"; + } } } } @@ -186,19 +192,19 @@ TResourcePoolManager::TAsyncStatus TResourcePoolManager::DoModify(const NYql::TO TResourcePoolManager::TAsyncStatus TResourcePoolManager::CreateResourcePool(const NYql::TCreateObjectSettings& settings, TInternalModificationContext& context, ui32 nodeId) const { NKqpProto::TKqpSchemeOperation schemeOperation; PrepareCreateResourcePool(schemeOperation, settings, context); - return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context.GetExternalData(), nodeId); + return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context.GetExternalData(), nodeId, NKqpProto::TKqpSchemeOperation::kCreateResourcePool); } TResourcePoolManager::TAsyncStatus TResourcePoolManager::AlterResourcePool(const NYql::TCreateObjectSettings& settings, TInternalModificationContext& context, ui32 nodeId) const { NKqpProto::TKqpSchemeOperation schemeOperation; PrepareAlterResourcePool(schemeOperation, settings, context); - return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context.GetExternalData(), nodeId); + return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context.GetExternalData(), nodeId, NKqpProto::TKqpSchemeOperation::kAlterResourcePool); } TResourcePoolManager::TAsyncStatus TResourcePoolManager::DropResourcePool(const NYql::TCreateObjectSettings& settings, TInternalModificationContext& context, ui32 nodeId) const { NKqpProto::TKqpSchemeOperation schemeOperation; PrepareDropResourcePool(schemeOperation, settings, context); - return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context.GetExternalData(), nodeId); + return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context.GetExternalData(), nodeId, NKqpProto::TKqpSchemeOperation::kDropResourcePool); } //// Deferred modification @@ -265,11 +271,11 @@ TResourcePoolManager::TAsyncStatus TResourcePoolManager::ExecutePrepared(const N try { switch (schemeOperation.GetOperationCase()) { case NKqpProto::TKqpSchemeOperation::kCreateResourcePool: - return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context, nodeId); + return ExecuteSchemeRequest(schemeOperation.GetCreateResourcePool(), context, nodeId, schemeOperation.GetOperationCase()); case NKqpProto::TKqpSchemeOperation::kAlterResourcePool: - return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context, nodeId); + return ExecuteSchemeRequest(schemeOperation.GetAlterResourcePool(), context, nodeId, schemeOperation.GetOperationCase()); case NKqpProto::TKqpSchemeOperation::kDropResourcePool: - return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context, nodeId); + return ExecuteSchemeRequest(schemeOperation.GetDropResourcePool(), context, nodeId, schemeOperation.GetOperationCase()); default: return NThreading::MakeFuture(TYqlConclusionStatus::Fail(TStringBuilder() << "Execution of prepare operation for RESOURCE_POOL object: unsupported operation: " << static_cast(schemeOperation.GetOperationCase()))); } @@ -288,8 +294,13 @@ TResourcePoolManager::TAsyncStatus TResourcePoolManager::ChainFeatures(TAsyncSta }); } -TResourcePoolManager::TAsyncStatus TResourcePoolManager::ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId) const { - auto validationFuture = CheckFeatureFlag(context, nodeId); +TResourcePoolManager::TAsyncStatus TResourcePoolManager::ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId, NKqpProto::TKqpSchemeOperation::OperationCase operationCase) const { + TAsyncStatus validationFuture = NThreading::MakeFuture(TYqlConclusionStatus::Success()); + if (operationCase != NKqpProto::TKqpSchemeOperation::kDropResourcePool) { + validationFuture = ChainFeatures(validationFuture, [context, nodeId] { + return CheckFeatureFlag(context, nodeId); + }); + } return ChainFeatures(validationFuture, [schemeTx, context] { return SendSchemeRequest(schemeTx, context); }); diff --git a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h index edab893dbe6c..64406f78e328 100644 --- a/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h +++ b/ydb/core/kqp/gateway/behaviour/resource_pool/manager.h @@ -31,7 +31,7 @@ class TResourcePoolManager : public NMetadata::NModifications::IOperationsManage void PrepareDropResourcePool(NKqpProto::TKqpSchemeOperation& schemeOperation, const NYql::TDropObjectSettings& settings, TInternalModificationContext& context) const; TAsyncStatus ChainFeatures(TAsyncStatus lastFeature, std::function callback) const; - TAsyncStatus ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId) const; + TAsyncStatus ExecuteSchemeRequest(const NKikimrSchemeOp::TModifyScheme& schemeTx, const TExternalModificationContext& context, ui32 nodeId, NKqpProto::TKqpSchemeOperation::OperationCase operationCase) const; }; } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp index 1467a61bc38e..b000a2fd94a4 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp @@ -16,15 +16,21 @@ TConclusionStatus TAlterColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl if (StorageId && !*StorageId) { return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); } + { + auto status = AccessorConstructor.DeserializeFromRequest(features); + if (status.IsFail()) { + return status; + } + } { auto result = DictionaryEncodingDiff.DeserializeFromRequestFeatures(features); - if (!result) { - return TConclusionStatus::Fail(result.GetErrorMessage()); + if (result.IsFail()) { + return result; } } { auto status = Serializer.DeserializeFromRequest(features); - if (!status) { + if (status.IsFail()) { return status; } } @@ -40,6 +46,9 @@ void TAlterColumnOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTable if (!!Serializer) { Serializer.SerializeToProto(*column->MutableSerializer()); } + if (!!AccessorConstructor) { + *column->MutableDataAccessorConstructor() = AccessorConstructor.SerializeToProto(); + } *column->MutableDictionaryEncoding() = DictionaryEncodingDiff.SerializeToProto(); if (DefaultValue) { column->SetDefaultValue(*DefaultValue); diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h index ee51b47bb8df..23d1aef28abb 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h @@ -1,4 +1,5 @@ #include "abstract.h" +#include #include #include @@ -18,6 +19,7 @@ class TAlterColumnOperation : public ITableStoreOperation { NArrow::NSerialization::TSerializerContainer Serializer; NArrow::NDictionary::TEncodingDiff DictionaryEncodingDiff; std::optional DefaultValue; + NArrow::NAccessor::TRequestedConstructorContainer AccessorConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp index fdc831d424d3..fc0e3b0d262e 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.cpp @@ -1,4 +1,5 @@ #include "alter_sharding.h" +#include #include namespace NKikimr::NKqp { @@ -26,4 +27,8 @@ void TAlterShardingOperation::DoSerializeScheme(NKikimrSchemeOp::TModifyScheme& scheme.MutableAlterColumnTable()->MutableReshardColumnTable()->SetIncrease(*Increase); } +void TAlterShardingOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const { + AFL_VERIFY(false); +} + } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h index 52f58e14d7b5..cb81ee36da68 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_sharding.h @@ -1,5 +1,4 @@ #include "abstract.h" -#include namespace NKikimr::NKqp { @@ -12,9 +11,7 @@ class TAlterShardingOperation: public ITableStoreOperation { static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); private: std::optional Increase; - virtual void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const override { - AFL_VERIFY(false); - } + virtual void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& /*scheme*/) const override; virtual void DoSerializeScheme(NKikimrSchemeOp::TModifyScheme& scheme, const bool isStandalone) const override; public: diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp deleted file mode 100644 index 94a18e7e4140..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "drop_stat.h" -#include - -namespace NKikimr::NKqp { - -TConclusionStatus TDropStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { - { - auto fValue = features.Extract("NAME"); - if (!fValue) { - return TConclusionStatus::Fail("can't find parameter NAME"); - } - Name = *fValue; - } - return TConclusionStatus::Success(); -} - -void TDropStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { - *schemaData.AddDropStatistics() = Name; -} - -} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h deleted file mode 100644 index 777aae036858..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h +++ /dev/null @@ -1,19 +0,0 @@ -#include "abstract.h" - -namespace NKikimr::NKqp { - -class TDropStatOperation : public ITableStoreOperation { - static TString GetTypeName() { - return "DROP_STAT"; - } - - static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); -private: - TString Name; -public: - TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; - void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; -}; - -} - diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp index ae0f08e3333d..61914cb6e005 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp @@ -12,10 +12,6 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl } IndexName = *fValue; } - StorageId = features.Extract("STORAGE_ID"); - if (StorageId && !*StorageId) { - return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); - } TString indexType; { auto fValue = features.Extract("TYPE"); @@ -46,9 +42,6 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl void TUpsertIndexOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { auto* indexProto = schemaData.AddUpsertIndexes(); - if (StorageId) { - indexProto->SetStorageId(*StorageId); - } indexProto->SetName(IndexName); IndexMetaConstructor.SerializeToProto(*indexProto); } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h index 12305f85f0ae..267829a1a5f4 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h @@ -12,7 +12,6 @@ class TUpsertIndexOperation : public ITableStoreOperation { static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); private: TString IndexName; - std::optional StorageId; NBackgroundTasks::TInterfaceProtoContainer IndexMetaConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp deleted file mode 100644 index 9e8360dd5e35..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "upsert_stat.h" -#include -#include - -namespace NKikimr::NKqp { - -TConclusionStatus TUpsertStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { - { - auto fValue = features.Extract("NAME"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter NAME"); - } - Name = *fValue; - } - TString type; - { - auto fValue = features.Extract("TYPE"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter TYPE"); - } - type = *fValue; - } - { - auto fValue = features.Extract("FEATURES"); - if (!fValue) { - return TConclusionStatus::Fail("can't find alter parameter FEATURES"); - } - if (!Constructor.Initialize(type)) { - return TConclusionStatus::Fail("can't initialize stat constructor object for type \"" + type + "\""); - } - NJson::TJsonValue jsonData; - if (!NJson::ReadJsonFastTree(*fValue, &jsonData)) { - return TConclusionStatus::Fail("incorrect json in request FEATURES parameter"); - } - auto result = Constructor->DeserializeFromJson(jsonData); - if (result.IsFail()) { - return result; - } - } - return TConclusionStatus::Success(); -} - -void TUpsertStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { - auto* proto = schemaData.AddUpsertStatistics(); - proto->SetName(Name); - Constructor.SerializeToProto(*proto); -} - -} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h deleted file mode 100644 index 5d8abdffae8d..000000000000 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h +++ /dev/null @@ -1,23 +0,0 @@ -#include "abstract.h" -#include - -namespace NKikimr::NKqp { - -class TUpsertStatOperation : public ITableStoreOperation { -private: - static TString GetTypeName() { - return "UPSERT_STAT"; - } - - static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); -private: - TString Name; - NOlap::NStatistics::TConstructorContainer Constructor; -public: - TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; - - void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; -}; - -} - diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make index 6094887573e1..e393435d9cc5 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make @@ -7,8 +7,6 @@ SRCS( GLOBAL drop_column.cpp GLOBAL upsert_index.cpp GLOBAL drop_index.cpp - GLOBAL upsert_stat.cpp - GLOBAL drop_stat.cpp GLOBAL upsert_opt.cpp GLOBAL alter_sharding.cpp ) @@ -16,7 +14,6 @@ SRCS( PEERDIR( ydb/services/metadata/manager ydb/core/formats/arrow/serializer - ydb/core/tx/columnshard/engines/scheme/statistics/abstract ydb/core/tx/columnshard/engines/storage/optimizer/abstract ydb/core/kqp/gateway/utils ydb/core/protos diff --git a/ydb/core/kqp/gateway/behaviour/view/manager.cpp b/ydb/core/kqp/gateway/behaviour/view/manager.cpp index cfe2a73ac7d5..069697d53baf 100644 --- a/ydb/core/kqp/gateway/behaviour/view/manager.cpp +++ b/ydb/core/kqp/gateway/behaviour/view/manager.cpp @@ -2,6 +2,8 @@ #include #include +#include +#include #include namespace NKikimr::NKqp { @@ -10,13 +12,9 @@ namespace { using TYqlConclusionStatus = TViewManager::TYqlConclusionStatus; using TInternalModificationContext = TViewManager::TInternalModificationContext; +using TExternalModificationContext = TViewManager::TExternalModificationContext; -TString GetByKeyOrDefault(const NYql::TCreateObjectSettings& container, const TString& key) { - const auto value = container.GetFeaturesExtractor().Extract(key); - return value ? *value : TString{}; -} - -TYqlConclusionStatus CheckFeatureFlag(TInternalModificationContext& context) { +TYqlConclusionStatus CheckFeatureFlag(const TInternalModificationContext& context) { auto* const actorSystem = context.GetExternalData().GetActorSystem(); if (!actorSystem) { ythrow yexception() << "This place needs an actor system. Please contact internal support"; @@ -36,28 +34,47 @@ std::pair SplitPathByDb(const TString& objectId, return pathPair; } +std::pair SplitPathByObjectId(const TString& objectId) { + std::pair pathPair; + TString error; + if (!NSchemeHelpers::TrySplitTablePath(objectId, pathPair, error)) { + ythrow TBadArgumentException() << error; + } + return pathPair; +} + +void ValidateOptions(NYql::TFeaturesExtractor& features) { + // Current implementation does not persist the security_invoker option value. + if (features.Extract("security_invoker") != "true") { + ythrow TBadArgumentException() << "security_invoker option must be explicitly enabled"; + } + if (!features.IsFinished()) { + ythrow TBadArgumentException() << "Unknown property: " << features.GetRemainedParamsString(); + } +} + void FillCreateViewProposal(NKikimrSchemeOp::TModifyScheme& modifyScheme, const NYql::TCreateObjectSettings& settings, - const TString& database) { + const TExternalModificationContext& context) { - const auto pathPair = SplitPathByDb(settings.GetObjectId(), database); + const auto pathPair = SplitPathByDb(settings.GetObjectId(), context.GetDatabase()); modifyScheme.SetWorkingDir(pathPair.first); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateView); auto& viewDesc = *modifyScheme.MutableCreateView(); viewDesc.SetName(pathPair.second); - viewDesc.SetQueryText(GetByKeyOrDefault(settings, "query_text")); - if (!settings.GetFeaturesExtractor().IsFinished()) { - ythrow TBadArgumentException() << "Unknown property: " << settings.GetFeaturesExtractor().GetRemainedParamsString(); - } + auto& features = settings.GetFeaturesExtractor(); + viewDesc.SetQueryText(features.Extract("query_text").value_or("")); + ValidateOptions(features); + + NSQLTranslation::Serialize(context.GetTranslationSettings(), *viewDesc.MutableCapturedContext()); } void FillDropViewProposal(NKikimrSchemeOp::TModifyScheme& modifyScheme, - const NYql::TDropObjectSettings& settings, - const TString& database) { + const NYql::TDropObjectSettings& settings) { - const auto pathPair = SplitPathByDb(settings.GetObjectId(), database); + const auto pathPair = SplitPathByObjectId(settings.GetObjectId()); modifyScheme.SetWorkingDir(pathPair.first); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpDropView); @@ -83,41 +100,40 @@ NThreading::TFuture SendSchemeRequest(TEvTxUserProxy::TEvP } NThreading::TFuture CreateView(const NYql::TCreateObjectSettings& settings, - TInternalModificationContext& context) { + const TInternalModificationContext& context) { auto proposal = MakeHolder(); proposal->Record.SetDatabaseName(context.GetExternalData().GetDatabase()); if (context.GetExternalData().GetUserToken()) { proposal->Record.SetUserToken(context.GetExternalData().GetUserToken()->GetSerializedToken()); } auto& schemeTx = *proposal->Record.MutableTransaction()->MutableModifyScheme(); - FillCreateViewProposal(schemeTx, settings, context.GetExternalData().GetDatabase()); + FillCreateViewProposal(schemeTx, settings, context.GetExternalData()); return SendSchemeRequest(proposal.Release(), context.GetExternalData().GetActorSystem(), true); } NThreading::TFuture DropView(const NYql::TDropObjectSettings& settings, - TInternalModificationContext& context) { + const TInternalModificationContext& context) { auto proposal = MakeHolder(); proposal->Record.SetDatabaseName(context.GetExternalData().GetDatabase()); if (context.GetExternalData().GetUserToken()) { proposal->Record.SetUserToken(context.GetExternalData().GetUserToken()->GetSerializedToken()); } auto& schemeTx = *proposal->Record.MutableTransaction()->MutableModifyScheme(); - FillDropViewProposal(schemeTx, settings, context.GetExternalData().GetDatabase()); + FillDropViewProposal(schemeTx, settings); return SendSchemeRequest(proposal.Release(), context.GetExternalData().GetActorSystem(), false); } void PrepareCreateView(NKqpProto::TKqpSchemeOperation& schemeOperation, const NYql::TObjectSettingsImpl& settings, - TInternalModificationContext& context) { - FillCreateViewProposal(*schemeOperation.MutableCreateView(), settings, context.GetExternalData().GetDatabase()); + const TInternalModificationContext& context) { + FillCreateViewProposal(*schemeOperation.MutableCreateView(), settings, context.GetExternalData()); } void PrepareDropView(NKqpProto::TKqpSchemeOperation& schemeOperation, - const NYql::TObjectSettingsImpl& settings, - TInternalModificationContext& context) { - FillDropViewProposal(*schemeOperation.MutableDropView(), settings, context.GetExternalData().GetDatabase()); + const NYql::TObjectSettingsImpl& settings) { + FillDropViewProposal(*schemeOperation.MutableDropView(), settings); } } @@ -173,7 +189,7 @@ TViewManager::TYqlConclusionStatus TViewManager::DoPrepare(NKqpProto::TKqpScheme PrepareCreateView(schemeOperation, settings, context); break; case EActivityType::Drop: - PrepareDropView(schemeOperation, settings, context); + PrepareDropView(schemeOperation, settings); break; } } catch (...) { diff --git a/ydb/core/kqp/gateway/behaviour/view/ya.make b/ydb/core/kqp/gateway/behaviour/view/ya.make index 6cb342036bda..7d57b8ceaab8 100644 --- a/ydb/core/kqp/gateway/behaviour/view/ya.make +++ b/ydb/core/kqp/gateway/behaviour/view/ya.make @@ -8,6 +8,7 @@ SRCS( PEERDIR( ydb/core/base ydb/core/kqp/gateway/actors + ydb/core/kqp/provider ydb/core/tx/tx_proxy ydb/services/metadata/abstract ydb/services/metadata/manager diff --git a/ydb/core/kqp/gateway/kqp_gateway.h b/ydb/core/kqp/gateway/kqp_gateway.h index 1c9bfdcac103..9a46611da4fa 100644 --- a/ydb/core/kqp/gateway/kqp_gateway.h +++ b/ydb/core/kqp/gateway/kqp_gateway.h @@ -27,6 +27,15 @@ class IRequestCtxMtSafe; } +namespace NKikimr::NKqp::NRm { + class IKqpResourceManager; +} + +namespace NKikimr::NKqp::NComputeActor { + struct IKqpNodeComputeActorFactory; +} + + namespace NKikimr::NKqp { const TStringBuf ParamNamePrefix = "%kqp%"; @@ -145,6 +154,8 @@ class IKqpGateway : public NYql::IKikimrGateway { Ydb::Table::QueryStatsCollection::Mode StatsMode = Ydb::Table::QueryStatsCollection::STATS_COLLECTION_NONE; TDuration ProgressStatsPeriod; TKqpSnapshot Snapshot = TKqpSnapshot(); + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; NKikimrKqp::EIsolationLevel IsolationLevel = NKikimrKqp::ISOLATION_LEVEL_UNDEFINED; TMaybe RlPath; bool NeedTxId = true; diff --git a/ydb/core/kqp/gateway/kqp_ic_gateway.cpp b/ydb/core/kqp/gateway/kqp_ic_gateway.cpp index e5d6bbfcf461..fa4359c4258a 100644 --- a/ydb/core/kqp/gateway/kqp_ic_gateway.cpp +++ b/ydb/core/kqp/gateway/kqp_ic_gateway.cpp @@ -226,7 +226,7 @@ class TKqpScanQueryRequestHandler : public TRequestHandlerBase< void HandleResponse(typename TResponse::TPtr &ev, const TActorContext &ctx) { auto& response = *ev->Get()->Record.GetRef().MutableResponse(); - NKikimr::ConvertYdbResultToKqpResult(ResultSet,*response.AddResults()); + response.AddYdbResults()->CopyFrom(ResultSet); for (auto& execStats : Executions) { response.MutableQueryStats()->AddExecutions()->Swap(&execStats); } @@ -285,20 +285,18 @@ class TKqpStreamRequestHandler : public TRequestHandlerBase< virtual void HandleResponse(typename TResponse::TPtr &ev, const TActorContext &ctx) { auto& record = ev->Get()->Record.GetRef(); if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - if (record.MutableResponse()->GetResults().size()) { + if (record.MutableResponse()->GetYdbResults().size()) { // Send result sets to RPC actor TStreamExecuteYqlScriptRPC auto evStreamPart = MakeHolder(); ActorIdToProto(this->SelfId(), evStreamPart->Record.MutableGatewayActorId()); - for (int i = 0; i < record.MutableResponse()->MutableResults()->size(); ++i) { + for (int i = 0; i < record.MutableResponse()->MutableYdbResults()->size(); ++i) { // Workaround to avoid errors on Pull execution stage which would expect some results - Ydb::ResultSet resultSet; - NKikimr::ConvertYdbResultToKqpResult(resultSet, *evStreamPart->Record.AddResults()); + evStreamPart->Record.AddResults(); } - evStreamPart->Record.MutableResults()->Swap(record.MutableResponse()->MutableResults()); + evStreamPart->Record.MutableResults()->Swap(record.MutableResponse()->MutableYdbResults()); this->Send(TargetActorId, evStreamPart.Release()); - // Save response without data to send it later ResponseHandle = ev.Release(); } else { @@ -404,7 +402,7 @@ class TKqpForwardStreamRequestHandler : public TRequestHandlerBase< auto& response = *ev->Get()->Record.GetRef().MutableResponse(); Ydb::ResultSet resultSet; - NKikimr::ConvertYdbResultToKqpResult(resultSet, *response.AddResults()); + response.AddYdbResults()->CopyFrom(resultSet); for (auto& execStats : Executions) { response.MutableQueryStats()->AddExecutions()->Swap(&execStats); } @@ -510,7 +508,7 @@ class TKqpGenericQueryRequestHandler: public TRequestHandlerBase< auto& response = *ev->Get()->Record.GetRef().MutableResponse(); for (auto& resultSet : ResultSets) { - ConvertYdbResultToKqpResult(std::move(resultSet.ResultSet), *response.AddResults()); + response.AddYdbResults()->Swap(&resultSet.ResultSet); } TBase::HandleResponse(ev, ctx); @@ -671,8 +669,8 @@ void KqpResponseToQueryResult(const NKikimrKqp::TEvQueryResponse& response, IKqp queryResult.AddIssue(NYql::IssueFromMessage(issue)); } - for (auto& result : queryResponse.GetResults()) { - auto arenaResult = google::protobuf::Arena::CreateMessage( + for (auto& result : queryResponse.GetYdbResults()) { + auto arenaResult = google::protobuf::Arena::CreateMessage( queryResult.ProtobufArenaPtr.get()); arenaResult->CopyFrom(result); @@ -976,7 +974,11 @@ class TKikimrIcGateway : public IKqpGateway { return NotImplemented(); } - TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request) override { + TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request, bool existingOk) override { + if (existingOk) { + return MakeFuture(ResultFromError("IF NOT EXISTS statement is not supported for CREATE TOPIC in yql script")); + } + try { if (!CheckCluster(cluster)) { return InvalidCluster(cluster); @@ -988,9 +990,27 @@ class TKikimrIcGateway : public IKqpGateway { catch (yexception& e) { return MakeFuture(ResultFromException(e)); } + Y_UNUSED(existingOk); + } + + TFuture AlterTopicPrepared(NYql::TAlterTopicSettings&& settings) override { + auto schemaTxPromise = NewPromise(); + auto schemaTxFuture = schemaTxPromise.GetFuture(); + + NKikimr::NGRpcProxy::V1::TAlterTopicRequest request{ + std::move(settings.Request), settings.WorkDir, settings.Name, Database, GetTokenCompat(), + settings.MissingOk + }; + IActor* requestHandler = new NKikimr::NGRpcProxy::V1::TAlterTopicActorInternal(std::move(request), std::move(schemaTxPromise), settings.MissingOk); + RegisterActor(requestHandler); + return schemaTxFuture; } - TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request) override { + TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request, bool missingOk) override { + if (missingOk) { + return MakeFuture(ResultFromError("IF EXISTS statement is not supported for ALTER TOPIC in yql script")); + } + try { if (!CheckCluster(cluster)) { return InvalidCluster(cluster); @@ -1004,7 +1024,11 @@ class TKikimrIcGateway : public IKqpGateway { } } - TFuture DropTopic(const TString& cluster, const TString& topic) override { + TFuture DropTopic(const TString& cluster, const TString& topic, bool missingOk) override { + if (missingOk) { + return MakeFuture(ResultFromError("IF EXISTS statement is not supported for DROP TOPIC in yql script")); + } + try { if (!CheckCluster(cluster)) { return InvalidCluster(cluster); @@ -1019,6 +1043,7 @@ class TKikimrIcGateway : public IKqpGateway { catch (yexception& e) { return MakeFuture(ResultFromException(e)); } + } TFuture CreateReplication(const TString&, const NYql::TCreateReplicationSettings&) override { diff --git a/ydb/core/kqp/gateway/kqp_metadata_loader.cpp b/ydb/core/kqp/gateway/kqp_metadata_loader.cpp index 3959b377e3d1..db99faa189bd 100644 --- a/ydb/core/kqp/gateway/kqp_metadata_loader.cpp +++ b/ydb/core/kqp/gateway/kqp_metadata_loader.cpp @@ -303,7 +303,7 @@ TTableMetadataResult GetViewMetadataResult( metadata->SchemaVersion = description.GetVersion(); metadata->Kind = NYql::EKikimrTableKind::View; metadata->Attributes = schemeEntry.Attributes; - metadata->ViewPersistedData = {description.GetQueryText()}; + metadata->ViewPersistedData = {description.GetQueryText(), description.GetCapturedContext()}; return builtResult; } @@ -961,6 +961,7 @@ NThreading::TFuture TKqpTableMetadataLoader::LoadTableMeta auto s = resp.Simple; result.Metadata->RecordsCount = s.RowCount; result.Metadata->DataSize = s.BytesSize; + result.Metadata->StatsLoaded = true; promise.SetValue(result); }); diff --git a/ydb/core/kqp/host/kqp_explain_prepared.cpp b/ydb/core/kqp/host/kqp_explain_prepared.cpp index 91ee362da0f1..f76ce851140c 100644 --- a/ydb/core/kqp/host/kqp_explain_prepared.cpp +++ b/ydb/core/kqp/host/kqp_explain_prepared.cpp @@ -66,7 +66,7 @@ class TKqpExplainPreparedTransformer : public NYql::TGraphTransformerBase { PhyQuerySetTxPlans(query, TKqpPhysicalQuery(TransformCtx->ExplainTransformerInput), std::move(TxResults), ctx, Cluster, TransformCtx->Tables, TransformCtx->Config, TypeCtx, OptimizeCtx); - query.SetQueryAst(KqpExprToPrettyString(*TransformCtx->ExplainTransformerInput, ctx)); + query.SetQueryAst(KqpExprToPrettyString(*input, ctx)); TransformCtx->ExplainTransformerInput = nullptr; return TStatus::Ok; diff --git a/ydb/core/kqp/host/kqp_gateway_proxy.cpp b/ydb/core/kqp/host/kqp_gateway_proxy.cpp index 3c717ee29c7c..78daaa4004c1 100644 --- a/ydb/core/kqp/host/kqp_gateway_proxy.cpp +++ b/ydb/core/kqp/host/kqp_gateway_proxy.cpp @@ -8,6 +8,7 @@ #include #include #include +#include namespace NKikimr::NKqp { @@ -108,7 +109,7 @@ bool ConvertDataSlotToYdbTypedValue(NYql::EDataSlot fromType, const TString& fro case NYql::EDataSlot::Interval64: toType->set_type_id(Ydb::Type::INTERVAL64); toValue->set_int64_value(FromString(fromValue)); - break; + break; default: return false; } @@ -914,16 +915,120 @@ class TKqpGatewayProxy : public IKikimrGateway { return dropPromise.GetFuture(); } - TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request) override { - FORWARD_ENSURE_NO_PREPARE(CreateTopic, cluster, std::move(request)); + TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request, bool existingOk) override { + CHECK_PREPARED_DDL(CreateTopic); + Y_UNUSED(cluster); + + std::pair pathPair; + TString error; + auto createPromise = NewPromise(); + if (!NSchemeHelpers::SplitTablePath(request.path(), GetDatabase(), pathPair, error, false)) { + return MakeFuture(ResultFromError(error)); + } + NKikimrSchemeOp::TModifyScheme schemeTx; + schemeTx.SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpCreatePersQueueGroup); + + schemeTx.SetWorkingDir(pathPair.first); + + auto pqDescr = schemeTx.MutableCreatePersQueueGroup(); + pqDescr->SetName(pathPair.second); + NKikimr::NGRpcProxy::V1::FillProposeRequestImpl(pathPair.second, request, schemeTx, AppData(ActorSystem), error, pathPair.first); + + if (IsPrepare()) { + auto& phyQuery = *SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + auto& phyTx = *phyQuery.AddTransactions(); + phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + + + phyTx.MutableSchemeOperation()->MutableCreateTopic()->Swap(&schemeTx); + phyTx.MutableSchemeOperation()->MutableCreateTopic()->SetFailedOnAlreadyExists(!existingOk); + TGenericResult result; + result.SetSuccess(); + createPromise.SetValue(result); + } else { + return Gateway->CreateTopic(cluster, std::move(request), existingOk); + } + return createPromise.GetFuture(); } - TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request) override { - FORWARD_ENSURE_NO_PREPARE(AlterTopic, cluster, std::move(request)); + TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request, bool missingOk) override { + CHECK_PREPARED_DDL(AlterTopic); + Y_UNUSED(cluster); + std::pair pathPair; + TString error; + if (!NSchemeHelpers::SplitTablePath(request.path(), GetDatabase(), pathPair, error, false)) { + return MakeFuture(ResultFromError(error)); + } + auto alterPromise = NewPromise(); + + if (IsPrepare()) { + TAlterTopicSettings settings{std::move(request), pathPair.second, pathPair.first, missingOk}; + auto getModifySchemeFuture = Gateway->AlterTopicPrepared(std::move(settings)); + + + auto* phyQuery = SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + + getModifySchemeFuture.Subscribe([=] (const auto future) mutable { + TGenericResult result; + auto modifySchemeResult = future.GetValue(); + if (modifySchemeResult.Status == Ydb::StatusIds::SUCCESS) { + if (modifySchemeResult.ModifyScheme.HasAlterPersQueueGroup()) { + auto* phyTx = phyQuery->AddTransactions(); + phyTx->SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + phyTx->MutableSchemeOperation()->MutableAlterTopic()->Swap(&modifySchemeResult.ModifyScheme); + phyTx->MutableSchemeOperation()->MutableAlterTopic()->SetSuccessOnNotExist(missingOk); + } + result.SetSuccess(); + + } else { + result.SetStatus(NYql::YqlStatusFromYdbStatus(modifySchemeResult.Status)); + result.AddIssues(modifySchemeResult.Issues); + } + alterPromise.SetValue(result); + }); + + } else { + return Gateway->AlterTopic(cluster, std::move(request), missingOk); + } + return alterPromise.GetFuture(); + } - TFuture DropTopic(const TString& cluster, const TString& topic) override { - FORWARD_ENSURE_NO_PREPARE(DropTopic, cluster, topic); + NThreading::TFuture AlterTopicPrepared(TAlterTopicSettings&& settings) override { + return Gateway->AlterTopicPrepared(std::move(settings)); + } + + TFuture DropTopic(const TString& cluster, const TString& topic, bool missingOk) override { + CHECK_PREPARED_DDL(DropTopic); + Y_UNUSED(cluster); + + std::pair pathPair; + TString error; + auto dropPromise = NewPromise(); + if (!NSchemeHelpers::SplitTablePath(topic, GetDatabase(), pathPair, error, false)) { + return MakeFuture(ResultFromError(error)); + } + + if (IsPrepare()) { + auto& phyQuery = *SessionCtx->Query().PreparingQuery->MutablePhysicalQuery(); + auto& phyTx = *phyQuery.AddTransactions(); + phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); + + NKikimrSchemeOp::TModifyScheme schemeTx; + schemeTx.SetOperationType(NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); + + schemeTx.SetWorkingDir(pathPair.first); + schemeTx.MutableDrop()->SetName(pathPair.second); + + phyTx.MutableSchemeOperation()->MutableDropTopic()->Swap(&schemeTx); + phyTx.MutableSchemeOperation()->MutableDropTopic()->SetSuccessOnNotExist(missingOk); + TGenericResult result; + result.SetSuccess(); + dropPromise.SetValue(result); + } else { + return Gateway->DropTopic(cluster, topic, missingOk); + } + return dropPromise.GetFuture(); } TFuture ModifyPermissions(const TString& cluster, @@ -1161,6 +1266,7 @@ class TKqpGatewayProxy : public IKikimrGateway { if (SessionCtx->GetUserToken()) { context.SetUserToken(*SessionCtx->GetUserToken()); } + context.SetTranslationSettings(SessionCtx->Query().TranslationSettings); auto& phyTx = phyTxRemover.Capture(SessionCtx->Query().PreparingQuery->MutablePhysicalQuery()); phyTx.SetType(NKqpProto::TKqpPhyTx::TYPE_SCHEME); @@ -1901,6 +2007,7 @@ class TKqpGatewayProxy : public IKikimrGateway { const auto parseResult = NYdb::ParseConnectionString(*connectionString); params.SetEndpoint(parseResult.Endpoint); params.SetDatabase(parseResult.Database); + params.SetEnableSsl(parseResult.EnableSsl); } if (const auto& endpoint = settings.Settings.Endpoint) { params.SetEndpoint(*endpoint); diff --git a/ydb/core/kqp/host/kqp_host.cpp b/ydb/core/kqp/host/kqp_host.cpp index c54f1a26cc44..08623eb55528 100644 --- a/ydb/core/kqp/host/kqp_host.cpp +++ b/ydb/core/kqp/host/kqp_host.cpp @@ -288,8 +288,8 @@ class TAsyncExecuteYqlResult : public TKqpAsyncResultBase(queryResult.ProtobufArenaPtr.get())); - NKikimrMiniKQL::TResult* result = queryResult.Results.back(); + google::protobuf::Arena::CreateMessage(queryResult.ProtobufArenaPtr.get())); + Ydb::ResultSet* result = queryResult.Results.back(); if (!result->ParseFromArray(resultStr.data(), resultStr.size())) { queryResult = ResultFromError("Failed to parse run result."); @@ -1033,7 +1033,7 @@ class TKqpHost : public IKqpHost { std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, NActors::TActorSystem* actorSystem = nullptr, - NYql::TExprContext* ctx = nullptr) + NYql::TExprContext* ctx = nullptr, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig = NKikimrConfig::TQueryServiceConfig()) : Gateway(gateway) , Cluster(cluster) , GUCSettings(gUCSettings) @@ -1051,6 +1051,7 @@ class TKqpHost : public IKqpHost { , FakeWorld(ctx ? nullptr : ExprCtx->NewWorld(TPosition())) , ExecuteCtx(MakeIntrusive()) , ActorSystem(actorSystem ? actorSystem : NActors::TActivationContext::ActorSystem()) + , QueryServiceConfig(queryServiceConfig) { if (funcRegistry) { FuncRegistry = funcRegistry; @@ -1225,8 +1226,21 @@ class TKqpHost : public IKqpHost { .SetIsEnableExternalDataSources(SessionCtx->Config().FeatureFlags.GetEnableExternalDataSources()) .SetIsEnablePgConstsToParams(SessionCtx->Config().EnablePgConstsToParams) .SetQueryParameters(query.ParameterTypes) - .SetApplicationName(ApplicationName); - auto astRes = ParseQuery(query.Text, isSql, sqlVersion, TypesCtx->DeprecatedSQL, ctx, settingsBuilder, result.KeepInCache, result.CommandTagName); + .SetApplicationName(ApplicationName) + .SetIsEnablePgSyntax(SessionCtx->Config().FeatureFlags.GetEnablePgSyntax()); + NSQLTranslation::TTranslationSettings effectiveSettings; + auto astRes = ParseQuery( + query.Text, + isSql, + sqlVersion, + TypesCtx->DeprecatedSQL, + ctx, + settingsBuilder, + result.KeepInCache, + result.CommandTagName, + &effectiveSettings + ); + SessionCtx->Query().TranslationSettings = std::move(effectiveSettings); if (astRes.ActualSyntaxType == NYql::ESyntaxType::Pg) { SessionCtx->Config().IndexAutoChooserMode = NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX; } @@ -1256,8 +1270,13 @@ class TKqpHost : public IKqpHost { YQL_CLOG(INFO, ProviderKqp) << "Compiled query:\n" << KqpExprToPrettyString(*queryExpr, ctx); if (Config->EnableCreateTableAs) { - result.QueryExprs = RewriteExpression(queryExpr, ctx, *TypesCtx, SessionCtx, Cluster); + auto [rewriteResults, rewriteIssues] = RewriteExpression(queryExpr, ctx, *TypesCtx, SessionCtx, Cluster); + ctx.IssueManager.AddIssues(rewriteIssues); + if (!rewriteIssues.Empty()) { + return result; + } + result.QueryExprs = rewriteResults; for (const auto& resultPart : result.QueryExprs) { YQL_CLOG(INFO, ProviderKqp) << "Splitted Compiled query part:\n" << KqpExprToPrettyString(*resultPart, ctx); } @@ -1277,7 +1296,7 @@ class TKqpHost : public IKqpHost { settingsBuilder .SetSqlAutoCommit(false) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(query, /* isSql */ true, *ExprCtx, sqlVersion, settingsBuilder, settings.PerStatementResult); + auto compileResult = CompileYqlQuery(query, /* isSql */ true, *ExprCtx, sqlVersion, settingsBuilder); return TSplitResult{ .Ctx = std::move(ExprCtxStorage), @@ -1287,7 +1306,7 @@ class TKqpHost : public IKqpHost { } TCompileExprResult CompileYqlQuery(const TKqpQueryRef& query, bool isSql, TExprContext& ctx, TMaybe& sqlVersion, - TKqpTranslationSettingsBuilder& settingsBuilder, bool perStatementResult) const + TKqpTranslationSettingsBuilder& settingsBuilder) const { auto compileResult = CompileQuery(query, isSql, ctx, sqlVersion, settingsBuilder); if (!compileResult.QueryExprs) { @@ -1299,12 +1318,7 @@ class TKqpHost : public IKqpHost { } // Currently used only for create table as - if (!perStatementResult && compileResult.QueryExprs.size() > 1) { - ctx.AddError(YqlIssue(TPosition(), TIssuesIds::KIKIMR_BAD_REQUEST, - "Query can be executed only in per-statement mode (NoTx)")); - compileResult.QueryExprs = {}; - return compileResult; - } else if (compileResult.QueryExprs.size() > 1) { + if (compileResult.QueryExprs.size() > 1) { return compileResult; } @@ -1376,7 +1390,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(query, isSql, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(query, isSql, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1436,7 +1450,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1464,7 +1478,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, queryAst.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1510,7 +1524,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder, settings.PerStatementResult); + auto compileResult = CompileYqlQuery(query, /* isSql */ true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1547,7 +1561,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion = 1; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, query.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(query, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(query, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1568,7 +1582,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, queryAst.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(false); - auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(queryAst, false, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1595,7 +1609,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1624,7 +1638,7 @@ class TKqpHost : public IKqpHost { TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true) .SetUsePgParser(settings.UsePgParser); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1648,7 +1662,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1676,7 +1690,7 @@ class TKqpHost : public IKqpHost { TMaybe sqlVersion; TKqpTranslationSettingsBuilder settingsBuilder(SessionCtx->Query().Type, SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), Cluster, script.Text, SessionCtx->Config().BindingsMode, GUCSettings); settingsBuilder.SetSqlAutoCommit(true); - auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder, false); + auto compileResult = CompileYqlQuery(script, true, ctx, sqlVersion, settingsBuilder); if (compileResult.QueryExprs.empty()) { return nullptr; } @@ -1774,7 +1788,7 @@ class TKqpHost : public IKqpHost { auto queryExecutor = MakeIntrusive(Gateway, Cluster, SessionCtx, KqpRunner); auto kikimrDataSource = CreateKikimrDataSource(*FuncRegistry, *TypesCtx, gatewayProxy, SessionCtx, - ExternalSourceFactory, IsInternalCall); + ExternalSourceFactory, IsInternalCall, GUCSettings); auto kikimrDataSink = CreateKikimrDataSink(*FuncRegistry, *TypesCtx, gatewayProxy, SessionCtx, ExternalSourceFactory, queryExecutor); FillSettings.AllResultsBytesLimit = Nothing(); @@ -1824,10 +1838,15 @@ class TKqpHost : public IKqpHost { || settingName == "FilterPushdownOverJoinOptionalSide" || settingName == "DisableFilterPushdownOverJoinOptionalSide" || settingName == "RotateJoinTree" + || settingName == "TimeOrderRecoverDelay" + || settingName == "TimeOrderRecoverAhead" + || settingName == "TimeOrderRecoverRowLimit" + || settingName == "MatchRecognizeStream" ; }; auto configProvider = CreateConfigProvider(*TypesCtx, gatewaysConfig, {}, allowSettings); TypesCtx->AddDataSource(ConfigProviderName, configProvider); + TypesCtx->MatchRecognize = QueryServiceConfig.GetEnableMatchRecognize(); YQL_ENSURE(TypesCtx->Initialize(*ExprCtx)); @@ -1929,6 +1948,7 @@ class TKqpHost : public IKqpHost { TKqpTempTablesState::TConstPtr TempTablesState; NActors::TActorSystem* ActorSystem = nullptr; + NKikimrConfig::TQueryServiceConfig QueryServiceConfig; }; } // namespace @@ -1949,11 +1969,11 @@ Ydb::Table::QueryStatsCollection::Mode GetStatsMode(NYql::EKikimrStatsMode stats TIntrusivePtr CreateKqpHost(TIntrusivePtr gateway, const TString& cluster, const TString& database, TKikimrConfiguration::TPtr config, IModuleResolver::TPtr moduleResolver, std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const TGUCSettings::TPtr& gUCSettings, - const TMaybe& applicationName, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, + const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TMaybe& applicationName, const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry, bool keepConfigChanges, bool isInternalCall, TKqpTempTablesState::TConstPtr tempTablesState, NActors::TActorSystem* actorSystem, NYql::TExprContext* ctx) { return MakeIntrusive(gateway, cluster, database, gUCSettings, applicationName, config, moduleResolver, federatedQuerySetup, userToken, funcRegistry, - keepConfigChanges, isInternalCall, std::move(tempTablesState), actorSystem, ctx); + keepConfigChanges, isInternalCall, std::move(tempTablesState), actorSystem, ctx, queryServiceConfig); } } // namespace NKqp diff --git a/ydb/core/kqp/host/kqp_host.h b/ydb/core/kqp/host/kqp_host.h index ff94738619f0..f0e2b236e9f1 100644 --- a/ydb/core/kqp/host/kqp_host.h +++ b/ydb/core/kqp/host/kqp_host.h @@ -45,7 +45,6 @@ class IKqpHost : public TThrRefBase { struct TPrepareSettings: public TExecSettings { TMaybe IsInternalCall; TMaybe ConcurrentResults; - bool PerStatementResult; TString ToString() const { return TStringBuilder() << "TPrepareSettings{" @@ -120,7 +119,7 @@ class IKqpHost : public TThrRefBase { TIntrusivePtr CreateKqpHost(TIntrusivePtr gateway, const TString& cluster, const TString& database, NYql::TKikimrConfiguration::TPtr config, NYql::IModuleResolver::TPtr moduleResolver, std::optional federatedQuerySetup, const TIntrusiveConstPtr& userToken, const TGUCSettings::TPtr& gUCSettings, - const TMaybe& applicationName = Nothing(), const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry = nullptr, + const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TMaybe& applicationName = Nothing(), const NKikimr::NMiniKQL::IFunctionRegistry* funcRegistry = nullptr, bool keepConfigChanges = false, bool isInternalCall = false, TKqpTempTablesState::TConstPtr tempTablesState = nullptr, NActors::TActorSystem* actorSystem = nullptr /*take from TLS by default*/, NYql::TExprContext* ctx = nullptr); diff --git a/ydb/core/kqp/host/kqp_runner.cpp b/ydb/core/kqp/host/kqp_runner.cpp index 3f42256e7790..d6d76208ec0e 100644 --- a/ydb/core/kqp/host/kqp_runner.cpp +++ b/ydb/core/kqp/host/kqp_runner.cpp @@ -116,17 +116,7 @@ class TPrepareQueryAsyncResult : public TKqpAsyncResultBase results; - for (auto& phyResult : TransformCtx.PhysicalQueryResults) { - auto result = google::protobuf::Arena::CreateMessage( - queryResult.ProtobufArenaPtr.get()); - - result->CopyFrom(phyResult); - results.push_back(result); - } - queryResult.QueryStats.CopyFrom(TransformCtx.QueryStats); - queryResult.Results = std::move(results); } private: diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.cpp b/ydb/core/kqp/host/kqp_statement_rewrite.cpp index bf15ab483261..fa06bf64bfb4 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.cpp +++ b/ydb/core/kqp/host/kqp_statement_rewrite.cpp @@ -213,12 +213,10 @@ namespace { exprCtx.NewAtom(pos, "mode"), exprCtx.NewAtom(pos, "replace"), })); - if (!isOlap) { - insertSettings.push_back( - exprCtx.NewList(pos, { - exprCtx.NewAtom(pos, "AllowInconsistentWrites"), - })); - } + insertSettings.push_back( + exprCtx.NewList(pos, { + exprCtx.NewAtom(pos, "AllowInconsistentWrites"), + })); const auto insert = exprCtx.NewCallable(pos, "Write!", { topLevelRead == nullptr ? exprCtx.NewWorld(pos) : exprCtx.NewCallable(pos, "Left!", {topLevelRead.Get()}), @@ -292,20 +290,25 @@ namespace { } } -TVector RewriteExpression( +std::pair, NYql::TIssues> RewriteExpression( const NYql::TExprNode::TPtr& root, NYql::TExprContext& exprCtx, NYql::TTypeAnnotationContext& typeCtx, const TIntrusivePtr& sessionCtx, const TString& cluster) { + NYql::TIssues issues; // CREATE TABLE AS statement can be used only with perstatement execution. // Thus we assume that there is only one such statement. + ui64 actionsCount = 0; TVector result; VisitExpr(root, [&](const NYql::TExprNode::TPtr& node) { if (NYql::NNodes::TCoWrite::Match(node.Get())) { + ++actionsCount; const auto rewriteResult = RewriteCreateTableAs(node, exprCtx, typeCtx, sessionCtx, cluster); if (rewriteResult) { - YQL_ENSURE(result.empty()); + if (!result.empty()) { + issues.AddIssue("Several CTAS statement can't be used without per-statement mode."); + } result.push_back(rewriteResult->CreateTable); result.push_back(rewriteResult->ReplaceInto); if (rewriteResult->MoveTable) { @@ -316,10 +319,14 @@ TVector RewriteExpression( return true; }); + if (!result.empty() && actionsCount > 1) { + issues.AddIssue("CTAS statement can't be used with other statements without per-statement mode."); + } + if (result.empty()) { result.push_back(root); } - return result; + return {result, issues}; } } diff --git a/ydb/core/kqp/host/kqp_statement_rewrite.h b/ydb/core/kqp/host/kqp_statement_rewrite.h index 95c4b3a506d1..3b9b560c5684 100644 --- a/ydb/core/kqp/host/kqp_statement_rewrite.h +++ b/ydb/core/kqp/host/kqp_statement_rewrite.h @@ -7,7 +7,7 @@ namespace NKikimr { namespace NKqp { -TVector RewriteExpression( +std::pair, NYql::TIssues> RewriteExpression( const NYql::TExprNode::TPtr& root, NYql::TExprContext& ctx, NYql::TTypeAnnotationContext& typeCtx, diff --git a/ydb/core/kqp/host/kqp_transform.h b/ydb/core/kqp/host/kqp_transform.h index bdf95715ba9d..0004340de583 100644 --- a/ydb/core/kqp/host/kqp_transform.h +++ b/ydb/core/kqp/host/kqp_transform.h @@ -26,18 +26,13 @@ struct TKqlTransformContext : TThrRefBase { NKqpProto::TKqpStatsQuery QueryStats; std::shared_ptr PhysicalQuery; - TVector> MkqlResults; - TVector PhysicalQueryResults; - NYql::TExprNode::TPtr ExplainTransformerInput; // Explain transformer must work after other transformers, but use input before peephole TMaybe DataQueryBlocks; void Reset() { ReplyTarget = {}; - MkqlResults.clear(); QueryStats = {}; PhysicalQuery = nullptr; - PhysicalQueryResults.clear(); ExplainTransformerInput = nullptr; DataQueryBlocks = Nothing(); } diff --git a/ydb/core/kqp/host/kqp_translate.cpp b/ydb/core/kqp/host/kqp_translate.cpp index 35e632c273b4..8b7d842f88ed 100644 --- a/ydb/core/kqp/host/kqp_translate.cpp +++ b/ydb/core/kqp/host/kqp_translate.cpp @@ -1,6 +1,8 @@ #include "kqp_translate.h" +#include #include +#include namespace NKikimr { @@ -52,7 +54,7 @@ NYql::EKikimrQueryType ConvertType(NKikimrKqp::EQueryType type) { YQL_ENSURE(false, "Unexpected query type: " << type); } } - + NSQLTranslation::TTranslationSettings TKqpTranslationSettingsBuilder::Build(NYql::TExprContext& ctx) { NSQLTranslation::TTranslationSettings settings; settings.PgParser = UsePgParser && *UsePgParser; @@ -84,6 +86,7 @@ NSQLTranslation::TTranslationSettings TKqpTranslationSettingsBuilder::Build(NYql settings.SaveWorldDependencies = true; } + settings.PGDisable = !IsEnablePgSyntax; settings.InferSyntaxVersion = true; settings.V0ForceDisable = false; settings.WarnOnV0 = false; @@ -151,13 +154,14 @@ NSQLTranslation::TTranslationSettings TKqpTranslationSettingsBuilder::Build(NYql } NYql::TAstParseResult ParseQuery(const TString& queryText, bool isSql, TMaybe& sqlVersion, bool& deprecatedSQL, - NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName) { + NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName, + NSQLTranslation::TTranslationSettings* effectiveSettings) { NYql::TAstParseResult astRes; settingsBuilder.SetSqlVersion(sqlVersion); if (isSql) { auto settings = settingsBuilder.Build(ctx); NYql::TStmtParseInfo stmtParseInfo; - auto ast = NSQLTranslation::SqlToYql(queryText, settings, nullptr, &stmtParseInfo); + auto ast = NSQLTranslation::SqlToYql(queryText, settings, nullptr, &stmtParseInfo, effectiveSettings); deprecatedSQL = (ast.ActualSyntaxType == NYql::ESyntaxType::YQLv0); sqlVersion = ast.ActualSyntaxType == NYql::ESyntaxType::YQLv1 ? 1 : 0; keepInCache = stmtParseInfo.KeepInCache; diff --git a/ydb/core/kqp/host/kqp_translate.h b/ydb/core/kqp/host/kqp_translate.h index e54879819d97..d71c01d4ce2b 100644 --- a/ydb/core/kqp/host/kqp_translate.h +++ b/ydb/core/kqp/host/kqp_translate.h @@ -1,7 +1,8 @@ #pragma once -#include -#include +#include +#include +#include #include namespace NKikimr { @@ -61,6 +62,11 @@ class TKqpTranslationSettingsBuilder { return *this; } + TKqpTranslationSettingsBuilder& SetIsEnablePgSyntax(bool value) { + IsEnablePgSyntax = value; + return *this; + } + private: const NYql::EKikimrQueryType QueryType; const ui16 KqpYqlSyntaxVersion; @@ -72,6 +78,7 @@ class TKqpTranslationSettingsBuilder { TString KqpTablePathPrefix = {}; bool IsEnableExternalDataSources = false; bool IsEnablePgConstsToParams = false; + bool IsEnablePgSyntax = false; TMaybe SqlAutoCommit = {}; TGUCSettings::TPtr GUCSettings; TMaybe ApplicationName = {}; @@ -84,7 +91,8 @@ NSQLTranslation::EBindingsMode RemapBindingsMode(NKikimrConfig::TTableServiceCon NYql::EKikimrQueryType ConvertType(NKikimrKqp::EQueryType type); NYql::TAstParseResult ParseQuery(const TString& queryText, bool isSql, TMaybe& sqlVersion, bool& deprecatedSQL, - NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName); + NYql::TExprContext& ctx, TKqpTranslationSettingsBuilder& settingsBuilder, bool& keepInCache, TMaybe& commandTagName, + NSQLTranslation::TTranslationSettings* effectiveSettings = nullptr); TVector ParseStatements(const TString& queryText, const TMaybe& syntax, bool isSql, TKqpTranslationSettingsBuilder& settingsBuilder, bool perStatementExecution); diff --git a/ydb/core/kqp/host/kqp_type_ann.cpp b/ydb/core/kqp/host/kqp_type_ann.cpp index ab1c98f876fc..36249024fea4 100644 --- a/ydb/core/kqp/host/kqp_type_ann.cpp +++ b/ydb/core/kqp/host/kqp_type_ann.cpp @@ -451,7 +451,12 @@ TStatus AnnotateReadTableRanges(const TExprNode::TPtr& node, TExprContext& ctx, TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, const TString& cluster, const TKikimrTablesData& tablesData, bool withSystemColumns) { - if (!EnsureArgsCount(*node, TKqlLookupIndexBase::Match(node.Get()) || TKqlStreamLookupTable::Match(node.Get()) ? 4 : 3, ctx)) { + const bool isStreamLookup = TKqlStreamLookupTable::Match(node.Get()) || TKqlStreamLookupIndex::Match(node.Get()); + if (isStreamLookup && !EnsureArgsCount(*node, TKqlStreamLookupIndex::Match(node.Get()) ? 5 : 4, ctx)) { + return TStatus::Error; + } + + if (!isStreamLookup && !EnsureArgsCount(*node, TKqlLookupIndexBase::Match(node.Get()) ? 4 : 3, ctx)) { return TStatus::Error; } @@ -495,14 +500,16 @@ TStatus AnnotateLookupTable(const TExprNode::TPtr& node, TExprContext& ctx, cons YQL_ENSURE(lookupType); const TStructExprType* structType = nullptr; - bool isStreamLookup = TKqlStreamLookupTable::Match(node.Get()); if (isStreamLookup) { - auto lookupStrategy = node->Child(TKqlStreamLookupTable::idx_LookupStrategy); + auto lookupStrategy = node->Child(TKqlStreamLookupTable::Match(node.Get()) ? + TKqlStreamLookupTable::idx_LookupStrategy : TKqlStreamLookupIndex::idx_LookupStrategy); if (!EnsureAtom(*lookupStrategy, ctx)) { return TStatus::Error; } - if (lookupStrategy->Content() == TKqpStreamLookupJoinStrategyName) { + if (lookupStrategy->Content() == TKqpStreamLookupJoinStrategyName + || lookupStrategy->Content() == TKqpStreamLookupSemiJoinStrategyName) { + if (!EnsureTupleType(node->Pos(), *lookupType, ctx)) { return TStatus::Error; } @@ -1682,7 +1689,9 @@ TStatus AnnotateStreamLookupConnection(const TExprNode::TPtr& node, TExprContext node->SetTypeAnn(ctx.MakeType(rowType)); - } else if (lookupStrategy.Value() == TKqpStreamLookupJoinStrategyName) { + } else if (lookupStrategy.Value() == TKqpStreamLookupJoinStrategyName + || lookupStrategy.Value() == TKqpStreamLookupSemiJoinStrategyName) { + if (!EnsureTupleType(node->Pos(), *inputItemType, ctx)) { return TStatus::Error; } diff --git a/ydb/core/kqp/node_service/kqp_node_service.cpp b/ydb/core/kqp/node_service/kqp_node_service.cpp index 1416a56be23a..9e7e1593130a 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.cpp +++ b/ydb/core/kqp/node_service/kqp_node_service.cpp @@ -60,11 +60,16 @@ class TKqpNodeService : public TActorBootstrapped { return NKikimrServices::TActivity::KQP_NODE_SERVICE; } - TKqpNodeService(const NKikimrConfig::TTableServiceConfig& config, const TIntrusivePtr& counters, + TKqpNodeService(const NKikimrConfig::TTableServiceConfig& config, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, + const TIntrusivePtr& counters, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional& federatedQuerySetup) : Config(config.GetResourceManager()) , Counters(counters) + , ResourceManager_(std::move(resourceManager)) + , CaFactory_(std::move(caFactory)) , AsyncIoFactory(std::move(asyncIoFactory)) , FederatedQuerySetup(federatedQuerySetup) , State_(std::make_shared()) @@ -128,6 +133,10 @@ class TKqpNodeService : public TActorBootstrapped { auto requester = ev->Sender; ui64 txId = msg.GetTxId(); + TMaybe lockTxId = msg.HasLockTxId() + ? TMaybe(msg.GetLockTxId()) + : Nothing(); + ui32 lockNodeId = msg.GetLockNodeId(); YQL_ENSURE(msg.GetStartAllOrFail()); // todo: support partial start @@ -159,65 +168,10 @@ class TKqpNodeService : public TActorBootstrapped { memoryPool = NRm::EKqpMemoryPool::Unspecified; } - ui32 requestChannels = 0; - ui64 totalMemory = 0; - for (auto& dqTask : *msg.MutableTasks()) { - auto estimation = EstimateTaskResources(dqTask, Config, msg.GetTasks().size()); - LOG_D("Resource estimation complete" - << ", TxId: " << txId << ", task id: " << dqTask.GetId() << ", node id: " << SelfId().NodeId() - << ", estimated resources: " << estimation.ToString()); - - NKqpNode::TTaskContext& taskCtx = request.InFlyTasks[dqTask.GetId()]; - YQL_ENSURE(taskCtx.TaskId == 0); - taskCtx.TaskId = dqTask.GetId(); - - LOG_D("TxId: " << txId << ", task: " << taskCtx.TaskId << ", requested memory: " << estimation.TotalMemoryLimit); - totalMemory += estimation.TotalMemoryLimit; - requestChannels += estimation.ChannelBuffersCount; - } - - LOG_D("TxId: " << txId << ", channels: " << requestChannels - << ", computeActors: " << msg.GetTasks().size() << ", memory: " << totalMemory); - - TVector allocatedTasks; - allocatedTasks.reserve(msg.GetTasks().size()); - for (auto& task : request.InFlyTasks) { - NRm::TKqpResourcesRequest resourcesRequest; - resourcesRequest.MemoryPool = memoryPool; - resourcesRequest.ExecutionUnits = 1; - - // !!!!!!!!!!!!!!!!!!!!! - // we have to allocate memory instead of reserve only. currently, this memory will not be used for request processing. - resourcesRequest.Memory = (1 << 19) /* 512kb limit for check that memory exists for processing with minimal requirements */; - - auto result = ResourceManager()->AllocateResources(txId, task.first, resourcesRequest); - - if (!result) { - for (ui64 taskId : allocatedTasks) { - ResourceManager()->FreeResources(txId, taskId); - } - - ReplyError(txId, request.Executer, msg, result.GetStatus(), result.GetFailReason()); - return; - } - - allocatedTasks.push_back(task.first); - } - auto reply = MakeHolder(); reply->Record.SetTxId(txId); NYql::NDq::TComputeRuntimeSettings runtimeSettingsBase; - runtimeSettingsBase.ExtraMemoryAllocationPool = memoryPool; - runtimeSettingsBase.FailOnUndelivery = msgRtSettings.GetExecType() != NYql::NDqProto::TComputeRuntimeSettings::SCAN; - - runtimeSettingsBase.StatsMode = msgRtSettings.GetStatsMode(); - runtimeSettingsBase.UseSpilling = msgRtSettings.GetUseSpilling(); - - if (msgRtSettings.HasRlPath()) { - runtimeSettingsBase.RlPath = msgRtSettings.GetRlPath(); - } - runtimeSettingsBase.ReportStatsSettings = NYql::NDq::TReportStatsSettings{MinStatInterval, MaxStatInterval}; TShardsScanningPolicy scanPolicy(Config.GetShardsScanningPolicy()); @@ -228,16 +182,54 @@ class TKqpNodeService : public TActorBootstrapped { ev->Get()->Record.GetSerializedGUCSettings() : ""; // start compute actors + TMaybe rlPath = Nothing(); + if (msgRtSettings.HasRlPath()) { + rlPath.ConstructInPlace(msgRtSettings.GetRlPath()); + } + + TIntrusivePtr txInfo = MakeIntrusive( + txId, TInstant::Now(), ResourceManager_->GetCounters()); + const ui32 tasksCount = msg.GetTasks().size(); - for (int i = 0; i < msg.GetTasks().size(); ++i) { - auto& dqTask = *msg.MutableTasks(i); + for (auto& dqTask: *msg.MutableTasks()) { + auto result = CaFactory_->CreateKqpComputeActor({ + .ExecuterId = request.Executer, + .TxId = txId, + .LockTxId = lockTxId, + .LockNodeId = lockNodeId, + .Task = &dqTask, + .TxInfo = txInfo, + .RuntimeSettings = runtimeSettingsBase, + .TraceId = NWilson::TTraceId(ev->TraceId), + .Arena = ev->Get()->Arena, + .SerializedGUCSettings = serializedGUCSettings, + .NumberOfTasks = tasksCount, + .OutputChunkMaxSize = msg.GetOutputChunkMaxSize(), + .MemoryPool = memoryPool, + .WithSpilling = msgRtSettings.GetUseSpilling(), + .StatsMode = msgRtSettings.GetStatsMode(), + .Deadline = TInstant(), + .ShareMailbox = false, + .RlPath = rlPath, + .ComputesByStages = &computesByStage, + .State = State_ + }); + + if (const auto* rmResult = std::get_if(&result)) { + ReplyError(txId, request.Executer, msg, rmResult->GetStatus(), rmResult->GetFailReason()); + bucket.NewRequest(std::move(request)); + TerminateTx(txId, rmResult->GetFailReason()); + return; + } + auto& taskCtx = request.InFlyTasks[dqTask.GetId()]; + YQL_ENSURE(taskCtx.TaskId == 0); + taskCtx.TaskId = dqTask.GetId(); YQL_ENSURE(taskCtx.TaskId != 0); - taskCtx.ComputeActorId = CaFactory()->CreateKqpComputeActor( - request.Executer, txId, &dqTask, runtimeSettingsBase, - NWilson::TTraceId(ev->TraceId), ev->Get()->Arena, serializedGUCSettings, computesByStage, - msg.GetOutputChunkMaxSize(), State_, memoryPool, tasksCount); + TActorId* actorId = std::get_if(&result); + Y_ABORT_UNLESS(actorId); + taskCtx.ComputeActorId = *actorId; LOG_D("TxId: " << txId << ", executing task: " << taskCtx.TaskId << " on compute actor: " << taskCtx.ComputeActorId); @@ -249,7 +241,7 @@ class TKqpNodeService : public TActorBootstrapped { for (auto&& i : computesByStage) { for (auto&& m : i.second.MutableMetaInfo()) { Register(CreateKqpScanFetcher(msg.GetSnapshot(), std::move(m.MutableActorIds()), - m.GetMeta(), runtimeSettingsBase, txId, scanPolicy, Counters, NWilson::TTraceId(ev->TraceId))); + m.GetMeta(), runtimeSettingsBase, txId, lockTxId, lockNodeId, scanPolicy, Counters, NWilson::TTraceId(ev->TraceId))); } } @@ -343,11 +335,15 @@ class TKqpNodeService : public TActorBootstrapped { FORCE_VALUE(EnableInstantMkqlMemoryAlloc); FORCE_VALUE(MaxTotalChannelBuffersSize); FORCE_VALUE(MinChannelBufferSize); + FORCE_VALUE(MinMemAllocSize); + FORCE_VALUE(MinMemFreeSize); #undef FORCE_VALUE LOG_I("Updated table service config: " << Config.DebugString()); } + CaFactory_->ApplyConfig(event.GetConfig().GetTableServiceConfig().GetResourceManager()); + if (event.GetConfig().GetTableServiceConfig().HasIteratorReadsRetrySettings()) { SetIteratorReadsRetrySettings(event.GetConfig().GetTableServiceConfig().GetIteratorReadsRetrySettings()); } @@ -358,6 +354,7 @@ class TKqpNodeService : public TActorBootstrapped { auto responseEv = MakeHolder(event); Send(ev->Sender, responseEv.Release(), IEventHandle::FlagTrackDelivery, ev->Cookie); + } void SetIteratorReadsQuotaSettings(const NKikimrConfig::TTableServiceConfig::TIteratorReadQuotaSettings& settings) { @@ -442,24 +439,6 @@ class TKqpNodeService : public TActorBootstrapped { Send(executer, ev.Release()); } - std::shared_ptr ResourceManager() { - if (Y_LIKELY(ResourceManager_)) { - return ResourceManager_; - } - ResourceManager_ = GetKqpResourceManager(); - return ResourceManager_; - } - - std::shared_ptr CaFactory() { - if (Y_LIKELY(CaFactory_)) { - return CaFactory_; - } - - CaFactory_ = NComputeActor::MakeKqpCaFactory( - Config, ResourceManager(), AsyncIoFactory, FederatedQuerySetup); - return CaFactory_; - } - private: NKikimrConfig::TTableServiceConfig::TResourceManager Config; TIntrusivePtr Counters; @@ -468,7 +447,6 @@ class TKqpNodeService : public TActorBootstrapped { NYql::NDq::IDqAsyncIoFactory::TPtr AsyncIoFactory; const std::optional FederatedQuerySetup; - //state sharded by TxId std::shared_ptr State_; }; @@ -476,10 +454,13 @@ class TKqpNodeService : public TActorBootstrapped { } // anonymous namespace IActor* CreateKqpNodeService(const NKikimrConfig::TTableServiceConfig& tableServiceConfig, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, TIntrusivePtr counters, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, const std::optional& federatedQuerySetup) { - return new TKqpNodeService(tableServiceConfig, counters, std::move(asyncIoFactory), federatedQuerySetup); + return new TKqpNodeService(tableServiceConfig, std::move(resourceManager), std::move(caFactory), + counters, std::move(asyncIoFactory), federatedQuerySetup); } } // namespace NKqp diff --git a/ydb/core/kqp/node_service/kqp_node_service.h b/ydb/core/kqp/node_service/kqp_node_service.h index d17a56cf76b7..93f7a7a4e633 100644 --- a/ydb/core/kqp/node_service/kqp_node_service.h +++ b/ydb/core/kqp/node_service/kqp_node_service.h @@ -91,6 +91,8 @@ struct TNodeServiceState : public NKikimr::NKqp::NComputeActor::IKqpNodeState { }; NActors::IActor* CreateKqpNodeService(const NKikimrConfig::TTableServiceConfig& tableServiceConfig, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, TIntrusivePtr counters, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory = nullptr, const std::optional& federatedQuerySetup = std::nullopt); diff --git a/ydb/core/kqp/opt/kqp_opt.cpp b/ydb/core/kqp/opt/kqp_opt.cpp index b056c9c4b955..8af6ac69ad4b 100644 --- a/ydb/core/kqp/opt/kqp_opt.cpp +++ b/ydb/core/kqp/opt/kqp_opt.cpp @@ -83,7 +83,7 @@ bool IsKqpEffectsStage(const TDqStageBase& stage) { } bool NeedSinks(const TKikimrTableDescription& table, const TKqpOptimizeContext& kqpCtx) { - return kqpCtx.IsGenericQuery() + return (kqpCtx.IsGenericQuery() || (kqpCtx.IsDataQuery() && table.Metadata->Kind != EKikimrTableKind::Olap)) && (table.Metadata->Kind != EKikimrTableKind::Olap || kqpCtx.Config->EnableOlapSink) && (table.Metadata->Kind != EKikimrTableKind::Datashard || kqpCtx.Config->EnableOltpSink); } @@ -110,6 +110,49 @@ TKqpTable BuildTableMeta(const TKikimrTableDescription& tableDesc, const TPositi return BuildTableMeta(*tableDesc.Metadata, pos, ctx); } +bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, + const TMaybe>& passthroughFields) +{ + auto checkKey = [keySelector, &tableDesc, &passthroughFields] (NYql::NNodes::TExprBase key, ui32 index) { + if (!key.Maybe()) { + return false; + } + + auto member = key.Cast(); + if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { + return false; + } + + auto column = TString(member.Name().Value()); + auto columnIndex = tableDesc.GetKeyColumnIndex(column); + if (!columnIndex || *columnIndex != index) { + return false; + } + + if (passthroughFields && !passthroughFields->contains(column)) { + return false; + } + + return true; + }; + + auto lambdaBody = keySelector.Body(); + if (auto maybeTuple = lambdaBody.Maybe()) { + auto tuple = maybeTuple.Cast(); + for (size_t i = 0; i < tuple.Size(); ++i) { + if (!checkKey(tuple.Item(i), i)) { + return false; + } + } + } else { + if (!checkKey(lambdaBody, 0)) { + return false; + } + } + + return true; +} + bool IsBuiltEffect(const TExprBase& effect) { // Stage with effect output if (effect.Maybe()) { diff --git a/ydb/core/kqp/opt/kqp_opt.h b/ydb/core/kqp/opt/kqp_opt.h index b74b4b31fbff..7e8181a85a8f 100644 --- a/ydb/core/kqp/opt/kqp_opt.h +++ b/ydb/core/kqp/opt/kqp_opt.h @@ -24,8 +24,8 @@ struct TKqpOptimizeContext : public TSimpleRefCount { const NYql::TKikimrConfiguration::TPtr Config; const TIntrusivePtr QueryCtx; const TIntrusivePtr Tables; - int JoinsCount; - int EquiJoinsCount; + int JoinsCount{}; + int EquiJoinsCount{}; bool IsDataQuery() const { return QueryCtx->Type == NYql::EKikimrQueryType::Dml; diff --git a/ydb/core/kqp/opt/kqp_opt_impl.h b/ydb/core/kqp/opt/kqp_opt_impl.h index 8370b0994b9b..e85efabe2927 100644 --- a/ydb/core/kqp/opt/kqp_opt_impl.h +++ b/ydb/core/kqp/opt/kqp_opt_impl.h @@ -62,4 +62,7 @@ TVector> BuildS bool IsBuiltEffect(const NYql::NNodes::TExprBase& effect); +bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, + const TMaybe>& passthroughFields = {}); + } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp b/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp index 8255e13914e3..bfd9839cdf62 100644 --- a/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp +++ b/ydb/core/kqp/opt/kqp_opt_phy_finalize.cpp @@ -222,6 +222,115 @@ TStatus KqpDuplicateResults(const TExprNode::TPtr& input, TExprNode::TPtr& outpu return TStatus::Ok; } +template +TVector CollectNodes(const TExprNode::TPtr& input) { + TVector result; + + VisitExpr(input, [&result](const TExprNode::TPtr& node) { + if (TExpr::Match(node.Get())) { + result.emplace_back(TExpr(node)); + } + return true; + }); + + return result; +} + +bool FindPrecomputedOutputs(TDqStageBase stage, const TParentsMap& parentsMap) { + auto outIt = parentsMap.find(stage.Raw()); + if (outIt == parentsMap.end()) { + return false; + } + + for (auto& output : outIt->second) { + if (TDqOutput::Match(output)) { + auto connIt = parentsMap.find(output); + if (connIt != parentsMap.end()) { + for (auto maybeConn : connIt->second) { + auto parentIt = parentsMap.find(maybeConn); + if (parentIt != parentsMap.end()) { + for (auto& parent : parentIt->second) { + if (TDqPrecompute::Match(parent) || TDqPhyPrecompute::Match(parent)) { + return true; + } + } + } + } + } + } + } + + return false; +} + + +TExprBase ReplicatePrecompute(TDqStageBase stage, TExprContext& ctx, const TParentsMap& parentsMap) { + for (size_t i = 0; i < stage.Inputs().Size(); ++i) { + auto input = stage.Inputs().Item(i); + if (auto maybeConn = stage.Inputs().Item(i).Maybe()) { + auto conn = maybeConn.Cast(); + if (conn.Maybe() || conn.Maybe()) { + { + auto sourceStage = conn.Output().Stage(); + if (!sourceStage.Program().Body().Maybe()) { + continue; + } + + if (!FindPrecomputedOutputs(sourceStage, parentsMap)) { + continue; + } + } + + auto arg = stage.Program().Args().Arg(i); + auto newArg = Build(ctx, stage.Program().Args().Arg(i).Pos()) + .Name("_replaced_arg") + .Done(); + + TVector newArgs; + TNodeOnNodeOwnedMap programReplaces; + for (size_t j = 0; j < stage.Program().Args().Size(); ++j) { + auto oldArg = stage.Program().Args().Arg(j); + newArgs.push_back(Build(ctx, stage.Program().Args().Arg(i).Pos()) + .Name("_replaced_arg_" + ToString(j)) + .Done()); + if (i == j) { + programReplaces[oldArg.Raw()] = Build(ctx, oldArg.Pos()).Input(newArgs.back()).Done().Ptr(); + } else { + programReplaces[oldArg.Raw()] = newArgs.back().Ptr(); + } + } + + return + Build(ctx, stage.Pos()) + .Inputs(ctx.ReplaceNode(stage.Inputs().Ptr(), input.Ref(), Build(ctx, input.Pos()).Connection(conn).Done().Ptr())) + .Outputs(stage.Outputs()) + .Settings(stage.Settings()) + .Program() + .Args(newArgs) + .Body(TExprBase(ctx.ReplaceNodes(stage.Program().Body().Ptr(), programReplaces))) + .Build() + .Done(); + } + } + } + return stage; +} + +NYql::IGraphTransformer::TStatus ReplicatePrecomputeRule(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + TParentsMap parents; + GatherParents(*input, parents, true); + auto stages = CollectNodes(input); + for (auto& stage : stages) { + auto applied = ReplicatePrecompute(stage, ctx, parents); + if (applied.Raw() != stage.Raw()) { + output = ctx.ReplaceNode(input.Get(), stage.Ref(), applied.Ptr()); + return TStatus::Repeat; + } + } + output = input; + return TStatus::Ok; +} + template NYql::IGraphTransformer::TStatus PerformGlobalRule(const TString& ruleName, const NYql::TExprNode::TPtr& input, NYql::TExprNode::TPtr& output, NYql::TExprContext& ctx, TFunctor func) @@ -251,6 +360,8 @@ TAutoPtr CreateKqpFinalizingOptTransformer(const TIntrusivePt [kqpCtx](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> TStatus { output = input; + PERFORM_GLOBAL_RULE("ReplicatePrecompute", input, output, ctx, ReplicatePrecomputeRule); + PERFORM_GLOBAL_RULE("ReplicateMultiUsedConnection", input, output, ctx, [](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { YQL_ENSURE(TKqlQuery::Match(input.Get())); diff --git a/ydb/core/kqp/opt/kqp_query_plan.cpp b/ydb/core/kqp/opt/kqp_query_plan.cpp index 587c410fb413..66b113f0b86b 100644 --- a/ydb/core/kqp/opt/kqp_query_plan.cpp +++ b/ydb/core/kqp/opt/kqp_query_plan.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -592,9 +592,7 @@ class TxPlanSerializer { } if (auto literal = key.Maybe()) { - TStringStream out; - NUuid::UuidBytesToString(literal.Cast().Literal().Value().Data(), out); - return out.Str(); + return NUuid::UuidBytesToString(literal.Cast().Literal().StringValue()); } if (auto literal = key.Maybe()) { @@ -1947,6 +1945,9 @@ TVector RemoveRedundantNodes(NJson::TJsonValue& plan, const T } } + if (!planMap.contains("Node Type")) { + return {}; + } const auto typeName = planMap.at("Node Type").GetStringSafe(); if (redundantNodes.contains(typeName) || typeName.find("Precompute") != TString::npos) { return children; @@ -1955,167 +1956,235 @@ TVector RemoveRedundantNodes(NJson::TJsonValue& plan, const T return {plan}; } -NJson::TJsonValue ReconstructQueryPlanRec(const NJson::TJsonValue& plan, - int operatorIndex, - const THashMap& planIndex, - const THashMap& precomputes, - int& nodeCounter) { - - int currentNodeId = nodeCounter++; - - NJson::TJsonValue result; - result["PlanNodeId"] = currentNodeId; - - if (plan.GetMapSafe().contains("PlanNodeType")) { - result["PlanNodeType"] = plan.GetMapSafe().at("PlanNodeType").GetStringSafe(); - } +struct TQueryPlanReconstructor { + TQueryPlanReconstructor( + const THashMap& planIndex, + const THashMap& precomputes + ) + : PlanIndex(planIndex) + , Precomputes(precomputes) + , NodeIDCounter(0) + , Budget(10'000) + {} - if (plan.GetMapSafe().contains("Stats") && operatorIndex==0) { - result["Stats"] = plan.GetMapSafe().at("Stats"); - } + NJson::TJsonValue Reconstruct( + const NJson::TJsonValue& plan, + int operatorIndex + ) { + int currentNodeId = NodeIDCounter++; - if (!plan.GetMapSafe().contains("Operators")) { - NJson::TJsonValue planInputs; + NJson::TJsonValue result; + result["PlanNodeId"] = currentNodeId; - result["Node Type"] = plan.GetMapSafe().at("Node Type").GetStringSafe(); + if (--Budget <= 0) { + YQL_CLOG(DEBUG, ProviderKqp) << "Can't build the plan - recursion depth has been exceeded!"; + return result; + } - if (plan.GetMapSafe().contains("CTE Name")) { - auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); - if (precomputes.contains(precompute)) { - planInputs.AppendValue(ReconstructQueryPlanRec(precomputes.at(precompute), 0, planIndex, precomputes, nodeCounter)); - } + if (plan.GetMapSafe().contains("PlanNodeType")) { + result["PlanNodeType"] = plan.GetMapSafe().at("PlanNodeType").GetStringSafe(); } - if (!plan.GetMapSafe().contains("Plans")) { - result["Plans"] = planInputs; - return result; + if (plan.GetMapSafe().contains("Stats") && operatorIndex==0) { + result["Stats"] = plan.GetMapSafe().at("Stats"); } - if (plan.GetMapSafe().at("Node Type").GetStringSafe() == "TableLookup") { + if (plan.GetMapSafe().at("Node Type") == "TableLookupJoin" && plan.GetMapSafe().contains("Table")) { + result["Node Type"] = "LookupJoin"; NJson::TJsonValue newOps; NJson::TJsonValue op; - op["Name"] = "TableLookup"; - op["Columns"] = plan.GetMapSafe().at("Columns"); + op["Name"] = "LookupJoin"; op["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns"); - op["Table"] = plan.GetMapSafe().at("Table"); + + newOps.AppendValue(std::move(op)); + result["Operators"] = std::move(newOps); + + NJson::TJsonValue newPlans; + + NJson::TJsonValue lookupPlan; + lookupPlan["Node Type"] = "TableLookup"; + lookupPlan["PlanNodeType"] = "TableLookup"; + + NJson::TJsonValue lookupOps; + NJson::TJsonValue lookupOp; + + lookupOp["Name"] = "TableLookup"; + lookupOp["Columns"] = plan.GetMapSafe().at("Columns"); + lookupOp["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns"); + lookupOp["Table"] = plan.GetMapSafe().at("Table"); if (plan.GetMapSafe().contains("E-Cost")) { - op["E-Cost"] = plan.GetMapSafe().at("E-Cost"); - } + lookupOp["E-Cost"] = plan.GetMapSafe().at("E-Cost"); + } if (plan.GetMapSafe().contains("E-Rows")) { - op["E-Rows"] = plan.GetMapSafe().at("E-Rows"); + lookupOp["E-Rows"] = plan.GetMapSafe().at("E-Rows"); } if (plan.GetMapSafe().contains("E-Size")) { - op["E-Size"] = plan.GetMapSafe().at("E-Size"); + lookupOp["E-Size"] = plan.GetMapSafe().at("E-Size"); } - newOps.AppendValue(op); + lookupOps.AppendValue(std::move(lookupOp)); + lookupPlan["Operators"] = std::move(lookupOps); + + newPlans.AppendValue(Reconstruct(plan.GetMapSafe().at("Plans").GetArraySafe()[0], 0)); + + newPlans.AppendValue(std::move(lookupPlan)); + + result["Plans"] = std::move(newPlans); - result["Operators"] = newOps; return result; } - for (auto p : plan.GetMapSafe().at("Plans").GetArraySafe()) { - if (!p.GetMapSafe().contains("Operators") && p.GetMapSafe().contains("CTE Name")) { - auto precompute = p.GetMapSafe().at("CTE Name").GetStringSafe(); - if (precomputes.contains(precompute)) { - planInputs.AppendValue(ReconstructQueryPlanRec(precomputes.at(precompute), 0, planIndex, precomputes, nodeCounter)); + if (!plan.GetMapSafe().contains("Operators")) { + NJson::TJsonValue planInputs; + + result["Node Type"] = plan.GetMapSafe().at("Node Type").GetStringSafe(); + + if (plan.GetMapSafe().contains("CTE Name")) { + auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); + if (Precomputes.contains(precompute)) { + planInputs.AppendValue(Reconstruct(Precomputes.at(precompute), 0)); } - } else if (p.GetMapSafe().at("Node Type").GetStringSafe().find("Precompute") == TString::npos) { - planInputs.AppendValue(ReconstructQueryPlanRec(p, 0, planIndex, precomputes, nodeCounter)); } - } - result["Plans"] = planInputs; - return result; - } - if (plan.GetMapSafe().contains("CTE Name") && plan.GetMapSafe().at("Node Type").GetStringSafe() == "ConstantExpr") { - auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); - if (!precomputes.contains(precompute)) { - result["Node Type"] = plan.GetMapSafe().at("Node Type"); + if (!plan.GetMapSafe().contains("Plans")) { + result["Plans"] = std::move(planInputs); + return result; + } + + if (plan.GetMapSafe().at("Node Type").GetStringSafe() == "TableLookup") { + NJson::TJsonValue newOps; + NJson::TJsonValue op; + + op["Name"] = "TableLookup"; + op["Columns"] = plan.GetMapSafe().at("Columns"); + op["LookupKeyColumns"] = plan.GetMapSafe().at("LookupKeyColumns"); + op["Table"] = plan.GetMapSafe().at("Table"); + + if (plan.GetMapSafe().contains("E-Cost")) { + op["E-Cost"] = plan.GetMapSafe().at("E-Cost"); + } + if (plan.GetMapSafe().contains("E-Rows")) { + op["E-Rows"] = plan.GetMapSafe().at("E-Rows"); + } + if (plan.GetMapSafe().contains("E-Size")) { + op["E-Size"] = plan.GetMapSafe().at("E-Size"); + } + + newOps.AppendValue(std::move(op)); + + result["Operators"] = std::move(newOps); + return result; + } + + for (auto p : plan.GetMapSafe().at("Plans").GetArraySafe()) { + if (!p.GetMapSafe().contains("Operators") && p.GetMapSafe().contains("CTE Name")) { + auto precompute = p.GetMapSafe().at("CTE Name").GetStringSafe(); + if (Precomputes.contains(precompute)) { + planInputs.AppendValue(Reconstruct(Precomputes.at(precompute), 0)); + } + } else if (p.GetMapSafe().at("Node Type").GetStringSafe().find("Precompute") == TString::npos) { + planInputs.AppendValue(Reconstruct(p, 0)); + } + } + result["Plans"] = planInputs; return result; } - return ReconstructQueryPlanRec(precomputes.at(precompute), 0, planIndex, precomputes, nodeCounter); - } + if (plan.GetMapSafe().contains("CTE Name") && plan.GetMapSafe().at("Node Type").GetStringSafe() == "ConstantExpr") { + auto precompute = plan.GetMapSafe().at("CTE Name").GetStringSafe(); + if (!Precomputes.contains(precompute)) { + result["Node Type"] = plan.GetMapSafe().at("Node Type"); + return result; + } - auto ops = plan.GetMapSafe().at("Operators").GetArraySafe(); - auto op = ops[operatorIndex]; + return Reconstruct(Precomputes.at(precompute), 0); + } - TVector planInputs; + auto ops = plan.GetMapSafe().at("Operators").GetArraySafe(); + auto op = ops[operatorIndex]; - auto opName = op.GetMapSafe().at("Name").GetStringSafe(); + TVector planInputs; - THashSet processedExternalOperators; - THashSet processedInternalOperators; - for (auto opInput : op.GetMapSafe().at("Inputs").GetArraySafe()) { + auto opName = op.GetMapSafe().at("Name").GetStringSafe(); - if (opInput.GetMapSafe().contains("ExternalPlanNodeId")) { - auto inputPlanKey = opInput.GetMapSafe().at("ExternalPlanNodeId").GetIntegerSafe(); + THashSet processedExternalOperators; + THashSet processedInternalOperators; + for (auto opInput : op.GetMapSafe().at("Inputs").GetArraySafe()) { - if (processedExternalOperators.contains(inputPlanKey)) { - continue; - } - processedExternalOperators.insert(inputPlanKey); + if (opInput.GetMapSafe().contains("ExternalPlanNodeId")) { + auto inputPlanKey = opInput.GetMapSafe().at("ExternalPlanNodeId").GetIntegerSafe(); - auto inputPlan = planIndex.at(inputPlanKey); - planInputs.push_back( ReconstructQueryPlanRec(inputPlan, 0, planIndex, precomputes, nodeCounter)); - } else if (opInput.GetMapSafe().contains("InternalOperatorId")) { - auto inputPlanId = opInput.GetMapSafe().at("InternalOperatorId").GetIntegerSafe(); + if (processedExternalOperators.contains(inputPlanKey)) { + continue; + } + processedExternalOperators.insert(inputPlanKey); - if (processedInternalOperators.contains(inputPlanId)) { - continue; - } - processedInternalOperators.insert(inputPlanId); + auto inputPlan = PlanIndex.at(inputPlanKey); + planInputs.push_back( Reconstruct(inputPlan, 0) ); + } else if (opInput.GetMapSafe().contains("InternalOperatorId")) { + auto inputPlanId = opInput.GetMapSafe().at("InternalOperatorId").GetIntegerSafe(); - planInputs.push_back( ReconstructQueryPlanRec(plan, inputPlanId, planIndex, precomputes, nodeCounter)); + if (processedInternalOperators.contains(inputPlanId)) { + continue; + } + processedInternalOperators.insert(inputPlanId); + + planInputs.push_back( Reconstruct(plan, inputPlanId) ); + } } - } - if (op.GetMapSafe().contains("Inputs")) { - op.GetMapSafe().erase("Inputs"); - } + if (op.GetMapSafe().contains("Inputs")) { + op.GetMapSafe().erase("Inputs"); + } - if (op.GetMapSafe().contains("Input") - || op.GetMapSafe().contains("ToFlow") - || op.GetMapSafe().contains("Member") - || op.GetMapSafe().contains("AssumeSorted") - || op.GetMapSafe().contains("Iterator")) { + if (op.GetMapSafe().contains("Input") + || op.GetMapSafe().contains("ToFlow") + || op.GetMapSafe().contains("Member") + || op.GetMapSafe().contains("AssumeSorted") + || op.GetMapSafe().contains("Iterator")) { - TString maybePrecompute = ""; - if (op.GetMapSafe().contains("Input")) { - maybePrecompute = op.GetMapSafe().at("Input").GetStringSafe(); - } else if (op.GetMapSafe().contains("ToFlow")) { - maybePrecompute = op.GetMapSafe().at("ToFlow").GetStringSafe(); - } else if (op.GetMapSafe().contains("Member")) { - maybePrecompute = op.GetMapSafe().at("Member").GetStringSafe(); - } else if (op.GetMapSafe().contains("AssumeSorted")) { - maybePrecompute = op.GetMapSafe().at("AssumeSorted").GetStringSafe(); - } else if (op.GetMapSafe().contains("Iterator")) { - maybePrecompute = op.GetMapSafe().at("Iterator").GetStringSafe(); - } + TString maybePrecompute = ""; + if (op.GetMapSafe().contains("Input")) { + maybePrecompute = op.GetMapSafe().at("Input").GetStringSafe(); + } else if (op.GetMapSafe().contains("ToFlow")) { + maybePrecompute = op.GetMapSafe().at("ToFlow").GetStringSafe(); + } else if (op.GetMapSafe().contains("Member")) { + maybePrecompute = op.GetMapSafe().at("Member").GetStringSafe(); + } else if (op.GetMapSafe().contains("AssumeSorted")) { + maybePrecompute = op.GetMapSafe().at("AssumeSorted").GetStringSafe(); + } else if (op.GetMapSafe().contains("Iterator")) { + maybePrecompute = op.GetMapSafe().at("Iterator").GetStringSafe(); + } - if (precomputes.contains(maybePrecompute) && planInputs.empty()) { - planInputs.push_back(ReconstructQueryPlanRec(precomputes.at(maybePrecompute), 0, planIndex, precomputes, nodeCounter)); + if (Precomputes.contains(maybePrecompute) && planInputs.empty()) { + planInputs.push_back(Reconstruct(Precomputes.at(maybePrecompute), 0)); + } } - } - result["Node Type"] = opName; - NJson::TJsonValue newOps; - newOps.AppendValue(op); - result["Operators"] = newOps; + result["Node Type"] = std::move(opName); + NJson::TJsonValue newOps; + newOps.AppendValue(std::move(op)); + result["Operators"] = std::move(newOps); - if (planInputs.size()){ - NJson::TJsonValue plans; - for( auto i : planInputs) { - plans.AppendValue(i); + if (!planInputs.empty()){ + NJson::TJsonValue plans; + for(auto&& i : planInputs) { + plans.AppendValue(std::move(i)); + } + result["Plans"] = std::move(plans); } - result["Plans"] = plans; + + return result; } - return result; -} +private: + const THashMap& PlanIndex; + const THashMap& Precomputes; + ui32 NodeIDCounter; + i32 Budget; // Prevent bugs with inf recursion +}; double ComputeCpuTimes(NJson::TJsonValue& plan) { double currCpuTime = 0; @@ -2211,8 +2280,7 @@ NJson::TJsonValue SimplifyQueryPlan(NJson::TJsonValue& plan) { BuildPlanIndex(plan, planIndex, precomputes); - int nodeCounter = 0; - plan = ReconstructQueryPlanRec(plan, 0, planIndex, precomputes, nodeCounter); + plan = TQueryPlanReconstructor(planIndex, precomputes).Reconstruct(plan, 0); RemoveRedundantNodes(plan, redundantNodes); ComputeCpuTimes(plan); @@ -2245,7 +2313,7 @@ TString AddSimplifiedPlan(const TString& planText, TIntrusivePtr& txPlans, TIntrusivePtr optCtx, const TString commonPlanInfo = "") { +TString SerializeTxPlans(const TVector& txPlans, TIntrusivePtr optCtx, const TString commonPlanInfo = "", const TString& queryStats = "") { NJsonWriter::TBuf writer; writer.SetIndentSpaces(2); @@ -2268,6 +2336,15 @@ TString SerializeTxPlans(const TVector& txPlans, TIntrusivePtr()); + + NJsonWriter::TBuf writer; + writer.BeginObject(); + + if (queryStats.HasCompilation()) { + const auto& compilation = queryStats.GetCompilation(); + + writer.WriteKey("Compilation"); + writer.BeginObject(); + writer.WriteKey("FromCache").WriteBool(compilation.GetFromCache()); + writer.WriteKey("DurationUs").WriteLongLong(compilation.GetDurationUs()); + writer.WriteKey("CpuTimeUs").WriteLongLong(compilation.GetCpuTimeUs()); + writer.EndObject(); + } + + writer.WriteKey("ProcessCpuTimeUs").WriteLongLong(queryStats.GetWorkerCpuTimeUs()); + writer.WriteKey("TotalDurationUs").WriteLongLong(queryStats.GetDurationUs()); + writer.WriteKey("QueuedTimeUs").WriteLongLong(queryStats.GetQueuedTimeUs()); + writer.EndObject(); + + return SerializeTxPlans(txPlans, TIntrusivePtr(), "", writer.Str()); } TString SerializeScriptPlan(const TVector& queryPlans) { diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp index 0a63f6b6cf83..ea9561d43531 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -61,6 +62,7 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { AddHandler(0, &TCoNarrowFlatMap::Match, HNDL(DqReadWideWrapFieldSubset)); AddHandler(0, &TCoNarrowMultiMap::Match, HNDL(DqReadWideWrapFieldSubset)); AddHandler(0, &TCoWideMap::Match, HNDL(DqReadWideWrapFieldSubset)); + AddHandler(0, &TCoMatchRecognize::Match, HNDL(MatchRecognize)); AddHandler(1, &TCoFlatMap::Match, HNDL(LatePushExtractedPredicateToReadTable)); AddHandler(1, &TCoTop::Match, HNDL(RewriteTopSortOverIndexRead)); @@ -311,6 +313,14 @@ class TKqpLogicalOptTransformer : public TOptimizeTransformerBase { return output; } + TMaybeNode MatchRecognize(TExprBase node, TExprContext& ctx) { + auto output = ExpandMatchRecognize(node.Ptr(), ctx, TypesCtx); + if (output) { + DumpAppliedRule("MatchRecognize", node.Ptr(), output, ctx); + } + return output; + } + TMaybeNode DqReadWrapByProvider(TExprBase node, TExprContext& ctx) { auto output = NDq::DqReadWrapByProvider(node, ctx, TypesCtx); if (output) { diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp index a872addcf58a..157c371c167e 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_indexes.cpp @@ -412,44 +412,67 @@ TExprBase KqpRewriteLookupIndex(const TExprBase& node, TExprContext& ctx, const } TExprBase KqpRewriteStreamLookupIndex(const TExprBase& node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx) { - if (!kqpCtx.IsScanQuery()) { + if (!node.Maybe()) { return node; } - if (auto maybeStreamLookupIndex = node.Maybe()) { - auto streamLookupIndex = maybeStreamLookupIndex.Cast(); + auto streamLookupIndex = node.Maybe().Cast(); - const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, streamLookupIndex.Table().Path()); - const auto& [indexMeta, _] = tableDesc.Metadata->GetIndexMetadata(streamLookupIndex.Index().StringValue()); + const auto& tableDesc = GetTableData(*kqpCtx.Tables, kqpCtx.Cluster, streamLookupIndex.Table().Path()); + const auto& [indexMeta, _] = tableDesc.Metadata->GetIndexMetadata(streamLookupIndex.Index().StringValue()); - const bool needDataRead = CheckIndexCovering(streamLookupIndex, indexMeta); - if (!needDataRead) { - return Build(ctx, node.Pos()) - .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) - .LookupKeys(streamLookupIndex.LookupKeys()) - .Columns(streamLookupIndex.Columns()) - .LookupStrategy().Build(TKqpStreamLookupStrategyName) - .Done(); - } - - auto keyColumnsList = BuildKeyColumnsList(tableDesc, streamLookupIndex.Pos(), ctx); - - TExprBase lookupIndexTable = Build(ctx, node.Pos()) + const bool needDataRead = CheckIndexCovering(streamLookupIndex, indexMeta); + if (!needDataRead) { + return Build(ctx, node.Pos()) .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) .LookupKeys(streamLookupIndex.LookupKeys()) - .Columns(keyColumnsList) - .LookupStrategy().Build(TKqpStreamLookupStrategyName) + .Columns(streamLookupIndex.Columns()) + .LookupStrategy().Build(streamLookupIndex.LookupStrategy()) .Done(); + } - return Build(ctx, node.Pos()) - .Table(streamLookupIndex.Table()) - .LookupKeys(lookupIndexTable.Ptr()) - .Columns(streamLookupIndex.Columns()) - .LookupStrategy().Build(TKqpStreamLookupStrategyName) + auto keyColumnsList = BuildKeyColumnsList(tableDesc, streamLookupIndex.Pos(), ctx); + + TExprBase lookupIndexTable = Build(ctx, node.Pos()) + .Table(BuildTableMeta(*indexMeta, node.Pos(), ctx)) + .LookupKeys(streamLookupIndex.LookupKeys()) + .Columns(keyColumnsList) + .LookupStrategy().Build(streamLookupIndex.LookupStrategy()) + .Done(); + + TMaybeNode lookupKeys; + YQL_ENSURE(streamLookupIndex.LookupStrategy().Maybe()); + TString lookupStrategy = streamLookupIndex.LookupStrategy().Maybe().Cast().StringValue(); + if (lookupStrategy == TKqpStreamLookupJoinStrategyName || lookupStrategy == TKqpStreamLookupSemiJoinStrategyName) { + // Result type of lookupIndexTable: list>>, + // expected input type for main table stream join: list, left_row>>, + // so we should transform list>> to list, left_row>> + lookupKeys = Build(ctx, node.Pos()) + .Input(lookupIndexTable) + .Lambda() + .Args({"tuple"}) + .Body() + .Add() + .Tuple("tuple") + .Index().Value("1").Build() + .Build() + .Add() + .Tuple("tuple") + .Index().Value("0").Build() + .Build() + .Build() + .Build() .Done(); + } else { + lookupKeys = lookupIndexTable; } - return node; + return Build(ctx, node.Pos()) + .Table(streamLookupIndex.Table()) + .LookupKeys(lookupKeys.Cast()) + .Columns(streamLookupIndex.Columns()) + .LookupStrategy().Build(streamLookupIndex.LookupStrategy()) + .Done(); } /// Can push flat map node to read from table using only columns available in table description diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp index ac26425830f7..432e5d86df22 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_join.cpp @@ -198,6 +198,7 @@ TExprBase BuildLookupIndex(TExprContext& ctx, const TPositionHandle pos, .Columns(columns) .Index() .Build(indexName) + .LookupStrategy().Build(TKqpStreamLookupStrategyName) .Done(); } @@ -336,6 +337,7 @@ bool IsParameterToListOfStructsRepack(const TExprBase& expr) { TMaybeNode BuildKqpStreamIndexLookupJoin( const TDqJoin& join, TExprBase leftInput, + const TString& indexName, const TPrefixLookup& rightLookup, const TKqpMatchReadResult& rightReadMatch, TExprContext& ctx) @@ -395,19 +397,34 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( } } - TExprBase lookupJoin = Build(ctx, join.Pos()) - .Table(rightLookup.MainTable) - .LookupKeys(leftInput) - .Columns(lookupColumns.Cast()) - .LookupStrategy().Build(TKqpStreamLookupJoinStrategyName) - .Done(); + auto strategy = join.JoinType().Value() == "LeftSemi" + ? TKqpStreamLookupSemiJoinStrategyName + : TKqpStreamLookupJoinStrategyName; + + TMaybeNode lookupJoin; + if (indexName) { + lookupJoin = Build(ctx, join.Pos()) + .Table(rightLookup.MainTable) + .LookupKeys(leftInput) + .Columns(lookupColumns.Cast()) + .Index().Build(indexName) + .LookupStrategy().Build(strategy) + .Done(); + } else { + lookupJoin = Build(ctx, join.Pos()) + .Table(rightLookup.MainTable) + .LookupKeys(leftInput) + .Columns(lookupColumns.Cast()) + .LookupStrategy().Build(strategy) + .Done(); + } // Stream lookup join output: stream>> // so we should apply filters to second element of tuple for each row if (extraRightFilter.IsValid()) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -429,7 +446,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.ExtractMembers) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -451,7 +468,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.FilterNullMembers) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -473,7 +490,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.SkipNullMembers) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -495,7 +512,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( if (rightReadMatch.FlatMap) { lookupJoin = Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .Lambda() .Args({"tuple"}) .Body() @@ -516,7 +533,7 @@ TMaybeNode BuildKqpStreamIndexLookupJoin( } return Build(ctx, join.Pos()) - .Input(lookupJoin) + .Input(lookupJoin.Cast()) .LeftLabel().Build(leftLabel) .RightLabel().Build(rightLabel) .JoinType(join.JoinType()) @@ -597,8 +614,7 @@ TMaybeNode KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext } const bool useStreamIndexLookupJoin = (kqpCtx.IsDataQuery() || kqpCtx.IsGenericQuery()) - && kqpCtx.Config->EnableKqpDataQueryStreamIdxLookupJoin - && !indexName; + && kqpCtx.Config->EnableKqpDataQueryStreamIdxLookupJoin; auto leftRowArg = Build(ctx, join.Pos()) .Name("leftRowArg") @@ -833,7 +849,7 @@ TMaybeNode KqpJoinToIndexLookupImpl(const TDqJoin& join, TExprContext .Build() .Done(); - return BuildKqpStreamIndexLookupJoin(join, leftInput, *prefixLookup, *rightReadMatch, ctx); + return BuildKqpStreamIndexLookupJoin(join, leftInput, indexName, *prefixLookup, *rightReadMatch, ctx); } auto leftDataDeduplicated = DeduplicateByMembers(leftData, filter, deduplicateLeftColumns, ctx, join.Pos()); diff --git a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp index bcc8a8cf7150..29722d23a386 100644 --- a/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp +++ b/ydb/core/kqp/opt/logical/kqp_opt_log_ranges_predext.cpp @@ -177,6 +177,22 @@ TMaybeNode TryBuildTrivialReadTable(TCoFlatMap& flatmap, TKqlReadTabl .Done(); } +TMaybeNode ExtractTopSortKeySelector(TExprBase node, const NYql::TParentsMap& parentsMap) { + auto it = parentsMap.find(node.Raw()); + if (it != parentsMap.end()) { + if (it->second.size() != 1) { + return {}; + } + for (auto* node : it->second) { + if (TCoTopSort::Match(node)) { + TCoTopSort topSort(node); + return topSort.KeySelectorLambda(); + } + } + } + return {}; +} + } // namespace TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx, const TKqpOptimizeContext& kqpCtx, @@ -269,7 +285,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx YQL_ENSURE(prepareSuccess); if (!indexName.IsValid() && !readSettings.ForcePrimary && kqpCtx.Config->IndexAutoChooserMode != NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_DISABLED) { - using TIndexComparisonKey = std::tuple; + using TIndexComparisonKey = std::tuple; auto calcNeedsJoin = [&] (const TKikimrTableMetadataPtr& keyTable) -> bool { bool needsJoin = false; for (auto&& column : read.Columns()) { @@ -280,8 +296,16 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx return needsJoin; }; - auto calcKey = [&](NYql::IPredicateRangeExtractor::TBuildResult buildResult, size_t descriptionKeyColumns, bool needsJoin) -> TIndexComparisonKey { + auto keySelector = ExtractTopSortKeySelector(flatmap, parentsMap); + + auto calcKey = [&]( + NYql::IPredicateRangeExtractor::TBuildResult buildResult, + size_t descriptionKeyColumns, + bool needsJoin, + const NYql::TKikimrTableDescription & tableDesc) -> TIndexComparisonKey + { return std::make_tuple( + keySelector.IsValid() && IsSortKeyPrimary(keySelector.Cast(), tableDesc), buildResult.PointPrefixLen >= descriptionKeyColumns, buildResult.PointPrefixLen >= descriptionKeyColumns ? 0 : buildResult.PointPrefixLen, buildResult.UsedPrefixLen >= descriptionKeyColumns, @@ -293,7 +317,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx auto primaryBuildResult = extractor->BuildComputeNode(mainTableDesc.Metadata->KeyColumnNames, ctx, typesCtx); if (primaryBuildResult.PointPrefixLen < mainTableDesc.Metadata->KeyColumnNames.size()) { - auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false); + auto maxKey = calcKey(primaryBuildResult, mainTableDesc.Metadata->KeyColumnNames.size(), false, mainTableDesc); for (auto& index : mainTableDesc.Metadata->Indexes) { if (index.Type != TIndexDescription::EType::GlobalAsync) { auto& tableDesc = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, mainTableDesc.Metadata->GetIndexMetadata(TString(index.Name)).first->Name); @@ -307,7 +331,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx continue; } - auto key = calcKey(buildResult, index.KeyColumns.size(), needsJoin); + auto key = calcKey(buildResult, index.KeyColumns.size(), needsJoin, tableDesc); if (key > maxKey) { maxKey = key; chosenIndex = index.Name; @@ -387,6 +411,7 @@ TExprBase KqpPushExtractedPredicateToReadTable(TExprBase node, TExprContext& ctx .LookupKeys(keys) .Index(indexName.Cast()) .LookupKeys(keys) + .LookupStrategy().Build(TKqpStreamLookupStrategyName) .Done(); } } else { diff --git a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp index d4e97c698697..4ae0aaf7e3b4 100644 --- a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp +++ b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_returning.cpp @@ -204,7 +204,7 @@ TExprBase KqpRewriteReturningUpsert(TExprBase node, TExprContext& ctx, const TKq return node; } - if (!upsert.Input().Maybe() && !upsert.Input().Maybe()) { + if (upsert.Input().Maybe() || upsert.Input().Maybe()) { return node; } @@ -226,7 +226,7 @@ TExprBase KqpRewriteReturningDelete(TExprBase node, TExprContext& ctx, const TKq return node; } - if (!del.Input().Maybe() && !del.Input().Maybe()) { + if (del.Input().Maybe() || del.Input().Maybe()) { return node; } diff --git a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp index 39550e770404..ad89fb36ff67 100644 --- a/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp +++ b/ydb/core/kqp/opt/physical/effects/kqp_opt_phy_update.cpp @@ -91,7 +91,7 @@ TExprBase KqpBuildUpdateStages(TExprBase node, TExprContext& ctx, const TKqpOpti const auto& table = kqpCtx.Tables->ExistingTable(kqpCtx.Cluster, update.Table().Path()); - const bool isSink = NeedSinks(table, kqpCtx) && table.Metadata->Kind == EKikimrTableKind::Olap; + const bool isSink = NeedSinks(table, kqpCtx); const bool needPrecompute = !isSink; if (needPrecompute) { diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp index a28771a8f9da..7249eb21c141 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp @@ -428,7 +428,7 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { { // TODO: Allow push to left stage for data queries. // It is now possible as we don't use datashard transactions for reads in data queries. - bool pushLeftStage = !KqpCtx.IsDataQuery() && AllowFuseJoinInputs(node); + bool pushLeftStage = (KqpCtx.IsScanQuery() || KqpCtx.Config->EnableKqpDataQueryStreamLookup) && AllowFuseJoinInputs(node); TExprBase output = DqBuildJoin(node, ctx, optCtx, *getParents(), IsGlobal, pushLeftStage, KqpCtx.Config->GetHashJoinMode() ); diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp index 18a439af0c21..298680c604bb 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_helpers.cpp @@ -170,49 +170,6 @@ NYql::NNodes::TDqStage ReplaceTableSourceSettings(NYql::NNodes::TDqStage stage, .Done(); } -bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, - const TMaybe>& passthroughFields) -{ - auto checkKey = [keySelector, &tableDesc, &passthroughFields] (NYql::NNodes::TExprBase key, ui32 index) { - if (!key.Maybe()) { - return false; - } - - auto member = key.Cast(); - if (member.Struct().Raw() != keySelector.Args().Arg(0).Raw()) { - return false; - } - - auto column = TString(member.Name().Value()); - auto columnIndex = tableDesc.GetKeyColumnIndex(column); - if (!columnIndex || *columnIndex != index) { - return false; - } - - if (passthroughFields && !passthroughFields->contains(column)) { - return false; - } - - return true; - }; - - auto lambdaBody = keySelector.Body(); - if (auto maybeTuple = lambdaBody.Maybe()) { - auto tuple = maybeTuple.Cast(); - for (size_t i = 0; i < tuple.Size(); ++i) { - if (!checkKey(tuple.Item(i), i)) { - return false; - } - } - } else { - if (!checkKey(lambdaBody, 0)) { - return false; - } - } - - return true; -} - ESortDirection GetSortDirection(const NYql::NNodes::TExprBase& sortDirections) { auto getDirection = [] (TExprBase expr) -> ESortDirection { if (!expr.Maybe()) { diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h b/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h index 0e22dbac8e4f..846bf2d5cd69 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_impl.h @@ -28,9 +28,6 @@ NYql::NNodes::TDqStage ReplaceStageArg(NYql::NNodes::TDqStage stage, size_t inpu NYql::NNodes::TDqStage ReplaceTableSourceSettings(NYql::NNodes::TDqStage stage, size_t inputIndex, NYql::NNodes::TKqpReadRangesSourceSettings settings, NYql::TExprContext& ctx); -bool IsSortKeyPrimary(const NYql::NNodes::TCoLambda& keySelector, const NYql::TKikimrTableDescription& tableDesc, - const TMaybe>& passthroughFields = {}); - enum ESortDirection : ui32 { None = 0, Forward = 1, diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp index 5092bbeec38d..ffe2ffae4c53 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy_olap_agg.cpp @@ -299,11 +299,6 @@ TExprBase KqpPushOlapAggregate(TExprBase node, TExprContext& ctx, const TKqpOpti return node; } - // temporary for keys grouping push down not useful - if (!aggCombine.Keys().Empty()) { - return node; - } - auto read = maybeRead.Cast(); auto aggs = Build(ctx, node.Pos()); diff --git a/ydb/core/kqp/opt/ya.make b/ydb/core/kqp/opt/ya.make index 6f07040d1180..160eb3a8a5f0 100644 --- a/ydb/core/kqp/opt/ya.make +++ b/ydb/core/kqp/opt/ya.make @@ -27,7 +27,7 @@ PEERDIR( ydb/library/yql/providers/s3/expr_nodes ydb/library/yql/utils/plan ydb/core/kqp/provider - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/kqp/provider/rewrite_io_utils.cpp b/ydb/core/kqp/provider/rewrite_io_utils.cpp index 633149305a44..067ab4b583a9 100644 --- a/ydb/core/kqp/provider/rewrite_io_utils.cpp +++ b/ydb/core/kqp/provider/rewrite_io_utils.cpp @@ -1,6 +1,7 @@ #include "rewrite_io_utils.h" #include +#include #include #include #include @@ -15,23 +16,18 @@ using namespace NNodes; constexpr const char* QueryGraphNodeSignature = "SavedQueryGraph"; -NSQLTranslation::TTranslationSettings CreateViewTranslationSettings(const TString& cluster) { - NSQLTranslation::TTranslationSettings settings; - - settings.DefaultCluster = cluster; - settings.ClusterMapping[cluster] = TString(NYql::KikimrProviderName); - settings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; - - return settings; -} - TExprNode::TPtr CompileViewQuery( - const TString& query, TExprContext& ctx, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver, + const TViewPersistedData& viewData ) { + auto translationSettings = settingsBuilder.Build(ctx); + translationSettings.Mode = NSQLTranslation::ESqlMode::LIMITED_VIEW; + NSQLTranslation::Deserialize(viewData.CapturedContext, translationSettings); + TAstParseResult queryAst; - queryAst = NSQLTranslation::SqlToYql(query, CreateViewTranslationSettings(cluster)); + queryAst = NSQLTranslation::SqlToYql(viewData.QueryText, translationSettings); ctx.IssueManager.AddIssues(queryAst.Issues); if (!queryAst.IsOk()) { @@ -39,7 +35,7 @@ TExprNode::TPtr CompileViewQuery( } TExprNode::TPtr queryGraph; - if (!CompileExpr(*queryAst.Root, queryGraph, ctx, nullptr, nullptr)) { + if (!CompileExpr(*queryAst.Root, queryGraph, ctx, moduleResolver.get(), nullptr)) { return nullptr; } @@ -122,8 +118,9 @@ TExprNode::TPtr FindTopLevelRead(const TExprNode::TPtr& queryGraph) { TExprNode::TPtr RewriteReadFromView( const TExprNode::TPtr& node, TExprContext& ctx, - const TString& query, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver, + const TViewPersistedData& viewData ) { YQL_PROFILE_FUNC(DEBUG); @@ -132,7 +129,7 @@ TExprNode::TPtr RewriteReadFromView( TExprNode::TPtr queryGraph = FindSavedQueryGraph(readNode.Ptr()); if (!queryGraph) { - queryGraph = CompileViewQuery(query, ctx, cluster); + queryGraph = CompileViewQuery(ctx, settingsBuilder, moduleResolver, viewData); if (!queryGraph) { ctx.AddError(TIssue(ctx.GetPosition(readNode.Pos()), "The query stored in the view cannot be compiled.")); @@ -156,4 +153,4 @@ TExprNode::TPtr RewriteReadFromView( return Build(ctx, node->Pos()).Input(topLevelRead).Done().Ptr(); } -} \ No newline at end of file +} diff --git a/ydb/core/kqp/provider/rewrite_io_utils.h b/ydb/core/kqp/provider/rewrite_io_utils.h index d793cc0fcd1a..dd3dff66dd06 100644 --- a/ydb/core/kqp/provider/rewrite_io_utils.h +++ b/ydb/core/kqp/provider/rewrite_io_utils.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include namespace NYql { @@ -9,8 +11,9 @@ TExprNode::TPtr FindTopLevelRead(const TExprNode::TPtr& queryGraph); TExprNode::TPtr RewriteReadFromView( const TExprNode::TPtr& node, TExprContext& ctx, - const TString& query, - const TString& cluster + NKikimr::NKqp::TKqpTranslationSettingsBuilder& settingsBuilder, + IModuleResolver::TPtr moduleResolver, + const TViewPersistedData& viewData ); -} \ No newline at end of file +} diff --git a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp index a4d88aff12d2..e418a96804d6 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasink.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasink.cpp @@ -176,9 +176,9 @@ class TKiSinkIntentDeterminationTransformer: public TKiSinkVisitorTransformer { } TStatus HandleDropObject(TKiDropObject node, TExprContext& ctx) override { - ctx.AddError(TIssue(ctx.GetPosition(node.Pos()), TStringBuilder() - << "DropObject is not yet implemented for intent determination transformer")); - return TStatus::Error; + Y_UNUSED(node); + Y_UNUSED(ctx); + return TStatus::Ok; } TStatus HandleCreateGroup(TKiCreateGroup node, TExprContext& ctx) override { @@ -1132,7 +1132,8 @@ class TKikimrDataSink : public TDataProviderBase YQL_ENSURE(settings.Mode); auto mode = settings.Mode.Cast(); - if (mode == "create") { + if (mode == "create" || mode == "create_if_not_exists") { + bool existingOk = mode == "create_if_not_exists"; return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) @@ -1140,9 +1141,11 @@ class TKikimrDataSink : public TDataProviderBase .TopicSettings(settings.TopicSettings.Cast()) .Consumers(settings.Consumers.Cast()) .Settings(settings.Other) + .ExistingOk().Value(existingOk).Build() .Done() .Ptr(); - } else if (mode == "alter") { + } else if (mode == "alter" || mode == "alter_if_exists") { + bool missingOk = mode == "alter_if_exists"; return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) @@ -1152,14 +1155,17 @@ class TKikimrDataSink : public TDataProviderBase .AlterConsumers(settings.AlterConsumers.Cast()) .DropConsumers(settings.DropConsumers.Cast()) .Settings(settings.Other) + .MissingOk().Value(missingOk).Build() .Done() .Ptr(); - } else if (mode == "drop") { + } else if (mode == "drop" || mode == "drop_if_exists") { + bool missingOk = (mode == "drop_if_exists"); return Build(ctx, node->Pos()) .World(node->Child(0)) .DataSink(node->Child(1)) .Topic().Build(key.GetTopicPath()) .Settings(settings.Other) + .MissingOk().Value(missingOk).Build() .Done() .Ptr(); } else { diff --git a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp index d289a9e7f2e1..1c3a02efc504 100644 --- a/ydb/core/kqp/provider/yql_kikimr_datasource.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_datasource.cpp @@ -3,6 +3,7 @@ #include "yql_kikimr_provider_impl.h" #include +#include #include #include @@ -472,12 +473,14 @@ class TKikimrDataSource : public TDataProviderBase { TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NExternalSource::IExternalSourceFactory::TPtr& externalSourceFactory, - bool isInternalCall) + bool isInternalCall, + TGUCSettings::TPtr gucSettings) : FunctionRegistry(functionRegistry) , Types(types) , Gateway(gateway) , SessionCtx(sessionCtx) , ExternalSourceFactory(externalSourceFactory) + , GUCSettings(gucSettings) , ConfigurationTransformer(new TKikimrConfigurationTransformer(sessionCtx, types)) , IntentDeterminationTransformer(new TKiSourceIntentDeterminationTransformer(sessionCtx)) , LoadTableMetadataTransformer(CreateKiSourceLoadTableMetadataTransformer(gateway, sessionCtx, types, externalSourceFactory, isInternalCall)) @@ -760,6 +763,7 @@ class TKikimrDataSource : public TDataProviderBase { } ctx.Step + .Repeat(TExprStep::ExpandApplyForLambdas) .Repeat(TExprStep::ExprEval) .Repeat(TExprStep::DiscoveryIO) .Repeat(TExprStep::Epochs) @@ -767,8 +771,17 @@ class TKikimrDataSource : public TDataProviderBase { .Repeat(TExprStep::LoadTablesMetadata) .Repeat(TExprStep::RewriteIO); - const auto& query = tableDesc.Metadata->ViewPersistedData.QueryText; - return RewriteReadFromView(node, ctx, query, cluster); + const auto& viewData = tableDesc.Metadata->ViewPersistedData; + + NKqp::TKqpTranslationSettingsBuilder settingsBuilder( + SessionCtx->Query().Type, + SessionCtx->Config()._KqpYqlSyntaxVersion.Get().GetRef(), + cluster, + viewData.QueryText, + SessionCtx->Config().BindingsMode, + GUCSettings + ); + return RewriteReadFromView(node, ctx, settingsBuilder, Types.Modules, viewData); } } @@ -881,6 +894,7 @@ class TKikimrDataSource : public TDataProviderBase { TIntrusivePtr Gateway; TIntrusivePtr SessionCtx; NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory; + TGUCSettings::TPtr GUCSettings; TAutoPtr ConfigurationTransformer; TAutoPtr IntentDeterminationTransformer; @@ -920,9 +934,10 @@ TIntrusivePtr CreateKikimrDataSource( TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NExternalSource::IExternalSourceFactory::TPtr& externalSourceFactory, - bool isInternalCall) + bool isInternalCall, + TGUCSettings::TPtr gucSettings) { - return new TKikimrDataSource(functionRegistry, types, gateway, sessionCtx, externalSourceFactory, isInternalCall); + return new TKikimrDataSource(functionRegistry, types, gateway, sessionCtx, externalSourceFactory, isInternalCall, gucSettings); } TAutoPtr CreateKiSourceLoadTableMetadataTransformer(TIntrusivePtr gateway, diff --git a/ydb/core/kqp/provider/yql_kikimr_exec.cpp b/ydb/core/kqp/provider/yql_kikimr_exec.cpp index 8c1d07ed998a..8d256586cfeb 100644 --- a/ydb/core/kqp/provider/yql_kikimr_exec.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_exec.cpp @@ -425,8 +425,8 @@ namespace { request->mutable_partitioning_settings()->set_min_active_partitions( FromString(setting.Value().Cast().Literal().Cast().Value()) ); - } else if (name == "setPartitionsLimit") { - request->mutable_partitioning_settings()->set_partition_count_limit( + } else if (name == "setMaxPartitions") { + request->mutable_partitioning_settings()->set_max_active_partitions( FromString(setting.Value().Cast().Literal().Cast().Value()) ); } else if (name == "setRetentionPeriod") { @@ -456,8 +456,29 @@ namespace { ); auto* protoCodecs = request->mutable_supported_codecs(); for (auto codec : codecs) { - protoCodecs->add_codecs(codec); + protoCodecs->add_codecs(codec); } + } else if (name == "setAutoPartitioningStabilizationWindow") { + auto microValue = FromString(setting.Value().Cast().Literal().Value()); + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->mutable_partition_write_speed()->mutable_stabilization_window()->set_seconds( + static_cast(microValue / 1'000'000) + ); + } else if (name == "setAutoPartitioningUpUtilizationPercent") { + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->mutable_partition_write_speed()->set_up_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningDownUtilizationPercent") { + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->mutable_partition_write_speed()->set_down_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningStrategy") { + Ydb::Topic::AutoPartitioningStrategy strategy; + auto result = GetTopicAutoPartitioningStrategyFromString( + TString(setting.Value().Cast().Literal().Cast().Value()), + strategy + ); + YQL_ENSURE(result); + request->mutable_partitioning_settings()->mutable_auto_partitioning_settings()->set_strategy(strategy); } } } @@ -470,7 +491,7 @@ namespace { request->mutable_alter_partitioning_settings()->set_set_min_active_partitions( FromString(setting.Value().Cast().Literal().Cast().Value()) ); - } else if (name == "setPartitionsLimit") { + } else if (name == "setMaxPartitions") { request->mutable_alter_partitioning_settings()->set_set_partition_count_limit( FromString(setting.Value().Cast().Literal().Cast().Value()) ); @@ -503,6 +524,27 @@ namespace { for (auto codec : codecs) { protoCodecs->add_codecs(codec); } + } else if (name == "setAutoPartitioningStabilizationWindow") { + auto microValue = FromString(setting.Value().Cast().Literal().Value()); + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->mutable_set_partition_write_speed()->mutable_set_stabilization_window()->set_seconds( + static_cast(microValue / 1'000'000) + ); + } else if (name == "setAutoPartitioningUpUtilizationPercent") { + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->mutable_set_partition_write_speed()->set_set_up_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningDownUtilizationPercent") { + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->mutable_set_partition_write_speed()->set_set_down_utilization_percent( + FromString(setting.Value().Cast().Literal().Cast().Value()) + ); + } else if (name == "setAutoPartitioningStrategy") { + Ydb::Topic::AutoPartitioningStrategy strategy; + auto result = GetTopicAutoPartitioningStrategyFromString( + TString(setting.Value().Cast().Literal().Cast().Value()), + strategy + ); + YQL_ENSURE(result); + request->mutable_alter_partitioning_settings()->mutable_alter_auto_partitioning_settings()->set_set_strategy(strategy); } } } @@ -938,16 +980,12 @@ class TKiSourceCallableExecutionTransformer : public TAsyncCallbackTransformer columnHints(NCommon::GetResOrPullColumnHints(node)); auto protoValue = &resultValue; YQL_ENSURE(resultValue.GetArena()); - if (IsRawKikimrResult(resultValue)) { - protoValue = KikimrResultToProto(resultValue, columnHints, fillSettings, resultValue.GetArena()); - } - YQL_ENSURE(fillSettings.Format == IDataProvider::EResultFormat::Custom); YQL_ENSURE(fillSettings.FormatDetails == KikimrMkqlProtoFormat); @@ -1829,38 +1867,6 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { - auto requireStatus = RequireChild(*input, 0); - if (requireStatus.Level != TStatus::Ok) { - return SyncStatus(requireStatus); - } - auto cluster = TString(maybeCreate.Cast().DataSink().Cluster()); - TString topicName = TString(maybeCreate.Cast().Topic()); - Ydb::Topic::CreateTopicRequest createReq; - createReq.set_path(topicName); - for (const auto& consumer : maybeCreate.Cast().Consumers()) { - auto error = AddConsumerToTopicRequest(createReq.add_consumers(), consumer); - if (!error.empty()) { - ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << error << input->Content())); - return SyncError(); - } - } - AddTopicSettingsToRequest(&createReq,maybeCreate.Cast().TopicSettings()); - bool prepareOnly = SessionCtx->Query().PrepareOnly; - // DEBUG - // Cerr << "Create topic request proto: " << createReq.DebugString() << Endl; - auto future = prepareOnly ? CreateDummySuccess() : ( - Gateway->CreateTopic(cluster, std::move(createReq)) - ); - - return WrapFuture(future, - [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { - Y_UNUSED(res); - auto resultNode = ctx.NewWorld(input->Pos()); - return resultNode; - }, "Executing CREATE TOPIC"); - } - if (auto maybeCreateSequence = TMaybeNode(input)) { auto requireStatus = RequireChild(*input, 0); if (requireStatus.Level != TStatus::Ok) { @@ -1922,6 +1928,35 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { + auto requireStatus = RequireChild(*input, 0); + if (requireStatus.Level != TStatus::Ok) { + return SyncStatus(requireStatus); + } + auto cluster = TString(maybeCreate.Cast().DataSink().Cluster()); + TString topicName = TString(maybeCreate.Cast().Topic()); + Ydb::Topic::CreateTopicRequest createReq; + createReq.set_path(topicName); + for (const auto& consumer : maybeCreate.Cast().Consumers()) { + auto error = AddConsumerToTopicRequest(createReq.add_consumers(), consumer); + if (!error.empty()) { + ctx.AddError(TIssue(ctx.GetPosition(input->Pos()), TStringBuilder() << error << input->Content())); + return SyncError(); + } + } + AddTopicSettingsToRequest(&createReq,maybeCreate.Cast().TopicSettings()); + bool existingOk = (maybeCreate.ExistingOk().Cast().Value() == "1"); + + auto future = Gateway->CreateTopic(cluster, std::move(createReq), existingOk); + + return WrapFuture(future, + [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { + Y_UNUSED(res); + auto resultNode = ctx.NewWorld(input->Pos()); + return resultNode; + }, "Executing CREATE TOPIC"); + } + if (auto maybeAlter = TMaybeNode(input)) { auto requireStatus = RequireChild(*input, 0); if (requireStatus.Level != TStatus::Ok) { @@ -1949,13 +1984,9 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer().StringValue(); alterReq.add_drop_consumers(name); } + bool missingOk = (maybeAlter.MissingOk().Cast().Value() == "1"); AddAlterTopicSettingsToRequest(&alterReq, maybeAlter.Cast().TopicSettings()); - bool prepareOnly = SessionCtx->Query().PrepareOnly; - // DEBUG - // Cerr << "Alter topic request proto:\n" << alterReq.DebugString() << Endl; - auto future = prepareOnly ? CreateDummySuccess() : ( - Gateway->AlterTopic(cluster, std::move(alterReq)) - ); + auto future = Gateway->AlterTopic(cluster, std::move(alterReq), missingOk); return WrapFuture(future, [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { @@ -1966,21 +1997,15 @@ class TKiSinkCallableExecutionTransformer : public TAsyncCallbackTransformer(input)) { - if (!EnsureNotPrepare("DROP TOPIC", input->Pos(), SessionCtx->Query(), ctx)) { - return SyncError(); - } - auto requireStatus = RequireChild(*input, 0); if (requireStatus.Level != TStatus::Ok) { return SyncStatus(requireStatus); } auto cluster = TString(maybeDrop.Cast().DataSink().Cluster()); TString topicName = TString(maybeDrop.Cast().Topic()); + bool missingOk = (maybeDrop.MissingOk().Cast().Value() == "1"); - bool prepareOnly = SessionCtx->Query().PrepareOnly; - auto future = prepareOnly ? CreateDummySuccess() : ( - Gateway->DropTopic(cluster, topicName) - ); + auto future = Gateway->DropTopic(cluster, topicName, missingOk); return WrapFuture(future, [](const IKikimrGateway::TGenericResult& res, const TExprNode::TPtr& input, TExprContext& ctx) { diff --git a/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json b/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json index 4d8f0946cacb..c18233c3b66f 100644 --- a/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json +++ b/ydb/core/kqp/provider/yql_kikimr_expr_nodes.json @@ -171,7 +171,8 @@ {"Index": 2, "Name": "Topic", "Type": "TCoAtom"}, {"Index": 3, "Name": "Consumers", "Type": "TCoTopicConsumerList"}, {"Index": 4, "Name": "TopicSettings", "Type": "TCoNameValueTupleList"}, - {"Index": 5, "Name": "Settings", "Type": "TCoNameValueTupleList"} + {"Index": 5, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 6, "Name": "ExistingOk", "Type": "TCoAtom"} ] }, { @@ -186,7 +187,9 @@ {"Index": 4, "Name": "AlterConsumers", "Type": "TCoTopicConsumerList"}, {"Index": 5, "Name": "DropConsumers", "Type": "TCoAtomList"}, {"Index": 6, "Name": "TopicSettings", "Type": "TCoNameValueTupleList"}, - {"Index": 7, "Name": "Settings", "Type": "TCoNameValueTupleList"} + {"Index": 7, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 8, "Name": "MissingOk", "Type": "TCoAtom"} + ] }, { @@ -197,7 +200,9 @@ {"Index": 0, "Name": "World", "Type": "TExprBase"}, {"Index": 1, "Name": "DataSink", "Type": "TKiDataSink"}, {"Index": 2, "Name": "Topic", "Type": "TCoAtom"}, - {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList"} + {"Index": 3, "Name": "Settings", "Type": "TCoNameValueTupleList"}, + {"Index": 4, "Name": "MissingOk", "Type": "TCoAtom"} + ] }, { diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp index c0bf3edf6d61..8b23ef1d6715 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.cpp @@ -365,6 +365,26 @@ static std::shared_ptr> GetCodecsMapping() return codecsMapping; } +static std::shared_ptr> GetAutoPartitioningStrategiesMapping() { + static std::shared_ptr> strategiesMapping; + if (!strategiesMapping) { + strategiesMapping = MakeEnumMapping( + Ydb::Topic::AutoPartitioningStrategy_descriptor(), "auto_partitioning_strategy_"); + + const TString prefix = "scale_"; + for (const auto& [key, value] : *strategiesMapping) { + if (key.StartsWith(prefix)) { + TString newKey = key; + newKey.erase(0, prefix.length()); + + Y_ABORT_UNLESS(strategiesMapping->find(newKey) == strategiesMapping->end()); + (*strategiesMapping)[newKey] = value; + } + } + } + return strategiesMapping; +} + static std::shared_ptr> GetMeteringModesMapping() { static std::shared_ptr> metModesMapping; if (metModesMapping == nullptr) { @@ -387,6 +407,18 @@ bool GetTopicMeteringModeFromString(const TString& meteringMode, Ydb::Topic::Met } } +bool GetTopicAutoPartitioningStrategyFromString(const TString& strategy, Ydb::Topic::AutoPartitioningStrategy& result) { + auto mapping = GetAutoPartitioningStrategiesMapping(); + auto normStrategy = to_lower(strategy); + auto iter = mapping->find(normStrategy); + if (iter.IsEnd()) { + return false; + } else { + result = iter->second; + return true; + } +} + TVector GetTopicCodecsFromString(const TStringBuf& codecsStr) { const TVector codecsList = StringSplitter(codecsStr).Split(',').SkipEmpty(); TVector result; diff --git a/ydb/core/kqp/provider/yql_kikimr_gateway.h b/ydb/core/kqp/provider/yql_kikimr_gateway.h index a5c666a23ab1..ec1c55887246 100644 --- a/ydb/core/kqp/provider/yql_kikimr_gateway.h +++ b/ydb/core/kqp/provider/yql_kikimr_gateway.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include @@ -367,6 +369,7 @@ ETableType GetTableTypeFromString(const TStringBuf& tableType); bool GetTopicMeteringModeFromString(const TString& meteringMode, Ydb::Topic::MeteringMode& result); TVector GetTopicCodecsFromString(const TStringBuf& codecsStr); +bool GetTopicAutoPartitioningStrategyFromString(const TString& strategy, Ydb::Topic::AutoPartitioningStrategy& result); enum class EStoreType : ui32 { @@ -404,6 +407,7 @@ enum EMetaSerializationType : ui64 { struct TViewPersistedData { TString QueryText; + NYql::NProto::TTranslationSettings CapturedContext; }; struct TKikimrTableMetadata : public TThrRefBase { @@ -424,6 +428,7 @@ struct TKikimrTableMetadata : public TThrRefBase { ui64 DataSize = 0; ui64 MemorySize = 0; ui32 ShardsCount = 0; + bool StatsLoaded = false; TInstant LastAccessTime; TInstant LastUpdateTime; @@ -460,6 +465,7 @@ struct TKikimrTableMetadata : public TThrRefBase { , Kind(static_cast(message->GetKind())) , RecordsCount(message->GetRecordsCount()) , DataSize(message->GetDataSize()) + , StatsLoaded(message->GetStatsLoaded()) , KeyColumnNames(message->GetKeyColunmNames().begin(), message->GetKeyColunmNames().end()) { @@ -527,6 +533,7 @@ struct TKikimrTableMetadata : public TThrRefBase { message->SetKind(static_cast(Kind)); message->SetRecordsCount(RecordsCount); message->SetDataSize(DataSize); + message->SetStatsLoaded(StatsLoaded); for(auto& [key, value] : Attributes) { message->AddAttributes()->SetKey(key); message->AddAttributes()->SetValue(value); @@ -664,6 +671,13 @@ struct TCreateExternalTableSettings { TVector> SourceTypeParameters; }; +struct TAlterTopicSettings { + Ydb::Topic::AlterTopicRequest Request; + TString Name; + TString WorkDir; + bool MissingOk; +}; + struct TSequenceSettings { TMaybe MinValue; TMaybe MaxValue; @@ -842,7 +856,7 @@ class IKikimrGateway : public TThrRefBase { struct TQueryResult : public TGenericResult { TString SessionId; - TVector Results; + TVector Results; TMaybe Profile; // TODO: Deprecate. NKqpProto::TKqpStatsQuery QueryStats; std::unique_ptr PreparingQuery; @@ -937,11 +951,13 @@ class IKikimrGateway : public TThrRefBase { virtual NThreading::TFuture DropTable(const TString& cluster, const TDropTableSettings& settings) = 0; - virtual NThreading::TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request) = 0; + virtual NThreading::TFuture CreateTopic(const TString& cluster, Ydb::Topic::CreateTopicRequest&& request, bool existingOk) = 0; + + virtual NThreading::TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request, bool missingOk) = 0; - virtual NThreading::TFuture AlterTopic(const TString& cluster, Ydb::Topic::AlterTopicRequest&& request) = 0; + virtual NThreading::TFuture AlterTopicPrepared(TAlterTopicSettings&& settings) = 0; - virtual NThreading::TFuture DropTopic(const TString& cluster, const TString& topic) = 0; + virtual NThreading::TFuture DropTopic(const TString& cluster, const TString& topic, bool missingOk) = 0; virtual NThreading::TFuture CreateReplication(const TString& cluster, const TCreateReplicationSettings& settings) = 0; diff --git a/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp b/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp index 5220fba92a72..5c4d7633ba55 100644 --- a/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_opt_build.cpp @@ -247,7 +247,7 @@ struct TKiExploreTxResults { } } - void AddResult(const TExprBase& result) { + void PrepareForResult() { if (QueryBlocks.empty()) { AddQueryBlock(); } @@ -255,6 +255,10 @@ struct TKiExploreTxResults { if (!ConcurrentResults && QueryBlocks.back().Results.size() > 0) { AddQueryBlock(); } + } + + void AddResult(const TExprBase& result) { + PrepareForResult(); auto& curBlock = QueryBlocks.back(); curBlock.Results.push_back(result); @@ -422,6 +426,10 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T const auto& tableData = tablesData->ExistingTable(cluster, table); YQL_ENSURE(tableData.Metadata); + if (!write.ReturningColumns().Empty()) { + txRes.PrepareForResult(); + } + if (tableOp == TYdbOperation::UpdateOn) { auto inputColumnsSetting = GetSetting(write.Settings().Ref(), "input_columns"); YQL_ENSURE(inputColumnsSetting); @@ -445,7 +453,9 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T .Update(node) .Columns(write.ReturningColumns()) .Build() - .Settings().Build() + .Settings() + .Add().Name().Value("columns").Build().Value(write.ReturningColumns()).Build() + .Build() .Done()); } @@ -480,6 +490,11 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T for (const auto& item : updateStructType->GetItems()) { updateColumns.emplace(item->GetName()); } + + if (!update.ReturningColumns().Empty()) { + txRes.PrepareForResult(); + } + txRes.AddUpdateOpToQueryBlock(node, tableData.Metadata, updateColumns); if (!update.ReturningColumns().Empty()) { txRes.AddResult( @@ -491,7 +506,9 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T .Update(node) .Columns(update.ReturningColumns()) .Build() - .Settings().Build() + .Settings() + .Add().Name().Value("columns").Build().Value(update.ReturningColumns()).Build() + .Build() .Done()); } @@ -513,6 +530,10 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T YQL_ENSURE(tablesData); const auto& tableData = tablesData->ExistingTable(cluster, table); YQL_ENSURE(tableData.Metadata); + if (!del.ReturningColumns().Empty()) { + txRes.PrepareForResult(); + } + txRes.AddWriteOpToQueryBlock(node, tableData.Metadata, tableOp & KikimrReadOps()); if (!del.ReturningColumns().Empty()) { txRes.AddResult( @@ -524,7 +545,9 @@ bool ExploreTx(TExprBase node, TExprContext& ctx, const TKiDataSink& dataSink, T .Update(node) .Columns(del.ReturningColumns()) .Build() - .Settings().Build() + .Settings() + .Add().Name().Value("columns").Build().Value(del.ReturningColumns()).Build() + .Build() .Done()); } diff --git a/ydb/core/kqp/provider/yql_kikimr_provider.cpp b/ydb/core/kqp/provider/yql_kikimr_provider.cpp index ac310064077c..b71e0082277e 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_provider.cpp @@ -964,3 +964,36 @@ TCoNameValueTupleList TKiExecDataQuerySettings::BuildNode(TExprContext& ctx, TPo } } // namespace NYql + +namespace NSQLTranslation { + +void Serialize(const TTranslationSettings& settings, NYql::NProto::TTranslationSettings& serializedSettings) { + serializedSettings.SetPathPrefix(settings.PathPrefix); + serializedSettings.SetSyntaxVersion(settings.SyntaxVersion); + serializedSettings.SetAnsiLexer(settings.AnsiLexer); + serializedSettings.SetPgParser(settings.PgParser); + + auto* pragmas = serializedSettings.MutablePragmas(); + pragmas->Clear(); + pragmas->Add(settings.Flags.begin(), settings.Flags.end()); +} + +void Deserialize(const NYql::NProto::TTranslationSettings& serializedSettings, TTranslationSettings& settings) { + #define DeserializeSetting(settingName) \ + if (serializedSettings.Has##settingName()) { \ + settings.settingName = serializedSettings.Get##settingName(); \ + } + + DeserializeSetting(PathPrefix); + DeserializeSetting(SyntaxVersion); + DeserializeSetting(AnsiLexer); + DeserializeSetting(PgParser); + + #undef DeserializeSetting + + // overwrite existing pragmas + settings.Flags.clear(); + settings.Flags.insert(serializedSettings.GetPragmas().begin(), serializedSettings.GetPragmas().end()); +} + +} diff --git a/ydb/core/kqp/provider/yql_kikimr_provider.h b/ydb/core/kqp/provider/yql_kikimr_provider.h index 48634bf9c7e8..3c1ad3edf62f 100644 --- a/ydb/core/kqp/provider/yql_kikimr_provider.h +++ b/ydb/core/kqp/provider/yql_kikimr_provider.h @@ -124,6 +124,8 @@ struct TKikimrQueryContext : TThrRefBase { // we do not want add extra life time for query context here std::shared_ptr RpcCtx; + NSQLTranslation::TTranslationSettings TranslationSettings; + void Reset() { PrepareOnly = false; SuppressDdlChecks = false; @@ -142,6 +144,7 @@ struct TKikimrQueryContext : TThrRefBase { RlPath.Clear(); RpcCtx.reset(); + TranslationSettings = NSQLTranslation::TTranslationSettings(); } }; @@ -323,7 +326,6 @@ class TKikimrTransactionContextBase : public TThrRefBase { for (const auto& info : tableInfos) { tableInfoMap.emplace(info.GetTableName(), &info); - TKikimrPathId pathId(info.GetTableId().GetOwnerId(), info.GetTableId().GetTableId()); TableByIdMap.emplace(pathId, info.GetTableName()); } @@ -567,7 +569,8 @@ TIntrusivePtr CreateKikimrDataSource( TIntrusivePtr gateway, TIntrusivePtr sessionCtx, const NKikimr::NExternalSource::IExternalSourceFactory::TPtr& sourceFactory, - bool isInternalCall); + bool isInternalCall, + TGUCSettings::TPtr gucSettings); TIntrusivePtr CreateKikimrDataSink( const NKikimr::NMiniKQL::IFunctionRegistry& functionRegistry, @@ -578,3 +581,10 @@ TIntrusivePtr CreateKikimrDataSink( TIntrusivePtr queryExecutor); } // namespace NYql + +namespace NSQLTranslation { + +void Serialize(const TTranslationSettings& settings, NYql::NProto::TTranslationSettings& serializedSettings); +void Deserialize(const NYql::NProto::TTranslationSettings& serializedSettings, TTranslationSettings& settings); + +} diff --git a/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp b/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp index a5746e0be2c0..90350b1820f2 100644 --- a/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp +++ b/ydb/core/kqp/provider/yql_kikimr_type_ann.cpp @@ -1455,7 +1455,7 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over } static bool CheckTopicSettings(const TCoNameValueTupleList& settings, TExprContext& ctx) { - ui32 minParts = 0, partsLimit = 0; + ui32 minParts = 0, maxPartitions = 0; TPosition errorPos; for (const auto& setting : settings) { auto name = setting.Name().Value(); @@ -1477,11 +1477,11 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over ); minParts = value; errorPos = ctx.GetPosition(setting.Value().Ref().Pos()); - } else if (name == "setPartitionsLimit") { + } else if (name == "setMaxPartitions") { ui32 value = FromString( setting.Value().Cast().Literal().template Cast().Value() ); - partsLimit = value; + maxPartitions = value; errorPos = ctx.GetPosition(setting.Value().Ref().Pos()); } else if (name.StartsWith("reset")) { ctx.AddError(TIssue( @@ -1490,10 +1490,10 @@ virtual TStatus HandleCreateTable(TKiCreateTable create, TExprContext& ctx) over ); return false; } - if (minParts && partsLimit && partsLimit < minParts) { + if (minParts && maxPartitions && maxPartitions < minParts) { ctx.AddError(TIssue( errorPos, - TStringBuilder() << "partitions_limit cannot be less than min_partitions") + TStringBuilder() << "max_partitions cannot be less than min_partitions") ); return false; } diff --git a/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp b/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp index 23be28bcb264..deb34ffb92bc 100644 --- a/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp +++ b/ydb/core/kqp/proxy_service/kqp_proxy_service.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -269,7 +270,11 @@ class TKqpProxyService : public TActorBootstrapped { MakeKqpCompileComputationPatternServiceID(SelfId().NodeId()), CompileComputationPatternService); } - KqpNodeService = TlsActivationContext->ExecutorThread.RegisterActor(CreateKqpNodeService(TableServiceConfig, Counters, AsyncIoFactory, FederatedQuerySetup)); + ResourceManager_ = GetKqpResourceManager(); + CaFactory_ = NComputeActor::MakeKqpCaFactory( + TableServiceConfig.GetResourceManager(), ResourceManager_, AsyncIoFactory, FederatedQuerySetup); + + KqpNodeService = TlsActivationContext->ExecutorThread.RegisterActor(CreateKqpNodeService(TableServiceConfig, ResourceManager_, CaFactory_, Counters, AsyncIoFactory, FederatedQuerySetup)); TlsActivationContext->ExecutorThread.ActorSystem->RegisterLocalService( MakeKqpNodeServiceID(SelfId().NodeId()), KqpNodeService); @@ -687,11 +692,8 @@ class TKqpProxyService : public TActorBootstrapped { LocalSessions->AttachQueryText(sessionInfo, ev->Get()->GetQuery()); } - if (!FeatureFlags.GetEnableResourcePools()) { - ev->Get()->SetPoolId(""); - } else if (!ev->Get()->GetPoolId()) { - // TODO: do not use default pool if there is no limits - ev->Get()->SetPoolId(NResourcePool::DEFAULT_POOL_ID); + if (!TryFillPoolInfoFromCache(ev, requestId)) { + return; } TActorId targetId; @@ -1344,6 +1346,7 @@ class TKqpProxyService : public TActorBootstrapped { hFunc(TEvInterconnect::TEvNodeDisconnected, Handle); hFunc(TEvKqp::TEvListSessionsRequest, Handle); hFunc(TEvKqp::TEvListProxyNodesRequest, Handle); + hFunc(NWorkload::TEvUpdatePoolInfo, Handle); default: Y_ABORT("TKqpProxyService: unexpected event type: %" PRIx32 " event: %s", ev->GetTypeRewrite(), ev->ToString().data()); @@ -1479,7 +1482,7 @@ class TKqpProxyService : public TActorBootstrapped { auto config = CreateConfig(KqpSettings, workerSettings); - IActor* sessionActor = CreateKqpSessionActor(SelfId(), sessionId, KqpSettings, workerSettings, + IActor* sessionActor = CreateKqpSessionActor(SelfId(), ResourceManager_, CaFactory_, sessionId, KqpSettings, workerSettings, FederatedQuerySetup, AsyncIoFactory, ModuleResolverState, Counters, QueryServiceConfig, KqpTempTablesAgentActor); auto workerId = TlsActivationContext->ExecutorThread.RegisterActor(sessionActor, TMailboxType::HTSwap, AppData()->UserPoolId); @@ -1566,6 +1569,43 @@ class TKqpProxyService : public TActorBootstrapped { } } + bool TryFillPoolInfoFromCache(TEvKqp::TEvQueryRequest::TPtr& ev, ui64 requestId) { + if (!FeatureFlags.GetEnableResourcePools()) { + ev->Get()->SetPoolId(""); + return true; + } + + if (!ev->Get()->GetPoolId()) { + ev->Get()->SetPoolId(NResourcePool::DEFAULT_POOL_ID); + } + + const auto& poolId = ev->Get()->GetPoolId(); + const auto& poolInfo = ResourcePoolsCache.GetPoolInfo(ev->Get()->GetDatabase(), poolId); + if (!poolInfo) { + return true; + } + + const auto& securityObject = poolInfo->SecurityObject; + const auto& userToken = ev->Get()->GetUserToken(); + if (securityObject && userToken && !userToken->GetSerializedToken().empty()) { + if (!securityObject->CheckAccess(NACLib::EAccessRights::DescribeSchema, *userToken)) { + ReplyProcessError(Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Resource pool " << poolId << " not found or you don't have access permissions", requestId); + return false; + } + if (!securityObject->CheckAccess(NACLib::EAccessRights::SelectRow, *userToken)) { + ReplyProcessError(Ydb::StatusIds::UNAUTHORIZED, TStringBuilder() << "You don't have access permissions for resource pool " << poolId, requestId); + return false; + } + } + + const auto& poolConfig = poolInfo->Config; + if (!NWorkload::IsWorkloadServiceRequired(poolConfig)) { + ev->Get()->SetPoolConfig(poolConfig); + } + + return true; + } + void UpdateYqlLogLevels() { const auto& kqpYqlName = NKikimrServices::EServiceKikimr_Name(NKikimrServices::KQP_YQL); for (auto &entry : LogConfig.GetEntry()) { @@ -1692,7 +1732,11 @@ class TKqpProxyService : public TActorBootstrapped { KQP_PROXY_LOG_D("incoming list sessions request " << ev->Get()->Record.ShortUtf8DebugString()); auto result = std::make_unique(); - auto startIt = LocalSessions->GetOrderedLowerBound(ev->Get()->Record.GetSessionIdStart()); + + const auto& tenant = ev->Get()->Record.GetTenantName(); + bool checkTenant = (AppData()->TenantName != tenant); + + auto startIt = LocalSessions->GetOrderedLowerBound(tenant, ev->Get()->Record.GetSessionIdStart()); auto endIt = LocalSessions->GetOrderedEnd(); i32 freeSpace = ev->Get()->Record.GetFreeSpace(); @@ -1703,6 +1747,10 @@ class TKqpProxyService : public TActorBootstrapped { while(startIt != endIt && freeSpace > 0) { auto* sessionInfo = startIt->second; + if (checkTenant && sessionInfo->Database != ev->Get()->Record.GetTenantName()) { + finished = true; + break; + } if (!until.empty()) { if (sessionInfo->SessionId > until) { @@ -1730,7 +1778,8 @@ class TKqpProxyService : public TActorBootstrapped { if (finished) { result->Record.SetFinished(true); } else { - result->Record.SetContinuationToken(startIt->first); + Y_ABORT_UNLESS(startIt != endIt); + result->Record.SetContinuationToken(startIt->first.second); result->Record.SetFinished(false); } @@ -1751,6 +1800,10 @@ class TKqpProxyService : public TActorBootstrapped { Send(ev->Sender, result.release(), 0, ev->Cookie); } + void Handle(NWorkload::TEvUpdatePoolInfo::TPtr& ev) { + ResourcePoolsCache.UpdatePoolInfo(ev->Get()->Database, ev->Get()->PoolId, ev->Get()->Config, ev->Get()->SecurityObject); + } + private: NKikimrConfig::TLogConfig LogConfig; NKikimrConfig::TTableServiceConfig TableServiceConfig; @@ -1768,6 +1821,8 @@ class TKqpProxyService : public TActorBootstrapped { THashMap ConfigSubscriptions; THashMap TimeoutTimers; + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; TIntrusivePtr ShutdownState; TIntrusivePtr ModuleResolverState; @@ -1810,6 +1865,8 @@ class TKqpProxyService : public TActorBootstrapped { std::deque DelayedEventsQueue; bool IsLookupByRmScheduled = false; TActorId KqpTempTablesAgentActor; + + TResourcePoolsCache ResourcePoolsCache; }; } // namespace diff --git a/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h b/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h index 1ca66b5f019a..61620e2f2768 100644 --- a/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h +++ b/ydb/core/kqp/proxy_service/kqp_proxy_service_impl.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -147,7 +148,7 @@ struct TKqpSessionInfo { class TLocalSessionsRegistry { THashMap LocalSessions; - std::map OrderedSessions; + std::map, TKqpSessionInfo*> OrderedSessions; THashMap TargetIdIndex; THashSet ShutdownInFlightSessions; THashMap SessionsCountPerDatabase; @@ -205,7 +206,7 @@ class TLocalSessionsRegistry { auto result = LocalSessions.emplace(sessionId, TKqpSessionInfo(sessionId, workerId, database, dbCounters, std::move(pos), sessionStartedAt + idleDuration, IdleSessions.end(), pgWire, startedAt)); - OrderedSessions.emplace(sessionId, &result.first->second); + OrderedSessions.emplace(std::make_pair(database, sessionId), &result.first->second); SessionsCountPerDatabase[database]++; Y_ABORT_UNLESS(result.second, "Duplicate session id!"); TargetIdIndex.emplace(workerId, sessionId); @@ -299,11 +300,11 @@ class TLocalSessionsRegistry { return ShutdownInFlightSessions.size(); } - std::map::const_iterator GetOrderedLowerBound(const TString& continuation) const { - return OrderedSessions.lower_bound(continuation); + std::map, TKqpSessionInfo*>::const_iterator GetOrderedLowerBound(const TString& tenant, const TString& continuation) const { + return OrderedSessions.lower_bound(std::make_pair(tenant, continuation)); } - std::map::const_iterator GetOrderedEnd() const { + std::map, TKqpSessionInfo*>::const_iterator GetOrderedEnd() const { return OrderedSessions.end(); } @@ -336,7 +337,7 @@ class TLocalSessionsRegistry { } } - OrderedSessions.erase(sessionId); + OrderedSessions.erase(std::make_pair(it->second.Database, sessionId)); LocalSessions.erase(it); } @@ -415,4 +416,40 @@ class TLocalSessionsRegistry { } }; +class TResourcePoolsCache { + struct TPoolInfo { + NResourcePool::TPoolSettings Config; + std::optional SecurityObject; + }; + +public: + std::optional GetPoolInfo(const TString& database, const TString& poolId) const { + auto it = PoolsCache.find(GetPoolKey(database, poolId)); + if (it == PoolsCache.end()) { + return std::nullopt; + } + return it->second; + } + + void UpdatePoolInfo(const TString& database, const TString& poolId, const std::optional& config, const std::optional& securityObject) { + const TString& poolKey = GetPoolKey(database, poolId); + if (!config) { + PoolsCache.erase(poolKey); + return; + } + + auto& poolInfo = PoolsCache[poolKey]; + poolInfo.Config = *config; + poolInfo.SecurityObject = securityObject; + } + +private: + static TString GetPoolKey(const TString& database, const TString& poolId) { + return CanonizePath(TStringBuilder() << database << "/" << poolId); + } + +private: + std::unordered_map PoolsCache; +}; + } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/proxy_service/kqp_session_info.cpp b/ydb/core/kqp/proxy_service/kqp_session_info.cpp index 995e02676c54..383fd6690244 100644 --- a/ydb/core/kqp/proxy_service/kqp_session_info.cpp +++ b/ydb/core/kqp/proxy_service/kqp_session_info.cpp @@ -6,6 +6,8 @@ namespace NKikimr::NKqp { using VSessions = NKikimr::NSysView::Schema::QuerySessions; +constexpr size_t QUERY_TEXT_LIMIT = 10_KB; + void TKqpSessionInfo::SerializeTo(::NKikimrKqp::TSessionInfo* proto, const TFieldsMap& fieldsMap) const { if (fieldsMap.NeedField(VSessions::SessionId::ColumnId)) { // 1 proto->SetSessionId(SessionId); @@ -26,7 +28,12 @@ void TKqpSessionInfo::SerializeTo(::NKikimrKqp::TSessionInfo* proto, const TFiel // last executed query or currently running query. if (fieldsMap.NeedField(VSessions::Query::ColumnId)) { // 4 - proto->SetQuery(QueryText); + if (QueryText.size() > QUERY_TEXT_LIMIT) { + TString truncatedText = QueryText.substr(0, QUERY_TEXT_LIMIT); + proto->SetQuery(QueryText); + } else { + proto->SetQuery(QueryText); + } } if (fieldsMap.NeedField(VSessions::QueryCount::ColumnId)) { // 5 diff --git a/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp index 5f6283824745..b2ed454f6e66 100644 --- a/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_olap_compiler.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include @@ -856,6 +856,8 @@ TTypedColumn GetOrCreateColumnIdAndType(const TExprBase& node, TKqpOlapCompileCo return BuildLogicalNot(maybeNot.Cast().Value(), ctx); } else if (const auto& maybeJsonValue = node.Maybe()) { return ConvertJsonValueToColumn(maybeJsonValue.Cast(), ctx); + } else if (const auto& maybeJsonValue = node.Maybe()) { + return CompileJsonExists(maybeJsonValue.Cast(), ctx); } else if (const auto& maybeApply = node.Maybe()) { return CompileYqlKernelScalarApply(maybeApply.Cast(), ctx); } diff --git a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp index 78be12f810e6..b6943897a805 100644 --- a/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp +++ b/ydb/core/kqp/query_compiler/kqp_query_compiler.cpp @@ -91,6 +91,8 @@ NKqpProto::EStreamLookupStrategy GetStreamLookupStrategy(const std::string_view lookupStrategy = NKqpProto::EStreamLookupStrategy::LOOKUP; } else if (strategy == "LookupJoinRows"sv) { lookupStrategy = NKqpProto::EStreamLookupStrategy::JOIN; + } else if (strategy == "LookupSemiJoinRows"sv) { + lookupStrategy = NKqpProto::EStreamLookupStrategy::SEMI_JOIN; } YQL_ENSURE(lookupStrategy != NKqpProto::EStreamLookupStrategy::UNSPECIFIED, @@ -1274,7 +1276,8 @@ class TKqpQueryCompiler : public IKqpQueryCompiler { break; } - case NKqpProto::EStreamLookupStrategy::JOIN: { + case NKqpProto::EStreamLookupStrategy::JOIN: + case NKqpProto::EStreamLookupStrategy::SEMI_JOIN: { YQL_ENSURE(inputItemType->GetKind() == ETypeAnnotationKind::Tuple); const auto inputTupleType = inputItemType->Cast(); YQL_ENSURE(inputTupleType->GetSize() == 2); diff --git a/ydb/core/kqp/query_data/kqp_predictor.cpp b/ydb/core/kqp/query_data/kqp_predictor.cpp index 9c0ce87d73e4..3cf0b3b97187 100644 --- a/ydb/core/kqp/query_data/kqp_predictor.cpp +++ b/ydb/core/kqp/query_data/kqp_predictor.cpp @@ -131,7 +131,7 @@ ui32 TStagePredictor::GetUsableThreads() { userPoolSize = TlsActivationContext->ActorSystem()->GetPoolThreadsCount(AppData()->UserPoolId); } if (!userPoolSize) { - ALS_ERROR(NKikimrServices::KQP_EXECUTER) << "user pool is undefined for executer tasks construction"; + ALS_INFO(NKikimrServices::KQP_EXECUTER) << "user pool is undefined for executer tasks construction"; userPoolSize = NSystemInfo::NumberOfCpus(); } return Max(1, *userPoolSize); diff --git a/ydb/core/kqp/query_data/kqp_prepared_query.cpp b/ydb/core/kqp/query_data/kqp_prepared_query.cpp index 9817c4b3ce37..a46ec53d9068 100644 --- a/ydb/core/kqp/query_data/kqp_prepared_query.cpp +++ b/ydb/core/kqp/query_data/kqp_prepared_query.cpp @@ -154,7 +154,7 @@ const NKikimr::NKqp::TStagePredictor& TKqpPhyTxHolder::GetCalculationPredictor(c } TPreparedQueryHolder::TPreparedQueryHolder(NKikimrKqp::TPreparedQuery* proto, - const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry) + const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, bool noFillTables) : Proto(proto) , Alloc(nullptr) , TableConstInfoById(MakeIntrusive()) @@ -164,6 +164,11 @@ TPreparedQueryHolder::TPreparedQueryHolder(NKikimrKqp::TPreparedQuery* proto, Alloc = std::make_shared(functionRegistry); } + // In case of some compilation failures filling tables may produce new problems which may replace original error messages. + if (noFillTables) { + return; + } + THashSet tablesSet; const auto& phyQuery = Proto->GetPhysicalQuery(); Transactions.reserve(phyQuery.TransactionsSize()); diff --git a/ydb/core/kqp/query_data/kqp_prepared_query.h b/ydb/core/kqp/query_data/kqp_prepared_query.h index a92a79063137..b7cd6ab1b167 100644 --- a/ydb/core/kqp/query_data/kqp_prepared_query.h +++ b/ydb/core/kqp/query_data/kqp_prepared_query.h @@ -143,7 +143,10 @@ class TPreparedQueryHolder { public: - TPreparedQueryHolder(NKikimrKqp::TPreparedQuery* proto, const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry); + TPreparedQueryHolder( + NKikimrKqp::TPreparedQuery* proto, + const NKikimr::NMiniKQL::IFunctionRegistry* functionRegistry, + bool noFillTables = false); ~TPreparedQueryHolder(); using TConstPtr = std::shared_ptr; diff --git a/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp b/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp index 5a228c190751..9c78862fcf5e 100644 --- a/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp +++ b/ydb/core/kqp/rm_service/kqp_resource_estimation.cpp @@ -5,13 +5,7 @@ namespace NKikimr::NKqp { using namespace NYql::NDqProto; using namespace NKikimrConfig; -TTaskResourceEstimation EstimateTaskResources(const TDqTask& task, - const TTableServiceConfig::TResourceManager& config, const ui32 tasksCount) -{ - TTaskResourceEstimation ret = BuildInitialTaskResources(task); - EstimateTaskResources(config, ret, tasksCount); - return ret; -} + TTaskResourceEstimation BuildInitialTaskResources(const TDqTask& task) { TTaskResourceEstimation ret; @@ -23,24 +17,4 @@ TTaskResourceEstimation BuildInitialTaskResources(const TDqTask& task) { return ret; } -void EstimateTaskResources(const TTableServiceConfig::TResourceManager& config, - TTaskResourceEstimation& ret, const ui32 tasksCount) -{ - ui64 totalChannels = std::max(tasksCount, (ui32)1) * std::max(ret.ChannelBuffersCount, (ui32)1); - ui64 optimalChannelBufferSizeEstimation = totalChannels * config.GetChannelBufferSize(); - - optimalChannelBufferSizeEstimation = std::min(optimalChannelBufferSizeEstimation, config.GetMaxTotalChannelBuffersSize()); - - ret.ChannelBufferMemoryLimit = std::max(config.GetMinChannelBufferSize(), optimalChannelBufferSizeEstimation / totalChannels); - - if (ret.HeavyProgram) { - ret.MkqlProgramMemoryLimit = config.GetMkqlHeavyProgramMemoryLimit() / tasksCount; - } else { - ret.MkqlProgramMemoryLimit = config.GetMkqlLightProgramMemoryLimit() / tasksCount; - } - - ret.TotalMemoryLimit = ret.ChannelBuffersCount * ret.ChannelBufferMemoryLimit - + ret.MkqlProgramMemoryLimit; -} - } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/rm_service/kqp_resource_estimation.h b/ydb/core/kqp/rm_service/kqp_resource_estimation.h index e88c7065c2ed..2a6af1a5339c 100644 --- a/ydb/core/kqp/rm_service/kqp_resource_estimation.h +++ b/ydb/core/kqp/rm_service/kqp_resource_estimation.h @@ -30,9 +30,5 @@ struct TTaskResourceEstimation { TTaskResourceEstimation BuildInitialTaskResources(const NYql::NDqProto::TDqTask& task); -TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, - const NKikimrConfig::TTableServiceConfig::TResourceManager& config, const ui32 tasksCount); - -void EstimateTaskResources(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, TTaskResourceEstimation& result, const ui32 tasksCount); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/rm_service/kqp_resource_estimation_ut.cpp b/ydb/core/kqp/rm_service/kqp_resource_estimation_ut.cpp deleted file mode 100644 index b741dc2220cf..000000000000 --- a/ydb/core/kqp/rm_service/kqp_resource_estimation_ut.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include -#include - -#include - -namespace NKikimr::NKqp { - -Y_UNIT_TEST_SUITE(KqpResourceEstimation) { - -Y_UNIT_TEST(TestChannelSize) { - NKikimrConfig::TTableServiceConfig::TResourceManager config; - config.SetChannelBufferSize(8_MB); - config.SetMinChannelBufferSize(2_MB); - config.SetMaxTotalChannelBuffersSize(2_GB); - config.SetMkqlLightProgramMemoryLimit(100); - - NYql::NDqProto::TDqTask task; - - // 100 input channels - auto* input = task.MutableInputs()->Add(); - for (int i = 0; i < 100; ++i) { - input->MutableChannels()->Add(); - } - - // 100 input channels - input = task.MutableInputs()->Add(); - for (int i = 0; i < 100; ++i) { - input->MutableChannels()->Add(); - } - - auto* output = task.MutableOutputs()->Add(); - output->MutableChannels()->Add(); - - auto est = EstimateTaskResources(task, config, 1); - UNIT_ASSERT_EQUAL(2, est.ChannelBuffersCount); - UNIT_ASSERT_EQUAL(est.ChannelBufferMemoryLimit, config.GetChannelBufferSize()); - - // add more channels, to be more then 256 - input = task.MutableInputs()->Add(); - for (int i = 0; i < 100; ++i) { - input->MutableChannels()->Add(); - } - - est = EstimateTaskResources(task, config, 1); - UNIT_ASSERT_EQUAL(2, est.ChannelBuffersCount); - - UNIT_ASSERT(est.ChannelBufferMemoryLimit == config.GetChannelBufferSize()); - UNIT_ASSERT(est.ChannelBufferMemoryLimit >= config.GetMinChannelBufferSize()); -} - -} // suite KqpResourceEstimation - -} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/rm_service/kqp_rm_service.cpp b/ydb/core/kqp/rm_service/kqp_rm_service.cpp index b02e74d3b24b..ea16a237c12d 100644 --- a/ydb/core/kqp/rm_service/kqp_rm_service.cpp +++ b/ydb/core/kqp/rm_service/kqp_rm_service.cpp @@ -91,60 +91,6 @@ class TLimitedResource { T Used; }; -struct TTaskState { - bool AllocatedExecutionUnit = false; - ui64 ScanQueryMemory = 0; - ui64 ExternalDataQueryMemory = 0; - ui32 ExecutionUnits = 0; - ui64 ResourceBrokerTaskId = 0; - TInstant CreatedAt; -}; - -struct TTxState { - std::unordered_map Tasks; - ui64 TxScanQueryMemory = 0; - ui64 TxExternalDataQueryMemory = 0; - ui32 TxExecutionUnits = 0; - TInstant CreatedAt; - - TTaskState& Allocated(ui64 taskId, TInstant now, const TKqpResourcesRequest& resources, bool memoryAsExternal = false) { - ui64 externalMemory = resources.ExternalMemory; - ui64 resourceBrokerMemory = 0; - if (memoryAsExternal) { - externalMemory += resources.Memory; - } else { - resourceBrokerMemory = resources.Memory; - } - - TxExternalDataQueryMemory += externalMemory; - TxScanQueryMemory += resourceBrokerMemory; - if (!CreatedAt) { - CreatedAt = now; - } - - if (resources.ExecutionUnits) { - Y_ABORT_UNLESS(!Tasks.contains(taskId)); - } - - auto& taskState = Tasks[taskId]; - taskState.ExecutionUnits += resources.ExecutionUnits; - taskState.ScanQueryMemory += resourceBrokerMemory; - taskState.ExternalDataQueryMemory += externalMemory; - if (!taskState.CreatedAt) { - taskState.CreatedAt = now; - } - - return taskState; - } -}; - -struct TTxStatesBucket { - std::unordered_map Txs; // TxId -> TxState - TMutex Lock; -}; - -constexpr ui64 BucketsCount = 64; - struct TEvPrivate { enum EEv { EvPublishResources = EventSpaceBegin(TEvents::ES_PRIVATE), @@ -167,31 +113,44 @@ class TKqpResourceManager : public IKqpResourceManager { public: TKqpResourceManager(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, TIntrusivePtr counters) - : Config(config) - , Counters(counters) - , ExecutionUnitsResource(Config.GetComputeActorsCount()) - , ExecutionUnitsLimit(Config.GetComputeActorsCount()) - , ScanQueryMemoryResource(Config.GetQueryMemoryLimit()) - , PublishResourcesByExchanger(Config.GetEnablePublishResourcesByExchanger()) { - + : Counters(counters) + , ExecutionUnitsResource(config.GetComputeActorsCount()) + , ExecutionUnitsLimit(config.GetComputeActorsCount()) + , ScanQueryMemoryResource(config.GetQueryMemoryLimit()) + , PublishResourcesByExchanger(config.GetEnablePublishResourcesByExchanger()) + { + SetConfigValues(config); } - void Bootstrap(TActorSystem* actorSystem, TActorId selfId) { + void Bootstrap(NKikimrConfig::TTableServiceConfig::TResourceManager& config, TActorSystem* actorSystem, TActorId selfId) { if (!Counters) { Counters = MakeIntrusive(AppData()->Counters); } ActorSystem = actorSystem; SelfId = selfId; - UpdatePatternCache(Config.GetKqpPatternCacheCapacityBytes(), - Config.GetKqpPatternCacheCompiledCapacityBytes(), - Config.GetKqpPatternCachePatternAccessTimesBeforeTryToCompile()); + UpdatePatternCache(config.GetKqpPatternCacheCapacityBytes(), + config.GetKqpPatternCacheCompiledCapacityBytes(), + config.GetKqpPatternCachePatternAccessTimesBeforeTryToCompile()); if (PublishResourcesByExchanger) { - CreateResourceInfoExchanger(Config.GetInfoExchangerSettings()); + CreateResourceInfoExchanger(config.GetInfoExchangerSettings()); return; } } + const TIntrusivePtr& GetCounters() const override { + return Counters; + } + + TPlannerPlacingOptions GetPlacingOptions() override { + return TPlannerPlacingOptions{ + .MaxNonParallelTasksExecutionLimit = MaxNonParallelTasksExecutionLimit.load(), + .MaxNonParallelDataQueryTasksLimit = MaxNonParallelDataQueryTasksLimit.load(), + .MaxNonParallelTopStageExecutionLimit = MaxNonParallelTopStageExecutionLimit.load(), + .PreferLocalDatacenterExecution = PreferLocalDatacenterExecution.load(), + }; + } + void CreateResourceInfoExchanger( const NKikimrConfig::TTableServiceConfig::TResourceManager::TInfoExchangerSettings& settings) { PublishResourcesByExchanger = true; @@ -210,7 +169,6 @@ class TKqpResourceManager : public IKqpResourceManager { ExecutionUnitsResource.fetch_add(cnt); return false; } else { - Counters->RmComputeActors->Add(cnt); return true; } } @@ -221,11 +179,13 @@ class TKqpResourceManager : public IKqpResourceManager { } ExecutionUnitsResource.fetch_add(cnt); - Counters->RmComputeActors->Sub(cnt); } - TKqpRMAllocateResult AllocateResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) override + TKqpRMAllocateResult AllocateResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) override { + const ui64 txId = tx->TxId; + const ui64 taskId = task->TaskId; + TKqpRMAllocateResult result; if (resources.ExecutionUnits) { if (!AllocateExecutionUnits(resources.ExecutionUnits)) { @@ -248,22 +208,18 @@ class TKqpResourceManager : public IKqpResourceManager { return result; } - auto now = ActorSystem->Timestamp(); bool hasScanQueryMemory = true; ui64 queryMemoryLimit = 0; - // NOTE(gvit): the first memory request from the data query pool always satisfied. + + // NOTE(gvit): the first memory request always satisfied. // all other requests are not guaranteed to be satisfied. // In the nearest future we need to implement several layers of memory requests. bool isFirstAllocationRequest = (resources.ExecutionUnits > 0 && resources.MemoryPool == EKqpMemoryPool::DataQuery); if (isFirstAllocationRequest) { - auto& txBucket = TxBucket(txId); - with_lock(txBucket.Lock) { - auto& tx = txBucket.Txs[txId]; - tx.Allocated(taskId, now, resources, /*memoryAsExternal=*/true); - ExternalDataQueryMemory.fetch_add(resources.Memory + resources.ExternalMemory); - Counters->RmExternalMemory->Add(resources.Memory + resources.ExternalMemory); - } - + TKqpResourcesRequest newRequest = resources; + newRequest.MoveToFreeTier(); + tx->Allocated(task, newRequest); + ExternalDataQueryMemory.fetch_add(newRequest.ExternalMemory); return result; } @@ -278,7 +234,7 @@ class TKqpResourceManager : public IKqpResourceManager { hasScanQueryMemory = ScanQueryMemoryResource.Has(resources.Memory); if (hasScanQueryMemory) { ScanQueryMemoryResource.Acquire(resources.Memory); - queryMemoryLimit = Config.GetQueryMemoryLimit(); + queryMemoryLimit = QueryMemoryLimit.load(); } } // with_lock (Lock) @@ -292,170 +248,93 @@ class TKqpResourceManager : public IKqpResourceManager { ui64 rbTaskId = LastResourceBrokerTaskId.fetch_add(1) + 1; TString rbTaskName = TStringBuilder() << "kqp-" << txId << '-' << taskId << '-' << rbTaskId; - bool extraAlloc = false; - - auto& txBucket = TxBucket(txId); - with_lock (txBucket.Lock) { - Y_DEFER { - if (!result) { - auto unguard = ::Unguard(txBucket.Lock); - Counters->RmNotEnoughMemory->Inc(); - with_lock (Lock) { - ScanQueryMemoryResource.Release(resources.Memory); - } // with_lock (Lock) - } - }; - auto& tx = txBucket.Txs[txId]; - ui64 txTotalRequestedMemory = tx.TxScanQueryMemory + resources.Memory; - result.TotalAllocatedQueryMemory = txTotalRequestedMemory; - if (txTotalRequestedMemory > queryMemoryLimit) { - TStringBuilder reason; - reason << "TxId: " << txId << ", taskId: " << taskId << ". Query memory limit exceeded: " - << "requested " << txTotalRequestedMemory; - result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::QUERY_MEMORY_LIMIT_EXCEEDED, reason); - return result; + Y_DEFER { + if (!result) { + Counters->RmNotEnoughMemory->Inc(); + with_lock (Lock) { + ScanQueryMemoryResource.Release(resources.Memory); + } // with_lock (Lock) } + }; - bool allocated = ResourceBroker->SubmitTaskInstant( - TEvResourceBroker::TEvSubmitTask(rbTaskId, rbTaskName, {0, resources.Memory}, "kqp_query", 0, {}), - SelfId); - - if (!allocated) { - TStringBuilder reason; - reason << "TxId: " << txId << ", taskId: " << taskId << ". Not enough ScanQueryMemory: " - << "requested " << resources.Memory; - LOG_AS_N(reason); - result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::NOT_ENOUGH_MEMORY, reason); - return result; - } + ui64 txTotalRequestedMemory = tx->GetExtraMemoryAllocatedSize() + resources.Memory; + if (txTotalRequestedMemory > queryMemoryLimit) { + TStringBuilder reason; + reason << "TxId: " << txId << ", taskId: " << taskId << ". Query memory limit exceeded: " + << "requested " << txTotalRequestedMemory; + result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::QUERY_MEMORY_LIMIT_EXCEEDED, reason); + return result; + } - auto& taskState = tx.Allocated(taskId, now, resources); - if (!taskState.ResourceBrokerTaskId) { - taskState.ResourceBrokerTaskId = rbTaskId; - } else { - extraAlloc = true; - bool merged = ResourceBroker->MergeTasksInstant(taskState.ResourceBrokerTaskId, rbTaskId, SelfId); - Y_ABORT_UNLESS(merged); - } - } // with_lock (txBucket.Lock) + bool allocated = ResourceBroker->SubmitTaskInstant( + TEvResourceBroker::TEvSubmitTask(rbTaskId, rbTaskName, {0, resources.Memory}, "kqp_query", 0, {}), + SelfId); - LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". Allocated " << resources.ToString()); + if (!allocated) { + TStringBuilder reason; + reason << "TxId: " << txId << ", taskId: " << taskId << ". Not enough ScanQueryMemory: " + << "requested " << resources.Memory; + LOG_AS_N(reason); + result.SetError(NKikimrKqp::TEvStartKqpTasksResponse::NOT_ENOUGH_MEMORY, reason); + return result; + } - Counters->RmMemory->Add(resources.Memory); - if (extraAlloc) { - Counters->RmExtraMemAllocs->Inc(); + tx->Allocated(task, resources); + if (!task->ResourceBrokerTaskId) { + task->ResourceBrokerTaskId = rbTaskId; + } else { + bool merged = ResourceBroker->MergeTasksInstant(task->ResourceBrokerTaskId, rbTaskId, SelfId); + Y_ABORT_UNLESS(merged); } + LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". Allocated " << resources.ToString()); FireResourcesPublishing(); return result; } - void FreeResources(ui64 txId, ui64 taskId) override { - FreeResources(txId, taskId, TKqpResourcesRequest{.ReleaseAllResources=true}); + void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task) override { + FreeResources(tx, task, task->FreeResourcesRequest()); } - void FreeResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) override { - ui64 releaseScanQueryMemory = 0; - ui64 releaseExternalDataQueryMemory = 0; - - auto& txBucket = TxBucket(txId); - - { - TMaybe> guard; - guard.ConstructInPlace(txBucket.Lock); - - auto txIt = txBucket.Txs.find(txId); - if (txIt == txBucket.Txs.end()) { - return; - } - - auto& tx = txIt->second; - auto taskIt = tx.Tasks.find(taskId); - if (taskIt == tx.Tasks.end()) { - return; - } - - auto& task = taskIt->second; - if (resources.ReleaseAllResources && task.ExecutionUnits) { - FreeExecutionUnits(task.ExecutionUnits); - } - - if (resources.ReleaseAllResources) { - releaseExternalDataQueryMemory = task.ExternalDataQueryMemory; - releaseScanQueryMemory = task.ScanQueryMemory; - } else { - releaseScanQueryMemory = std::min(task.ScanQueryMemory, resources.Memory); - ui64 leftToRelease = resources.Memory - releaseScanQueryMemory; - releaseExternalDataQueryMemory = std::min(task.ExternalDataQueryMemory, resources.ExternalMemory + leftToRelease); - } - - task.ScanQueryMemory -= releaseScanQueryMemory; - tx.TxScanQueryMemory -= releaseScanQueryMemory; - - task.ExternalDataQueryMemory -= releaseExternalDataQueryMemory; - tx.TxExternalDataQueryMemory -= releaseExternalDataQueryMemory; + void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) override { + if (resources.ExecutionUnits) { + FreeExecutionUnits(resources.ExecutionUnits); + } - if (task.ScanQueryMemory == 0) { - if (task.ResourceBrokerTaskId) { - bool finished = ResourceBroker->FinishTaskInstant( - TEvResourceBroker::TEvFinishTask(task.ResourceBrokerTaskId), SelfId); - Y_DEBUG_ABORT_UNLESS(finished); - task.ResourceBrokerTaskId = 0; - } + Y_ABORT_UNLESS(resources.Memory <= task->ScanQueryMemory); + if (resources.Memory > 0 && task->ResourceBrokerTaskId) { + if (resources.Memory == task->ScanQueryMemory) { + bool finished = ResourceBroker->FinishTaskInstant( + TEvResourceBroker::TEvFinishTask(task->ResourceBrokerTaskId), SelfId); + Y_DEBUG_ABORT_UNLESS(finished); + task->ResourceBrokerTaskId = 0; } else { bool reduced = ResourceBroker->ReduceTaskResourcesInstant( - taskIt->second.ResourceBrokerTaskId, {0, releaseScanQueryMemory}, SelfId); + task->ResourceBrokerTaskId, {0, resources.Memory}, SelfId); Y_DEBUG_ABORT_UNLESS(reduced); } + } - if (resources.ExecutionUnits) { - ui64 remainsTasks = tx.Tasks.size() - 1; - if (remainsTasks == 0) { - txBucket.Txs.erase(txIt); - } else { - tx.Tasks.erase(taskIt); - } - } - - i64 prev = ExternalDataQueryMemory.fetch_sub(releaseExternalDataQueryMemory); - Counters->RmExternalMemory->Sub(releaseExternalDataQueryMemory); - Y_DEBUG_ABORT_UNLESS(prev >= 0); - Counters->RmMemory->Sub(releaseScanQueryMemory); - Y_DEBUG_ABORT_UNLESS(Counters->RmMemory->Val() >= 0); - } // with_lock (txBucket.Lock) + tx->Released(task, resources); + i64 prev = ExternalDataQueryMemory.fetch_sub(resources.ExternalMemory); + Y_DEBUG_ABORT_UNLESS(prev >= 0); - with_lock (Lock) { - ScanQueryMemoryResource.Release(releaseScanQueryMemory); - } // with_lock (Lock) + if (resources.Memory > 0) { + with_lock (Lock) { + ScanQueryMemoryResource.Release(resources.Memory); + } // with_lock (Lock) + } - LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". Released resources, " - << "ScanQueryMemory: " << releaseScanQueryMemory << ", " - << "ExternalDataQueryMemory " << releaseExternalDataQueryMemory << ", " + LOG_AS_D("TxId: " << tx->TxId << ", taskId: " << task->TaskId << ". Released resources, " + << "ScanQueryMemory: " << resources.Memory << ", " + << "ExternalDataQueryMemory " << resources.ExternalMemory << ", " << "ExecutionUnits " << resources.ExecutionUnits << "."); FireResourcesPublishing(); } - void NotifyExternalResourcesAllocated(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) override { - LOG_AS_D("TxId: " << txId << ", taskId: " << taskId << ". External allocation: " << resources.ToString()); - - // we don't register data execution units for now - //YQL_ENSURE(resources.ExecutionUnits == 0); - YQL_ENSURE(resources.MemoryPool == EKqpMemoryPool::DataQuery); - - auto& txBucket = TxBucket(txId); - with_lock (txBucket.Lock) { - txBucket.Txs[txId].Allocated(taskId, TInstant(), resources); - ExternalDataQueryMemory.fetch_add(resources.ExternalMemory); - Counters->RmExternalMemory->Add(resources.ExternalMemory); - } // with_lock (txBucket.Lock) - - - FireResourcesPublishing(); - } - TVector GetClusterResources() const override { TVector resources; Y_ABORT_UNLESS(PublishResourcesByExchanger); @@ -505,34 +384,60 @@ class TKqpResourceManager : public IKqpResourceManager { return result; } - NKikimrConfig::TTableServiceConfig::TResourceManager GetConfig() override { + std::shared_ptr GetPatternCache() override { with_lock (Lock) { - return Config; + return PatternCache; } } - std::shared_ptr GetPatternCache() override { - with_lock (Lock) { - return PatternCache; + TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, const ui32 tasksCount) override + { + TTaskResourceEstimation ret = BuildInitialTaskResources(task); + EstimateTaskResources(ret, tasksCount); + return ret; + } + + void EstimateTaskResources(TTaskResourceEstimation& ret, const ui32 tasksCount) override + { + ui64 totalChannels = std::max(tasksCount, (ui32)1) * std::max(ret.ChannelBuffersCount, (ui32)1); + ui64 optimalChannelBufferSizeEstimation = totalChannels * ChannelBufferSize.load(); + + optimalChannelBufferSizeEstimation = std::min(optimalChannelBufferSizeEstimation, MaxTotalChannelBuffersSize.load()); + + ret.ChannelBufferMemoryLimit = std::max(MinChannelBufferSize.load(), optimalChannelBufferSizeEstimation / totalChannels); + + if (ret.HeavyProgram) { + ret.MkqlProgramMemoryLimit = MkqlHeavyProgramMemoryLimit.load() / std::max(tasksCount, (ui32)1); + } else { + ret.MkqlProgramMemoryLimit = MkqlLightProgramMemoryLimit.load() / std::max(tasksCount, (ui32)1); } + + ret.TotalMemoryLimit = ret.ChannelBuffersCount * ret.ChannelBufferMemoryLimit + + ret.MkqlProgramMemoryLimit; } - ui32 GetNodeId() override { - return SelfId.NodeId(); + void SetConfigValues(const NKikimrConfig::TTableServiceConfig::TResourceManager& config) { + MkqlHeavyProgramMemoryLimit.store(config.GetMkqlHeavyProgramMemoryLimit()); + MkqlLightProgramMemoryLimit.store(config.GetMkqlLightProgramMemoryLimit()); + ChannelBufferSize.store(config.GetChannelBufferSize()); + MinChannelBufferSize.store(config.GetMinChannelBufferSize()); + MaxTotalChannelBuffersSize.store(config.GetMaxTotalChannelBuffersSize()); + QueryMemoryLimit.store(config.GetQueryMemoryLimit()); + MaxNonParallelTopStageExecutionLimit.store(config.GetMaxNonParallelTopStageExecutionLimit()); + MaxNonParallelTasksExecutionLimit.store(config.GetMaxNonParallelTasksExecutionLimit()); + PreferLocalDatacenterExecution.store(config.GetPreferLocalDatacenterExecution()); + MaxNonParallelDataQueryTasksLimit.store(config.GetMaxNonParallelDataQueryTasksLimit()); } - TTxStatesBucket& TxBucket(ui64 txId) { - return Buckets[txId % Buckets.size()]; + ui32 GetNodeId() override { + return SelfId.NodeId(); } void FireResourcesPublishing() { - with_lock (Lock) { - if (PublishScheduledAt) { - return; - } + bool prev = PublishScheduled.test_and_set(); + if (!prev) { + ActorSystem->Send(SelfId, new TEvPrivate::TEvSchedulePublishResources); } - - ActorSystem->Send(SelfId, new TEvPrivate::TEvSchedulePublishResources); } void UpdatePatternCache(ui64 maxSizeBytes, ui64 maxCompiledSizeBytes, ui64 patternAccessTimesBeforeTryToCompile) { @@ -549,7 +454,13 @@ class TKqpResourceManager : public IKqpResourceManager { TActorId SelfId; - NKikimrConfig::TTableServiceConfig::TResourceManager Config; // guarded by Lock + std::atomic QueryMemoryLimit; + std::atomic MkqlHeavyProgramMemoryLimit; + std::atomic MkqlLightProgramMemoryLimit; + std::atomic ChannelBufferSize; + std::atomic MinChannelBufferSize; + std::atomic MaxTotalChannelBuffersSize; + TIntrusivePtr Counters; TIntrusivePtr ResourceBroker; TActorSystem* ActorSystem = nullptr; @@ -562,14 +473,15 @@ class TKqpResourceManager : public IKqpResourceManager { std::atomic ExecutionUnitsLimit; TLimitedResource ScanQueryMemoryResource; std::atomic ExternalDataQueryMemory = 0; + std::atomic MaxNonParallelTopStageExecutionLimit = 1; + std::atomic MaxNonParallelTasksExecutionLimit = 8; + std::atomic PreferLocalDatacenterExecution = true; + std::atomic MaxNonParallelDataQueryTasksLimit = 1000; // current state - std::array Buckets; std::atomic LastResourceBrokerTaskId = 0; - // schedule info (guarded by Lock) - std::optional PublishScheduledAt; - + std::atomic_flag PublishScheduled; // pattern cache for different actors std::shared_ptr PatternCache; @@ -601,16 +513,21 @@ class TKqpResourceManagerActor : public TActorBootstrapped counters, const TActorId& resourceBrokerId, - std::shared_ptr&& kqpProxySharedResources) - : ResourceBrokerId(resourceBrokerId ? resourceBrokerId : MakeResourceBrokerID()) + std::shared_ptr&& kqpProxySharedResources, ui32 nodeId) + : Config(config) + , ResourceBrokerId(resourceBrokerId ? resourceBrokerId : MakeResourceBrokerID()) , KqpProxySharedResources(std::move(kqpProxySharedResources)) , PublishResourcesByExchanger(config.GetEnablePublishResourcesByExchanger()) { ResourceManager = std::make_shared(config, counters); + with_lock (ResourceManagers.Lock) { + ResourceManagers.ByNodeId[nodeId] = ResourceManager; + ResourceManagers.Default = ResourceManager; + } } void Bootstrap() { - ResourceManager->Bootstrap(TlsActivationContext->ActorSystem(), SelfId()); + ResourceManager->Bootstrap(Config, TlsActivationContext->ActorSystem(), SelfId()); LOG_D("Start KqpResourceManagerActor at " << SelfId() << " with ResourceBroker at " << ResourceBrokerId); @@ -639,11 +556,6 @@ class TKqpResourceManagerActor : public TActorBootstrappedLock) { - ResourceManager->PublishScheduledAt.reset(); - } + PublishResourcesScheduledAt.reset(); PublishResourceUsage("batching"); } @@ -841,9 +751,9 @@ class TKqpResourceManagerActor : public TActorBootstrappedExecutionUnitsLimit.load(); ResourceManager->ExecutionUnitsLimit.store(config.GetComputeActorsCount()); ResourceManager->ExecutionUnitsResource.fetch_add((i32)config.GetComputeActorsCount() - prev); - ResourceManager->Config.Swap(&config); + ResourceManager->SetConfigValues(config); + Config.Swap(&config); } - } static void HandleWork(TEvents::TEvUndelivered::TPtr& ev) { @@ -881,11 +791,6 @@ class TKqpResourceManagerActor : public TActorBootstrappedLock) { - str << ResourceManager->Config.DebugString() << Endl; - } - str << "State storage key: " << WbState.Tenant << Endl; with_lock (ResourceManager->Lock) { str << "ScanQuery memory resource: " << ResourceManager->ScanQueryMemoryResource.ToString() << Endl; @@ -897,35 +802,8 @@ class TKqpResourceManagerActor : public TActorBootstrapped publishScheduledAt; - with_lock (ResourceManager->Lock) { - publishScheduledAt = ResourceManager->PublishScheduledAt; - } - - if (publishScheduledAt) { - str << "Next publish time: " << *publishScheduledAt << Endl; - } - - str << Endl << "Transactions:" << Endl; - for (auto& bucket : ResourceManager->Buckets) { - with_lock (bucket.Lock) { - for (auto& [txId, txState] : bucket.Txs) { - str << " TxId: " << txId << Endl; - str << " ScanQuery memory: " << txState.TxScanQueryMemory << Endl; - str << " External DataQuery memory: " << txState.TxExternalDataQueryMemory << Endl; - str << " Execution units: " << txState.TxExecutionUnits << Endl; - str << " Create at: " << txState.CreatedAt << Endl; - str << " Tasks:" << Endl; - for (auto& [taskId, taskState] : txState.Tasks) { - str << " TaskId: " << taskId << Endl; - str << " ScanQuery memory: " << taskState.ScanQueryMemory << Endl; - str << " External DataQuery memory: " << taskState.ExternalDataQueryMemory << Endl; - str << " Execution units: " << taskState.ExecutionUnits << Endl; - str << " ResourceBroker TaskId: " << taskState.ResourceBrokerTaskId << Endl; - str << " Created at: " << taskState.CreatedAt << Endl; - } - } - } // with_lock (bucket.Lock) + if (PublishResourcesScheduledAt) { + str << "Next publish time: " << *PublishResourcesScheduledAt << Endl; } if (snapshot.empty()) { @@ -940,13 +818,6 @@ class TKqpResourceManagerActor : public TActorBootstrapped publishScheduledAt; - - with_lock (ResourceManager->Lock) { - publishInterval = TDuration::Seconds(ResourceManager->Config.GetPublishStatisticsIntervalSec()); - publishScheduledAt = ResourceManager->PublishScheduledAt; - } - - if (publishScheduledAt) { + const TDuration publishInterval = TDuration::Seconds(Config.GetPublishStatisticsIntervalSec()); + if (PublishResourcesScheduledAt) { return; } auto now = ResourceManager->ActorSystem->Timestamp(); if (publishInterval && WbState.LastPublishTime && now - *WbState.LastPublishTime < publishInterval) { - publishScheduledAt = *WbState.LastPublishTime + publishInterval; + PublishResourcesScheduledAt = *WbState.LastPublishTime + publishInterval; - with_lock (ResourceManager->Lock) { - ResourceManager->PublishScheduledAt = publishScheduledAt; - } - - Schedule(*publishScheduledAt - now, new TEvPrivate::TEvPublishResources); - LOG_D("Schedule publish at " << *publishScheduledAt << ", after " << (*publishScheduledAt - now)); + Schedule(*PublishResourcesScheduledAt - now, new TEvPrivate::TEvPublishResources); + LOG_D("Schedule publish at " << *PublishResourcesScheduledAt << ", after " << (*PublishResourcesScheduledAt - now)); return; } + // starting resources publishing. + // saying resource manager that we are ready for the next publishing. + ResourceManager->PublishScheduled.clear(); + NKikimrKqp::TKqpNodeResources payload; payload.SetNodeId(SelfId().NodeId()); payload.SetTimestamp(now.Seconds()); @@ -1072,6 +936,8 @@ class TKqpResourceManagerActor : public TActorBootstrapped ResourceManager; + std::optional PublishResourcesScheduledAt; bool PublishResourcesByExchanger; std::optional SelfDataCenterId; }; @@ -1100,9 +967,9 @@ class TKqpResourceManagerActor : public TActorBootstrapped counters, NActors::TActorId resourceBroker, - std::shared_ptr kqpProxySharedResources) + std::shared_ptr kqpProxySharedResources, ui32 nodeId) { - return new NRm::TKqpResourceManagerActor(config, counters, resourceBroker, std::move(kqpProxySharedResources)); + return new NRm::TKqpResourceManagerActor(config, counters, resourceBroker, std::move(kqpProxySharedResources), nodeId); } std::shared_ptr GetKqpResourceManager(TMaybe _nodeId) { @@ -1111,6 +978,10 @@ std::shared_ptr GetKqpResourceManager(TMaybe _no } ui32 nodeId = _nodeId ? *_nodeId : TActivationContext::ActorSystem()->NodeId; + if (auto rm = TryGetKqpResourceManager(nodeId)) { + return rm; + } + Y_ABORT("KqpResourceManager not ready yet, node #%" PRIu32, nodeId); } diff --git a/ydb/core/kqp/rm_service/kqp_rm_service.h b/ydb/core/kqp/rm_service/kqp_rm_service.h index fe5f3e7986c5..c4cf4ba60f91 100644 --- a/ydb/core/kqp/rm_service/kqp_rm_service.h +++ b/ydb/core/kqp/rm_service/kqp_rm_service.h @@ -11,6 +11,8 @@ #include #include +#include "kqp_resource_estimation.h" + #include #include #include @@ -40,18 +42,140 @@ struct TKqpResourcesRequest { ui64 ExternalMemory = 0; bool ReleaseAllResources = false; + void MoveToFreeTier() { + ExternalMemory += Memory; + Memory = 0; + } + TString ToString() const { return TStringBuilder() << "TKqpResourcesRequest{ MemoryPool: " << (ui32) MemoryPool << ", Memory: " << Memory << "ExternalMemory: " << ExternalMemory << " }"; } }; +class TTxState; + +class TTaskState : public TAtomicRefCount { + friend TTxState; + +public: + const ui64 TaskId = 0; + const TInstant CreatedAt; + ui64 ScanQueryMemory = 0; + ui64 ExternalDataQueryMemory = 0; + ui64 ResourceBrokerTaskId = 0; + ui32 ExecutionUnits = 0; + +public: + + // compute actor wants to release some memory. + // we distribute that memory across granted resources + TKqpResourcesRequest FitRequest(TKqpResourcesRequest& resources) { + ui64 releaseScanQueryMemory = std::min(ScanQueryMemory, resources.Memory); + ui64 leftToRelease = resources.Memory - releaseScanQueryMemory; + ui64 releaseExternalDataQueryMemory = std::min(ExternalDataQueryMemory, resources.ExternalMemory + leftToRelease); + + resources.Memory = releaseScanQueryMemory; + resources.ExternalMemory = releaseExternalDataQueryMemory; + return resources; + } + + TKqpResourcesRequest FreeResourcesRequest() const { + return TKqpResourcesRequest{ + .ExecutionUnits=ExecutionUnits, + .MemoryPool=EKqpMemoryPool::Unspecified, + .Memory=ScanQueryMemory, + .ExternalMemory=ExternalDataQueryMemory}; + } + + explicit TTaskState(ui64 taskId, TInstant createdAt) + : TaskId(taskId) + , CreatedAt(createdAt) + { + } +}; + +class TTxState : public TAtomicRefCount { + +public: + const ui64 TxId; + const TInstant CreatedAt; + TIntrusivePtr Counters; +private: + std::atomic TxScanQueryMemory = 0; + std::atomic TxExternalDataQueryMemory = 0; + std::atomic TxExecutionUnits = 0; + +public: + explicit TTxState(ui64 txId, TInstant now, TIntrusivePtr counters) + : TxId(txId) + , CreatedAt(now) + , Counters(std::move(counters)) + {} + + TString ToString() const { + return TStringBuilder() << "TxResourcesInfo{ " + << "TxId: " << TxId + << ", memory initially granted resources: " << TxExternalDataQueryMemory.load() + << ", extra allocations " << TxScanQueryMemory.load() + << ", execution units: " << TxExecutionUnits.load() + << ", started at: " << CreatedAt + << " }"; + } + + ui64 GetExtraMemoryAllocatedSize() { + return TxScanQueryMemory.load(); + } + + void Released(TIntrusivePtr& taskState, const TKqpResourcesRequest& resources) { + if (resources.ExecutionUnits) { + Counters->RmOnCompleteFree->Inc(); + } else { + Counters->RmExtraMemFree->Inc(); + } + + Counters->RmExternalMemory->Sub(resources.ExternalMemory); + TxExternalDataQueryMemory.fetch_sub(resources.ExternalMemory); + taskState->ExternalDataQueryMemory -= resources.ExternalMemory; + + TxScanQueryMemory.fetch_sub(resources.Memory); + taskState->ScanQueryMemory -= resources.Memory; + Counters->RmMemory->Sub(resources.Memory); + + TxExecutionUnits.fetch_sub(resources.ExecutionUnits); + taskState->ExecutionUnits -= resources.ExecutionUnits; + Counters->RmComputeActors->Sub(resources.ExecutionUnits); + } + + void Allocated(TIntrusivePtr& taskState, const TKqpResourcesRequest& resources) { + if (resources.ExecutionUnits > 0) { + Counters->RmOnStartAllocs->Inc(); + } + + Counters->RmExternalMemory->Add(resources.ExternalMemory); + TxExternalDataQueryMemory.fetch_add(resources.ExternalMemory); + taskState->ExternalDataQueryMemory += resources.ExternalMemory; + + TxScanQueryMemory.fetch_add(resources.Memory); + taskState->ScanQueryMemory += resources.Memory; + Counters->RmMemory->Add(resources.Memory); + if (resources.Memory) { + Counters->RmExtraMemAllocs->Inc(); + } + + TxExecutionUnits.fetch_add(resources.ExecutionUnits); + taskState->ExecutionUnits += resources.ExecutionUnits; + Counters->RmComputeActors->Add(resources.ExecutionUnits); + } +}; + /// detailed information on allocation failure struct TKqpRMAllocateResult { bool Success = true; NKikimrKqp::TEvStartKqpTasksResponse::ENotStartedTaskReason Status = NKikimrKqp::TEvStartKqpTasksResponse::INTERNAL_ERROR; TString FailReason; - ui64 TotalAllocatedQueryMemory = 0; + TIntrusivePtr TaskInfo; + TIntrusivePtr TxInfo; NKikimrKqp::TEvStartKqpTasksResponse::ENotStartedTaskReason GetStatus() const { return Status; @@ -78,25 +202,32 @@ struct TKqpLocalNodeResources { std::array Memory; }; +struct TPlannerPlacingOptions { + ui64 MaxNonParallelTasksExecutionLimit = 8; + ui64 MaxNonParallelDataQueryTasksLimit = 1000; + ui64 MaxNonParallelTopStageExecutionLimit = 1; + bool PreferLocalDatacenterExecution = true; +}; + /// per node singleton with instant API class IKqpResourceManager : private TNonCopyable { public: virtual ~IKqpResourceManager() = default; - virtual TKqpRMAllocateResult AllocateResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) = 0; - - using TResourcesAllocatedCallback = std::function; + virtual const TIntrusivePtr& GetCounters() const = 0; - virtual void FreeResources(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) = 0; - virtual void FreeResources(ui64 txId, ui64 taskId) = 0; + virtual TKqpRMAllocateResult AllocateResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) = 0; - virtual void NotifyExternalResourcesAllocated(ui64 txId, ui64 taskId, const TKqpResourcesRequest& resources) = 0; + virtual TPlannerPlacingOptions GetPlacingOptions() = 0; + virtual TTaskResourceEstimation EstimateTaskResources(const NYql::NDqProto::TDqTask& task, const ui32 tasksCount) = 0; + virtual void EstimateTaskResources(TTaskResourceEstimation& result, const ui32 tasksCount) = 0; + virtual void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task, const TKqpResourcesRequest& resources) = 0; + virtual void FreeResources(TIntrusivePtr& tx, TIntrusivePtr& task) = 0; virtual void RequestClusterResourcesInfo(TOnResourcesSnapshotCallback&& callback) = 0; virtual TVector GetClusterResources() const = 0; virtual TKqpLocalNodeResources GetLocalResources() const = 0; - virtual NKikimrConfig::TTableServiceConfig::TResourceManager GetConfig() = 0; virtual std::shared_ptr GetPatternCache() = 0; @@ -142,7 +273,8 @@ struct TKqpProxySharedResources { NActors::IActor* CreateKqpResourceManagerActor(const NKikimrConfig::TTableServiceConfig::TResourceManager& config, TIntrusivePtr counters, NActors::TActorId resourceBroker = {}, - std::shared_ptr kqpProxySharedResources = nullptr); + std::shared_ptr kqpProxySharedResources = nullptr, + ui32 nodeId = 0); std::shared_ptr GetKqpResourceManager(TMaybe nodeId = Nothing()); std::shared_ptr TryGetKqpResourceManager(TMaybe nodeId = Nothing()); diff --git a/ydb/core/kqp/rm_service/kqp_rm_ut.cpp b/ydb/core/kqp/rm_service/kqp_rm_ut.cpp index 764e9cf8b6ce..09d7c0536254 100644 --- a/ydb/core/kqp/rm_service/kqp_rm_ut.cpp +++ b/ydb/core/kqp/rm_service/kqp_rm_ut.cpp @@ -151,7 +151,7 @@ class KqpRm : public TTestBase { void CreateKqpResourceManager( const NKikimrConfig::TTableServiceConfig::TResourceManager& config, ui32 nodeInd = 0) { auto kqpCounters = MakeIntrusive(Counters); - auto resman = CreateKqpResourceManagerActor(config, kqpCounters, ResourceBrokers[nodeInd]); + auto resman = CreateKqpResourceManagerActor(config, kqpCounters, ResourceBrokers[nodeInd], nullptr, Runtime->GetNodeId(nodeInd)); ResourceManagers.push_back(Runtime->Register(resman, nodeInd)); Runtime->RegisterService(MakeKqpResourceManagerServiceID( Runtime->GetNodeId(nodeInd)), ResourceManagers.back(), nodeInd); @@ -185,6 +185,14 @@ class KqpRm : public TTestBase { UNIT_ASSERT_VALUES_EQUAL(t->GetCounter("InFlyTasks")->Val(), infly); } + TIntrusivePtr MakeTx(ui64 txId, std::shared_ptr rm) { + return MakeIntrusive(txId, TInstant::Now(), rm->GetCounters()); + } + + TIntrusivePtr MakeTask(ui64 taskId, TIntrusivePtr tx) { + return MakeIntrusive(taskId, tx->CreatedAt); + } + void AssertResourceManagerStats( std::shared_ptr rm, ui64 scanQueryMemory, ui32 executionUnits) { Y_UNUSED(executionUnits); @@ -316,14 +324,16 @@ void KqpRm::SingleTask() { NRm::TKqpResourcesRequest request; request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; + auto tx1 = MakeTx(1, rm); + auto task2 = MakeTask(2, tx1); - bool allocated = rm->AllocateResources(1, 2, request); + bool allocated = rm->AllocateResources(tx1, task2, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 900, 90); AssertResourceBrokerSensors(0, 100, 0, 0, 1); - rm->FreeResources(1, 2); + rm->FreeResources(tx1, task2); AssertResourceManagerStats(rm, 1000, 100); AssertResourceBrokerSensors(0, 0, 0, 1, 0); } @@ -338,14 +348,23 @@ void KqpRm::ManyTasks() { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; + auto tx1 = MakeTx(1, rm); + TIntrusivePtr task1; + for (ui32 i = 1; i < 10; ++i) { - bool allocated = rm->AllocateResources(1, i, request); + auto task = MakeTask(i, tx1); + if (!task1) { + task1 = task; + } + + bool allocated = rm->AllocateResources(tx1, task, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 1000 - 100 * i, 100 - 10 * i); AssertResourceBrokerSensors(0, 100 * i, 0, 0, i); } +/* // invalid taskId rm->FreeResources(1, 0); AssertResourceManagerStats(rm, 100, 10); @@ -355,8 +374,9 @@ void KqpRm::ManyTasks() { rm->FreeResources(10, 1); AssertResourceManagerStats(rm, 100, 10); AssertResourceBrokerSensors(0, 900, 0, 0, 9); +*/ - rm->FreeResources(1, 1); + rm->FreeResources(tx1, task1); AssertResourceManagerStats(rm, 200, 20); AssertResourceBrokerSensors(0, 800, 0, 1, 8); } @@ -371,7 +391,10 @@ void KqpRm::NotEnoughMemory() { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 10'000; - bool allocated = rm->AllocateResources(1, 2, request); + auto tx = MakeTx(1, rm); + auto task = MakeTask(2, tx); + + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(!allocated); AssertResourceManagerStats(rm, 1000, 100); @@ -389,8 +412,10 @@ void KqpRm::NotEnoughExecutionUnits() { request.Memory = 100; request.ExecutionUnits = 1000; - bool allocated = true; - allocated &= rm->AllocateResources(1, 2, request); + auto tx = MakeTx(1, rm); + auto task = MakeTask(2, tx); + + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(!allocated); AssertResourceManagerStats(rm, 1000, 100); @@ -410,12 +435,15 @@ void KqpRm::ResourceBrokerNotEnoughResources() { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 1'000; - bool allocated = rm->AllocateResources(1, 2, request); + auto tx = MakeTx(1, rm); + auto task = MakeTask(2, tx); + + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(allocated); request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100'000; - allocated = rm->AllocateResources(1, 2, request); + allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(!allocated); AssertResourceManagerStats(rm, config.GetQueryMemoryLimit() - 1000, 90); @@ -432,11 +460,16 @@ void KqpRm::Snapshot(bool byExchanger) { request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; request.ExecutionUnits = 10; + auto tx1 = MakeTx(1, rm); + auto tx2 = MakeTx(2, rm); - bool allocated = rm->AllocateResources(1, 2, request); + auto task2 = MakeTask(2, tx1); + auto task1 = MakeTask(1, tx2); + + bool allocated = rm->AllocateResources(tx1, task2, request); UNIT_ASSERT(allocated); - allocated &= rm->AllocateResources(2, 1, request); + allocated &= rm->AllocateResources(tx2, task1, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 800, 80); @@ -446,8 +479,8 @@ void KqpRm::Snapshot(bool byExchanger) { CheckSnapshot(0, {{800, 80}, {1000, 100}}, rm); - rm->FreeResources(1, 2); - rm->FreeResources(2, 1); + rm->FreeResources(tx1, task2); + rm->FreeResources(tx2, task1); AssertResourceManagerStats(rm, 1000, 100); AssertResourceBrokerSensors(0, 0, 0, 2, 0); @@ -474,8 +507,10 @@ void KqpRm::Reduce() { NRm::TKqpResourcesRequest request; request.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; request.Memory = 100; + auto tx = MakeTx(1, rm); + auto task = MakeTask(1, tx); - bool allocated = rm->AllocateResources(1, 1, request); + bool allocated = rm->AllocateResources(tx, task, request); UNIT_ASSERT(allocated); AssertResourceManagerStats(rm, 1000 - 100, 100 - 10); @@ -485,6 +520,7 @@ void KqpRm::Reduce() { reduceRequest.MemoryPool = NRm::EKqpMemoryPool::ScanQuery; reduceRequest.Memory = 70; +/* // invalid taskId rm->FreeResources(1, 0); AssertResourceManagerStats(rm, 1000 - 100, 100 - 10); @@ -494,8 +530,9 @@ void KqpRm::Reduce() { rm->FreeResources(10, 1); AssertResourceManagerStats(rm, 1000 - 100, 100 - 10); AssertResourceBrokerSensors(0, 100, 0, 0, 1); +*/ - rm->FreeResources(1, 1, reduceRequest); + rm->FreeResources(tx, task, reduceRequest); AssertResourceManagerStats(rm, 1000 - 30, 100 - 7); AssertResourceBrokerSensors(0, 30, 0, 0, 1); } @@ -517,11 +554,21 @@ void KqpRm::SnapshotSharing(bool byExchanger) { request.Memory = 100; request.ExecutionUnits = 10; + auto tx1Rm1 = MakeTx(1, rm_first); + auto tx2Rm1 = MakeTx(2, rm_first); + auto task1Rm1 = MakeTask(1, tx1Rm1); + auto task2Rm1 = MakeTask(1, tx2Rm1); + + auto tx1Rm2 = MakeTx(1, rm_second); + auto tx2Rm2 = MakeTx(2, rm_second); + auto task1Rm2 = MakeTask(1, tx1Rm2); + auto task2Rm2 = MakeTask(2, tx2Rm2); + { - bool allocated = rm_first->AllocateResources(1, 2, request); + bool allocated = rm_first->AllocateResources(tx1Rm1, task1Rm1, request); UNIT_ASSERT(allocated); - allocated &= rm_first->AllocateResources(2, 1, request); + allocated &= rm_first->AllocateResources(tx2Rm1, task2Rm1, request); UNIT_ASSERT(allocated); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); @@ -530,10 +577,10 @@ void KqpRm::SnapshotSharing(bool byExchanger) { } { - bool allocated = rm_second->AllocateResources(1, 2, request); + bool allocated = rm_second->AllocateResources(tx1Rm2, task1Rm2, request); UNIT_ASSERT(allocated); - allocated &= rm_second->AllocateResources(2, 1, request); + allocated &= rm_second->AllocateResources(tx2Rm2, task2Rm2, request); UNIT_ASSERT(allocated); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); @@ -542,8 +589,8 @@ void KqpRm::SnapshotSharing(bool byExchanger) { } { - rm_first->FreeResources(1, 2); - rm_first->FreeResources(2, 1); + rm_first->FreeResources(tx1Rm1, task1Rm1); + rm_first->FreeResources(tx2Rm1, task2Rm1); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); @@ -551,8 +598,8 @@ void KqpRm::SnapshotSharing(bool byExchanger) { } { - rm_second->FreeResources(1, 2); - rm_second->FreeResources(2, 1); + rm_second->FreeResources(tx1Rm2, task1Rm2); + rm_second->FreeResources(tx2Rm2, task2Rm2); Runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); diff --git a/ydb/core/kqp/rm_service/ut/ya.make b/ydb/core/kqp/rm_service/ut/ya.make index 9c259fa0c3d4..fb595ee233f6 100644 --- a/ydb/core/kqp/rm_service/ut/ya.make +++ b/ydb/core/kqp/rm_service/ut/ya.make @@ -7,7 +7,6 @@ IF (SANITIZER_TYPE OR WITH_VALGRIND) ENDIF() SRCS( - kqp_resource_estimation_ut.cpp kqp_rm_ut.cpp ) diff --git a/ydb/core/kqp/runtime/kqp_read_actor.cpp b/ydb/core/kqp/runtime/kqp_read_actor.cpp index 7e02b7e58b76..b9bb9f748dd4 100644 --- a/ydb/core/kqp/runtime/kqp_read_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_read_actor.cpp @@ -837,8 +837,9 @@ class TKqpReadActor : public TActorBootstrapped, public NYql::NDq << ", limit: " << limit << ", readId = " << id << ", reverse = " << record.GetReverse() - << " snapshot = (txid=" << Settings->GetSnapshot().GetTxId() << ",step=" << Settings->GetSnapshot().GetStep() << ")" - << " lockTxId = " << Settings->GetLockTxId()); + << ", snapshot = (txid=" << Settings->GetSnapshot().GetTxId() << ",step=" << Settings->GetSnapshot().GetStep() << ")" + << ", lockTxId = " << Settings->GetLockTxId() + << ", lockNodeId = " << Settings->GetLockNodeId()); Counters->CreatedIterators->Inc(); ReadIdByTabletId[state->TabletId].push_back(id); @@ -894,6 +895,26 @@ class TKqpReadActor : public TActorBootstrapped, public NYql::NDq return; } + CA_LOG_D("Recv TEvReadResult from ShardID=" << Reads[id].Shard->TabletId + << ", ReadId=" << id + << ", Status=" << Ydb::StatusIds::StatusCode_Name(record.GetStatus().GetCode()) + << ", Finished=" << record.GetFinished() + << ", RowCount=" << record.GetRowCount() + << ", TxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", BrokenTxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetBrokenTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + if (!record.HasNodeId()) { Counters->ReadActorAbsentNodeId->Inc(); } else if (record.GetNodeId() != SelfId().NodeId()) { diff --git a/ydb/core/kqp/runtime/kqp_scan_data.cpp b/ydb/core/kqp/runtime/kqp_scan_data.cpp index 2e8b430681e9..96c8f6dbbca3 100644 --- a/ydb/core/kqp/runtime/kqp_scan_data.cpp +++ b/ydb/core/kqp/runtime/kqp_scan_data.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include @@ -296,26 +296,27 @@ TBytesStatistics WriteColumnValuesFromArrowSpecImpl(TAccessor editAccessor, auto trivialChunkedArray = std::make_shared(chunkedArrayExt); NArrow::NAccessor::IChunkedArray::TReader reader(trivialChunkedArray); - std::optional chunkIdx; std::optional currentIdxFrom; std::optional address; const typename TElementAccessor::TArrayType* currentArray = nullptr; const auto applyToIndex = [&](const ui32 rowIndexFrom, const ui32 rowIndexTo) { + bool changed = false; if (!currentIdxFrom) { address = reader.GetReadChunk(rowIndexFrom); AFL_ENSURE(rowIndexFrom == 0)("real", rowIndexFrom); + changed = true; } else { AFL_ENSURE(rowIndexFrom == *currentIdxFrom + 1)("next", rowIndexFrom)("current", *currentIdxFrom); if (!address->NextPosition()) { address = reader.GetReadChunk(rowIndexFrom); + changed = true; } } currentIdxFrom = rowIndexFrom; - if (!chunkIdx || *chunkIdx != address->GetChunkIdx()) { + if (changed) { currentArray = static_cast(address->GetArray().get()); TElementAccessor::Validate(*currentArray); - chunkIdx = address->GetChunkIdx(); } auto& rowItem = editAccessor(rowIndexTo, columnIndex); diff --git a/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp b/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp index e986a9cd3e96..7f94a524d3cc 100644 --- a/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_stream_lookup_actor.cpp @@ -38,6 +38,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped()) + , NodeLockId(settings.HasLockNodeId() ? settings.GetLockNodeId() : TMaybe()) , SchemeCacheRequestTimeout(SCHEME_CACHE_REQUEST_TIMEOUT) , StreamLookupWorker(CreateStreamLookupWorker(std::move(settings), args.TypeEnv, args.HolderFactory, args.InputDesc)) , Counters(counters) @@ -277,8 +278,6 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrappedGet()->Record; - CA_LOG_D("TEvReadResult was received for table: " << StreamLookupWorker->GetTablePath() << - ", readId: " << record.GetReadId() << ", finished: " << record.GetFinished()); auto readIt = Reads.find(record.GetReadId()); if (readIt == Reads.end() || readIt->second.State != EReadState::Running) { @@ -288,6 +287,27 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrappedsecond; + CA_LOG_D("Recv TEvReadResult (stream lookup) from ShardID=" << read.ShardId + << ", Table = " << StreamLookupWorker->GetTablePath() + << ", ReadId=" << record.GetReadId() + << ", Status=" << Ydb::StatusIds::StatusCode_Name(record.GetStatus().GetCode()) + << ", Finished=" << record.GetFinished() + << ", RowCount=" << record.GetRowCount() + << ", TxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", BrokenTxLocks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : record.GetBrokenTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); + for (auto& lock : record.GetBrokenTxLocks()) { BrokenLocks.push_back(lock); } @@ -456,11 +476,22 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrappedRecord; record.SetMaxRows(defaultSettings.GetMaxRows()); record.SetMaxBytes(defaultSettings.GetMaxBytes()); record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC); + CA_LOG_D(TStringBuilder() << "Send EvRead (stream lookup) to shardId=" << shardId + << ", readId = " << record.GetReadId() + << ", tablePath: " << StreamLookupWorker->GetTablePath() + << ", snapshot=(txid=" << record.GetSnapshot().GetTxId() << ", step=" << record.GetSnapshot().GetStep() << ")" + << ", lockTxId=" << record.GetLockTxId() + << ", lockNodeId=" << record.GetLockNodeId()); + Send(MainPipeCacheId, new TEvPipeCache::TEvForward(request.Release(), shardId, true), IEventHandle::FlagTrackDelivery, 0, LookupActorSpan.GetTraceId()); @@ -586,6 +617,7 @@ class TKqpStreamLookupActor : public NActors::TActorBootstrapped LockTxId; + const TMaybe NodeLockId; std::unordered_map Reads; std::unordered_map ReadsPerShard; std::shared_ptr> Partitioning; diff --git a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp index 4cfd3ea28c24..af995daf05bd 100644 --- a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp +++ b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.cpp @@ -142,7 +142,8 @@ TKqpStreamLookupWorker::TKqpStreamLookupWorker(NKikimrKqp::TKqpStreamLookupSetti , HolderFactory(holderFactory) , InputDesc(inputDesc) , TablePath(settings.GetTable().GetPath()) - , TableId(MakeTableId(settings.GetTable())) { + , TableId(MakeTableId(settings.GetTable())) + , Strategy(settings.GetLookupStrategy()) { KeyColumns.reserve(settings.GetKeyColumns().size()); i32 keyOrder = 0; @@ -748,6 +749,11 @@ class TKqpJoinRows : public TKqpStreamLookupWorker { auto leftRowIt = PendingLeftRowsByKey.find(joinKeyCells); YQL_ENSURE(leftRowIt != PendingLeftRowsByKey.end()); + if (Strategy == NKqpProto::EStreamLookupStrategy::SEMI_JOIN && leftRowIt->second.RightRowExist) { + // Semi join should return one result row per key + continue; + } + TReadResultStats rowStats; i64 availableSpace = freeSpace - (i64)resultStats.ResultBytesCount; auto resultRow = TryBuildResultRow(leftRowIt->second, row, rowStats, availableSpace, result.ShardId); @@ -962,6 +968,7 @@ std::unique_ptr CreateStreamLookupWorker(NKikimrKqp::TKq case NKqpProto::EStreamLookupStrategy::LOOKUP: return std::make_unique(std::move(settings), typeEnv, holderFactory, inputDesc); case NKqpProto::EStreamLookupStrategy::JOIN: + case NKqpProto::EStreamLookupStrategy::SEMI_JOIN: return std::make_unique(std::move(settings), typeEnv, holderFactory, inputDesc); default: return {}; diff --git a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h index 46b15745b3fc..6b9e35a1074f 100644 --- a/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h +++ b/ydb/core/kqp/runtime/kqp_stream_lookup_worker.h @@ -71,6 +71,7 @@ class TKqpStreamLookupWorker { std::unordered_map KeyColumns; std::vector LookupKeyColumns; std::vector Columns; + const NKqpProto::EStreamLookupStrategy Strategy; }; std::unique_ptr CreateStreamLookupWorker(NKikimrKqp::TKqpStreamLookupSettings&& settings, diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index 8f63dcf2c030..085b02a1f68f 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -24,8 +24,8 @@ namespace { constexpr i64 kInFlightMemoryLimitPerActor = 64_MB; - constexpr i64 kMemoryLimitPerMessage = 48_MB; - constexpr i64 kMaxBatchesPerMessage = 1; + constexpr i64 kMemoryLimitPerMessage = 64_MB; + constexpr i64 kMaxBatchesPerMessage = 8; struct TWriteActorBackoffSettings { TDuration StartRetryDelay = TDuration::MilliSeconds(250); @@ -81,12 +81,12 @@ namespace { namespace NKikimr { namespace NKqp { -class TKqpWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput { - using TBase = TActorBootstrapped; +class TKqpDirectWriteActor : public TActorBootstrapped, public NYql::NDq::IDqComputeActorAsyncOutput { + using TBase = TActorBootstrapped; class TResumeNotificationManager { public: - TResumeNotificationManager(TKqpWriteActor& writer) + TResumeNotificationManager(TKqpDirectWriteActor& writer) : Writer(writer) { CheckMemory(); } @@ -102,7 +102,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } private: - TKqpWriteActor& Writer; + TKqpDirectWriteActor& Writer; i64 LastFreeMemory = std::numeric_limits::max(); }; @@ -127,7 +127,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N }; public: - TKqpWriteActor( + TKqpDirectWriteActor( NKikimrKqp::TKqpTableSinkSettings&& settings, NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args, TIntrusivePtr counters) @@ -137,6 +137,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N , Callbacks(args.Callback) , Counters(counters) , TypeEnv(args.TypeEnv) + , Alloc(args.Alloc) , TxId(args.TxId) , TableId( Settings.GetTable().GetOwnerId(), @@ -157,13 +158,13 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void Bootstrap() { LogPrefix = TStringBuilder() << "SelfId: " << this->SelfId() << ", " << LogPrefix; ResolveTable(); - Become(&TKqpWriteActor::StateFunc); + Become(&TKqpDirectWriteActor::StateFunc); } static constexpr char ActorName[] = "KQP_WRITE_ACTOR"; private: - virtual ~TKqpWriteActor() { + virtual ~TKqpDirectWriteActor() { } void CommitState(const NYql::NDqProto::TCheckpoint&) final {}; @@ -227,6 +228,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N hFunc(TEvPipeCache::TEvDeliveryProblem, Handle); IgnoreFunc(TEvTxUserProxy::TEvAllocateTxIdResult); hFunc(TEvPrivate::TEvShardRequestTimeout, Handle); + hFunc(TEvPrivate::TEvResolveRequestPlanned, Handle); IgnoreFunc(TEvInterconnect::TEvNodeConnected); IgnoreFunc(TEvTxProxySchemeCache::TEvInvalidateTableResult); } @@ -246,21 +248,26 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void PlanResolveTable() { + CA_LOG_D("Plan resolve with delay " << CalculateNextAttemptDelay(ResolveAttempts)); TlsActivationContext->Schedule( CalculateNextAttemptDelay(ResolveAttempts), new IEventHandle(SelfId(), SelfId(), new TEvPrivate::TEvResolveRequestPlanned{}, 0, 0)); } + void Handle(TEvPrivate::TEvResolveRequestPlanned::TPtr&) { + ResolveTable(); + } + void ResolveTable() { SchemeEntry.reset(); SchemeRequest.reset(); if (ResolveAttempts++ >= BackoffSettings()->MaxResolveAttempts) { - const auto error = TStringBuilder() - << "Too many table resolve attempts for Sink=" << this->SelfId() << "."; - CA_LOG_E(error); + CA_LOG_E(TStringBuilder() + << "Too many table resolve attempts for table " << TableId << "."); RuntimeError( - error, + TStringBuilder() + << "Too many table resolve attempts for table `" << Settings.GetTable().GetPath() << "`.", NYql::NDqProto::StatusIds::SCHEME_ERROR); return; } @@ -272,6 +279,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpTable; entry.SyncVersion = false; + entry.ShowPrivatePath = true; request->ResultSet.emplace_back(entry); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvInvalidateTable(TableId, {})); @@ -279,14 +287,16 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N } void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + auto& resultSet = ev->Get()->Request->ResultSet; + YQL_ENSURE(resultSet.size() == 1); + if (ev->Get()->Request->ErrorCount > 0) { CA_LOG_E(TStringBuilder() << "Failed to get table: " - << TableId << "'"); + << TableId << "'. Entry: " << resultSet[0].ToString()); PlanResolveTable(); return; } - auto& resultSet = ev->Get()->Request->ResultSet; - YQL_ENSURE(resultSet.size() == 1); + SchemeEntry = resultSet[0]; CA_LOG_D("Resolved TableId=" << TableId << " (" @@ -361,6 +371,18 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N return issues; }; + CA_LOG_D("Recv EvWriteResult from ShardID=" << ev->Get()->Record.GetOrigin() + << ", Status=" << NKikimrDataEvents::TEvWriteResult::EStatus_Name(ev->Get()->GetStatus()) + << ", TxId=" << ev->Get()->Record.GetTxId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() + << ", Cookie=" << ev->Cookie); + switch (ev->Get()->GetStatus()) { case NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED: { CA_LOG_E("Got UNSPECIFIED for table `" @@ -423,6 +445,13 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N << " Ignored this error." << getIssues().ToOneLineString()); // TODO: support waiting + if (!InconsistentTx) { + RuntimeError( + TStringBuilder() << "Got OVERLOADED for table `" + << SchemeEntry->TableId.PathId.ToString() << "`.", + NYql::NDqProto::StatusIds::OVERLOADED, + getIssues()); + } return; } case NKikimrDataEvents::TEvWriteResult::STATUS_CANCELLED: { @@ -489,18 +518,30 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N CA_LOG_D("Got completed result TxId=" << ev->Get()->Record.GetTxId() << ", TabletId=" << ev->Get()->Record.GetOrigin() << ", Cookie=" << ev->Cookie - << ", LocksCount=" << ev->Get()->Record.GetTxLocks().size()); + << ", Locks=" << [&]() { + TStringBuilder builder; + for (const auto& lock : ev->Get()->Record.GetTxLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }()); - PopShardBatch(ev->Get()->Record.GetOrigin(), ev->Cookie); + OnMessageAcknowledged(ev->Get()->Record.GetOrigin(), ev->Cookie); for (const auto& lock : ev->Get()->Record.GetTxLocks()) { - LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock); + if (!LocksInfo[ev->Get()->Record.GetOrigin()].AddAndCheckLock(lock)) { + RuntimeError( + TStringBuilder() << "Got LOCKS BROKEN for table `" + << SchemeEntry->TableId.PathId.ToString() << "`.", + NYql::NDqProto::StatusIds::ABORTED, + NYql::TIssues{}); + } } ProcessBatches(); } - void PopShardBatch(ui64 shardId, ui64 cookie) { + void OnMessageAcknowledged(ui64 shardId, ui64 cookie) { TResumeNotificationManager resumeNotificator(*this); const auto removedDataSize = ShardedWriteController->OnMessageAcknowledged(shardId, cookie); if (removedDataSize) { @@ -579,10 +620,18 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N ShardedWriteController->GetDataFormat()); } - CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", TxId=" << std::get(TxId) + CA_LOG_D("Send EvWrite to ShardID=" << shardId << ", TxId=" << evWrite->Record.GetTxId() + << ", TxMode=" << evWrite->Record.GetTxMode() << ", LockTxId=" << evWrite->Record.GetLockTxId() << ", LockNodeId=" << evWrite->Record.GetLockNodeId() + << ", Locks= " << [&]() { + TStringBuilder builder; + for (const auto& lock : evWrite->Record.GetLocks().GetLocks()) { + builder << lock.ShortDebugString(); + } + return builder; + }() << ", Size=" << serializationResult.TotalDataSize << ", Cookie=" << metadata->Cookie - << ", Operations=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal + << ", OperationsCount=" << metadata->OperationsCount << ", IsFinal=" << metadata->IsFinal << ", Attempts=" << metadata->SendAttempts); Send( PipeCacheId, @@ -669,7 +718,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N void PassAway() override { Send(PipeCacheId, new TEvPipeCache::TEvUnlink(0)); - TActorBootstrapped::PassAway(); + TActorBootstrapped::PassAway(); } void Prepare() { @@ -693,7 +742,8 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N : kMaxBatchesPerMessage), }, std::move(columnsMetadata), - TypeEnv); + TypeEnv, + Alloc); } catch (...) { RuntimeError( CurrentExceptionMessage(), @@ -721,7 +771,6 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N Callbacks->ResumeExecution(); } - NActors::TActorId TxProxyId = MakeTxProxyID(); NActors::TActorId PipeCacheId = NKikimr::MakePipePerNodeCacheID(false); TString LogPrefix; @@ -731,6 +780,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N NYql::NDq::IDqComputeActorAsyncOutput::ICallbacks * Callbacks = nullptr; TIntrusivePtr Counters; const NMiniKQL::TTypeEnvironment& TypeEnv; + std::shared_ptr Alloc; const NYql::NDq::TTxId TxId; const TTableId TableId; @@ -754,7 +804,7 @@ void RegisterKqpWriteActor(NYql::NDq::TDqAsyncIoFactory& factory, TIntrusivePtr< factory.RegisterSink( TString(NYql::KqpTableSinkName), [counters] (NKikimrKqp::TKqpTableSinkSettings&& settings, NYql::NDq::TDqAsyncIoFactory::TSinkArguments&& args) { - auto* actor = new TKqpWriteActor(std::move(settings), std::move(args), counters); + auto* actor = new TKqpDirectWriteActor(std::move(settings), std::move(args), counters); return std::make_pair(actor, actor); }); } diff --git a/ydb/core/kqp/runtime/kqp_write_table.cpp b/ydb/core/kqp/runtime/kqp_write_table.cpp index 0fc9add54970..21dc4b1f2734 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.cpp +++ b/ydb/core/kqp/runtime/kqp_write_table.cpp @@ -1,7 +1,5 @@ #include "kqp_write_table.h" -#include -#include #include #include #include @@ -11,6 +9,7 @@ #include #include #include +#include #include namespace NKikimr { @@ -18,8 +17,44 @@ namespace NKqp { namespace { -constexpr ui64 MaxBatchBytes = 8_MB; -constexpr ui64 MaxUnshardedBatchBytes = 4_MB; +constexpr ui64 DataShardMaxOperationBytes = 8_MB; +constexpr ui64 ColumnShardMaxOperationBytes = 8_MB; +constexpr ui64 MaxUnshardedBatchBytes = 0_MB; + +class IPayloadSerializer : public TThrRefBase { +public: + class IBatch : public TThrRefBase { + public: + virtual TString SerializeToString() const = 0; + virtual i64 GetMemory() const = 0; + bool IsEmpty() const; + }; + + using IBatchPtr = TIntrusivePtr; + + virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; + virtual void AddBatch(const IBatchPtr& batch) = 0; + + virtual void Close() = 0; + + virtual bool IsClosed() = 0; + virtual bool IsEmpty() = 0; + virtual bool IsFinished() = 0; + + virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; + virtual std::vector GetWriteColumnIds() = 0; + + using TBatches = THashMap>; + + virtual TBatches FlushBatchesForce() = 0; + + virtual IBatchPtr FlushBatch(ui64 shardId) = 0; + virtual const THashSet& GetShardIds() const = 0; + + virtual i64 GetMemory() = 0; +}; + +using IPayloadSerializerPtr = TIntrusivePtr; TVector BuildColumns(const TConstArrayRef inputColumns) { TVector result; @@ -169,6 +204,136 @@ TVector BuildKeyColumnTypes( return keyColumnTypes; } +struct TRowWithData { + TVector Cells; + NUdf::TStringValue Data; +}; + +class TRowBuilder { +private: + struct TCellInfo { + NScheme::TTypeInfo Type; + NUdf::TUnboxedValuePod Value; + TString PgBinaryValue; + }; + +public: + explicit TRowBuilder(size_t size) + : CellsInfo(size) { + } + + TRowBuilder& AddCell( + const size_t index, + const NScheme::TTypeInfo type, + const NUdf::TUnboxedValuePod& value, + const i32 typmod = -1) { + CellsInfo[index].Type = type; + CellsInfo[index].Value = value; + + if (type.GetTypeId() == NScheme::NTypeIds::Pg) { + const auto typeDesc = type.GetTypeDesc(); + if (typmod != -1 && NPg::TypeDescNeedsCoercion(typeDesc)) { + TMaybe err; + CellsInfo[index].PgBinaryValue = NYql::NCommon::PgValueCoerce(value, NPg::PgTypeIdFromTypeDesc(typeDesc), typmod, &err); + if (err) { + ythrow yexception() << "PgValueCoerce error: " << *err; + } + } else { + CellsInfo[index].PgBinaryValue = NYql::NCommon::PgValueToNativeBinary(value, NPg::PgTypeIdFromTypeDesc(typeDesc)); + } + } else { + CellsInfo[index].PgBinaryValue.clear(); + } + return *this; + } + + size_t DataSize() const { + size_t result = 0; + for (const auto& cellInfo : CellsInfo) { + result += GetCellSize(cellInfo); + } + return result; + } + + TRowWithData Build() { + TVector cells; + cells.reserve(CellsInfo.size()); + const auto size = DataSize(); + auto data = Allocate(size); + char* ptr = data.Data(); + + for (const auto& cellInfo : CellsInfo) { + cells.push_back(BuildCell(cellInfo, ptr)); + } + + AFL_ENSURE(ptr == data.Data() + size); + + return TRowWithData { + .Cells = std::move(cells), + .Data = std::move(data), + }; + } + +private: + TCell BuildCell(const TCellInfo& cellInfo, char*& dataPtr) { + if (!cellInfo.Value) { + return TCell(); + } + + switch(cellInfo.Type.GetTypeId()) { + #define MAKE_PRIMITIVE_TYPE_CELL_CASE(type, layout) \ + case NUdf::TDataType::Id: return NMiniKQL::MakeCell(cellInfo.Value); + KNOWN_FIXED_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_CELL_CASE) + case NUdf::TDataType::Id: + { + auto intValue = cellInfo.Value.GetInt128(); + constexpr auto valueSize = sizeof(intValue); + + char* initialPtr = dataPtr; + std::memcpy(initialPtr, reinterpret_cast(&intValue), valueSize); + dataPtr += valueSize; + return TCell(initialPtr, valueSize); + } + } + + const auto ref = cellInfo.Type.GetTypeId() == NScheme::NTypeIds::Pg + ? NYql::NUdf::TStringRef(cellInfo.PgBinaryValue) + : cellInfo.Value.AsStringRef(); + + char* initialPtr = dataPtr; + std::memcpy(initialPtr, ref.Data(), ref.Size()); + dataPtr += ref.Size(); + return TCell(initialPtr, ref.Size()); + } + + size_t GetCellSize(const TCellInfo& cellInfo) const { + if (!cellInfo.Value) { + return 0; + } + + switch(cellInfo.Type.GetTypeId()) { + #define MAKE_PRIMITIVE_TYPE_CELL_CASE_SIZE(type, layout) \ + case NUdf::TDataType::Id: + KNOWN_FIXED_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_CELL_CASE_SIZE) + return 0; + case NUdf::TDataType::Id: + return sizeof(cellInfo.Value.GetInt128()); + } + + if (cellInfo.Type.GetTypeId() == NScheme::NTypeIds::Pg) { + return cellInfo.PgBinaryValue.size(); + } + return cellInfo.Value.AsStringRef().Size(); + } + + NUdf::TStringValue Allocate(size_t size) { + Y_DEBUG_ABORT_UNLESS(NMiniKQL::TlsAllocState); + return NUdf::TStringValue(size); + } + + TVector CellsInfo; +}; + class TColumnShardPayloadSerializer : public IPayloadSerializer { using TRecordBatchPtr = std::shared_ptr; @@ -206,10 +371,8 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { public: TColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const TConstArrayRef inputColumns, // key columns then value columns - const NMiniKQL::TTypeEnvironment& typeEnv) - : TypeEnv(typeEnv) - , Columns(BuildColumns(inputColumns)) + const TConstArrayRef inputColumns) // key columns then value columns + : Columns(BuildColumns(inputColumns)) , WriteIndex(BuildWriteIndex(schemeEntry, inputColumns)) , WriteColumnIds(BuildWriteColumnIds(inputColumns, WriteIndex)) , BatchBuilder(arrow::Compression::UNCOMPRESSED, BuildNotNullColumns(inputColumns)) { @@ -241,16 +404,13 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { return; } - TVector cells(Columns.size()); + TRowBuilder rowBuilder(Columns.size()); data.ForEachRow([&](const auto& row) { for (size_t index = 0; index < Columns.size(); ++index) { - cells[WriteIndex[index]] = MakeCell( - Columns[index].PType, - row.GetElement(index), - TypeEnv, - /* copy */ false); + rowBuilder.AddCell(WriteIndex[index], Columns[index].PType, row.GetElement(index)); } - BatchBuilder.AddRow(TConstArrayRef{cells.begin(), cells.end()}); + auto rowWithData = rowBuilder.Build(); + BatchBuilder.AddRow(TConstArrayRef{rowWithData.Cells.begin(), rowWithData.Cells.end()}); }); FlushUnsharded(false); @@ -265,7 +425,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } void FlushUnsharded(bool force) { - if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() >= MaxUnshardedBatchBytes) { + if ((BatchBuilder.Bytes() > 0 && force) || BatchBuilder.Bytes() > MaxUnshardedBatchBytes) { const auto unshardedBatch = BatchBuilder.FlushBatch(true); YQL_ENSURE(unshardedBatch); ShardAndFlushBatch(unshardedBatch, force); @@ -289,7 +449,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } void FlushUnpreparedBatch(const ui64 shardId, TUnpreparedBatch& unpreparedBatch, bool force) { - while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= MaxBatchBytes || force)) { + while (!unpreparedBatch.Batches.empty() && (unpreparedBatch.TotalDataSize >= ColumnShardMaxOperationBytes || force)) { std::vector toPrepare; i64 toPrepareSize = 0; while (!unpreparedBatch.Batches.empty()) { @@ -309,7 +469,7 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { for (i64 index = 0; index < batch->num_rows(); ++index) { i64 nextRowSize = rowCalculator.GetRowBytesSize(index); - if (toPrepareSize + nextRowSize >= (i64)MaxBatchBytes) { + if (toPrepareSize + nextRowSize >= (i64)ColumnShardMaxOperationBytes) { YQL_ENSURE(index > 0); toPrepare.push_back(batch->Slice(0, index)); @@ -413,7 +573,6 @@ class TColumnShardPayloadSerializer : public IPayloadSerializer { } private: - const NMiniKQL::TTypeEnvironment& TypeEnv; std::shared_ptr Sharding; const TVector Columns; @@ -434,7 +593,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { class TBatch : public IPayloadSerializer::IBatch { public: TString SerializeToString() const override { - return TSerializedCellMatrix::Serialize(Data, Rows, Columns); + return TSerializedCellMatrix::Serialize(Cells, Rows, Columns); } i64 GetMemory() const override { @@ -442,37 +601,94 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { } bool IsEmpty() const { - return Data.empty(); + return Cells.empty(); } - std::vector Extract() { + std::pair, std::vector> Extract() { Size = 0; Rows = 0; - return std::move(Data); + return {std::move(Cells), std::move(Data)}; } - TBatch(std::vector&& data, i64 size, ui32 rows, ui16 columns) - : Data(std::move(data)) + TBatch(std::vector&& cells, std::vector&& data, i64 size, ui32 rows, ui16 columns) + : Cells(std::move(cells)) + , Data(std::move(data)) , Size(size) , Rows(rows) , Columns(columns) { } private: - std::vector Data; + std::vector Cells; + std::vector Data; ui64 Size = 0; ui32 Rows = 0; ui16 Columns = 0; }; + class TRowsBatcher { + public: + explicit TRowsBatcher(ui16 columnCount, ui64 maxBytesPerBatch) + : ColumnCount(columnCount) + , MaxBytesPerBatch(maxBytesPerBatch) { + } + + bool IsEmpty() const { + return Batches.empty(); + } + + struct TBatch { + ui64 Memory = 0; + ui64 MemorySerialized = 0; + TVector Cells; + TVector Data; + }; + + TBatch Flush(bool force) { + TBatch res; + if ((!Batches.empty() && force) || Batches.size() > 1) { + res = std::move(Batches.front()); + Batches.pop_front(); + } + return res; + } + + ui64 AddRow(TRowWithData&& rowWithData) { + Y_ABORT_UNLESS(rowWithData.Cells.size() == ColumnCount); + ui64 newMemory = 0; + for (const auto& cell : rowWithData.Cells) { + newMemory += cell.Size(); + } + if (Batches.empty() || newMemory + GetCellHeaderSize() * ColumnCount + Batches.back().MemorySerialized > MaxBytesPerBatch) { + Batches.emplace_back(); + Batches.back().Memory = 0; + Batches.back().MemorySerialized = GetCellMatrixHeaderSize(); + } + + for (auto& cell : rowWithData.Cells) { + Batches.back().Cells.emplace_back(std::move(cell)); + } + Batches.back().Data.emplace_back(std::move(rowWithData.Data)); + + Batches.back().Memory += newMemory; + Batches.back().MemorySerialized += newMemory + GetCellHeaderSize() * ColumnCount; + + return newMemory; + } + + private: + std::deque Batches; + + ui16 ColumnCount; + ui64 MaxBytesPerBatch; + }; + public: TDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) - : TypeEnv(typeEnv) - , SchemeEntry(schemeEntry) + const TConstArrayRef inputColumns) + : SchemeEntry(schemeEntry) , KeyDescription(std::move(partitionsEntry.KeyDescription)) , Columns(BuildColumns(inputColumns)) , WriteIndex(BuildWriteIndexKeyFirst(SchemeEntry, inputColumns)) @@ -480,11 +696,11 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { , KeyColumnTypes(BuildKeyColumnTypes(SchemeEntry)) { } - void AddRow(TArrayRef row, const TKeyDesc& keyRange) { + void AddRow(TRowWithData&& row, const TKeyDesc& keyRange) { auto shardIter = std::lower_bound( std::begin(keyRange.GetPartitions()), std::end(keyRange.GetPartitions()), - TArrayRef(row.data(), KeyColumnTypes.size()), + TArrayRef(row.Cells.data(), KeyColumnTypes.size()), [this](const auto &partition, const auto& key) { const auto& range = *partition.Range; return 0 > CompareBorders(range.EndKeyPrefix.GetCells(), key, @@ -497,42 +713,39 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { if (batcherIter == std::end(Batchers)) { Batchers.emplace( shardIter->ShardId, - TCellsBatcher(Columns.size(), MaxBatchBytes)); + TRowsBatcher(Columns.size(), DataShardMaxOperationBytes)); } - Memory += Batchers.at(shardIter->ShardId).AddRow(row); + Memory += Batchers.at(shardIter->ShardId).AddRow(std::move(row)); ShardIds.insert(shardIter->ShardId); } void AddData(NMiniKQL::TUnboxedValueBatch&& data) override { YQL_ENSURE(!Closed); - TVector cells(Columns.size()); + TRowBuilder rowBuilder(Columns.size()); data.ForEachRow([&](const auto& row) { for (size_t index = 0; index < Columns.size(); ++index) { - // TODO: move to SerializedVector - cells[WriteIndex[index]] = MakeCell( - Columns[index].PType, - row.GetElement(index), - TypeEnv, - /* copy */ true); + rowBuilder.AddCell(WriteIndex[index], Columns[index].PType, row.GetElement(index)); } - AddRow(cells, GetKeyRange()); - - cells.resize(Columns.size()); + auto rowWithData = rowBuilder.Build(); + AddRow(std::move(rowWithData), GetKeyRange()); }); } void AddBatch(const IPayloadSerializer::IBatchPtr& batch) override { auto datashardBatch = dynamic_cast(batch.Get()); YQL_ENSURE(datashardBatch); - auto data = datashardBatch->Extract(); - const auto rows = data.size() / Columns.size(); - YQL_ENSURE(data.size() == rows * Columns.size()); + auto [cells, data] = datashardBatch->Extract(); + const auto rows = cells.size() / Columns.size(); + YQL_ENSURE(cells.size() == rows * Columns.size()); for (size_t rowIndex = 0; rowIndex < rows; ++rowIndex) { AddRow( - TArrayRef{&data[rowIndex * Columns.size()], Columns.size()}, + TRowWithData{ + TVector(cells.begin() + (rowIndex * Columns.size()), cells.begin() + (rowIndex * Columns.size()) + Columns.size()), + data[rowIndex], + }, GetKeyRange()); } } @@ -566,12 +779,13 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return IsClosed() && IsEmpty(); } - IBatchPtr ExtractNextBatch(TCellsBatcher& batcher, bool force) { + IBatchPtr ExtractNextBatch(TRowsBatcher& batcher, bool force) { auto batchResult = batcher.Flush(force); Memory -= batchResult.Memory; - const ui32 rows = batchResult.Data.size() / Columns.size(); + const ui32 rows = batchResult.Cells.size() / Columns.size(); YQL_ENSURE(Columns.size() <= std::numeric_limits::max()); return MakeIntrusive( + std::move(batchResult.Cells), std::move(batchResult.Data), static_cast(batchResult.MemorySerialized), rows, @@ -610,7 +824,6 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { return *KeyDescription; } - const NMiniKQL::TTypeEnvironment& TypeEnv; const NSchemeCache::TSchemeCacheNavigate::TEntry SchemeEntry; THolder KeyDescription; @@ -619,7 +832,7 @@ class TDataShardPayloadSerializer : public IPayloadSerializer { const std::vector WriteColumnIds; const TVector KeyColumnTypes; - THashMap Batchers; + THashMap Batchers; THashSet ShardIds; i64 Memory = 0; @@ -635,19 +848,17 @@ bool IPayloadSerializer::IBatch::IsEmpty() const { IPayloadSerializerPtr CreateColumnShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) { + const TConstArrayRef inputColumns) { return MakeIntrusive( - schemeEntry, inputColumns, typeEnv); + schemeEntry, inputColumns); } IPayloadSerializerPtr CreateDataShardPayloadSerializer( const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) { + const TConstArrayRef inputColumns) { return MakeIntrusive( - schemeEntry, std::move(partitionsEntry), inputColumns, typeEnv); + schemeEntry, std::move(partitionsEntry), inputColumns); } namespace { @@ -826,8 +1037,7 @@ class TShardedWriteController : public IShardedWriteController { BeforePartitioningChanged(); Serializer = CreateColumnShardPayloadSerializer( schemeEntry, - InputColumnsMetadata, - TypeEnv); + InputColumnsMetadata); AfterPartitioningChanged(); } @@ -838,8 +1048,7 @@ class TShardedWriteController : public IShardedWriteController { Serializer = CreateDataShardPayloadSerializer( schemeEntry, std::move(partitionsEntry), - InputColumnsMetadata, - TypeEnv); + InputColumnsMetadata); AfterPartitioningChanged(); } @@ -867,6 +1076,7 @@ class TShardedWriteController : public IShardedWriteController { YQL_ENSURE(!data.IsWide(), "Wide stream is not supported yet"); YQL_ENSURE(!Closed); + auto allocGuard = TypeEnv.BindAllocator(); YQL_ENSURE(Serializer); Serializer->AddData(std::move(data)); @@ -874,6 +1084,7 @@ class TShardedWriteController : public IShardedWriteController { } void Close() override { + auto allocGuard = TypeEnv.BindAllocator(); YQL_ENSURE(Serializer); Closed = true; Serializer->Close(); @@ -931,6 +1142,7 @@ class TShardedWriteController : public IShardedWriteController { } std::optional OnMessageAcknowledged(ui64 shardId, ui64 cookie) override { + auto allocGuard = TypeEnv.BindAllocator(); auto& shardInfo = ShardsInfo.GetShard(shardId); const auto removedDataSize = shardInfo.PopBatches(cookie); return removedDataSize; @@ -972,10 +1184,19 @@ class TShardedWriteController : public IShardedWriteController { TShardedWriteController( const TShardedWriteControllerSettings settings, TVector&& inputColumnsMetadata, - const NMiniKQL::TTypeEnvironment& typeEnv) + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc) : Settings(settings) , InputColumnsMetadata(std::move(inputColumnsMetadata)) - , TypeEnv(typeEnv) { + , TypeEnv(typeEnv) + , Alloc(alloc) { + } + + ~TShardedWriteController() { + Y_ABORT_UNLESS(Alloc); + TGuard allocGuard(*Alloc); + ShardsInfo.Clear(); + Serializer = nullptr; } private: @@ -1019,6 +1240,7 @@ class TShardedWriteController : public IShardedWriteController { TShardedWriteControllerSettings Settings; TVector InputColumnsMetadata; const NMiniKQL::TTypeEnvironment& TypeEnv; + std::shared_ptr Alloc; TShardsInfo ShardsInfo; bool Closed = false; @@ -1032,8 +1254,10 @@ class TShardedWriteController : public IShardedWriteController { IShardedWriteControllerPtr CreateShardedWriteController( const TShardedWriteControllerSettings& settings, TVector&& inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv) { - return MakeIntrusive(settings, std::move(inputColumns), typeEnv); + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc) { + return MakeIntrusive( + settings, std::move(inputColumns), typeEnv, alloc); } } diff --git a/ydb/core/kqp/runtime/kqp_write_table.h b/ydb/core/kqp/runtime/kqp_write_table.h index 7846cb954cc6..46e5ac4f7308 100644 --- a/ydb/core/kqp/runtime/kqp_write_table.h +++ b/ydb/core/kqp/runtime/kqp_write_table.h @@ -10,54 +10,6 @@ namespace NKikimr { namespace NKqp { -class IPayloadSerializer : public TThrRefBase { -public: - class IBatch : public TThrRefBase { - public: - virtual TString SerializeToString() const = 0; - virtual i64 GetMemory() const = 0; - bool IsEmpty() const; - }; - - using IBatchPtr = TIntrusivePtr; - - virtual void AddData(NMiniKQL::TUnboxedValueBatch&& data) = 0; - virtual void AddBatch(const IBatchPtr& batch) = 0; - - virtual void Close() = 0; - - virtual bool IsClosed() = 0; - virtual bool IsEmpty() = 0; - virtual bool IsFinished() = 0; - - virtual NKikimrDataEvents::EDataFormat GetDataFormat() = 0; - virtual std::vector GetWriteColumnIds() = 0; - - using TBatches = THashMap>; - - virtual TBatches FlushBatchesForce() = 0; - - virtual IBatchPtr FlushBatch(ui64 shardId) = 0; - virtual const THashSet& GetShardIds() const = 0; - - virtual i64 GetMemory() = 0; -}; - -using IPayloadSerializerPtr = TIntrusivePtr; - - -IPayloadSerializerPtr CreateColumnShardPayloadSerializer( - const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv); - -IPayloadSerializerPtr CreateDataShardPayloadSerializer( - const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry, - NSchemeCache::TSchemeCacheRequest::TEntry&& partitionsEntry, - const TConstArrayRef inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv); - - class IShardedWriteController : public TThrRefBase { public: virtual void OnPartitioningChanged(const NSchemeCache::TSchemeCacheNavigate::TEntry& schemeEntry) = 0; @@ -112,7 +64,8 @@ struct TShardedWriteControllerSettings { IShardedWriteControllerPtr CreateShardedWriteController( const TShardedWriteControllerSettings& settings, TVector&& inputColumns, - const NMiniKQL::TTypeEnvironment& typeEnv); + const NMiniKQL::TTypeEnvironment& typeEnv, + std::shared_ptr alloc); } } diff --git a/ydb/core/kqp/session_actor/kqp_query_state.cpp b/ydb/core/kqp/session_actor/kqp_query_state.cpp index e35e25292f00..98225965d603 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.cpp +++ b/ydb/core/kqp/session_actor/kqp_query_state.cpp @@ -117,7 +117,7 @@ std::unique_ptr TKqpQueryState::BuildN auto navigate = MakeHolder(); navigate->DatabaseName = Database; - if (UserToken && !UserToken->GetSerializedToken().empty()) { + if (HasUserToken()) { navigate->UserToken = UserToken; } @@ -160,7 +160,7 @@ bool TKqpQueryState::SaveAndCheckCompileResult(TEvKqp::TEvCompileResponse* ev) { CommandTagName = CompileResult->CommandTagName; } for (const auto& param : PreparedQuery->GetParameters()) { - const auto& ast = CompileResult->Ast; + const auto& ast = CompileResult->GetAst(); if (!ast || !ast->PgAutoParamValues || !ast->PgAutoParamValues->contains(param.GetName())) { ResultParams.push_back(param); } @@ -275,15 +275,9 @@ std::unique_ptr TKqpQueryState::BuildReCompileReque compileDeadline = Min(compileDeadline, QueryDeadlines.CancelAt); } - TMaybe statementAst; - if (!Statements.empty()) { - YQL_ENSURE(CurrentStatementId < Statements.size()); - statementAst = Statements[CurrentStatementId]; - } - return std::make_unique(UserToken, CompileResult->Uid, query, isQueryActionPrepare, compileDeadline, DbCounters, gUCSettingsPtr, ApplicationName, std::move(cookie), UserRequestContext, std::move(Orbit), TempTablesState, - statementAst); + CompileResult->QueryAst); } std::unique_ptr TKqpQueryState::BuildSplitRequest(std::shared_ptr> cookie, const TGUCSettings::TPtr& gUCSettingsPtr) { @@ -412,12 +406,23 @@ std::unique_ptr TKqpQueryState::BuildSchemeC consumer = operations.GetConsumer(); TopicOperations.FillSchemeCacheNavigate(*navigate, std::move(consumer)); - navigate->UserToken = UserToken; + if (HasUserToken()) { + navigate->UserToken = UserToken; + } navigate->Cookie = QueryId; return navigate; } +bool TKqpQueryState::HasUserToken() const +{ + return UserToken && !UserToken->GetSerializedToken().empty(); +} + bool TKqpQueryState::IsAccessDenied(const NSchemeCache::TSchemeCacheNavigate& response, TString& message) { + if (!HasUserToken()) { + return false; + } + auto checkAccessDenied = [&] (const NSchemeCache::TSchemeCacheNavigate::TEntry& result) { static const auto selectRowRights = NACLib::EAccessRights::SelectRow; static const auto accessAttributesRights = NACLib::EAccessRights::ReadAttributes | NACLib::EAccessRights::WriteAttributes; diff --git a/ydb/core/kqp/session_actor/kqp_query_state.h b/ydb/core/kqp/session_actor/kqp_query_state.h index 38447c484c8c..e88ff2e79a34 100644 --- a/ydb/core/kqp/session_actor/kqp_query_state.h +++ b/ydb/core/kqp/session_actor/kqp_query_state.h @@ -67,8 +67,9 @@ class TKqpQueryState : public TNonCopyable { , StartedAt(startedAt) { RequestEv.reset(ev->Release().Release()); - - if (AppData()->FeatureFlags.GetEnableImplicitQueryParameterTypes() && !RequestEv->GetYdbParameters().empty()) { + bool enableImplicitQueryParameterTypes = tableServiceConfig.GetEnableImplicitQueryParameterTypes() || + AppData()->FeatureFlags.GetEnableImplicitQueryParameterTypes(); + if (enableImplicitQueryParameterTypes && !RequestEv->GetYdbParameters().empty()) { QueryParameterTypes = std::make_shared>(); for (const auto& [name, typedValue] : RequestEv->GetYdbParameters()) { QueryParameterTypes->insert({name, typedValue.Gettype()}); @@ -86,6 +87,7 @@ class TKqpQueryState : public TNonCopyable { UserRequestContext = MakeIntrusive(RequestEv->GetTraceId(), Database, sessionId); } UserRequestContext->PoolId = RequestEv->GetPoolId(); + UserRequestContext->PoolConfig = RequestEv->GetPoolConfig(); } // the monotonously growing counter, the ordinal number of the query, @@ -115,6 +117,7 @@ class TKqpQueryState : public TNonCopyable { bool IsDocumentApiRestricted_ = false; TInstant StartTime; + TInstant ContinueTime; NYql::TKikimrQueryDeadlines QueryDeadlines; TKqpQueryStats QueryStats; bool KeepSession = false; @@ -313,10 +316,6 @@ class TKqpQueryState : public TNonCopyable { bool NeedPersistentSnapshot() const { auto type = GetType(); - if (type == NKikimrKqp::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY || - type == NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY) { - return ::NKikimr::NKqp::HasOlapTableReadInTx(PreparedQuery->GetPhysicalQuery()); - } return ( type == NKikimrKqp::QUERY_TYPE_SQL_SCAN || type == NKikimrKqp::QUERY_TYPE_AST_SCAN @@ -494,18 +493,6 @@ class TKqpQueryState : public TNonCopyable { PrepareCurrentStatement(); } - void PrepareStatementTransaction(NKqpProto::TKqpPhyTx_EType txType) { - if (!HasTxControl()) { - switch (txType) { - case NKqpProto::TKqpPhyTx::TYPE_SCHEME: - TxCtx->EffectiveIsolationLevel = NKikimrKqp::ISOLATION_LEVEL_UNDEFINED; - break; - default: - TxCtx->EffectiveIsolationLevel = NKikimrKqp::ISOLATION_LEVEL_SERIALIZABLE; - } - } - } - // validate the compiled query response and ensure that all table versions are not // changed since the last compilation. bool EnsureTableVersions(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& response); @@ -603,6 +590,8 @@ class TKqpQueryState : public TNonCopyable { std::unique_ptr BuildSchemeCacheNavigate(); bool IsAccessDenied(const NSchemeCache::TSchemeCacheNavigate& response, TString& message); bool HasErrors(const NSchemeCache::TSchemeCacheNavigate& response, TString& message); + + bool HasUserToken() const; }; diff --git a/ydb/core/kqp/session_actor/kqp_query_stats.cpp b/ydb/core/kqp/session_actor/kqp_query_stats.cpp index e26d6b5e7b8f..922b788419ea 100644 --- a/ydb/core/kqp/session_actor/kqp_query_stats.cpp +++ b/ydb/core/kqp/session_actor/kqp_query_stats.cpp @@ -210,6 +210,7 @@ ui64 CalcRequestUnit(const TKqpQueryStats& stats) { NKqpProto::TKqpStatsQuery TKqpQueryStats::ToProto() const { NKqpProto::TKqpStatsQuery result; result.SetDurationUs(DurationUs); + result.SetQueuedTimeUs(QueuedTimeUs); if (Compilation) { result.MutableCompilation()->SetFromCache(Compilation->FromCache); diff --git a/ydb/core/kqp/session_actor/kqp_query_stats.h b/ydb/core/kqp/session_actor/kqp_query_stats.h index f73ce6316f07..9cda3417beb9 100644 --- a/ydb/core/kqp/session_actor/kqp_query_stats.h +++ b/ydb/core/kqp/session_actor/kqp_query_stats.h @@ -8,6 +8,7 @@ namespace NKikimr::NKqp { struct TKqpQueryStats { ui64 DurationUs = 0; + ui64 QueuedTimeUs = 0; std::optional Compilation; ui64 WorkerCpuTimeUs = 0; diff --git a/ydb/core/kqp/session_actor/kqp_response.cpp b/ydb/core/kqp/session_actor/kqp_response.cpp index e1864d104c19..cc90a3031bf2 100644 --- a/ydb/core/kqp/session_actor/kqp_response.cpp +++ b/ydb/core/kqp/session_actor/kqp_response.cpp @@ -53,36 +53,6 @@ bool HasSchemeOrFatalIssues(const TIssue& issue) { } // namespace -void ConvertKqpQueryResultToDbResult(const NKikimrMiniKQL::TResult& from, Ydb::ResultSet* to) { - const auto& type = from.GetType(); - TStackVec columnTypes; - Y_ENSURE(type.GetKind() == NKikimrMiniKQL::ETypeKind::Struct); - for (const auto& member : type.GetStruct().GetMember()) { - if (member.GetType().GetKind() == NKikimrMiniKQL::ETypeKind::List) { - for (const auto& column : member.GetType().GetList().GetItem().GetStruct().GetMember()) { - auto columnMeta = to->add_columns(); - columnMeta->set_name(column.GetName()); - columnTypes.push_back(column.GetType()); - ConvertMiniKQLTypeToYdbType(column.GetType(), *columnMeta->mutable_type()); - } - } - } - for (const auto& responseStruct : from.GetValue().GetStruct()) { - for (const auto& row : responseStruct.GetList()) { - auto newRow = to->add_rows(); - ui32 columnCount = static_cast(row.StructSize()); - Y_ENSURE(columnCount == columnTypes.size()); - for (ui32 i = 0; i < columnCount; i++) { - const auto& column = row.GetStruct(i); - ConvertMiniKQLValueToYdbValue(columnTypes[i], column, *newRow->add_items()); - } - } - if (responseStruct.Getvalue_valueCase() == NKikimrMiniKQL::TValue::kBool) { - to->set_truncated(responseStruct.GetBool()); - } - } -} - TMaybe GetYdbStatus(const TIssue& issue) { if (issue.GetSeverity() == TSeverityIds::S_FATAL) { return Ydb::StatusIds::INTERNAL_ERROR; diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.cpp b/ydb/core/kqp/session_actor/kqp_session_actor.cpp index b66c28e20be1..9f8adedc4509 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_session_actor.cpp @@ -159,7 +159,10 @@ class TKqpSessionActor : public TActorBootstrapped { return NKikimrServices::TActivity::KQP_SESSION_ACTOR; } - TKqpSessionActor(const TActorId& owner, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, + TKqpSessionActor(const TActorId& owner, + std::shared_ptr resourceManager, + std::shared_ptr caFactory, + const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, std::optional federatedQuerySetup, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, @@ -168,6 +171,8 @@ class TKqpSessionActor : public TActorBootstrapped { const TActorId& kqpTempTablesAgentActor) : Owner(owner) , SessionId(sessionId) + , ResourceManager_(std::move(resourceManager)) + , CaFactory_(std::move(caFactory)) , Counters(counters) , Settings(workerSettings) , AsyncIoFactory(std::move(asyncIoFactory)) @@ -237,6 +242,12 @@ class TKqpSessionActor : public TActorBootstrapped { } void PassRequestToResourcePool() { + if (QueryState->UserRequestContext->PoolConfig) { + LOG_D("request placed into pool from cache: " << QueryState->UserRequestContext->PoolId); + CompileQuery(); + return; + } + Send(MakeKqpWorkloadServiceId(SelfId().NodeId()), new NWorkload::TEvPlaceRequestIntoPool( QueryState->Database, SessionId, @@ -470,6 +481,7 @@ class TKqpSessionActor : public TActorBootstrapped { void Handle(NWorkload::TEvContinueRequest::TPtr& ev) { YQL_ENSURE(QueryState); + QueryState->ContinueTime = TInstant::Now(); if (ev->Get()->Status == Ydb::StatusIds::UNSUPPORTED) { LOG_T("Failed to place request in resource pool, feature flag is disabled"); @@ -568,10 +580,18 @@ class TKqpSessionActor : public TActorBootstrapped { LWTRACK(KqpSessionQueryCompiled, QueryState->Orbit, TStringBuilder() << QueryState->CompileResult->Status); if (QueryState->CompileResult->NeedToSplit) { - YQL_ENSURE(!QueryState->HasTxControl() && QueryState->GetAction() == NKikimrKqp::QUERY_ACTION_EXECUTE); - auto ev = QueryState->BuildSplitRequest(CompilationCookie, GUCSettings); - Send(MakeKqpCompileServiceID(SelfId().NodeId()), ev.release(), 0, QueryState->QueryId, - QueryState->KqpSessionSpan.GetTraceId()); + if (!QueryState->HasTxControl()) { + YQL_ENSURE(QueryState->GetAction() == NKikimrKqp::QUERY_ACTION_EXECUTE); + auto ev = QueryState->BuildSplitRequest(CompilationCookie, GUCSettings); + Send(MakeKqpCompileServiceID(SelfId().NodeId()), ev.release(), 0, QueryState->QueryId, + QueryState->KqpSessionSpan.GetTraceId()); + } else { + NYql::TIssues issues; + ReplyQueryError( + ::Ydb::StatusIds::StatusCode::StatusIds_StatusCode_BAD_REQUEST, + "CTAS statement can be executed only in NoTx mode.", + MessageFromIssues(issues)); + } } else { ReplyQueryCompileError(); } @@ -832,9 +852,10 @@ class TKqpSessionActor : public TActorBootstrapped { const NKqpProto::TKqpPhyQuery& phyQuery = QueryState->PreparedQuery->GetPhysicalQuery(); HasOlapTable |= ::NKikimr::NKqp::HasOlapTableReadInTx(phyQuery) || ::NKikimr::NKqp::HasOlapTableWriteInTx(phyQuery); HasOltpTable |= ::NKikimr::NKqp::HasOltpTableReadInTx(phyQuery) || ::NKikimr::NKqp::HasOltpTableWriteInTx(phyQuery); - if (HasOlapTable && HasOltpTable) { + HasTableWrite |= ::NKikimr::NKqp::HasOlapTableWriteInTx(phyQuery) || ::NKikimr::NKqp::HasOltpTableWriteInTx(phyQuery); + if (HasOlapTable && HasOltpTable && HasTableWrite) { ReplyQueryError(Ydb::StatusIds::PRECONDITION_FAILED, - "Transactions between column and row tables are disabled at current time."); + "Write transactions between column and row tables are disabled at current time."); return false; } QueryState->TxCtx->SetTempTables(QueryState->TempTablesState); @@ -878,8 +899,8 @@ class TKqpSessionActor : public TActorBootstrapped { try { const auto& parameters = QueryState->GetYdbParameters(); QueryState->QueryData->ParseParameters(parameters); - if (QueryState->CompileResult && QueryState->CompileResult->Ast && QueryState->CompileResult->Ast->PgAutoParamValues) { - for(const auto& [name, param] : *QueryState->CompileResult->Ast->PgAutoParamValues) { + if (QueryState->CompileResult && QueryState->CompileResult->GetAst() && QueryState->CompileResult->GetAst()->PgAutoParamValues) { + for(const auto& [name, param] : *QueryState->CompileResult->GetAst()->PgAutoParamValues) { if (!parameters.contains(name)) { QueryState->QueryData->AddTypedValueParam(name, param); } @@ -1091,11 +1112,10 @@ class TKqpSessionActor : public TActorBootstrapped { bool ExecutePhyTx(const TKqpPhyTxHolder::TConstPtr& tx, bool commit) { if (tx) { - QueryState->PrepareStatementTransaction(tx->GetType()); switch (tx->GetType()) { case NKqpProto::TKqpPhyTx::TYPE_SCHEME: YQL_ENSURE(tx->StagesSize() == 0); - if (QueryState->HasTxControl() && QueryState->TxCtx->EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_UNDEFINED) { + if (QueryState->HasTxControl() && !QueryState->HasImplicitTx() && QueryState->TxCtx->EffectiveIsolationLevel != NKikimrKqp::ISOLATION_LEVEL_UNDEFINED) { ReplyQueryError(Ydb::StatusIds::PRECONDITION_FAILED, "Scheme operations cannot be executed inside transaction"); return true; @@ -1264,11 +1284,16 @@ class TKqpSessionActor : public TActorBootstrapped { request.PerRequestDataSizeLimit = RequestControls.PerRequestDataSizeLimit; request.MaxShardCount = RequestControls.MaxShardCount; request.TraceId = QueryState ? QueryState->KqpSessionSpan.GetTraceId() : NWilson::TTraceId(); + request.CaFactory_ = CaFactory_; + request.ResourceManager_ = ResourceManager_; LOG_D("Sending to Executer TraceId: " << request.TraceId.GetTraceId() << " " << request.TraceId.GetSpanIdSize()); const bool useEvWrite = ((HasOlapTable && Settings.TableService.GetEnableOlapSink()) || (!HasOlapTable && Settings.TableService.GetEnableOltpSink())) - && (request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_QUERY - || request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY); + && (request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_UNDEFINED + || request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_QUERY + || request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_GENERIC_CONCURRENT_QUERY + || (!HasOlapTable && request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_SQL_DML) + || (!HasOlapTable && request.QueryType == NKikimrKqp::EQueryType::QUERY_TYPE_PREPARED_DML)); auto executerActor = CreateKqpExecuter(std::move(request), Settings.Database, QueryState ? QueryState->UserToken : TIntrusiveConstPtr(), RequestCounters, Settings.TableService.GetAggregationConfig(), Settings.TableService.GetExecuterRetriesConfig(), @@ -1424,6 +1449,12 @@ class TKqpSessionActor : public TActorBootstrapped { // Invalidate query cache on scheme/internal errors switch (status) { + case Ydb::StatusIds::ABORTED: { + if (ev->BrokenLockPathId) { + issues.AddIssue(GetLocksInvalidatedIssue(*QueryState->TxCtx, *ev->BrokenLockPathId)); + } + break; + } case Ydb::StatusIds::SCHEME_ERROR: case Ydb::StatusIds::INTERNAL_ERROR: InvalidateQuery(); @@ -1545,6 +1576,9 @@ class TKqpSessionActor : public TActorBootstrapped { stats->DurationUs = ((TInstant::Now() - QueryState->StartTime).MicroSeconds()); stats->WorkerCpuTimeUs = (QueryState->GetCpuTime().MicroSeconds()); + if (const auto continueTime = QueryState->ContinueTime) { + stats->QueuedTimeUs = (continueTime - QueryState->StartTime).MicroSeconds(); + } if (QueryState->CompileResult) { stats->Compilation.emplace(); stats->Compilation->FromCache = (QueryState->CompileStats.FromCache); @@ -1737,7 +1771,6 @@ class TKqpSessionActor : public TActorBootstrapped { // Result for scan query is sent directly to target actor. Y_ABORT_UNLESS(response->GetArena()); if (QueryState->PreparedQuery) { - bool useYdbResponseFormat = QueryState->GetUsePublicResponseDataFormat(); auto& phyQuery = QueryState->PreparedQuery->GetPhysicalQuery(); size_t trailingResultsCount = 0; for (size_t i = 0; i < phyQuery.ResultBindingsSize(); ++i) { @@ -1754,28 +1787,12 @@ class TKqpSessionActor : public TActorBootstrapped { continue; } - if (useYdbResponseFormat) { - TMaybe effectiveRowsLimit = FillSettings.RowsLimitPerWrite; - if (QueryState->PreparedQuery->GetResults(i).GetRowsLimit()) { - effectiveRowsLimit = QueryState->PreparedQuery->GetResults(i).GetRowsLimit(); - } - auto* ydbResult = QueryState->QueryData->GetYdbTxResult(phyQuery.GetResultBindings(i), response->GetArena(), effectiveRowsLimit); - response->AddYdbResults()->Swap(ydbResult); - } else { - auto* protoRes = QueryState->QueryData->GetMkqlTxResult(phyQuery.GetResultBindings(i), response->GetArena()); - std::optional fillSettings; - if (QueryState->PreparedQuery->ResultsSize()) { - YQL_ENSURE(phyQuery.ResultBindingsSize() == QueryState->PreparedQuery->ResultsSize(), "" - << phyQuery.ResultBindingsSize() << " != " << QueryState->PreparedQuery->ResultsSize()); - const auto& result = QueryState->PreparedQuery->GetResults(i); - if (result.GetRowsLimit()) { - fillSettings = FillSettings; - fillSettings->RowsLimitPerWrite = result.GetRowsLimit(); - } - } - auto* finalResult = KikimrResultToProto(*protoRes, {}, fillSettings.value_or(FillSettings), response->GetArena()); - response->AddResults()->Swap(finalResult); + TMaybe effectiveRowsLimit = FillSettings.RowsLimitPerWrite; + if (QueryState->PreparedQuery->GetResults(i).GetRowsLimit()) { + effectiveRowsLimit = QueryState->PreparedQuery->GetResults(i).GetRowsLimit(); } + auto* ydbResult = QueryState->QueryData->GetYdbTxResult(phyQuery.GetResultBindings(i), response->GetArena(), effectiveRowsLimit); + response->AddYdbResults()->Swap(ydbResult); } } @@ -2069,8 +2086,15 @@ class TKqpSessionActor : public TActorBootstrapped { } CleanupCtx->Final = isFinal; CleanupCtx->IsWaitingForWorkloadServiceCleanup = true; + + const auto& stats = QueryState->QueryStats; + auto event = std::make_unique( + QueryState->Database, SessionId, QueryState->UserRequestContext->PoolId, + TDuration::MicroSeconds(stats.DurationUs), TDuration::MicroSeconds(stats.WorkerCpuTimeUs) + ); + auto forwardId = MakeKqpWorkloadServiceId(SelfId().NodeId()); - Send(new IEventHandle(*QueryState->PoolHandlerActor, SelfId(), new NWorkload::TEvCleanupRequest(QueryState->Database, SessionId, QueryState->UserRequestContext->PoolId), IEventHandle::FlagForwardOnNondelivery, 0, &forwardId)); + Send(new IEventHandle(*QueryState->PoolHandlerActor, SelfId(), event.release(), IEventHandle::FlagForwardOnNondelivery, 0, &forwardId)); QueryState->PoolHandlerActor = Nothing(); } @@ -2492,6 +2516,8 @@ class TKqpSessionActor : public TActorBootstrapped { TActorId Owner; TString SessionId; + std::shared_ptr ResourceManager_; + std::shared_ptr CaFactory_; // cached lookups to issue counters THashMap CachedIssueCounters; TInstant CreationTime; @@ -2525,13 +2551,15 @@ class TKqpSessionActor : public TActorBootstrapped { bool HasOlapTable = false; bool HasOltpTable = false; + bool HasTableWrite = false; TGUCSettings::TPtr GUCSettings; }; } // namespace -IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, +IActor* CreateKqpSessionActor(const TActorId& owner, std::shared_ptr resourceManager, + std::shared_ptr caFactory, const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, std::optional federatedQuerySetup, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, @@ -2539,7 +2567,7 @@ IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, const NKikimrConfig::TQueryServiceConfig& queryServiceConfig, const TActorId& kqpTempTablesAgentActor) { - return new TKqpSessionActor(owner, sessionId, kqpSettings, workerSettings, federatedQuerySetup, + return new TKqpSessionActor(owner, std::move(resourceManager), std::move(caFactory), sessionId, kqpSettings, workerSettings, federatedQuerySetup, std::move(asyncIoFactory), std::move(moduleResolverState), counters, queryServiceConfig, kqpTempTablesAgentActor); } diff --git a/ydb/core/kqp/session_actor/kqp_session_actor.h b/ydb/core/kqp/session_actor/kqp_session_actor.h index bbcaa76dab6f..f26fff2b00ca 100644 --- a/ydb/core/kqp/session_actor/kqp_session_actor.h +++ b/ydb/core/kqp/session_actor/kqp_session_actor.h @@ -11,6 +11,14 @@ #include #include +namespace NKikimr::NKqp::NComputeActor { + struct IKqpNodeComputeActorFactory; +} + +namespace NKikimr::NKqp::NRm { + class IKqpResourceManager; +} + namespace NKikimr::NKqp { struct TKqpWorkerSettings { @@ -48,7 +56,10 @@ struct TKqpWorkerSettings { } }; -IActor* CreateKqpSessionActor(const TActorId& owner, const TString& sessionId, +IActor* CreateKqpSessionActor(const TActorId& owner, + std::shared_ptr resourceManager_, + std::shared_ptr caFactory_, + const TString& sessionId, const TKqpSettings::TConstPtr& kqpSettings, const TKqpWorkerSettings& workerSettings, std::optional federatedQuerySetup, NYql::NDq::IDqAsyncIoFactory::TPtr asyncIoFactory, diff --git a/ydb/core/kqp/session_actor/kqp_worker_actor.cpp b/ydb/core/kqp/session_actor/kqp_worker_actor.cpp index 870ffa599ff7..176179a04b2c 100644 --- a/ydb/core/kqp/session_actor/kqp_worker_actor.cpp +++ b/ydb/core/kqp/session_actor/kqp_worker_actor.cpp @@ -188,7 +188,7 @@ class TKqpWorkerActor : public TActorBootstrapped { Config->FeatureFlags = AppData(ctx)->FeatureFlags; KqpHost = CreateKqpHost(Gateway, Settings.Cluster, Settings.Database, Config, ModuleResolverState->ModuleResolver, FederatedQuerySetup, - QueryState->RequestEv->GetUserToken(), GUCSettings, Settings.ApplicationName, AppData(ctx)->FunctionRegistry, !Settings.LongSession, false); + QueryState->RequestEv->GetUserToken(), GUCSettings, QueryServiceConfig, Settings.ApplicationName, AppData(ctx)->FunctionRegistry, !Settings.LongSession, false, nullptr, nullptr, nullptr); auto& queryRequest = QueryState->RequestEv; QueryState->ProxyRequestId = proxyRequestId; @@ -960,7 +960,7 @@ class TKqpWorkerActor : public TActorBootstrapped { // If we have result it must be allocated on protobuf arena Y_ASSERT(result->GetArena()); Y_ASSERT(resp->GetArena() == result->GetArena()); - resp->AddResults()->Swap(result); + resp->AddYdbResults()->Swap(result); } } else { auto resp = ev.MutableResponse(); diff --git a/ydb/core/kqp/session_actor/kqp_worker_common.cpp b/ydb/core/kqp/session_actor/kqp_worker_common.cpp index 63db80b02d88..e7cf7113b049 100644 --- a/ydb/core/kqp/session_actor/kqp_worker_common.cpp +++ b/ydb/core/kqp/session_actor/kqp_worker_common.cpp @@ -119,7 +119,7 @@ void SlowLogQuery(const TActorContext &ctx, const TKikimrConfiguration* config, << 'b'; ui64 resultsSize = 0; - for (auto& result : record->GetResponse().GetResults()) { + for (auto& result : record->GetResponse().GetYdbResults()) { resultsSize += result.ByteSize(); } diff --git a/ydb/core/kqp/topics/kqp_topics.cpp b/ydb/core/kqp/topics/kqp_topics.cpp index bb190899dfad..6b90767ff7b7 100644 --- a/ydb/core/kqp/topics/kqp_topics.cpp +++ b/ydb/core/kqp/topics/kqp_topics.cpp @@ -105,15 +105,15 @@ void TTopicPartitionOperations::AddOperation(const TString& topic, ui32 partitio HasWriteOperations_ = true; } -void TTopicPartitionOperations::BuildTopicTxs(THashMap &txs) +void TTopicPartitionOperations::BuildTopicTxs(TTopicOperationTransactions& txs) { Y_ABORT_UNLESS(TabletId_.Defined()); Y_ABORT_UNLESS(Partition_.Defined()); - auto& tx = txs[*TabletId_]; + auto& t = txs[*TabletId_]; for (auto& [consumer, operations] : Operations_) { - NKikimrPQ::TPartitionOperation* o = tx.MutableOperations()->Add(); + NKikimrPQ::TPartitionOperation* o = t.tx.MutableOperations()->Add(); o->SetPartitionId(*Partition_); auto [begin, end] = operations.GetRange(); o->SetBegin(begin); @@ -123,12 +123,13 @@ void TTopicPartitionOperations::BuildTopicTxs(THashMapAdd(); + NKikimrPQ::TPartitionOperation* o = t.tx.MutableOperations()->Add(); o->SetPartitionId(*Partition_); o->SetPath(*Topic_); if (SupportivePartition_.Defined()) { o->SetSupportivePartition(*SupportivePartition_); } + t.hasWrite = true; } } @@ -355,7 +356,7 @@ bool TTopicOperations::ProcessSchemeCacheNavigate(const NSchemeCache::TSchemeCac return true; } -void TTopicOperations::BuildTopicTxs(THashMap &txs) +void TTopicOperations::BuildTopicTxs(TTopicOperationTransactions& txs) { for (auto& [_, operations] : Operations_) { operations.BuildTopicTxs(txs); diff --git a/ydb/core/kqp/topics/kqp_topics.h b/ydb/core/kqp/topics/kqp_topics.h index e0e425c3c1f7..f4ff0bb180e2 100644 --- a/ydb/core/kqp/topics/kqp_topics.h +++ b/ydb/core/kqp/topics/kqp_topics.h @@ -42,6 +42,13 @@ class TConsumerOperations { TDisjointIntervalTree Offsets_; }; +struct TTopicOperationTransaction { + NKikimrPQ::TDataTransaction tx; + bool hasWrite = false; +}; + +using TTopicOperationTransactions = THashMap; + class TTopicPartitionOperations { public: bool IsValid() const; @@ -52,7 +59,7 @@ class TTopicPartitionOperations { void AddOperation(const TString& topic, ui32 partition, TMaybe supportivePartition); - void BuildTopicTxs(THashMap &txs); + void BuildTopicTxs(TTopicOperationTransactions &txs); void Merge(const TTopicPartitionOperations& rhs); @@ -109,7 +116,7 @@ class TTopicOperations { Ydb::StatusIds_StatusCode& status, TString& message); - void BuildTopicTxs(THashMap &txs); + void BuildTopicTxs(TTopicOperationTransactions &txs); void Merge(const TTopicOperations& rhs); diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index 90e3992b7176..6318a8f1e7ea 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -22,7 +22,7 @@ namespace NKqp { } SecretableSecretKey: { Value: { - Data: "secretSecretKey" + Data: "fakeSecret" } } } @@ -31,32 +31,37 @@ namespace NKqp { using namespace NYdb; - TTestHelper::TTestHelper(const TKikimrSettings& settings) - : Kikimr(settings) - , TableClient(Kikimr.GetTableClient()) - , Session(TableClient.CreateSession().GetValueSync().GetSession()) - {} + TTestHelper::TTestHelper(const TKikimrSettings& settings) { + TKikimrSettings kikimrSettings(settings); + if (!kikimrSettings.FeatureFlags.HasEnableTieringInColumnShard()) { + kikimrSettings.SetEnableTieringInColumnShard(true); + } + + Kikimr = std::make_unique(kikimrSettings); + TableClient = std::make_unique(Kikimr->GetTableClient()); + Session = std::make_unique(TableClient->CreateSession().GetValueSync().GetSession()); + } NKikimr::NKqp::TKikimrRunner& TTestHelper::GetKikimr() { - return Kikimr; + return *Kikimr; } TTestActorRuntime& TTestHelper::GetRuntime() { - return *Kikimr.GetTestServer().GetRuntime(); + return *Kikimr->GetTestServer().GetRuntime(); } NYdb::NTable::TSession& TTestHelper::GetSession() { - return Session; + return *Session; } void TTestHelper::CreateTable(const TColumnTableBase& table, const EStatus expectedStatus) { std::cerr << (table.BuildQuery()) << std::endl; - auto result = Session.ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), expectedStatus, result.GetIssues().ToString()); } void TTestHelper::CreateTier(const TString& tierName) { - auto result = Session.ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -70,43 +75,43 @@ namespace NKqp { } ] })"; - auto result = Session.ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); return ruleName; } void TTestHelper::SetTiering(const TString& tableName, const TString& ruleName) { auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` SET (TIERING = '" << ruleName << "')"; - auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::ResetTiering(const TString& tableName) { auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` RESET (TIERING)"; - auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + auto result = GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::DropTable(const TString& tableName) { - auto result = Session.DropTable(tableName).GetValueSync(); + auto result = GetSession().DropTable(tableName).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } void TTestHelper::BulkUpsert(const TColumnTable& table, TTestHelper::TUpdatesBuilder& updates, const Ydb::StatusIds_StatusCode& opStatus /*= Ydb::StatusIds::SUCCESS*/) { Y_UNUSED(opStatus); - NKikimr::Tests::NCS::THelper helper(Kikimr.GetTestServer()); + NKikimr::Tests::NCS::THelper helper(GetKikimr().GetTestServer()); auto batch = updates.BuildArrow(); helper.SendDataViaActorSystem(table.GetName(), batch, opStatus); } void TTestHelper::BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus /*= Ydb::StatusIds::SUCCESS*/) { Y_UNUSED(opStatus); - NKikimr::Tests::NCS::THelper helper(Kikimr.GetTestServer()); + NKikimr::Tests::NCS::THelper helper(GetKikimr().GetTestServer()); helper.SendDataViaActorSystem(table.GetName(), batch, opStatus); } void TTestHelper::ReadData(const TString& query, const TString& expected, const EStatus opStatus /*= EStatus::SUCCESS*/) { - auto it = TableClient.StreamExecuteScanQuery(query).GetValueSync(); + auto it = TableClient->StreamExecuteScanQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); // Means stream successfully get TString result = StreamResultToYson(it, false, opStatus); if (opStatus == EStatus::SUCCESS) { @@ -115,21 +120,29 @@ namespace NKqp { } void TTestHelper::RebootTablets(const TString& tableName) { - auto runtime = Kikimr.GetTestServer().GetRuntime(); + auto runtime = GetKikimr().GetTestServer().GetRuntime(); TActorId sender = runtime->AllocateEdgeActor(); TVector shards; { - auto describeResult = DescribeTable(&Kikimr.GetTestServer(), sender, tableName); + auto describeResult = DescribeTable(&GetKikimr().GetTestServer(), sender, tableName); for (auto shard : describeResult.GetPathDescription().GetColumnTableDescription().GetSharding().GetColumnShards()) { shards.push_back(shard); } } for (auto shard : shards) { - Kikimr.GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + GetKikimr().GetTestServer().GetRuntime()->Send(MakePipePerNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( new TEvents::TEvPoisonPill(), shard, false)); } } + void TTestHelper::WaitTabletDeletionInHive(ui64 tabletId, TDuration duration) { + auto deadline = TInstant::Now() + duration; + while (GetKikimr().GetTestClient().TabletExistsInHive(&GetRuntime(), tabletId) && TInstant::Now() <= deadline) { + Cerr << "WaitTabletDeletionInHive: wait until " << tabletId << " is deleted" << Endl; + Sleep(TDuration::Seconds(1)); + } + } + TString TTestHelper::TColumnSchema::BuildQuery() const { TStringBuilder str; str << Name << ' '; @@ -233,6 +246,8 @@ namespace NKqp { return arrow::field(name, arrow::int64(), nullable); case NScheme::NTypeIds::JsonDocument: return arrow::field(name, arrow::binary(), nullable); + case NScheme::NTypeIds::Decimal: + return arrow::field(name, arrow::decimal(22, 9)); case NScheme::NTypeIds::Pg: switch (NPg::PgTypeIdFromTypeDesc(typeDesc)) { case INT2OID: diff --git a/ydb/core/kqp/ut/common/columnshard.h b/ydb/core/kqp/ut/common/columnshard.h index a938e91b4d4a..d1be363fd5ef 100644 --- a/ydb/core/kqp/ut/common/columnshard.h +++ b/ydb/core/kqp/ut/common/columnshard.h @@ -2,15 +2,14 @@ #include "kqp_ut_common.h" #include +#include +#include +#include #include #include #include #include -#include -#include -#include - #include namespace NKikimr { @@ -63,9 +62,9 @@ namespace NKqp { }; private: - TKikimrRunner Kikimr; - NYdb::NTable::TTableClient TableClient; - NYdb::NTable::TSession Session; + std::unique_ptr Kikimr; + std::unique_ptr TableClient; + std::unique_ptr Session; public: TTestHelper(const TKikimrSettings& settings); @@ -82,6 +81,7 @@ namespace NKqp { void BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus = Ydb::StatusIds::SUCCESS); void ReadData(const TString& query, const TString& expected, const NYdb::EStatus opStatus = NYdb::EStatus::SUCCESS); void RebootTablets(const TString& tableName); + void WaitTabletDeletionInHive(ui64 tabletId, TDuration duration); }; } diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index d99f2d4fc082..4ac6314d0278 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -129,9 +129,10 @@ TKikimrRunner::TKikimrRunner(const TKikimrSettings& settings) { ServerSettings->SetFrFactory(&UdfFrFactory); ServerSettings->SetEnableNotNullColumns(true); ServerSettings->SetEnableMoveIndex(true); - ServerSettings->SetEnableUniqConstraint(true); ServerSettings->SetUseRealThreads(settings.UseRealThreads); ServerSettings->SetEnableTablePgTypes(true); + ServerSettings->SetEnablePgSyntax(true); + ServerSettings->SetEnableOlapCompression(true); ServerSettings->S3ActorsFactory = settings.S3ActorsFactory; if (settings.Storage) { diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index a43d6d14da0d..43c7fabda800 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -97,6 +97,7 @@ struct TKikimrSettings: public TTestFeatureFlagsHolder { exchangerSettings->SetStartDelayMs(10); exchangerSettings->SetMaxDelayMs(10); AppConfig.MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); + FeatureFlags.SetEnableSparsedColumns(true); } TKikimrSettings& SetAppConfig(const NKikimrConfig::TAppConfig& value) { AppConfig = value; return *this; } diff --git a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp index 0903b929a4b5..41650274cb0d 100644 --- a/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp +++ b/ydb/core/kqp/ut/indexes/kqp_indexes_ut.cpp @@ -55,7 +55,7 @@ TIntrusivePtr CreateKikimrQueryProcessor(TIntrusivePtr ga auto federatedQuerySetup = std::make_optional({NYql::IHTTPGateway::Make(), nullptr, nullptr, nullptr, {}, {}, {}, nullptr, nullptr}); return NKqp::CreateKqpHost(gateway, cluster, "/Root", kikimrConfig, moduleResolver, - federatedQuerySetup, nullptr, nullptr, {}, funcRegistry, funcRegistry, keepConfigChanges, nullptr, actorSystem); + federatedQuerySetup, nullptr, nullptr, NKikimrConfig::TQueryServiceConfig(), {}, funcRegistry, funcRegistry, keepConfigChanges, nullptr, actorSystem, nullptr); } NYql::NNodes::TExprBase GetExpr(const TString& ast, NYql::TExprContext& ctx, NYql::IModuleResolver* moduleResolver) { @@ -3500,14 +3500,17 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda } } - Y_UNIT_TEST(SecondaryIndexUsingInJoin) { + Y_UNIT_TEST_TWIN(SecondaryIndexUsingInJoin, UseStreamJoin) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(UseStreamJoin); auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() - .SetKqpSettings({setting}); + .SetKqpSettings({setting}) + .SetAppConfig(appConfig); TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - bool streamLookupEnabled = serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamLookup(); { auto tableBuilder = db.GetTableBuilder(); @@ -3575,16 +3578,30 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } { @@ -3605,16 +3622,30 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } { @@ -3636,17 +3667,30 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = 1; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); - indexPhaseId = streamLookupEnabled ? 1 : 2; + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } { @@ -3667,27 +3711,44 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 3); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 2); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } + } else { + int indexPhaseId = 1; + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + } } } - Y_UNIT_TEST(SecondaryIndexUsingInJoin2) { + Y_UNIT_TEST_TWIN(SecondaryIndexUsingInJoin2, UseStreamJoin) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(UseStreamJoin); auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() - .SetKqpSettings({setting}); + .SetKqpSettings({setting}) + .SetAppConfig(appConfig); TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - bool streamLookupEnabled = serverSettings.AppConfig.GetTableServiceConfig().GetEnableKqpDataQueryStreamLookup(); NYdb::NTable::TExecDataQuerySettings execSettings; execSettings.CollectQueryStats(ECollectQueryStatsMode::Basic); @@ -3756,29 +3817,36 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 4); - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - - if (streamLookupEnabled) { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).reads().rows(), 2); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 3); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } else { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - - indexPhaseId++; - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 2); + for (const auto& ta : stats.query_phases(1).table_access()) { + if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } } @@ -3801,29 +3869,36 @@ R"([[#;#;["Primary1"];[41u]];[["Secondary2"];[2u];["Primary2"];[42u]];[["Seconda auto& stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); - int indexPhaseId = streamLookupEnabled ? 1 : 2; - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), streamLookupEnabled ? 2 : 4); - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); - - if (streamLookupEnabled) { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(1).reads().rows(), 2); + if (UseStreamJoin) { + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 3); + + for (const auto& ta : stats.query_phases(0).table_access()) { + if (ta.name() == "/Root/TestTable1") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 3); + } else if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } else { - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2/Index1/indexImplTable"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); - - indexPhaseId++; - - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access().size(), 1); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).name(), "/Root/TestTable2"); - UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(indexPhaseId).table_access(0).reads().rows(), 2); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases().size(), 2); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access().size(), 1); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).name(), "/Root/TestTable1"); + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(0).table_access(0).reads().rows(), 3); + + UNIT_ASSERT_VALUES_EQUAL(stats.query_phases(1).table_access().size(), 2); + for (const auto& ta : stats.query_phases(1).table_access()) { + if (ta.name() == "/Root/TestTable2") { + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } else { + UNIT_ASSERT_VALUES_EQUAL(ta.name(), "/Root/TestTable2/Index1/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(ta.reads().rows(), 2); + } + } } } } diff --git a/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp index 1ff3126cb00b..0b8e4131cca5 100644 --- a/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_index_lookup_join_ut.cpp @@ -1168,6 +1168,69 @@ Y_UNIT_TEST_TWIN(JoinWithComplexCondition, StreamLookupJoin) { } } +Y_UNIT_TEST_TWIN(LeftSemiJoinWithDuplicatesInRightTable, StreamLookupJoin) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamIdxLookupJoin(StreamLookupJoin); + auto settings = TKikimrSettings().SetAppConfig(appConfig); + TKikimrRunner kikimr(settings); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { // create tables + const TString query = R"( + CREATE TABLE `/Root/Left` ( + Key1 Int64, + Key2 Int64, + Value String, + PRIMARY KEY (Key1, Key2) + ); + + CREATE TABLE `/Root/Right` ( + Key1 Int64, + Key2 Int64, + Value String, + PRIMARY KEY (Key1, Key2) + ); + )"; + UNIT_ASSERT(session.ExecuteSchemeQuery(query).GetValueSync().IsSuccess()); + } + + { // fill tables + const TString query = R"( + REPLACE INTO `/Root/Left` (Key1, Key2, Value) VALUES + (1, 10, "value1"), + (2, 20, "value2"), + (3, 30, "value3"); + + REPLACE INTO `/Root/Right` (Key1, Key2, Value) VALUES + (10, 100, "value1"), + (10, 101, "value1"), + (10, 102, "value1"), + (20, 200, "value2"), + (20, 201, "value2"), + (30, 300, "value3"); + )"; + UNIT_ASSERT(session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx()).GetValueSync().IsSuccess()); + } + + { + const TString query = R"( + SELECT l.Key1, l.Key2, l.Value + FROM `/Root/Left` AS l + LEFT SEMI JOIN `/Root/Right` AS r + ON l.Key2 = r.Key1 ORDER BY l.Key1, l.Key2, l.Value + )"; + + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + CompareYson(R"([ + [[1];[10];["value1"]]; + [[2];[20];["value2"]]; + [[3];[30];["value3"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } +} + } // suite } // namespace NKqp diff --git a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp index c0e8bd96aebd..867d00124224 100644 --- a/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_order_ut.cpp @@ -75,32 +75,53 @@ static TKikimrRunner GetKikimrWithJoinSettings(bool useStreamLookupJoin = false, return TKikimrRunner(serverSettings); } -class TChainConstructor { +void PrintPlan(const TString& plan) { + Cout << plan << Endl; +} + +class TChainTester { public: - TChainConstructor(size_t chainSize) - : Kikimr_(GetKikimrWithJoinSettings()) - , TableClient_(Kikimr_.GetTableClient()) - , Session_(TableClient_.CreateSession().GetValueSync().GetSession()) - , ChainSize_(chainSize) + TChainTester(size_t chainSize) + : Kikimr(GetKikimrWithJoinSettings(false, GetStats(chainSize))) + , TableClient(Kikimr.GetTableClient()) + , Session(TableClient.CreateSession().GetValueSync().GetSession()) + , ChainSize(chainSize) {} +public: + void Test() { + CreateTables(); + JoinTables(); + } + + static TString GetStats(size_t chainSize) { + srand(228); + NJson::TJsonValue stats; + for (size_t i = 0; i < chainSize; ++i) { + ui64 nRows = rand(); + NJson::TJsonValue tableStat; + tableStat["n_rows"] = nRows; + tableStat["byte_size"] = nRows * 10; + + TString table = Sprintf("/Root/table_%ld", i); + stats[table] = std::move(tableStat); + } + return stats.GetStringRobust(); + } + +private: void CreateTables() { - for (size_t i = 0; i < ChainSize_; ++i) { - TString tableName; - - tableName - .append("/Root/table_").append(ToString(i));; - - TString createTable; - createTable - += "CREATE TABLE `" + tableName + "` (id" - + ToString(i) + " Int32, " - + "PRIMARY KEY (id" + ToString(i) + "));"; - - std::cout << createTable << std::endl; - auto res = Session_.ExecuteSchemeQuery(createTable).GetValueSync(); - std::cout << res.GetIssues().ToString() << std::endl; - UNIT_ASSERT(res.IsSuccess()); + for (size_t i = 0; i < ChainSize; ++i) { + TString tableName = Sprintf("/Root/table_%ld", i); + + TString createTable = Sprintf( + "CREATE TABLE `%s` (id%ld Int32, PRIMARY KEY (id%ld));", + tableName.c_str(), i, i + ); + + auto result = Session.ExecuteSchemeQuery(createTable).GetValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); } } @@ -109,30 +130,29 @@ class TChainConstructor { joinRequest.append("SELECT * FROM `/Root/table_0` as t0 "); - for (size_t i = 1; i < ChainSize_; ++i) { - TString table = "/Root/table_" + ToString(i); + for (size_t i = 1; i < ChainSize; ++i) { + TString table = Sprintf("/Root/table_%ld", i); - TString prevAliasTable = "t" + ToString(i - 1); - TString aliasTable = "t" + ToString(i); + TString prevAliasTable = Sprintf("t%ld", i - 1); + TString aliasTable = Sprintf("t%ld", i); - joinRequest - += "INNER JOIN `" + table + "`" + " AS " + aliasTable + " ON " - + aliasTable + ".id" + ToString(i) + "=" + prevAliasTable + ".id" - + ToString(i-1) + " "; + joinRequest += + Sprintf( + "INNER JOIN `%s` AS %s ON %s.id%ld = %s.id%ld ", + table.c_str(), aliasTable.c_str(), aliasTable.c_str(), i, prevAliasTable.c_str(), i - 1 + ); } - auto result = Session_.ExecuteDataQuery(joinRequest, TTxControl::BeginTx().CommitTx()).ExtractValueSync(); - - std::cout << result.GetIssues().ToString() << std::endl; - std::cout << joinRequest << std::endl; + auto result = Session.ExplainDataQuery(joinRequest).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::SUCCESS); + PrintPlan(result.GetPlan()); } -private: - TKikimrRunner Kikimr_; - NYdb::NTable::TTableClient TableClient_; - TSession Session_; - size_t ChainSize_; + TKikimrRunner Kikimr; + NYdb::NTable::TTableClient TableClient; + TSession Session; + size_t ChainSize; }; void ExplainJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLookupJoin) { @@ -175,9 +195,7 @@ void ExecuteJoinOrderTestDataQuery(const TString& queryPath, bool useStreamLooku Y_UNIT_TEST_SUITE(KqpJoinOrder) { Y_UNIT_TEST(Chain65Nodes) { - TChainConstructor chain(65); - chain.CreateTables(); - chain.JoinTables(); + TChainTester(65).Test(); } Y_UNIT_TEST_TWIN(FiveWayJoin, StreamLookupJoin) { diff --git a/ydb/core/kqp/ut/join/kqp_join_ut.cpp b/ydb/core/kqp/ut/join/kqp_join_ut.cpp index fae91c918e04..391af9c3ecb9 100644 --- a/ydb/core/kqp/ut/join/kqp_join_ut.cpp +++ b/ydb/core/kqp/ut/join/kqp_join_ut.cpp @@ -816,6 +816,71 @@ Y_UNIT_TEST_SUITE(KqpJoin) { } } + Y_UNIT_TEST(TwoJoinsWithQueryService) { + NKikimrConfig::TAppConfig appConfig; + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + + TKikimrRunner kikimr(serverSettings); + auto client = kikimr.GetTableClient(); + auto db = kikimr.GetQueryClient(); + auto settings = NYdb::NQuery::TExecuteQuerySettings(); + + { + auto session = client.CreateSession().GetValueSync().GetSession(); + const auto query = Q_(R"( + CREATE TABLE ta( + a Int64, + b Int64, + c Int64, + PRIMARY KEY(a) + ); + )"); + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto session = client.CreateSession().GetValueSync().GetSession(); + const auto query = Q_(R"( + CREATE TABLE tb( + b Int64, + bval Int64, + PRIMARY KEY(b) + ); + )"); + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto session = client.CreateSession().GetValueSync().GetSession(); + const auto query = Q_(R"( + CREATE TABLE tc( + c Int64, + cval Int64, + PRIMARY KEY(c) + ); + )"); + auto result = session.ExecuteSchemeQuery(query).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + { + auto result = db.ExecuteQuery(R"( + UPSERT INTO ta(a, b, c) VALUES (1, 1001, 2001), (2, 1002, 2002), (3, 1003, 2003); + UPSERT INTO tb(b, bval) VALUES (1001, 1001), (1002, 1002), (1003, 1003); + UPSERT INTO tc(c, cval) VALUES (2001, 2001), (2002, 2002), (2003, 2003); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto result = db.ExecuteQuery(R"( + SELECT ta.a, tb.bval, tc.cval FROM ta INNER JOIN tb ON ta.b = tb.b LEFT JOIN tc ON ta.c = tc.cval; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[3];[1003];[2003]];[[2];[1002];[2002]];[[1];[1001];[2001]]])", FormatResultSetYson(result.GetResultSet(0))); + } + } + // join on key prefix => index-lookup Y_UNIT_TEST(RightSemiJoin_KeyPrefix) { TKikimrRunner kikimr(SyntaxV1Settings()); diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp index 9a70ee376fed..4f23f3c67c0d 100644 --- a/ydb/core/kqp/ut/olap/aggregations_ut.cpp +++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp @@ -163,7 +163,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { // Check plan #if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "Aggregate-TableFullScan"); + CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "TableFullScan"); // CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); #else CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); @@ -352,7 +352,8 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { .AddExpectedPlanOptions("KqpOlapFilter") #if SSA_RUNTIME_VERSION >= 2U .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) + // See https://github.com/ydb-platform/ydb/issues/7299 for explanation, why resultCount = 3 + .MutableLimitChecker().SetExpectedResultCount(3) #else .AddExpectedPlanOptions("CombineCore") #endif @@ -980,7 +981,7 @@ Y_UNIT_TEST_SUITE(KqpOlapAggregations) { ORDER BY c, resource_id DESC LIMIT 3 )") .SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); testCase.FillExpectedAggregationGroupByPlanOptions(); TestAggregations({ testCase }); } diff --git a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp index 560482ca4c44..ea97c44484f3 100644 --- a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp +++ b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp @@ -15,7 +15,6 @@ namespace NKikimr::NKqp { Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { - namespace { class TTransferStatus { private: @@ -60,7 +59,6 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { return TConclusionStatus::Success(); } virtual void DoSerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) const override { - } virtual TString GetClassName() const override { @@ -68,10 +66,16 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } }; + TKikimrSettings GetKikimrSettings() { + NKikimrConfig::TFeatureFlags featureFlags; + featureFlags.SetEnableAlterShardingInColumnShard(true); + return TKikimrSettings().SetWithSampleTables(false).SetFeatureFlags(featureFlags); + } + class TSharingDataTestCase { private: const ui32 ShardsCount; - TKikimrRunner& Kikimr; + TKikimrRunner Kikimr; TTypedLocalHelper Helper; NYDBTest::TControllers::TGuard Controller; std::vector ShardIds; @@ -86,15 +90,15 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { Helper.FillPKOnly(kff, recordsCount); } - TSharingDataTestCase(const ui32 shardsCount, TKikimrRunner& kikimr) + TSharingDataTestCase(const ui32 shardsCount) : ShardsCount(shardsCount) - , Kikimr(kikimr) + , Kikimr(GetKikimrSettings()) , Helper("", Kikimr, "olapTable", "olapStore12") , Controller(NYDBTest::TControllers::RegisterCSControllerGuard()) { Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); Controller->SetExpectedShardsCount(ShardsCount); - Controller->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - Controller->SetReadTimeoutClean(TDuration::Seconds(1)); + Controller->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").Initialize(); @@ -111,6 +115,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } void WaitNormalization() { + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Force); const auto start = TInstant::Now(); while (!Controller->IsTrivialLinks() && TInstant::Now() - start < TDuration::Seconds(30)) { @@ -119,9 +124,11 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } AFL_VERIFY(Controller->IsTrivialLinks()); Controller->CheckInvariants(); + Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5)); } void Execute(const ui64 destinationIdx, const std::vector& sourceIdxs, const bool move, const NOlap::TSnapshot& snapshot, const std::set& pathIdxs) { + Controller->SetOverrideReadTimeoutClean(TDuration::Seconds(1)); AFL_VERIFY(destinationIdx < ShardIds.size()); const ui64 destination = ShardIds[destinationIdx]; std::vector sources; @@ -188,21 +195,19 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } CSTransferStatus->Reset(); AFL_VERIFY(!Controller->IsTrivialLinks()); + Controller->CheckInvariants(); + Controller->SetOverrideReadTimeoutClean(TDuration::Minutes(5)); } }; Y_UNIT_TEST(BlobsSharingSplit1_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(0, { 1 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); } Y_UNIT_TEST(BlobsSharingSplit1_1_clean) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(2, kikimr); + TSharingDataTestCase tester(2); tester.AddRecords(80000); CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); Sleep(TDuration::Seconds(1)); @@ -214,9 +219,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit1_1_clean_with_restarts) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(2, kikimr); + TSharingDataTestCase tester(2); tester.SetRebootTablet(true); tester.AddRecords(80000); CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); @@ -229,18 +232,14 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit3_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(0, { 1, 2, 3 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); } Y_UNIT_TEST(BlobsSharingSplit1_3_1) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(1, { 0 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); @@ -250,9 +249,7 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } Y_UNIT_TEST(BlobsSharingSplit1_3_2_1_clean) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - TSharingDataTestCase tester(4, kikimr); + TSharingDataTestCase tester(4); tester.AddRecords(800000); Sleep(TDuration::Seconds(1)); tester.Execute(1, { 0 }, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), { 0 }); @@ -266,8 +263,15 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { } class TReshardingTest { - private: - YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); + public: + TReshardingTest() + : Kikimr(GetKikimrSettings()) + , CSController(NYDBTest::TControllers::RegisterCSControllerGuard()) + , TableClient(Kikimr.GetTableClient()) { + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + CSController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + CSController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + } void WaitResharding(const TString& hint = "") { const TInstant start = TInstant::Now(); @@ -303,28 +307,22 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { CompareYson(result, "[[" + ::ToString(expectation) + "u;]]"); } + protected: TKikimrRunner Kikimr; - public: + NKikimr::NYDBTest::TControllers::TGuard CSController; + NYdb::NTable::TTableClient TableClient; + }; - TReshardingTest() - : Kikimr(TKikimrSettings().SetWithSampleTables(false)) { + class TShardingTypeTest: public TReshardingTest { + YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); - } + public: + using TReshardingTest::TReshardingTest; void Execute() { - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - TLocalHelper(Kikimr).SetShardingMethod(ShardingType).CreateTestOlapTable("olapTable", "olapStore", 24, 4); - auto tableClient = Kikimr.GetTableClient(); - Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); - - std::vector uids; - std::vector resourceIds; - std::vector levels; + Tests::NCommon::TLoggerInit(Kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); { WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); @@ -334,82 +332,161 @@ Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); - - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; - - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - } CheckCount(230000); for (ui32 i = 0; i < 2; ++i) { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=SPLIT);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); WaitResharding("SPLIT:" + ::ToString(i)); } { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=SPLIT);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } - AFL_VERIFY(csController->GetShardingFiltersCount().Val() == 0); + AFL_VERIFY(CSController->GetShardingFiltersCount().Val() == 0); CheckCount(230000); - i64 count = csController->GetShardingFiltersCount().Val(); + i64 count = CSController->GetShardingFiltersCount().Val(); AFL_VERIFY(count >= 16)("count", count); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitIndexation(TDuration::Seconds(3)); + CSController->WaitCompactions(TDuration::Seconds(3)); WriteTestData(Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - csController->WaitIndexation(TDuration::Seconds(5)); - csController->WaitCompactions(TDuration::Seconds(5)); + CheckCount(230000); + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->WaitIndexation(TDuration::Seconds(5)); + CheckCount(230000); + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitCompactions(TDuration::Seconds(5)); + count = CSController->GetShardingFiltersCount().Val(); + CheckCount(230000); - csController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + CSController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); CheckCount(230000); - AFL_VERIFY(count == csController->GetShardingFiltersCount().Val())("count", count)("val", csController->GetShardingFiltersCount().Val()); + AFL_VERIFY(count == CSController->GetShardingFiltersCount().Val())("count", count)( + "val", CSController->GetShardingFiltersCount().Val()); const ui32 portionsCount = 16; for (ui32 i = 0; i < 4; ++i) { { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=MERGE);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } WaitResharding("MERGE:" + ::ToString(i)); - // csController->WaitCleaning(TDuration::Seconds(5)); + // CSController->WaitCleaning(TDuration::Seconds(5)); CheckCount(230000); - AFL_VERIFY(count + portionsCount == csController->GetShardingFiltersCount().Val())("count", count)("val", csController->GetShardingFiltersCount().Val()); + AFL_VERIFY(count + portionsCount == CSController->GetShardingFiltersCount().Val())("count", count)( + "val", CSController->GetShardingFiltersCount().Val()); count += portionsCount; } { auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=MERGE);)"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto session = TableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } + CSController->CheckInvariants(); } }; Y_UNIT_TEST(TableReshardingConsistency64) { - TReshardingTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); + TShardingTypeTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); } Y_UNIT_TEST(TableReshardingModuloN) { - TReshardingTest().SetShardingType("HASH_FUNCTION_MODULO_N").Execute(); + TShardingTypeTest().SetShardingType("HASH_FUNCTION_CONSISTENCY_64").Execute(); + } + + class TAsyncReshardingTest: public TReshardingTest { + YDB_ACCESSOR(TString, ShardingType, "HASH_FUNCTION_CONSISTENCY_64"); + + public: + TAsyncReshardingTest() { + TLocalHelper(Kikimr).CreateTestOlapTable("olapTable", "olapStore", 24, 4); + } + + void AddBatch(int numRows) { + WriteTestData(Kikimr, "/Root/olapStore/olapTable", LastPathId, LastTs, numRows); + LastPathId += numRows * 10; + LastTs += numRows * 10; + NumRows += numRows; + } + + void StartResharding(TString modification) { + auto alterQuery = + TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore/olapTable` (TYPE TABLESTORE) SET (ACTION=ALTER_SHARDING, MODIFICATION=)" + << modification << ");"; + auto session = TableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + void CheckCount() { + TReshardingTest::CheckCount(NumRows); + } + + void ChangeSchema() { + auto alterQuery = + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=level, " + "`SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, " + "`COMPRESSION.TYPE`=`zstd`);"; + auto session = TableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + void DisableCompaction() { + CSController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + } + + private: + ui64 LastPathId = 1000000; + ui64 LastTs = 300000000; + ui64 NumRows = 0; + }; + + Y_UNIT_TEST(UpsertWhileSplitTest) { + TAsyncReshardingTest tester; + + tester.AddBatch(10000); + + tester.CheckCount(); + + for (int i = 0; i < 4; i++) { + tester.StartResharding("SPLIT"); + + tester.CheckCount(); + tester.AddBatch(10000); + tester.CheckCount(); + tester.WaitResharding(); + } + tester.AddBatch(10000); + tester.CheckCount(); + } + + Y_UNIT_TEST(ChangeSchemaAndSplit) { + TAsyncReshardingTest tester; + tester.DisableCompaction(); + + tester.AddBatch(10000); + tester.ChangeSchema(); + tester.AddBatch(10000); + + tester.StartResharding("SPLIT"); + tester.WaitResharding(); + + tester.CheckCount(); } } } diff --git a/ydb/core/kqp/ut/olap/clickbench_ut.cpp b/ydb/core/kqp/ut/olap/clickbench_ut.cpp index 1cbcd7be436d..dfdb63033187 100644 --- a/ydb/core/kqp/ut/olap/clickbench_ut.cpp +++ b/ydb/core/kqp/ut/olap/clickbench_ut.cpp @@ -155,7 +155,7 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); q7.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q9; @@ -186,7 +186,7 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); q12.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q14; @@ -202,7 +202,7 @@ Y_UNIT_TEST_SUITE(KqpOlapClickbench) { //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); + .SetExpectedReadNodeType("TableFullScan"); q14.FillExpectedAggregationGroupByPlanOptions(); TAggregationTestCase q22; diff --git a/ydb/core/kqp/ut/olap/datatime64_ut.cpp b/ydb/core/kqp/ut/olap/datatime64_ut.cpp index b4ea097eafe8..97e2a2f2fd28 100644 --- a/ydb/core/kqp/ut/olap/datatime64_ut.cpp +++ b/ydb/core/kqp/ut/olap/datatime64_ut.cpp @@ -170,6 +170,7 @@ Y_UNIT_TEST_SUITE(KqpDatetime64ColumnShard) { runnerSettings.WithSampleTables = false; TTestHelper testHelper(runnerSettings); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER }, "CS").Initialize(); TVector schema = { TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int64).SetNullable(false), diff --git a/ydb/core/kqp/ut/olap/decimal_ut.cpp b/ydb/core/kqp/ut/olap/decimal_ut.cpp new file mode 100644 index 000000000000..151226bd01d6 --- /dev/null +++ b/ydb/core/kqp/ut/olap/decimal_ut.cpp @@ -0,0 +1,198 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NTable; + +Y_UNIT_TEST_SUITE(KqpDecimalColumnShard) { + class TDecimalTestCase { + public: + TDecimalTestCase() + : TestHelper(TKikimrSettings().SetWithSampleTables(false)) { + } + + TTestHelper::TUpdatesBuilder Inserter() { + return TTestHelper::TUpdatesBuilder(TestTable.GetArrowSchema(Schema)); + } + + void Upsert(TTestHelper::TUpdatesBuilder& inserter) { + TestHelper.BulkUpsert(TestTable, inserter); + } + + void CheckQuery(const TString& query, const TString& expected) { + TestHelper.ReadData(query, expected); + } + + void PrepareTable1() { + Schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("int").SetType(NScheme::NTypeIds::Int64), + TTestHelper::TColumnSchema().SetName("dec").SetType(NScheme::NTypeIds::Decimal), + }; + TestTable.SetName("/Root/Table1").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(Schema); + TestHelper.CreateTable(TestTable); + + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(1).Add(4).Add(TDecimalValue("3.14")); + inserter.AddRow().Add(2).Add(3).Add(TDecimalValue("8.16")); + Upsert(inserter); + } + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(4).Add(1).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("8.492")); + + Upsert(inserter); + } + } + + void PrepareTable2() { + Schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("table1_id").SetType(NScheme::NTypeIds::Int64), + TTestHelper::TColumnSchema().SetName("dec").SetType(NScheme::NTypeIds::Decimal), + }; + TestTable.SetName("/Root/Table2").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(Schema); + TestHelper.CreateTable(TestTable); + + { + TTestHelper::TUpdatesBuilder inserter = Inserter(); + inserter.AddRow().Add(1).Add(1).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(2).Add(1).Add(TDecimalValue("8.16")); + inserter.AddRow().Add(3).Add(2).Add(TDecimalValue("12.46")); + inserter.AddRow().Add(4).Add(2).Add(TDecimalValue("8.16")); + Upsert(inserter); + } + } + + private: + TTestHelper TestHelper; + + TVector Schema; + TTestHelper::TColumnTable TestTable; + }; + + Y_UNIT_TEST(TestSimpleQueries) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE id=1", "[[[\"3.14\"];1;[4]]]"); + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` order by id", "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterEqual) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec == cast(\"3.14\" as decimal(22,9))", "[[[\"3.14\"];1;[4]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec != cast(\"3.14\" as decimal(22,9)) order by id", + "[[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterNulls) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); + inserter.AddRow().Add(5).Add(5).AddNull(); + inserter.AddRow().Add(6).Add(6).AddNull(); + tester.Upsert(inserter); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec is NULL order by id", "[[#;5;[5]];[#;6;[6]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec is not NULL order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestFilterCompare) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec < cast(\"12.46\" as decimal(22,9)) order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]]]"); + + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` WHERE dec > cast(\"8.16\" as decimal(22,9)) order by id", "[[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec <= cast(\"12.46\" as decimal(22,9)) order by id", + "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + + tester.CheckQuery("SELECT * FROM `/Root/Table1` WHERE dec >= cast(\"8.492\" as decimal(22,9)) order by id", + "[[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestOrderByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + tester.CheckQuery( + "SELECT * FROM `/Root/Table1` order by dec", "[[[\"3.14\"];1;[4]];[[\"8.16\"];2;[3]];[[\"8.492\"];3;[2]];[[\"12.46\"];4;[1]]]"); + } + + Y_UNIT_TEST(TestGroupByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + + TTestHelper::TUpdatesBuilder inserter = tester.Inserter(); + inserter.AddRow().Add(5).Add(12).Add(TDecimalValue("8.492")); + inserter.AddRow().Add(6).Add(30).Add(TDecimalValue("12.46")); + tester.Upsert(inserter); + + tester.CheckQuery("SELECT dec, count(*) FROM `/Root/Table1` group by dec order by dec", + "[[[\"3.14\"];1u];[[\"8.16\"];1u];[[\"8.492\"];2u];[[\"12.46\"];2u]]"); + } + + Y_UNIT_TEST(TestAggregation) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.CheckQuery("SELECT min(dec) FROM `/Root/Table1`", "[[[\"3.14\"]]]"); + tester.CheckQuery("SELECT max(dec) FROM `/Root/Table1`", "[[[\"12.46\"]]]"); + tester.CheckQuery("SELECT sum(dec) FROM `/Root/Table1`", "[[[\"32.252\"]]]"); + } + + Y_UNIT_TEST(TestJoinById) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.PrepareTable2(); + + tester.CheckQuery( + "SELECT t1.id, t1.dec, t2.dec FROM `/Root/Table1` as t1 join `/Root/Table2` as t2 on t1.id = t2.table1_id order by t1.id, t1.dec, " + "t2.dec", + R"([[1;["3.14"];["8.16"]];[1;["3.14"];["12.46"]];[2;["8.16"];["8.16"]];[2;["8.16"];["12.46"]]])"); + } + + Y_UNIT_TEST(TestJoinByDecimal) { + TDecimalTestCase tester; + tester.PrepareTable1(); + tester.PrepareTable2(); + + tester.CheckQuery( + "SELECT t1.id, t2.id, t1.dec FROM `/Root/Table1` as t1 join `/Root/Table2` as t2 on t1.dec = t2.dec order by t1.id, t2.id, t1.dec", + R"([[2;2;["8.16"]];[2;4;["8.16"]];[4;1;["12.46"]];[4;3;["12.46"]]])"); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp index 7bed6f4a4aa4..1c03f6be9ffa 100644 --- a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp @@ -13,6 +13,7 @@ void TestAggregationsBase(const std::vector& cases) { TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").Initialize(); { WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); @@ -49,10 +50,11 @@ void TestAggregationsInternal(const std::vector& cases) { Tests::TServer::TPtr server = new Tests::TServer(settings); auto runtime = server->GetRuntime(); + Tests::NCommon::TLoggerInit(runtime).Initialize(); + Tests::NCommon::TLoggerInit(runtime).SetComponents({ NKikimrServices::GROUPED_MEMORY_LIMITER }, "CS").Initialize(); auto sender = runtime->AllocateEdgeActor(); InitRoot(server, sender); - Tests::NCommon::TLoggerInit(runtime).Initialize(); ui32 numShards = 1; ui32 numIterations = 10; diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.h b/ydb/core/kqp/ut/olap/helpers/aggregation.h index e83dcb32fd87..de9ab275fa79 100644 --- a/ydb/core/kqp/ut/olap/helpers/aggregation.h +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.h @@ -81,12 +81,7 @@ class TAggregationTestCase { bool UseLlvm = true; public: void FillExpectedAggregationGroupByPlanOptions() { -#if SSA_RUNTIME_VERSION >= 2U - // AddExpectedPlanOptions("TKqpOlapAgg"); AddExpectedPlanOptions("WideCombiner"); -#else - AddExpectedPlanOptions("CombineCore"); -#endif } TString GetFixedQuery() const { TStringBuilder queryFixed; @@ -177,7 +172,8 @@ void CheckPlanForAggregatePushdown( const TString& query, TClient& client, const std::vector& expectedPlanNodes, - const std::string& readNodeType) { + const std::string& readNodeType) +{ auto res = StreamExplainQuery(query, client); UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); @@ -242,4 +238,4 @@ void WriteTestDataForTableWithNulls(TKikimrRunner& kikimr, TString testTable); void TestTableWithNulls(const std::vector& cases, const bool genericQuery = false); -} \ No newline at end of file +} diff --git a/ydb/core/kqp/ut/olap/helpers/get_value.cpp b/ydb/core/kqp/ut/olap/helpers/get_value.cpp index 208e5e40e02e..67fe905d8e44 100644 --- a/ydb/core/kqp/ut/olap/helpers/get_value.cpp +++ b/ydb/core/kqp/ut/olap/helpers/get_value.cpp @@ -36,6 +36,11 @@ void PrintValue(IOutputStream& out, const NYdb::TValue& v) { out << value.GetInt64(); break; } + case NYdb::EPrimitiveType::Uint8: + { + out << value.GetUint8(); + break; + } case NYdb::EPrimitiveType::Utf8: { out << value.GetUtf8(); diff --git a/ydb/core/kqp/ut/olap/helpers/local.h b/ydb/core/kqp/ut/olap/helpers/local.h index dc957f98220e..9511ad1828ef 100644 --- a/ydb/core/kqp/ut/olap/helpers/local.h +++ b/ydb/core/kqp/ut/olap/helpers/local.h @@ -28,8 +28,14 @@ class TLocalHelper: public Tests::NCS::THelper { void CreateTestOlapTable(TString tableName = "olapTable", TString storeName = "olapStore", ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(tableName, storeName, storeShardsCount, tableShardsCount); + CreateOlapTablesWithStore({tableName}, storeName, storeShardsCount, tableShardsCount); } + + void CreateTestOlapTables(TVector tableNames = {"olapTable0", "olapTable1"}, TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { + CreateOlapTablesWithStore(tableNames, storeName, storeShardsCount, tableShardsCount); + } + using TBase::TBase; TLocalHelper(TKikimrRunner& runner) diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp index e592ed398d2b..32c08c2c8925 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp @@ -18,6 +18,31 @@ TString TTypedLocalHelper::GetTestTableSchema() const { return result; } +TString TTypedLocalHelper::GetMultiColumnTestTableSchema(ui32 reps) const { + TString result; + result += R"( + Columns { Name: "pk_int" Type: "Int64" NotNull: true } + Columns { Name: "ts" Type: "Timestamp" } + )"; + for (ui32 i = 0; i < reps; i++) { + TString strNum = ToString(i); + result += "Columns {Name: \"field_utf" + strNum + "\" Type: \"Utf8\"}\n"; + result += "Columns {Name: \"field_int" + strNum + "\" Type: \"Int64\"}\n"; + result += "Columns {Name: \"field_uint" + strNum + "\" Type: \"Uint8\"}\n"; + result += "Columns {Name: \"field_float" + strNum + "\" Type: \"Float\"}\n"; + result += "Columns {Name: \"field_double" + strNum + "\" Type: \"Double\"}\n"; + } + result += R"( + KeyColumnNames: "pk_int" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + )"; + return result; +} + +void TTypedLocalHelper::CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount, ui32 tableShardsCount) { + CreateSchemaOlapTablesWithStore(GetMultiColumnTestTableSchema(reps), {TableName}, "olapStore", storeShardsCount, tableShardsCount); +} + void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const { auto session = KikimrRunner.GetTableClient().CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); @@ -79,7 +104,7 @@ NKikimr::NKqp::TTypedLocalHelper::TDistribution TTypedLocalHelper::GetDistributi } void TTypedLocalHelper::GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose /*= false*/, const std::vector columnNames /*= {}*/) { - TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity = true"; + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity == 1"; if (columnNames.size()) { selectQuery += " AND EntityName IN ('" + JoinSeq("','", columnNames) + "')"; } @@ -143,16 +168,16 @@ void TTypedLocalHelper::FillPKOnly(const double pkKff /*= 0*/, const ui32 numRow TBase::SendDataViaActorSystem(TablePath, batch); } -void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { - TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity = true"; +void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity == 1"; auto tableClient = KikimrRunner.GetTableClient(); auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); for (auto&& r : rows) { for (auto&& c : r) { if (c.first == "Stats") { - NKikimrColumnShardStatisticsProto::TPortionStorage store; - AFL_VERIFY(google::protobuf::TextFormat::ParseFromString(GetUtf8(c.second), &store)); - stats.emplace_back(store); + NJson::TJsonValue jsonStore; + AFL_VERIFY(NJson::ReadJsonFastTree(GetUtf8(c.second), &jsonStore)); + stats.emplace_back(jsonStore); } } } diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h index 1afef6b7a19b..df81c5cd6a86 100644 --- a/ydb/core/kqp/ut/olap/helpers/typed_local.h +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -1,10 +1,15 @@ #pragma once #include #include +#include +#include +#include + #include -#include -#include -#include + +#include + +#include namespace NKikimr::NKqp { @@ -19,14 +24,14 @@ class TTypedLocalHelper: public Tests::NCS::THelper { protected: virtual TString GetTestTableSchema() const override; virtual std::vector GetShardingColumns() const override { - return {"pk_int"}; + return { "pk_int" }; } public: TTypedLocalHelper(const TString& typeName, TKikimrRunner& kikimrRunner, const TString& tableName = "olapTable", const TString& storeName = "olapStore") : TBase(kikimrRunner.GetTestServer()) , TypeName(typeName) , KikimrRunner(kikimrRunner) - , TablePath("/Root/" + storeName + "/" + tableName) + , TablePath(storeName.empty() ? "/Root/" + tableName : "/Root/" + storeName + "/" + tableName) , TableName(tableName) , StoreName(storeName) { SetShardingMethod("HASH_FUNCTION_CONSISTENCY_64"); @@ -66,12 +71,12 @@ class TTypedLocalHelper: public Tests::NCS::THelper { void GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose = false, const std::vector columnNames = {}); - void GetStats(std::vector& stats, const bool verbose = false); + void GetStats(std::vector& stats, const bool verbose = false); void GetCount(ui64& count); template - void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const { + void FillTable(const TFiller& fillPolicy, const double pkKff = 0, const ui32 numRows = 800000) const { std::vector builders; builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); builders.emplace_back(std::make_shared>("field", fillPolicy)); @@ -80,11 +85,38 @@ class TTypedLocalHelper: public Tests::NCS::THelper { TBase::SendDataViaActorSystem(TablePath, batch); } + void FillMultiColumnTable(ui32 repCount, const double pkKff = 0, const ui32 numRows = 800000) const { + const double frq = 0.9; + NArrow::NConstruction::TPoolFiller int64Pool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller uint8Pool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller floatPool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller doublePool(1000, 0, frq); + NArrow::NConstruction::TPoolFiller utfPool(1000, 52, "abcde", frq); + + std::vector builders; + builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); + for (ui32 i = 0; i < repCount; i++) { + TString repStr = ToString(i); + builders.emplace_back(std::make_shared>>("field_utf" + repStr, utfPool, i)); + builders.emplace_back(std::make_shared>>("field_int" + repStr, int64Pool, i)); + builders.emplace_back(std::make_shared>>("field_uint" + repStr, uint8Pool, i)); + builders.emplace_back(std::make_shared>>("field_float" + repStr, floatPool, i)); + builders.emplace_back(std::make_shared>>("field_double" + repStr, doublePool, i)); + } + NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); + std::shared_ptr batch = batchBuilder.BuildBatch(numRows); + TBase::SendDataViaActorSystem(TablePath, batch); + } + + void FillPKOnly(const double pkKff = 0, const ui32 numRows = 800000) const; void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount); + CreateOlapTablesWithStore({TableName}, StoreName, storeShardsCount, tableShardsCount); } + + TString GetMultiColumnTestTableSchema(ui32 reps) const; + void CreateMultiColumnOlapTableWithStore(ui32 reps, ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); }; -} \ No newline at end of file +} diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp index 6a80bc2bf133..7699cd8d8b6e 100644 --- a/ydb/core/kqp/ut/olap/indexes_ut.cpp +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -17,8 +17,8 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); - csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); TLocalHelper(kikimr).CreateTestOlapTable(); @@ -111,7 +111,7 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { TKikimrRunner kikimr(settings); auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); TLocalHelper(kikimr).CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); @@ -171,78 +171,87 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { (ui64)csController->GetActualizationRefreshSchemeCount().Val())("updates", updatesCount)("count", csController->GetActualizationRefreshSchemeCount().Val()); } - Y_UNIT_TEST(Indexes) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); + class TTestIndexesScenario { + private: + TKikimrSettings Settings; + std::unique_ptr Kikimr; + YDB_ACCESSOR(TString, StorageId, "__DEFAULT"); + public: + TTestIndexesScenario& Initialize() { + Settings = TKikimrSettings().SetWithSampleTables(false); + Kikimr = std::make_unique(Settings); + return *this; + } -// Tests::NCommon::TLoggerInit(kikimr).Initialize(); + void Execute() const { + TLocalHelper(*Kikimr).CreateTestOlapTable(); + auto tableClient = Kikimr->GetTableClient(); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, EXTERNAL_GUARANTEE_EXCLUSIVE_PK=`true`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } + // Tests::NCommon::TLoggerInit(kikimr).Initialize(); - std::vector uids; - std::vector resourceIds; - std::vector levels; + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + { + auto alterQuery = TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); + )", StorageId.data()); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << Sprintf( + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05, "storage_id" : "%s"}`); + )", StorageId.data() + ); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, EXTERNAL_GUARANTEE_EXCLUSIVE_PK=`true`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; + std::vector uids; + std::vector resourceIds; + std::vector levels; + + { + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(*Kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + + const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { + for (ui32 i = 0; i < count; ++i) { + uids.emplace_back("uid_" + ::ToString(startUid + i)); + resourceIds.emplace_back(::ToString(startRes + i)); + levels.emplace_back(i % 5); + } + }; + + filler(1000000, 300000000, 10000); + filler(1100000, 300100000, 10000); + filler(1200000, 300200000, 10000); + filler(1300000, 300300000, 10000); + filler(1400000, 300400000, 10000); + filler(2000000, 200000000, 70000); + filler(3000000, 100000000, 110000); - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - - } + } - { - auto it = tableClient.StreamExecuteScanQuery(R"( + { + auto it = tableClient.StreamExecuteScanQuery(R"( --!syntax_v1 SELECT @@ -250,27 +259,27 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { FROM `/Root/olapStore/olapTable` )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[230000u;]])"); - } + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[230000u;]])"); + } - AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); - TInstant start = Now(); - ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); - while (Now() - start < TDuration::Seconds(10)) { - if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { - compactionsStart = csController->GetCompactionStartedCounter().Val(); - start = Now(); + AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); + TInstant start = Now(); + ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); + while (Now() - start < TDuration::Seconds(10)) { + if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { + compactionsStart = csController->GetCompactionStartedCounter().Val(); + start = Now(); + } + Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; + Sleep(TDuration::Seconds(1)); } - Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; - Sleep(TDuration::Seconds(1)); - } - { - auto it = tableClient.StreamExecuteScanQuery(R"( + { + auto it = tableClient.StreamExecuteScanQuery(R"( --!syntax_v1 SELECT @@ -279,40 +288,50 @@ Y_UNIT_TEST_SUITE(KqpOlapIndexes) { WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' )").GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; - CompareYson(result, R"([[0u;]])"); - AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.3); - } - ui32 requestsCount = 100; - for (ui32 i = 0; i < requestsCount; ++i) { - const ui32 idx = RandomNumber(uids.size()); - const auto query = [](const TString& res, const TString& uid, const ui32 level) { - TStringBuilder sb; - sb << "SELECT" << Endl; - sb << "COUNT(*)" << Endl; - sb << "FROM `/Root/olapStore/olapTable`" << Endl; - sb << "WHERE(" << Endl; - sb << "resource_id = '" << res << "' AND" << Endl; - sb << "uid= '" << uid << "' AND" << Endl; - sb << "level= " << level << Endl; - sb << ")"; - return sb; - }; - auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("result", result); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("skip", csController->GetIndexesSkippingOnSelect().Val())("check", csController->GetIndexesApprovedOnSelect().Val()); + CompareYson(result, R"([[0u;]])"); + if (StorageId == "__LOCAL_METADATA") { + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val()); + } else { + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0)("val", csController->GetIndexesSkippedNoData().Val()); + } + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()); + } + ui32 requestsCount = 100; + for (ui32 i = 0; i < requestsCount; ++i) { + const ui32 idx = RandomNumber(uids.size()); + const auto query = [](const TString& res, const TString& uid, const ui32 level) { + TStringBuilder sb; + sb << "SELECT COUNT(*) FROM `/Root/olapStore/olapTable`" << Endl; + sb << "WHERE(" << Endl; + sb << "resource_id = '" << res << "' AND" << Endl; + sb << "uid= '" << uid << "' AND" << Endl; + sb << "level= " << level << Endl; + sb << ")"; + return sb; + }; + auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; + CompareYson(result, R"([[1u;]])"); + } - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; - CompareYson(result, R"([[1u;]])"); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val()) + ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); } + }; - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.20 * csController->GetIndexesSkippingOnSelect().Val()) - ("approved", csController->GetIndexesApprovedOnSelect().Val())("skipped", csController->GetIndexesSkippingOnSelect().Val()); + Y_UNIT_TEST(IndexesInBS) { + TTestIndexesScenario().SetStorageId("__DEFAULT").Initialize().Execute(); + } + Y_UNIT_TEST(IndexesInLocalMetadata) { + TTestIndexesScenario().SetStorageId("__LOCAL_METADATA").Initialize().Execute(); } Y_UNIT_TEST(IndexesModificationError) { diff --git a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp index f9959ede78e3..c1fcab4be0fd 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_stats_ut.cpp @@ -12,7 +12,7 @@ using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpOlapStats) { constexpr size_t inserted_rows = 1000; constexpr size_t tables_in_store = 1000; - constexpr size_t size_single_table = 13352; + constexpr size_t size_single_table = 13152; const TVector schema = { TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), @@ -21,10 +21,10 @@ Y_UNIT_TEST_SUITE(KqpOlapStats) { class TOlapStatsController : public NYDBTest::NColumnShard::TController { public: - TDuration GetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override { + TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration /*defaultValue*/) const override { return TDuration::MilliSeconds(10); } - TDuration GetStatsReportInterval(const TDuration /*defaultValue*/) const override { + TDuration DoGetStatsReportInterval(const TDuration /*defaultValue*/) const override { return TDuration::MilliSeconds(10); } }; diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 5e5156178415..b68a7fcea98e 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -51,9 +51,10 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PARTITION BY HASH(timestamp) WITH ( STORE = COLUMN, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = %d + PARTITION_COUNT = %d ) - )", storeName.data(), tableName.data(), shardsCount); + )", + storeName.data(), tableName.data(), shardsCount); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); if (result.GetStatus() != EStatus::SUCCESS) { Cerr << result.GetIssues().ToOneLineString() << Endl; @@ -939,7 +940,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"(`resource_id` = "10001")", R"(`resource_id` != "10001")", R"("XXX" == "YYY" OR `resource_id` != "10001")", - R"(`resource_id` != "10001" XOR "XXX" == "YYY")", R"(`level` = 1)", R"(`level` = Int8("1"))", R"(`level` = Int16("1"))", @@ -1021,6 +1021,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { R"((`timestamp`, `level`) >= (Timestamp("1970-01-01T00:00:03.000001Z"), 3))", #endif #if SSA_RUNTIME_VERSION >= 5U + R"(`resource_id` != "10001" XOR "XXX" == "YYY")", R"(IF(`level` > 3, -`level`, +`level`) < 2)", R"(StartsWith(`message` ?? `resource_id`, "10000"))", R"(NOT EndsWith(`message` ?? `resource_id`, "xxx"))", @@ -1844,8 +1845,8 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PARTITION BY HASH(WatchID) WITH ( STORE = COLUMN, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT =)" << numShards - << ")"; + PARTITION_COUNT =)" << numShards + << ")"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); @@ -1932,10 +1933,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 + PARTITION_COUNT = 1 ); - )" - ); + )"); lHelper.StartDataRequest( R"( @@ -1987,10 +1987,9 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1 + PARTITION_COUNT = 1 ); - )" - ); + )"); lHelper.StartDataRequest( R"( @@ -1999,7 +1998,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { ); } -/* + /* Y_UNIT_TEST(OlapDeletePlanned) { TPortManager pm; @@ -2039,7 +2038,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { WITH ( STORE = COLUMN, AUTO_PARTITIONING_BY_SIZE = ENABLED, - AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 8 + PARTITION_COUNT = 8 ); )" ); @@ -2479,14 +2478,14 @@ Y_UNIT_TEST_SUITE(KqpOlap) { PRIMARY KEY (a) ) PARTITION BY HASH(a) - WITH (STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 4); + WITH (STORE = COLUMN, PARTITION_COUNT = 4); )"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (a, b, c) VALUES (1u, 1, 5), @@ -2516,15 +2515,16 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_AUTO: - UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (ExpandMap (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; case NKikimrConfig::TTableServiceConfig_EBlockChannelsMode_BLOCK_CHANNELS_FORCE: - UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("return (FromFlow (WideMap"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (WideSortBlocks"), plan.QueryStats->Getquery_ast()); + UNIT_ASSERT_C(plan.QueryStats->Getquery_ast().Contains("(FromFlow (NarrowMap (WideFromBlocks"), plan.QueryStats->Getquery_ast()); break; } } - { + { auto it = client.StreamExecuteQuery(R"( SELECT b, COUNT(*), SUM(a) @@ -2624,6 +2624,193 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } + Y_UNIT_TEST(NormalizeAbsentColumn) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper testHelper(kikimr); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + + testHelper.CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_SCAN }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + { + auto alterQuery = TStringBuilder() << "ALTER TABLESTORE `/Root/olapStore` ADD COLUMN new_column1 Uint64;"; + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 1000); + + { + auto alterQuery = TStringBuilder() << "ALTER TABLESTORE `/Root/olapStore` ADD COLUMN new_column2 Uint64;"; + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + + } + + Y_UNIT_TEST(MultiInsertWithSinks) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + auto db = kikimr.GetQueryClient(); + + auto result = db.ExecuteQuery(R"( + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:00Z'), 'a', '0'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:01Z'), 'b', 't'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:02Z'), 'c', 'test'); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + + { + auto it = db.StreamExecuteQuery(R"( + --!syntax_v1 + + SELECT + * + FROM `/Root/olapStore/olapTable` ORDER BY uid + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + CompareYson(result, R"([[#;#;["0"];0u;"a"];[#;#;["t"];1000000u;"b"];[#;#;["test"];2000000u;"c"]])"); + } + } + + Y_UNIT_TEST(CountWhereColumnIsNull) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_DEBUG); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 300, true); + + auto client = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL AND uid IS NOT NULL + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(*), COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level IS NULL + GROUP BY level + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[100u;0u]]", result); + } + } + + Y_UNIT_TEST(SimpleCount) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD_SCAN, NActors::NLog::PRI_DEBUG); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 300, true); + + auto client = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + { + auto it = client.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE StartsWith(uid, "uid_") + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson("[[200u]]", result); + } + } + + Y_UNIT_TEST(TableSinkWithOlapStore) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + TLocalHelper(kikimr).CreateTestOlapTables(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable0", 0, 1000000, 3, true); + + auto client = kikimr.GetQueryClient(); + { + auto result = client.ExecuteQuery(R"( + SELECT * FROM `/Root/olapStore/olapTable0` ORDER BY timestamp; + INSERT INTO `/Root/olapStore/olapTable1` SELECT * FROM `/Root/olapStore/olapTable0`; + REPLACE INTO `/Root/olapStore/olapTable0` SELECT * FROM `/Root/olapStore/olapTable1`; + SELECT * FROM `/Root/olapStore/olapTable1` ORDER BY timestamp; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + } } } diff --git a/ydb/core/kqp/ut/olap/sparsed_ut.cpp b/ydb/core/kqp/ut/olap/sparsed_ut.cpp new file mode 100644 index 000000000000..73b75f2cc53f --- /dev/null +++ b/ydb/core/kqp/ut/olap/sparsed_ut.cpp @@ -0,0 +1,307 @@ +#include "helpers/local.h" +#include "helpers/writer.h" +#include "helpers/typed_local.h" +#include "helpers/query_executor.h" +#include "helpers/get_value.h" + +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapSparsed) { + + class TSparsedDataTest { + private: + const TKikimrSettings Settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner Kikimr; + NKikimr::NYDBTest::TControllers::TGuard CSController; + const TString StoreName; + ui32 MultiColumnRepCount = 100; + static const ui32 SKIP_GROUPS = 7; + const TVector FIELD_NAMES{"utf", "int", "uint", "float", "double"}; + public: + TSparsedDataTest(const TString& storeName) + : Kikimr(Settings) + , CSController(NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard()) + , StoreName(storeName) + { + + } + + ui32 GetCount() const { + auto selectQuery = TString(R"( + SELECT + count(*) as count, + FROM `/Root/)") + (StoreName.empty() ? "" : StoreName + "/") + "olapTable`"; + + auto tableClient = Kikimr.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, selectQuery); + return GetUint64(rows[0].at("count")); + } + + ui32 GetDefaultsCount(const TString& fieldName, const TString& defValueStr) const { + auto selectQueryTmpl = TString(R"( + SELECT + count(*) as count, + FROM `/Root/)") + (StoreName.empty() ? "" : StoreName + "/") + R"(olapTable` + WHERE %s == %s + )"; + + auto tableClient = Kikimr.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, Sprintf(selectQueryTmpl.c_str(), fieldName.c_str(), defValueStr.c_str())); + return GetUint64(rows[0].at("count")); + } + + void GetAllDefaultsCount(ui64* counts, ui32 skipCount) { + TString query = "SELECT"; + ui32 groupsCount = 0; + for (ui32 i = 0; i < MultiColumnRepCount; i += skipCount) { + query += Sprintf("%s field_utf%u == 'abcde' AS def_utf%u, field_uint%u == 0 AS def_uint%u, field_int%u == 0 AS def_int%u, field_float%u == 0 AS def_float%u, field_double%u == 0 AS def_double%u", i == 0 ? "" : ",", i, i, i, i, i, i, i, i, i, i); + groupsCount++; + } + query += " FROM `/Root/olapStore/olapTable`"; + auto tableClient = Kikimr.GetTableClient(); + + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + auto rows = ExecuteScanQuery(tableClient, query, false); + + printTime("Executing query"); + + Fill(&counts[0], &counts[FIELD_NAMES.size() * groupsCount], 0); + + for (auto& row: rows) { + auto incCounts = [&](ui32 i, const TString& column) { + if (*NYdb::TValueParser(row.at(column)).GetOptionalBool()) { + counts[i]++; + } + }; + ui32 ind = 0; + for (ui32 i = 0; i < MultiColumnRepCount; i += skipCount) { + TString grStr = ToString(i); + incCounts(ind++, "def_utf" + grStr); + incCounts(ind++, "def_uint" + grStr); + incCounts(ind++, "def_int" + grStr); + incCounts(ind++, "def_float" + grStr); + incCounts(ind++, "def_double" + grStr); + } + } + } + + void CheckAllFieldsTable(bool firstCall, ui32 countExpectation, ui32* defCountStart) { + ui32 grCount = (MultiColumnRepCount + SKIP_GROUPS - 1) / SKIP_GROUPS; + ui64 defCounts[FIELD_NAMES.size() * grCount]; + const ui32 count = GetCount(); + GetAllDefaultsCount(defCounts, SKIP_GROUPS); + for (ui32 i = 0; i < FIELD_NAMES.size() * grCount; i++) { + if (firstCall) { + defCountStart[i] = defCounts[i]; + } else { + AFL_VERIFY(defCountStart[i] == defCounts[i]); + } + AFL_VERIFY(count == countExpectation)("expect", countExpectation)("count", count); + AFL_VERIFY(1.0 * defCounts[i] / count < 0.95)("def", defCounts[i])("count", count); + AFL_VERIFY(1.0 * defCounts[i] / count > 0.85)("def", defCounts[i])("count", count); + } + } + + void CheckTable(const TString& fieldName, const TString& defValueStr, bool firstCall, ui32 countExpectation, ui32& defCountStart) { + const ui32 defCount = GetDefaultsCount(fieldName, defValueStr); + if (firstCall) { + defCountStart = defCount; + } else { + AFL_VERIFY(defCountStart == defCount); + } + const ui32 count = GetCount(); + AFL_VERIFY(count == countExpectation)("expect", countExpectation)("count", count); + AFL_VERIFY(1.0 * defCount / count < 0.95)("def", defCount)("count", count); + AFL_VERIFY(1.0 * defCount / count > 0.85)("def", defCount)("count", count); + } + + template + void FillCircleImpl(TFillTable&& fillTable, TCheckTable&& checkTable) { + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + fillTable(); + printTime("fillTable"); + checkTable(true); + printTime("checkTable"); + + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->WaitIndexation(TDuration::Seconds(5)); + printTime("wait"); + + checkTable(false); + printTime("checkTable"); + + CSController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->WaitCompactions(TDuration::Seconds(5)); + printTime("wait"); + + checkTable(false); + printTime("checkTable"); + + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + printTime("wait"); + } + + void FillCircle(const double shiftKff, const ui32 countExpectation) { + ui32 defCountStart = (ui32)-1; + FillCircleImpl([&]() { + TTypedLocalHelper helper("Utf8", Kikimr, "olapTable", StoreName); + const double frq = 0.9; + NArrow::NConstruction::TStringPoolFiller sPool(1000, 52, "abcde", frq); + helper.FillTable(sPool, shiftKff, 10000); + }, + [&](bool firstCall) { + CheckTable("field", "'abcde'", firstCall, countExpectation, defCountStart); + }); + } + + void FillMultiColumnCircle(const double shiftKff, const ui32 countExpectation) { + ui32 grCount = (MultiColumnRepCount + SKIP_GROUPS - 1) / SKIP_GROUPS; + ui32 defCountStart[FIELD_NAMES.size() * grCount]; + FillCircleImpl([&]() { + TTypedLocalHelper helper("Utf8", Kikimr); + helper.FillMultiColumnTable(MultiColumnRepCount, shiftKff, 10000); + }, + [&](bool firstCall) { + CheckAllFieldsTable(firstCall, countExpectation, defCountStart); + }); + } + + void Execute() { + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + + Tests::NCommon::TLoggerInit(Kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", Kikimr, "olapTable", StoreName); + if (!StoreName.empty()) { + helper.CreateTestOlapTable(); + } else { + auto tableClient = Kikimr.GetTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + auto query = TStringBuilder() << R"( + --!syntax_v1 + CREATE TABLE `/Root/olapTable` + ( + pk_int int64 NOT NULL, + field )" << "Utf8" << R"(, + ts TimeStamp, + PRIMARY KEY (pk_int) + ) + PARTITION BY HASH(pk_int) + WITH ( + STORE = COLUMN + ))"; + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + TString type = StoreName.empty() ? "TABLE" : "TABLESTORE"; + TString name = StoreName.empty() ? "olapTable" : "olapStore"; + + FillCircle(0, 10000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`, `DEFAULT_VALUE`=`abcde`);"); + FillCircle(0.1, 11000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + FillCircle(0.2, 12000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`);"); + FillCircle(0.3, 13000); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/" + name + "`(TYPE " + type + ") SET (ACTION=ALTER_COLUMN, NAME=field, `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + FillCircle(0.4, 14000); + } + + void ExecuteMultiColumn() { + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + CSController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + CSController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + + Tests::NCommon::TLoggerInit(Kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", Kikimr); + helper.CreateMultiColumnOlapTableWithStore(MultiColumnRepCount); + + auto start = TInstant::Now().Seconds(); + + auto printTime = [&](const char* prefix) { + auto finish = TInstant::Now().Seconds(); + fprintf(stderr, "Timing: %s took %lu seconds\n", prefix, finish - start); + start = finish; + }; + + FillMultiColumnCircle(0, 10000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`, `DEFAULT_VALUE`=" + (f == 0 ? "`abcde`" : "`0`") + ");"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.1, 11000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.2, 12000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`SPARSED`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.3, 13000); + printTime("Fill"); + for (ui32 i = 0; i < MultiColumnRepCount; i += SKIP_GROUPS) { + TString grStr = ToString(i); + for (ui32 f = 0; f < FIELD_NAMES.size(); f++) { + helper.ExecuteSchemeQuery(TString("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field_") + FIELD_NAMES[f] + grStr + ", `DATA_ACCESSOR_CONSTRUCTOR.CLASS_NAME`=`PLAIN`);"); + } + } + printTime("Alter"); + FillMultiColumnCircle(0.4, 14000); + printTime("Fill"); + } + }; + + Y_UNIT_TEST(Switching) { + TSparsedDataTest test("olapStore"); + test.Execute(); + } + + Y_UNIT_TEST(SwitchingMultiColumn) { + TSparsedDataTest test("olapStore"); + test.ExecuteMultiColumn(); + } + + Y_UNIT_TEST(SwitchingStandalone) { + TSparsedDataTest test(""); + test.Execute(); + } +} + +} // namespace diff --git a/ydb/core/kqp/ut/olap/statistics_ut.cpp b/ydb/core/kqp/ut/olap/statistics_ut.cpp index d79a07f9bc3b..ece5e454bacb 100644 --- a/ydb/core/kqp/ut/olap/statistics_ut.cpp +++ b/ydb/core/kqp/ut/olap/statistics_ut.cpp @@ -14,25 +14,25 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto alterQuery = TStringBuilder() << R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`))"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_field, FEATURES=`{\"column_name\": \"field\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"field\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=max_pk_int, TYPE=MAX, FEATURES=`{\"column_name\": \"pk_int\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_pk_int);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=max_pk_int);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -50,7 +50,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { helper.CreateTestOlapTable(); auto tableClient = kikimr.GetTableClient(); { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, TYPE=MAX, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -62,7 +62,7 @@ Y_UNIT_TEST_SUITE(KqpOlapStatistics) { UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); } { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_ts);"; + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=max_ts);"; auto session = tableClient.CreateSession().GetValueSync().GetSession(); auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp index 4242f7676298..27820452542e 100644 --- a/ydb/core/kqp/ut/olap/sys_view_ut.cpp +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -5,6 +5,7 @@ #include "helpers/get_value.h" #include +#include #include #include @@ -229,7 +230,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { helper.CreateTestOlapTable(); NArrow::NConstruction::TStringPoolFiller sPool(3, 52); helper.FillTable(sPool, 0, 800000); - csController->WaitCompactions(TDuration::Seconds(10)); + csController->WaitCompactions(TDuration::Seconds(5)); + helper.FillTable(sPool, 0.5, 800000); + csController->WaitCompactions(TDuration::Seconds(5)); helper.GetVolumes(rawBytes1, bytes1, false, {"new_column_ui64"}); AFL_VERIFY(rawBytes1 == 0); @@ -241,9 +244,9 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { csController->WaitActualization(TDuration::Seconds(10)); ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"new_column_ui64"}); - AFL_VERIFY(rawBytes2 == 6500041)("real", rawBytes2); - AFL_VERIFY(bytes2 == 45360)("b", bytes2); + helper.GetVolumes(rawBytes2, bytes2, false, { "new_column_ui64", NOlap::IIndexInfo::SPEC_COL_DELETE_FLAG }); + AFL_VERIFY(rawBytes2 == 0)("real", rawBytes2); + AFL_VERIFY(bytes2 == 0)("b", bytes2); } } @@ -302,63 +305,61 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { auto tableClient = kikimr.GetTableClient(); { helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=field_var, TYPE=variability, FEATURES=`{\"column_name\" : \"field\"}`);"); - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=pk_int_max, TYPE=MAX, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { + csController->WaitActualization(TDuration::Seconds(40)); + { ui64 rawBytes2; ui64 bytes2; - helper.GetVolumes(rawBytes2, bytes2, false, {"field"}); + helper.GetVolumes(rawBytes2, bytes2, false, { "field" }); AFL_VERIFY(rawBytes2 == rawBytes1)("f1", rawBytes1)("f2", rawBytes2); AFL_VERIFY(bytes2 < bytes1 * 0.5)("f1", bytes1)("f2", bytes2); - std::vector stats; + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 2) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } - } - return true; + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 1); + AFL_VERIFY(i.GetArraySafe()[0]["chunk_idx"].GetInteger() == 0); + AFL_VERIFY(i.GetArraySafe()[0]["entity_id"].GetInteger() == 4); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() >= 799992); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() <= 799999); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("json", i); } - ); + } } { - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=pk_int_max);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=pk_int_max);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { - std::vector stats; + csController->WaitActualization(TDuration::Seconds(30)); + { + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 1) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 0)("json", i); } - return true; - }); + } } { - helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=pk_int_max, TYPE=MAX, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); - csController->WaitCondition(TDuration::Seconds(10), [&]() { - std::vector stats; + csController->WaitActualization(TDuration::Seconds(40)); + { + std::vector stats; helper.GetStats(stats, true); + AFL_VERIFY(stats.size() == 3); for (auto&& i : stats) { - if (i.ScalarsSize() != 2) { - return false; - } - if (i.GetScalars()[0].GetUint32() != 3) { - return false; - } + AFL_VERIFY(i.IsArray()); + AFL_VERIFY(i.GetArraySafe().size() == 1); + AFL_VERIFY(i.GetArraySafe()[0]["chunk_idx"].GetInteger() == 0); + AFL_VERIFY(i.GetArraySafe()[0]["entity_id"].GetInteger() == 5)("json", i); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() >= 799992); + AFL_VERIFY(i.GetArraySafe()[0]["data"].GetIntegerRobust() <= 799999); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("json", i); } - return true; - } - ); + } } } @@ -452,7 +453,7 @@ Y_UNIT_TEST_SUITE(KqpOlapSysView) { SELECT PathId, Kind, TabletId FROM `/Root/olapStore/.sys/store_primary_index_stats` WHERE - PathId == UInt64("3") AND Activity = true + PathId == UInt64("3") AND Activity == 1 GROUP BY TabletId, PathId, Kind ORDER BY TabletId, Kind )"); diff --git a/ydb/core/kqp/ut/olap/tiering_ut.cpp b/ydb/core/kqp/ut/olap/tiering_ut.cpp new file mode 100644 index 000000000000..b9cceba93738 --- /dev/null +++ b/ydb/core/kqp/ut/olap/tiering_ut.cpp @@ -0,0 +1,161 @@ +#include "helpers/get_value.h" +#include "helpers/local.h" +#include "helpers/query_executor.h" +#include "helpers/typed_local.h" +#include "helpers/writer.h" + +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapTiering) { + Y_UNIT_TEST(Eviction) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + TLocalHelper localHelper(testHelper.GetKikimr()); + NYdb::NTable::TTableClient tableClient = testHelper.GetKikimr().GetTableClient(); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + Singleton()->SetSecretKey("fakeSecret"); + + localHelper.CreateTestOlapTable(); + testHelper.CreateTier("tier1"); + const TString tieringRule = testHelper.CreateTieringRule("tier1", "timestamp"); + + for (ui64 i = 0; i < 100; ++i) { + WriteTestData(testHelper.GetKikimr(), "/Root/olapStore/olapTable", 0, i * 10000, 1000); + } + + csController->WaitActualization(TDuration::Seconds(5)); + + ui64 columnRawBytes = 0; + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "__DEFAULT"); + + columnRawBytes = GetUint64(rows[0].at("RawBytes")); + UNIT_ASSERT_GT(columnRawBytes, 0); + } + + testHelper.SetTiering("/Root/olapStore/olapTable", tieringRule); + csController->WaitActualization(TDuration::Seconds(5)); + + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "tier1"); + UNIT_ASSERT_VALUES_EQUAL_C(GetUint64(rows[0].at("RawBytes")), columnRawBytes, + TStringBuilder() << "RawBytes changed after eviction: before=" << columnRawBytes + << " after=" << GetUint64(rows[0].at("RawBytes"))); + } + + testHelper.ResetTiering("/Root/olapStore/olapTable"); + csController->WaitCompactions(TDuration::Seconds(5)); + + { + auto selectQuery = TString(R"( + SELECT + TierName, SUM(ColumnRawBytes) As RawBytes + FROM `/Root/olapStore/olapTable/.sys/primary_index_portion_stats` + WHERE Activity == 1 + GROUP BY TierName + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("TierName")), "__DEFAULT"); + UNIT_ASSERT_VALUES_EQUAL_C(GetUint64(rows[0].at("RawBytes")), columnRawBytes, + TStringBuilder() << "RawBytes changed after resetting tiering: before=" << columnRawBytes + << " after=" << GetUint64(rows[0].at("RawBytes"))); + } + } + + Y_UNIT_TEST(TieringRuleValidation) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + TLocalHelper localHelper(testHelper.GetKikimr()); + NYdb::NTable::TTableClient tableClient = testHelper.GetKikimr().GetTableClient(); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + Singleton()->SetSecretKey("fakeSecret"); + + localHelper.CreateTestOlapTable(); + testHelper.CreateTier("tier1"); + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS empty_tiering_rule (TYPE TIERING_RULE) + WITH (defaultColumn = timestamp, description = `{"rules": []}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS empty_default_column (TYPE TIERING_RULE) + WITH (defaultColumn = ``, description = `{"rules": [{ "tierName" : "tier1", "durationForEvict" : "10d" }]}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = R"( + CREATE OBJECT IF NOT EXISTS no_default_column (TYPE TIERING_RULE) + WITH (description = `{"rules": [{ "tierName" : "tier1", "durationForEvict" : "10d" }]}`))"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + const TString correctTieringRule = testHelper.CreateTieringRule("tier1", "timestamp"); + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET description `{"rules": []}`)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET description `{"rules": []}`)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) SET defaultColumn ``)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + const TString query = "ALTER OBJECT " + correctTieringRule + R"( (TYPE TIERING_RULE) RESET defaultColumn)"; + auto result = testHelper.GetSession().ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(result.GetStatus(), NYdb::EStatus::SUCCESS); + } + } +} + +} // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/ut/olap/write_ut.cpp b/ydb/core/kqp/ut/olap/write_ut.cpp index b631b245d7c9..8d9751f28193 100644 --- a/ydb/core/kqp/ut/olap/write_ut.cpp +++ b/ydb/core/kqp/ut/olap/write_ut.cpp @@ -15,7 +15,7 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { Y_UNIT_TEST(TierDraftsGC) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csController->SetIndexWriteControllerEnabled(false); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); Singleton()->ResetWriteCounters(); auto settings = TKikimrSettings() @@ -47,10 +47,31 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { AFL_VERIFY(!Singleton()->GetSize()); } + Y_UNIT_TEST(TestRemoveTableBeforeIndexation) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetIndexWriteControllerEnabled(false); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + TTypedLocalHelper("Utf8", kikimr).ExecuteSchemeQuery("DROP TABLE `/Root/olapStore/olapTable`;"); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csController->WaitIndexation(TDuration::Seconds(5)); + csController->WaitCompactions(TDuration::Seconds(5)); + } + Y_UNIT_TEST(TierDraftsGCWithRestart) { auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csController->SetIndexWriteControllerEnabled(false); - csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); Singleton()->ResetWriteCounters(); @@ -131,6 +152,63 @@ Y_UNIT_TEST_SUITE(KqpOlapWrite) { UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("count")), 800000); } + Y_UNIT_TEST(WriteDeleteCleanGC) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::MilliSeconds(100)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); + Singleton()->ResetWriteCounters(); + + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + + auto settings = TKikimrSettings().SetAppConfig(appConfig).SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({ NKikimrServices::TX_COLUMNSHARD, NKikimrServices::TX_COLUMNSHARD_BLOBS }, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + auto client = kikimr.GetQueryClient(); + + { + auto it = client.ExecuteQuery(R"( + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:00Z'), 'a', '0'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:01Z'), 'a', 'test'); + INSERT INTO `/Root/olapStore/olapTable` (timestamp, uid, resource_id) VALUES (Timestamp('1970-01-01T00:00:02Z'), 'a', 't'); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + } + + while (csController->GetInsertStartedCounter().Val() == 0) { + Cerr << "Wait indexation..." << Endl; + Sleep(TDuration::Seconds(2)); + } + { + const TInstant start = TInstant::Now(); + while (!Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { + Cerr << "Wait size in memory... " << Singleton()->GetSize() << Endl; + Sleep(TDuration::Seconds(2)); + } + AFL_VERIFY(Singleton()->GetSize()); + } + { + auto it = client.ExecuteQuery(R"( + DELETE FROM `/Root/olapStore/olapTable` ON SELECT CAST(0u AS Timestamp) AS timestamp, Unwrap(CAST('a' AS Utf8)) AS uid; + DELETE FROM `/Root/olapStore/olapTable`; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + } + csController->SetOverrideReadTimeoutClean(TDuration::Zero()); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); + { + const TInstant start = TInstant::Now(); + while (Singleton()->GetSize() && TInstant::Now() - start < TDuration::Seconds(10)) { + Cerr << "Wait empty... " << Singleton()->GetSize() << Endl; + Sleep(TDuration::Seconds(2)); + } + AFL_VERIFY(!Singleton()->GetSize()); + } + } + } } // namespace diff --git a/ydb/core/kqp/ut/olap/ya.make b/ydb/core/kqp/ut/olap/ya.make index d9b7a06ef77b..e324116597eb 100644 --- a/ydb/core/kqp/ut/olap/ya.make +++ b/ydb/core/kqp/ut/olap/ya.make @@ -1,7 +1,7 @@ UNITTEST_FOR(ydb/core/kqp) FORK_SUBTESTS() -SPLIT_FACTOR(100) +SPLIT_FACTOR(200) IF (WITH_VALGRIND) TIMEOUT(3600) @@ -23,6 +23,9 @@ SRCS( clickbench_ut.cpp aggregations_ut.cpp write_ut.cpp + sparsed_ut.cpp + tiering_ut.cpp + decimal_ut.cpp ) PEERDIR( diff --git a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp index 2dcce5cd1e2a..cd478e6cdac0 100644 --- a/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_ne_ut.cpp @@ -11,6 +11,110 @@ using namespace NYdb; using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpNewEngine) { + Y_UNIT_TEST(StreamLookupWithView) { + TKikimrSettings settings = TKikimrSettings().SetWithSampleTables(false); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX); + appConfig.MutableFeatureFlags()->SetEnableViews(true); + settings.SetDomainRoot(KikimrDefaultUtDomainRoot); + settings.SetAppConfig(appConfig); + + auto kikimr = TKikimrRunner{settings}; + kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableViews(true); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + CREATE TABLE `object_table` + ( + object_id utf8, + role utf8, + id utf8 not NULL, + primary key (id) + ); + + ALTER TABLE `object_table` ADD INDEX `object_id_index` GLOBAL ON (object_id); + ALTER TABLE `object_table` ADD INDEX `role_index` GLOBAL ON (role); + + CREATE TABLE `role_table` + ( + granted_by_role utf8, + granted_role utf8, + role_type utf8, + role utf8, + id utf8 not NULL, + primary key (id) + ); + + ALTER TABLE `role_table` ADD INDEX `granted_by_role_index` GLOBAL ON (granted_by_role); + ALTER TABLE `role_table` ADD INDEX `granted_role_index` GLOBAL ON (granted_role); + ALTER TABLE `role_table` ADD INDEX `role_index` GLOBAL ON (role); + + CREATE TABLE `access_table` + ( + endpoints utf8, + name utf8, + class utf8, + type utf8, + id utf8 not NULL, + primary key (id) + ); + + ALTER TABLE `access_table` ADD INDEX `endpoints_index` GLOBAL ON (endpoints); + ALTER TABLE `access_table` ADD INDEX `class_index` GLOBAL ON (class); + )").GetValueSync()); + + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + CREATE VIEW granted_privilege WITH (security_invoker = TRUE) AS + SELECT DISTINCT + object_table.object_id AS object_id, + role_table.granted_role AS granted_role, + access_table.id AS id, + role_table.role AS role, + access_table.`type` AS object_type, + FROM `/Root/access_table` AS access_table + INNER JOIN `/Root/object_table` AS object_table ON access_table.id = object_table.object_id + INNER JOIN `/Root/role_table` AS role_table ON object_table.role = role_table.granted_role + )").GetValueSync()); + + auto result = session.ExecuteDataQuery(R"( + UPSERT INTO `access_table` (id, type) VALUES + ("10", "OPERATION_PRIVILEGE"); + UPSERT INTO `role_table` (id, granted_role, role_type) VALUES + ("10", "admin", "USER_ROLE"); + UPSERT INTO `object_table` (id, object_id, role) VALUES + ("10", "10", "admin"); + )", TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + AssertSuccessResult(result); + + auto testQueryParams = [&] (TString query, TParams params) { + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx(), params).GetValueSync(); + AssertSuccessResult(result); + + Cerr << FormatResultSetYson(result.GetResultSet(0)) << Endl; + }; + + auto params = kikimr.GetTableClient().GetParamsBuilder() + .AddParam("$jp1").Utf8("admin").Build() + .AddParam("$jp2").Utf8("10").Build() + .AddParam("$jp3").Uint64(2).Build() + .Build(); + + testQueryParams(R"( + --!syntax_v1 + DECLARE $jp1 AS Text; + DECLARE $jp2 AS Text; + DECLARE $jp3 AS Uint64; + select g1_0.id from granted_privilege g1_0 where ( + g1_0.role = 'admin' + ) and g1_0.role=$jp1 and g1_0.object_type=$jp2 limit $jp3 + )", params); + } + Y_UNIT_TEST(Select1) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -2991,7 +3095,7 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); AssertTableStats(result, "/Root/Test", { - .ExpectedReads = 2, + .ExpectedReads = 1, .ExpectedDeletes = 2, }); @@ -3955,6 +4059,78 @@ Y_UNIT_TEST_SUITE(KqpNewEngine) { AssertTableReads(result, "/Root/SecondaryKeys/Index/indexImplTable", 1); } + Y_UNIT_TEST(AutoChooseIndexOrderByLimit) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_ONLY_POINTS); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + CreateSampleTablesWithIndex(session); + + NYdb::NTable::TExecDataQuerySettings querySettings; + querySettings.CollectQueryStats(ECollectQueryStatsMode::Profile); + + auto result = session.ExecuteDataQuery(R"( + --!syntax_v1 + SELECT Fk, Key FROM `/Root/SecondaryKeys` WHERE Fk = 1 ORDER BY Key DESC LIMIT 1; + )", TTxControl::BeginTx(TTxSettings::SerializableRW()), querySettings).GetValueSync(); + AssertSuccessResult(result); + AssertTableReads(result, "/Root/SecondaryKeys/Index/indexImplTable", 0); + } + + Y_UNIT_TEST(MultipleBroadcastJoin) { + TKikimrSettings kisettings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetIndexAutoChooseMode(NKikimrConfig::TTableServiceConfig_EIndexAutoChooseMode_MAX_USED_PREFIX); + kisettings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(kisettings); + + auto db = kikimr.GetTableClient(); + auto client = kikimr.GetQueryClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto session = db.CreateSession().GetValueSync().GetSession(); + AssertSuccessResult(session.ExecuteSchemeQuery(R"( + --!syntax_v1 + + create table demo_ba(id text, some text, ref1 text, ref2 text, primary key(id)); + create table demo_ref1(id text, code text, some text, primary key(id), index ix_code global on (code)); + create table demo_ref2(id text, code text, some text, primary key(id), index ix_code global on (code)); + )").GetValueSync()); + } + + auto query = R"( + select ba_0.id, ba_0.some, + r_1.id, r_1.some, r_1.code, + r_2.id, r_2.some, r_2.code + from demo_ba ba_0 + left join demo_ref1 r_1 on r_1.id=ba_0.ref1 + left join demo_ref2 r_2 on r_2.code=ba_0.ref2 + where ba_0.id in ("ba#10"u,"ba#20"u,"ba#30"u,"ba#40"u,"ba#50"u,"ba#60"u,"ba#70"u,"ba#80"u,"ba#90"u,"ba#100"u); + )"; + + auto settings = NYdb::NQuery::TExecuteQuerySettings() + .Syntax(NYdb::NQuery::ESyntax::YqlV1) + .ConcurrentResultSets(false); + { + auto result = client.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + //CompareYson(R"([[[1];["321"]]])", FormatResultSetYson(result.GetResultSet(0))); + //CompareYson(R"([[["111"];[1]]])", FormatResultSetYson(result.GetResultSet(1))); + } + { + auto it = client.StreamExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + Cerr << StreamResultToYson(it); + } + + } Y_UNIT_TEST_TWIN(ComplexLookupLimit, NewPredicateExtract) { TKikimrSettings settings; diff --git a/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp b/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp index 7035b4d6927b..b23d9d38ace9 100644 --- a/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp +++ b/ydb/core/kqp/ut/opt/kqp_returning_ut.cpp @@ -11,6 +11,113 @@ using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpReturning) { +Y_UNIT_TEST(ReturningTwice) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableSequences(true); + appConfig.MutableTableServiceConfig()->SetEnableColumnsWithDefault(true); + auto serverSettings = TKikimrSettings().SetAppConfig(appConfig); + TKikimrRunner kikimr(serverSettings); + + auto client = kikimr.GetTableClient(); + auto session = client.CreateSession().GetValueSync().GetSession(); + + const auto queryCreate = Q_(R"( + CREATE TABLE IF NOT EXISTS tasks ( + hashed_key Uint32, + queue_name String, + task_id String, + worker_id Int32, + running Bool, + eta Timestamp, + lock_timeout Timestamp, + num_fails Int32, + num_reschedules Int32, + body String, + first_fail Timestamp, + idempotency_run_id String, + PRIMARY KEY (hashed_key, queue_name, task_id) + ); + + CREATE TABLE IF NOT EXISTS tasks_eta_002 ( + eta Timestamp, + hashed_key Uint32, + queue_name String, + task_id String, + PRIMARY KEY (eta, hashed_key, queue_name, task_id) + ) WITH ( + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 1 + ); + + CREATE TABLE IF NOT EXISTS tasks_processing_002 ( + expiration_ts Timestamp, + hashed_key Uint32, + queue_name String, + task_id String, + PRIMARY KEY (expiration_ts, hashed_key, queue_name, task_id) + ) WITH ( + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 1, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 1 + ); + )"); + + auto resultCreate = session.ExecuteSchemeQuery(queryCreate).GetValueSync(); + UNIT_ASSERT_C(resultCreate.IsSuccess(), resultCreate.GetIssues().ToString()); + + { + const auto query = Q_(R"( + --!syntax_v1 + DECLARE $eta AS Timestamp; + DECLARE $expiration_ts AS Timestamp; + DECLARE $limit AS Int32; + + $to_move = ( + SELECT $expiration_ts AS expiration_ts, eta, hashed_key, queue_name, task_id + FROM tasks_eta_002 + WHERE eta <= $eta + ORDER BY eta, hashed_key, queue_name, task_id + LIMIT $limit + ); + + UPSERT INTO tasks_processing_002 (expiration_ts, hashed_key, queue_name, task_id) + SELECT expiration_ts, hashed_key, queue_name, task_id FROM $to_move + RETURNING expiration_ts, hashed_key, queue_name, task_id; + + UPSERT INTO tasks (hashed_key, queue_name, task_id, running, lock_timeout) + SELECT hashed_key, queue_name, task_id, True as running, $expiration_ts AS lock_timeout FROM $to_move; + + DELETE FROM tasks_eta_002 ON + SELECT eta, hashed_key, queue_name, task_id FROM $to_move; + )"); + + auto params = TParamsBuilder() + .AddParam("$eta").Timestamp(TInstant::Zero()).Build() + .AddParam("$expiration_ts").Timestamp(TInstant::Zero()).Build() + .AddParam("$limit").Int32(1).Build() + .Build(); + + NYdb::NTable::TExecDataQuerySettings execSettings; + execSettings.CollectQueryStats(ECollectQueryStatsMode::Full); + + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx(), params, execSettings).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + size_t eta_table_access = 0; + auto stats = NYdb::TProtoAccessor::GetProto(*result.GetStats()); + + for (auto phase : stats.query_phases()) { + for (auto table : phase.table_access()) { + if (table.name() == "/Root/tasks_eta_002") { + eta_table_access++; + } + } + } + Cerr << "access count " << eta_table_access << Endl; + UNIT_ASSERT_EQUAL(eta_table_access, 1); + //Cerr << stats.Utf8DebugString() << Endl; + } +} + Y_UNIT_TEST(ReturningSerial) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableSequences(true); @@ -134,6 +241,55 @@ Y_UNIT_TEST(ReturningSerial) { } } +Y_UNIT_TEST(ReturningColumnsOrder) { + auto kikimr = DefaultKikimrRunner(); + + auto client = kikimr.GetTableClient(); + auto session = client.CreateSession().GetValueSync().GetSession(); + auto db = kikimr.GetQueryClient(); + + const auto queryCreate = Q_(R"( + CREATE TABLE test1 (id Int32, v Text, PRIMARY KEY(id)); + )"); + + auto resultCreate = session.ExecuteSchemeQuery(queryCreate).GetValueSync(); + UNIT_ASSERT_C(resultCreate.IsSuccess(), resultCreate.GetIssues().ToString()); + + { + const auto query = Q_(R"( + UPSERT INTO test1 (id, v) VALUES (1, '321') RETURNING id, v; + REPLACE INTO test1 (id, v) VALUES (1, '111') RETURNING v, id; + )"); + + auto result = session.ExecuteDataQuery(query, TTxControl::BeginTx().CommitTx()).GetValueSync(); + UNIT_ASSERT(result.IsSuccess()); + CompareYson(R"([[[1];["321"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[["111"];[1]]])", FormatResultSetYson(result.GetResultSet(1))); + } + + auto settings = NYdb::NQuery::TExecuteQuerySettings() + .Syntax(NYdb::NQuery::ESyntax::YqlV1) + .ConcurrentResultSets(false); + { + auto result = db.ExecuteQuery(R"( + UPSERT INTO test1 (id, v) VALUES (1, '321') RETURNING id, v; + REPLACE INTO test1 (id, v) VALUES (1, '111') RETURNING v, id; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[1];["321"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[["111"];[1]]])", FormatResultSetYson(result.GetResultSet(1))); + } + { + auto it = db.StreamExecuteQuery(R"( + UPSERT INTO test1 (id, v) VALUES (2, '321') RETURNING id, v; + REPLACE INTO test1 (id, v) VALUES (2, '111') RETURNING v, id; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + Cerr << StreamResultToYson(it); + } + +} + Y_UNIT_TEST(ReturningTypes) { auto kikimr = DefaultKikimrRunner(); diff --git a/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp b/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp index 2854ea9d30fe..bf610134fc81 100644 --- a/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp +++ b/ydb/core/kqp/ut/pg/kqp_pg_ut.cpp @@ -4685,7 +4685,8 @@ Y_UNIT_TEST_SUITE(KqpPg) { ui16 mbusport = tp.GetPort(2134); auto settings = Tests::TServerSettings(mbusport) .SetDomainName("Root") - .SetUseRealThreads(false); + .SetUseRealThreads(false) + .SetEnablePgSyntax(true); Tests::TServer::TPtr server = new Tests::TServer(settings); @@ -4732,6 +4733,33 @@ Y_UNIT_TEST_SUITE(KqpPg) { UNIT_ASSERT_VALUES_EQUAL(ydbResults.begin()->Getcolumns().at(i).Getname(), colNames[i]); } } + + Y_UNIT_TEST(LongDomainName) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}) + .SetDomainRoot(std::string(65, 'a')); + TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); + auto db = kikimr.GetQueryClient(); + auto settings = NYdb::NQuery::TExecuteQuerySettings().Syntax(NYdb::NQuery::ESyntax::Pg); + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE t (id INT PRIMARY KEY, data1 UUID[]); + )", NYdb::NQuery::TTxControl::NoTx(), settings).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + + { + const auto query = Q_(R"( + SELECT * FROM t; + )"); + auto result = db.ExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), settings).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/query/kqp_limits_ut.cpp b/ydb/core/kqp/ut/query/kqp_limits_ut.cpp index a5c993eff08d..4a65ed26cf46 100644 --- a/ydb/core/kqp/ut/query/kqp_limits_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_limits_ut.cpp @@ -128,6 +128,27 @@ Y_UNIT_TEST_SUITE(KqpLimits) { UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::BAD_REQUEST); } + Y_UNIT_TEST(ComputeActorMemoryAllocationFailure) { + auto app = NKikimrConfig::TAppConfig(); + app.MutableTableServiceConfig()->MutableResourceManager()->SetMkqlLightProgramMemoryLimit(10); + app.MutableTableServiceConfig()->MutableResourceManager()->SetQueryMemoryLimit(2000); + + TKikimrRunner kikimr(app); + CreateLargeTable(kikimr, 0, 0, 0); + + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_SLOW_LOG, NActors::NLog::PRI_ERROR); + + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + auto result = session.ExecuteDataQuery(Q1_(R"( + SELECT * FROM `/Root/LargeTable`; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::OVERLOADED); + } + Y_UNIT_TEST(DatashardProgramSize) { auto app = NKikimrConfig::TAppConfig(); app.MutableTableServiceConfig()->MutableResourceManager()->SetMkqlLightProgramMemoryLimit(1'000'000'000); diff --git a/ydb/core/kqp/ut/query/kqp_params_ut.cpp b/ydb/core/kqp/ut/query/kqp_params_ut.cpp index 86094d3a61e6..d265a84b62fd 100644 --- a/ydb/core/kqp/ut/query/kqp_params_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_params_ut.cpp @@ -103,8 +103,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { } Y_UNIT_TEST(ImplicitParameterTypes) { - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -127,8 +131,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { Y_UNIT_TEST(CheckQueryCacheForPreparedQuery) { // All params are declared in the text - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -164,8 +172,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { Y_UNIT_TEST(CheckQueryCacheForUnpreparedQuery) { // Some params are declared in text, some by user - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -284,8 +296,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { Y_UNIT_TEST(CheckQueryCacheForExecuteAndPreparedQueries) { // All params are declared in the text - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -493,8 +509,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { } Y_UNIT_TEST(ImplicitSameParameterTypesQueryCacheCheck) { - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -519,8 +539,12 @@ Y_UNIT_TEST_SUITE(KqpParams) { } Y_UNIT_TEST(ImplicitDifferentParameterTypesQueryCacheCheck) { - TKikimrRunner kikimr; - kikimr.GetTestServer().GetRuntime()->GetAppData(0).FeatureFlags.SetEnableImplicitQueryParameterTypes(true); + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableImplicitQueryParameterTypes(true); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({NKikimrKqp::TKqpSetting()}); + TKikimrRunner kikimr(serverSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); diff --git a/ydb/core/kqp/ut/query/kqp_query_ut.cpp b/ydb/core/kqp/ut/query/kqp_query_ut.cpp index 6c11b9e2e7bb..15e018470b43 100644 --- a/ydb/core/kqp/ut/query/kqp_query_ut.cpp +++ b/ydb/core/kqp/ut/query/kqp_query_ut.cpp @@ -1576,7 +1576,7 @@ Y_UNIT_TEST_SUITE(KqpQuery) { )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); UNIT_ASSERT(!prepareResult.IsSuccess()); UNIT_ASSERT_C( - prepareResult.GetIssues().ToString().Contains("Query can be executed only in per-statement mode (NoTx)"), + prepareResult.GetIssues().ToString().Contains("CTAS statement can be executed only in NoTx mode."), prepareResult.GetIssues().ToString()); } @@ -1782,6 +1782,96 @@ Y_UNIT_TEST_SUITE(KqpQuery) { CompareYson(output, R"([[1u;[1];["test1"]];[100u;[100];["test2"]]])"); } } + + Y_UNIT_TEST_TWIN(TableSink_ReplaceDataShardDataQuery, UseSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(UseSink); + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(UseSink); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + const TString query = R"( + CREATE TABLE `/Root/DataShard` ( + Col1 Uint32 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + WITH ( + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 16, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 16, + UNIFORM_PARTITIONS = 16); + + CREATE TABLE `/Root/DataShard2` ( + Col1 Uint32 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + WITH ( + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 17, + AUTO_PARTITIONING_MAX_PARTITIONS_COUNT = 17, + UNIFORM_PARTITIONS = 17); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + { + auto prepareResult = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES + (10u, "test1", 10), (20u, "test2", 11), (2147483647u, "test3", 12), (2147483640u, NULL, 13); + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session.ExecuteDataQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + CompareYson(R"([[4u]])", FormatResultSetYson(it.GetResultSet(0))); + } + + { + auto prepareResult = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/DataShard2` SELECT * FROM `/Root/DataShard`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session.ExecuteDataQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard2`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + CompareYson(R"([[4u]])", FormatResultSetYson(it.GetResultSet(0))); + } + + { + auto prepareResult = session.ExecuteDataQuery(R"( + REPLACE INTO `/Root/DataShard2` (Col1, Col2, Col3) VALUES + (11u, "test1", 10), (21u, "test2", 11), (2147483646u, "test3", 12), (2147483641u, NULL, 13); + SELECT COUNT(*) FROM `/Root/DataShard`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); + } + + { + auto it = session.ExecuteDataQuery(R"( + SELECT COUNT(*) FROM `/Root/DataShard2`; + )", NYdb::NTable::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + CompareYson(R"([[8u]])", FormatResultSetYson(it.GetResultSet(0))); + } + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp b/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp index e8bb6f3034b5..8fa62dbdb704 100644 --- a/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp +++ b/ydb/core/kqp/ut/scan/kqp_scan_ut.cpp @@ -237,15 +237,34 @@ Y_UNIT_TEST_SUITE(KqpScan) { .BeginTuple().AddElement().BeginOptional().Decimal(TDecimalValue("1.5", 22, 9)).EndOptional().EndTuple() .Build()); - auto ret = session.CreateTable("/Root/DecimalTest", + auto ret = session.CreateTable("/Root/DecimalTest", TTableBuilder() .AddNullableColumn("Key", TDecimalType(22, 9)) .AddNullableColumn("Value", TDecimalType(22, 9)) .SetPrimaryKeyColumn("Key") - // .SetPartitionAtKeys(partitions) // Error at split boundary 0: Unsupported typeId 4865 at index 0 + .SetPartitionAtKeys(partitions) .Build()).GetValueSync(); UNIT_ASSERT_C(ret.IsSuccess(), ret.GetIssues().ToString()); + { + auto describeResult = session.DescribeTable("/Root/DecimalTest" , TDescribeTableSettings().WithKeyShardBoundary(true)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(describeResult.GetStatus(), NYdb::EStatus::SUCCESS); + const NYdb::NTable::TTableDescription& tableDescription = describeResult.GetTableDescription(); + const TVector& keyRanges = tableDescription.GetKeyRanges(); + const TVector& columns = tableDescription.GetTableColumns(); + UNIT_ASSERT_VALUES_EQUAL(columns.size(), 2); + UNIT_ASSERT_STRINGS_EQUAL(columns[0].Type.ToString(), "Decimal(22,9)?"); + UNIT_ASSERT_STRINGS_EQUAL(columns[1].Type.ToString(), "Decimal(22,9)?"); + auto extractValue = [](const TValue& val) { + auto parser = TValueParser(val); + parser.OpenTuple(); + UNIT_ASSERT(parser.TryNextElement()); + return parser.GetOptionalDecimal()->ToString(); + }; + UNIT_ASSERT_VALUES_EQUAL(keyRanges.size(), 2); + UNIT_ASSERT_STRINGS_EQUAL(extractValue(keyRanges[0].To()->GetValue()), "1.5"); + } + auto params = TParamsBuilder().AddParam("$in").BeginList() .AddListItem().BeginStruct() .AddMember("Key").Decimal(TDecimalValue("1.0")) diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 4db4ac5dd9fb..a9e7125dc601 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -1,6 +1,8 @@ #include #include +#include #include +#include #include #include #include @@ -1947,8 +1949,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } Y_UNIT_TEST(CreateTableWithPartitionAtKeysUuid) { - TKikimrSettings kikimrSettings = TKikimrSettings() - .SetEnableUuidAsPrimaryKey(true); + TKikimrSettings kikimrSettings; TKikimrRunner kikimr(kikimrSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -1992,8 +1993,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } Y_UNIT_TEST(CreateTableWithUniformPartitionsUuid) { - TKikimrSettings kikimrSettings = TKikimrSettings() - .SetEnableUuidAsPrimaryKey(true); + TKikimrSettings kikimrSettings; TKikimrRunner kikimr(kikimrSettings); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); @@ -3905,6 +3905,22 @@ Y_UNIT_TEST_SUITE(KqpScheme) { UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetRetentionPeriod(), TDuration::Hours(1)); } + { // alter + auto query = R"( + --!syntax_v1 + ALTER TOPIC `/Root/table/feed_2` SET ( + RETENTION_PERIOD = Interval("PT2H") + ); + )"; + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto desc = pq.DescribeTopic("/Root/table/feed_2").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetRetentionPeriod(), TDuration::Hours(2)); + } + { // non-positive (invalid) auto query = R"( --!syntax_v1 @@ -4075,6 +4091,70 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } } + Y_UNIT_TEST(ChangefeedOnIndexTable) { + TKikimrRunner kikimr(TKikimrSettings() + .SetPQConfig(DefaultPQConfig()) + .SetEnableChangefeedsOnIndexTables(true)); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` ( + Key Uint64, + Value String, + PRIMARY KEY (Key), + INDEX SyncIndex GLOBAL SYNC ON (`Value`), + INDEX AsyncIndex GLOBAL ASYNC ON (`Value`) + ); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + const auto changefeed = TChangefeedDescription("feed", EChangefeedMode::KeysOnly, EChangefeedFormat::Json); + { + auto result = session.AlterTable("/Root/table/AsyncIndex", TAlterTableSettings() + .AppendAddChangefeeds(changefeed) + ).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::PRECONDITION_FAILED, result.GetIssues().ToString()); + } + { + auto result = session.AlterTable("/Root/table/SyncIndex", TAlterTableSettings() + .AppendAddChangefeeds(changefeed) + ).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + } + + Y_UNIT_TEST(DescribeIndexTable) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` ( + Key Uint64, + Value String, + PRIMARY KEY (Key), + INDEX SyncIndex GLOBAL SYNC ON (`Value`) + ); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto desc = session.DescribeTable("/Root/table/SyncIndex").ExtractValueSync(); + UNIT_ASSERT_C(desc.IsSuccess(), desc.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(desc.GetEntry().Name, "SyncIndex"); + } + } + Y_UNIT_TEST(CreatedAt) { TKikimrRunner kikimr(TKikimrSettings().SetPQConfig(DefaultPQConfig())); auto scheme = NYdb::NScheme::TSchemeClient(kikimr.GetDriver(), TCommonClientSettings().Database("/Root")); @@ -4785,6 +4865,66 @@ Y_UNIT_TEST_SUITE(KqpScheme) { session.Close().GetValueSync(); } + Y_UNIT_TEST(DisableExternalDataSourcesOnServerless) { + auto ydb = NWorkload::TYdbSetupSettings() + .CreateSampleTenants(true) + .EnableExternalDataSourcesOnServerless(false) + .Create(); + + auto checkDisabled = [](const auto& result, NYdb::EStatus status) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), status, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + }; + + auto checkNotFound = [](const auto& result, NYdb::EStatus status) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), status, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Path does not exist"); + }; + + const auto& createSourceSql = R"( + CREATE EXTERNAL DATA SOURCE MyExternalDataSource WITH ( + SOURCE_TYPE="ObjectStorage", + LOCATION="my-bucket", + AUTH_METHOD="NONE" + );)"; + + const auto& createTableSql = R"( + CREATE EXTERNAL TABLE MyExternalTable ( + Key Uint64, + Value String + ) WITH ( + DATA_SOURCE="MyExternalDataSource", + LOCATION="/" + );)"; + + const auto& dropSourceSql = "DROP EXTERNAL DATA SOURCE MyExternalDataSource;"; + + const auto& dropTableSql = "DROP EXTERNAL TABLE MyExternalTable;"; + + auto settings = NWorkload::TQueryRunnerSettings().PoolId(NResourcePool::DEFAULT_POOL_ID); + + // Dedicated, enabled + settings.Database(ydb->GetSettings().GetDedicatedTenantName()).NodeIndex(1); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSourceSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSourceSql, settings)); + + // Shared, enabled + settings.Database(ydb->GetSettings().GetSharedTenantName()).NodeIndex(2); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createSourceSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(createTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropTableSql, settings)); + NWorkload::TSampleQueries::CheckSuccess(ydb->ExecuteQuery(dropSourceSql, settings)); + + // Serverless, disabled + settings.Database(ydb->GetSettings().GetServerlessTenantName()).NodeIndex(2); + checkDisabled(ydb->ExecuteQuery(createSourceSql, settings), NYdb::EStatus::GENERIC_ERROR); + checkDisabled(ydb->ExecuteQuery(createTableSql, settings), NYdb::EStatus::PRECONDITION_FAILED); + checkNotFound(ydb->ExecuteQuery(dropTableSql, settings), NYdb::EStatus::SCHEME_ERROR); + checkNotFound(ydb->ExecuteQuery(dropSourceSql, settings), NYdb::EStatus::GENERIC_ERROR); + } + Y_UNIT_TEST(CreateExternalDataSource) { NKikimrConfig::TAppConfig appCfg; appCfg.MutableQueryServiceConfig()->AddHostnamePatterns("my-bucket"); @@ -6055,16 +6195,75 @@ Y_UNIT_TEST_SUITE(KqpScheme) { } } + void AsyncReplicationConnectionParams(TKikimrRunner& kikimr, const TString& connectionParam, bool ssl = false) { + using namespace NReplication; + + auto repl = TReplicationClient(kikimr.GetDriver(), TCommonClientSettings().Database("/Root")); + auto db = kikimr.GetTableClient(); + auto session = db.CreateSession().GetValueSync().GetSession(); + + { + auto query = R"( + --!syntax_v1 + CREATE TABLE `/Root/table` (Key Uint64, Value String, PRIMARY KEY (Key)); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + auto query = Sprintf(R"( + --!syntax_v1 + CREATE ASYNC REPLICATION `/Root/replication` FOR + `/Root/table` AS `/Root/replica` + WITH ( + %s, TOKEN = "root@builtin" + ); + )", connectionParam.c_str()); + + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + { + const auto result = repl.DescribeReplication("/Root/replication").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + const auto& params = result.GetReplicationDescription().GetConnectionParams(); + UNIT_ASSERT_VALUES_EQUAL(params.GetDiscoveryEndpoint(), kikimr.GetEndpoint()); + UNIT_ASSERT_VALUES_EQUAL(params.GetDatabase(), "/Root"); + UNIT_ASSERT_VALUES_EQUAL(params.GetEnableSsl(), ssl); + } + } + + Y_UNIT_TEST(AsyncReplicationConnectionString) { + TKikimrRunner kikimr; + AsyncReplicationConnectionParams(kikimr, Sprintf(R"(CONNECTION_STRING = "grpc://%s/?database=/Root")", kikimr.GetEndpoint().c_str())); + } + + Y_UNIT_TEST(AsyncReplicationConnectionStringWithSsl) { + TKikimrRunner kikimr; + AsyncReplicationConnectionParams(kikimr, Sprintf(R"(CONNECTION_STRING = "grpcs://%s/?database=/Root")", kikimr.GetEndpoint().c_str()), true); + } + + Y_UNIT_TEST(AsyncReplicationEndpointAndDatabase) { + TKikimrRunner kikimr; + AsyncReplicationConnectionParams(kikimr, Sprintf(R"(ENDPOINT = "%s", DATABASE = "/Root")", kikimr.GetEndpoint().c_str())); + } + Y_UNIT_TEST(DisableResourcePools) { TKikimrRunner kikimr(TKikimrSettings().SetEnableResourcePools(false)); auto db = kikimr.GetTableClient(); auto session = db.CreateSession().GetValueSync().GetSession(); - auto checkDisabled = [&session](const TString& query) { + auto checkQuery = [&session](const TString& query, EStatus status, const TString& error) { Cerr << "Check query:\n" << query << "\n"; auto result = session.ExecuteSchemeQuery(query).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::UNSUPPORTED); - UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Resource pools are disabled. Please contact your system administrator to enable it"); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), status); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), error); + }; + + auto checkDisabled = [checkQuery](const TString& query) { + checkQuery(query, EStatus::UNSUPPORTED, "Resource pools are disabled. Please contact your system administrator to enable it"); }; // CREATE RESOURCE POOL @@ -6083,7 +6282,9 @@ Y_UNIT_TEST_SUITE(KqpScheme) { )"); // DROP RESOURCE POOL - checkDisabled("DROP RESOURCE POOL MyResourcePool;"); + checkQuery("DROP RESOURCE POOL MyResourcePool;", + EStatus::SCHEME_ERROR, + "Path does not exist"); } Y_UNIT_TEST(ResourcePoolsValidation) { @@ -6343,6 +6544,7 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { } testHelper.DropTable("/Root/ColumnTableTest"); for (auto tablet: tabletIds) { + testHelper.WaitTabletDeletionInHive(tablet, TDuration::Seconds(5)); UNIT_ASSERT_C(!testHelper.GetKikimr().GetTestClient().TabletExistsInHive(&testHelper.GetRuntime(), tablet), ToString(tablet) + " is alive"); } } @@ -7051,6 +7253,87 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { testHelper.ReadData("SELECT * FROM `/Root/ColumnTableTest` WHERE id=1", "[[1;#;[\"test_res_1\"]]]"); } + void TestDropThenAddColumn(bool enableIndexation, bool enableCompaction) { + if (enableCompaction) { + Y_ABORT_UNLESS(enableIndexation); + } + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Compaction); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("value").SetType(NScheme::NTypeIds::Utf8), + }; + + TTestHelper::TColumnTable testTable; + testTable.SetName("/Root/ColumnTableTest").SetPrimaryKey({ "id" }).SetSharding({ "id" }).SetSchema(schema); + testHelper.CreateTable(testTable); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(1).Add("test_res_1"); + tableInserter.AddRow().Add(2).Add("test_res_2"); + testHelper.BulkUpsert(testTable, tableInserter); + } + + if (enableCompaction) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->EnableBackground(NYDBTest::ICSController::EBackground::Compaction); + csController->WaitIndexation(TDuration::Seconds(5)); + csController->WaitCompactions(TDuration::Seconds(5)); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NYDBTest::ICSController::EBackground::Compaction); + } + + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` DROP COLUMN value;"; + auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` ADD COLUMN value Uint64;"; + auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + schema.back().SetType(NScheme::NTypeIds::Uint64); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(3).Add(42); + tableInserter.AddRow().Add(4).Add(43); + testHelper.BulkUpsert(testTable, tableInserter); + } + + if (enableIndexation) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + } + if (enableCompaction) { + csController->EnableBackground(NYDBTest::ICSController::EBackground::Compaction); + csController->WaitCompactions(TDuration::Seconds(5)); + } + + testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest`", "[[#];[#];[[42u]];[[43u]]]"); + } + + Y_UNIT_TEST(DropThenAddColumn) { + TestDropThenAddColumn(false, false); + } + + Y_UNIT_TEST(DropThenAddColumnIndexation) { + TestDropThenAddColumn(true, true); + } + + Y_UNIT_TEST(DropThenAddColumnCompaction) { + TestDropThenAddColumn(true, true); + } + Y_UNIT_TEST(DropTtlColumn) { TKikimrSettings runnerSettings; runnerSettings.WithSampleTables = false; diff --git a/ydb/core/kqp/ut/scheme/ya.make b/ydb/core/kqp/ut/scheme/ya.make index 4cd03e32b9d9..f1ab58f089eb 100644 --- a/ydb/core/kqp/ut/scheme/ya.make +++ b/ydb/core/kqp/ut/scheme/ya.make @@ -22,6 +22,7 @@ PEERDIR( library/cpp/threading/local_executor ydb/core/kqp ydb/core/kqp/ut/common + ydb/core/kqp/workload_service/ut/common ydb/core/tx/columnshard/hooks/testing ydb/library/yql/sql/pg ydb/library/yql/parser/pg_wrapper diff --git a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp index 1469403863ae..dd6dc576086a 100644 --- a/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp +++ b/ydb/core/kqp/ut/service/kqp_qs_queries_ut.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -2313,6 +2315,10 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { ALTER TABLE TestDdlDml2 DROP COLUMN Value2; UPSERT INTO TestDdlDml2 (Key, Value1) VALUES (2, "2"); SELECT * FROM TestDdlDml2; + CREATE TABLE TestDdlDml33 ( + Key Uint64, + PRIMARY KEY (Key) + ); )", TTxControl::NoTx()).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); @@ -2327,6 +2333,13 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); CompareYson(R"([[[1u];["1"]];[[2u];["2"]]])", FormatResultSetYson(result.GetResultSet(0))); + result = db.ExecuteQuery(R"( + SELECT * FROM TestDdlDml33; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + result = db.ExecuteQuery(R"( CREATE TABLE TestDdlDml4 ( Key Uint64, @@ -2566,6 +2579,313 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } + Y_UNIT_TEST(CTASWithoutPerStatement) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnableCreateTableAs(true); + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableAstCache(false); + appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(false); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}) + .SetWithSampleTables(false) + .SetEnableTempTables(true); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetQueryClient(); + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table1 ( + PRIMARY KEY (Key) + ) AS SELECT 1u AS Key, "1" AS Value1, "1" AS Value2; + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_C( + result.GetIssues().ToString().Contains("Several CTAS statement can't be used without per-statement mode."), + result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + SELECT * FROM Table1 ORDER BY Key; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_C( + result.GetIssues().ToString().Contains("CTAS statement can't be used with other statements without per-statement mode."), + result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + SELECT * FROM Table1 ORDER BY Key; + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT(!result.IsSuccess()); + UNIT_ASSERT_C( + result.GetIssues().ToString().Contains("CTAS statement can't be used with other statements without per-statement mode."), + result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table1 ( + PRIMARY KEY (Key) + ) AS SELECT 1u AS Key, "1" AS Value1, "1" AS Value2; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = db.ExecuteQuery(R"( + SELECT * FROM Table1 ORDER BY Key; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + CompareYson(R"([[[1u];["1"];["1"]]])", FormatResultSetYson(result.GetResultSet(0))); + } + } + + Y_UNIT_TEST(SeveralCTAS) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableAstCache(true); + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableCreateTableAs(true); + appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}) + .SetWithSampleTables(false) + .SetEnableTempTables(true); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetQueryClient(); + + { + auto result = db.ExecuteQuery(R"( + CREATE TABLE Table1 ( + PRIMARY KEY (Key) + ) AS SELECT 1u AS Key, "1" AS Value1, "1" AS Value2; + CREATE TABLE Table2 ( + PRIMARY KEY (Key) + ) AS SELECT 2u AS Key, "2" AS Value1, "2" AS Value2; + CREATE TABLE Table3 ( + PRIMARY KEY (Key) + ) AS SELECT * FROM Table2 UNION ALL SELECT * FROM Table1; + SELECT * FROM Table1 ORDER BY Key; + SELECT * FROM Table2 ORDER BY Key; + SELECT * FROM Table3 ORDER BY Key; + )", TTxControl::NoTx(), TExecuteQuerySettings()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 3); + // Results are empty. Snapshot was taken before tables were created, so we don't see changes after snapshot. + // This will be fixed in future, for example, by implicit commit before/after each ddl statement. + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(1))); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(2))); + + result = db.ExecuteQuery(R"( + SELECT * FROM Table1 ORDER BY Key; + SELECT * FROM Table2 ORDER BY Key; + SELECT * FROM Table3 ORDER BY Key; + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 3); + CompareYson(R"([[[1u];["1"];["1"]]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[[2u];["2"];["2"]]])", FormatResultSetYson(result.GetResultSet(1))); + // Also empty now( + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(2))); + } + } + + Y_UNIT_TEST(CheckIsolationLevelFroPerStatementMode) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableTableServiceConfig()->SetEnableAstCache(true); + appConfig.MutableTableServiceConfig()->SetEnablePerStatementQueryExecution(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + + TKikimrRunner kikimr(serverSettings); + auto db = kikimr.GetQueryClient(); + auto tableClient = kikimr.GetTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + { + // 1 ddl statement + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test1 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 0); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + + NYdb::NTable::TDescribeTableResult describe = session.DescribeTable("/Root/Test1").GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetStatus(), EStatus::SUCCESS); + } + + { + // 2 ddl statements + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test2 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + CREATE TABLE Test3 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 0); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + + NYdb::NTable::TDescribeTableResult describe1 = session.DescribeTable("/Root/Test2").GetValueSync(); + UNIT_ASSERT_EQUAL(describe1.GetStatus(), EStatus::SUCCESS); + NYdb::NTable::TDescribeTableResult describe2 = session.DescribeTable("/Root/Test3").GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 dml statement + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test1; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + } + + { + // 2 dml statements + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test2; + SELECT * FROM Test3; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + } + + { + // 1 ddl 1 dml statements + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test4 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test4; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe = session.DescribeTable("/Root/Test4").GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 dml 1 ddl statements + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test4; + CREATE TABLE Test5 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 1); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe = session.DescribeTable("/Root/Test5").GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 ddl 1 dml 1 ddl 1 dml statements + auto result = db.ExecuteQuery(R"( + CREATE TABLE Test6 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test6; + CREATE TABLE Test7 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test7; + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe1 = session.DescribeTable("/Root/Test6").GetValueSync(); + UNIT_ASSERT_EQUAL(describe1.GetStatus(), EStatus::SUCCESS); + NYdb::NTable::TDescribeTableResult describe2 = session.DescribeTable("/Root/Test7").GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetStatus(), EStatus::SUCCESS); + } + + { + // 1 dml 1 ddl 1 dml 1 ddl statements + auto result = db.ExecuteQuery(R"( + SELECT * FROM Test7; + CREATE TABLE Test8 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + SELECT * FROM Test8; + CREATE TABLE Test9 ( + Key Uint64, + Value1 String, + Value2 String, + PRIMARY KEY (Key) + ); + )", TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + UNIT_ASSERT_VALUES_EQUAL(result.GetResultSets().size(), 2); + UNIT_ASSERT_EQUAL_C(result.GetIssues().Size(), 0, result.GetIssues().ToString()); + NYdb::NTable::TDescribeTableResult describe1 = session.DescribeTable("/Root/Test8").GetValueSync(); + UNIT_ASSERT_EQUAL(describe1.GetStatus(), EStatus::SUCCESS); + NYdb::NTable::TDescribeTableResult describe2 = session.DescribeTable("/Root/Test9").GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetStatus(), EStatus::SUCCESS); + } + } + Y_UNIT_TEST(TableSink_ReplaceFromSelectOlap) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); @@ -2735,7 +3055,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2748,20 +3068,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), - insertResult.GetIssues().ToString()); - } - - { - // column & row read - const TString sql = R"( - SELECT * FROM `/Root/DataShard`; - SELECT * FROM `/Root/ColumnShard`; - )"; - auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); - UNIT_ASSERT(!insertResult.IsSuccess()); - UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2776,7 +3083,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2790,7 +3097,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } @@ -2804,7 +3111,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto insertResult = client.ExecuteQuery(sql, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); UNIT_ASSERT(!insertResult.IsSuccess()); UNIT_ASSERT_C( - insertResult.GetIssues().ToString().Contains("Transactions between column and row tables are disabled at current time"), + insertResult.GetIssues().ToString().Contains("Write transactions between column and row tables are disabled at current time"), insertResult.GetIssues().ToString()); } } @@ -2892,7 +3199,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { // Shuffled auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col3, Col4, Col2, Col1) VALUES ("test100", "100", 1000, 100u); @@ -2916,7 +3223,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { CompareYson(output, R"([[1u;"test1";[10];["1"]];[2u;"test2";#;["2"]];[3u;"test3";[12];#];[4u;"test4";#;#];[100u;"test100";[1000];["100"]]])"); } - Y_UNIT_TEST(TableSink_OltpReplace) { + Y_UNIT_TEST_TWIN(TableSink_OltpReplace, HasSecondaryIndex) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); auto settings = TKikimrSettings() @@ -2928,20 +3235,21 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); - const TString query = R"( + const TString query = Sprintf(R"( CREATE TABLE `/Root/DataShard` ( Col1 Uint64 NOT NULL, Col2 Int32, Col3 String, + %s PRIMARY KEY (Col1) ); - )"; + )", (HasSecondaryIndex ? "INDEX idx_2 GLOBAL ON (Col2)," : "")); auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - + { auto it = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col2) VALUES (0u, 0); @@ -2959,7 +3267,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { CompareYson(output, R"([[0u;[0];#];[1u;#;["test"]]])"); } - { + { auto it = client.ExecuteQuery(R"( REPLACE INTO `/Root/DataShard` (Col1, Col3) VALUES (0u, 'null'); REPLACE INTO `/Root/DataShard` (Col1) VALUES (1u); @@ -2997,6 +3305,11 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto session = Kikimr->GetTableClient().CreateSession().GetValueSync().GetSession(); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + const TString query = Sprintf(R"( CREATE TABLE `/Root/DataShard` ( Col1 Uint64 NOT NULL, @@ -3012,6 +3325,8 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { auto result = session.ExecuteSchemeQuery(query).GetValueSync(); UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); DoExecute(); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); } }; @@ -3046,6 +3361,13 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); } + { + auto it = client.ExecuteQuery(R"( + UPSERT INTO `/Root/DataShard` (Col3) VALUES ('null'); + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT(!it.IsSuccess()); + } + { auto it = client.StreamExecuteQuery(R"( SELECT * FROM `/Root/DataShard` ORDER BY Col1; @@ -3308,7 +3630,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col1, Col2) VALUES (100u, 1000), (100u, 1000); @@ -3316,7 +3638,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(prepareResult.IsSuccess(), prepareResult.GetIssues().ToString()); } - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col1, Col2) VALUES (100u, 1000), (100u, 1000); @@ -3359,7 +3681,7 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); auto client = kikimr.GetQueryClient(); - { + { auto prepareResult = client.ExecuteQuery(R"( REPLACE INTO `/Root/ColumnShard` (Col1, Col2) VALUES (1u, 1) )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); @@ -3479,6 +3801,98 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { } } + Y_UNIT_TEST(ReadDatashardAndColumnshard) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto client = kikimr.GetQueryClient(); + + { + auto createTable = client.ExecuteQuery(R"sql( + CREATE TABLE `/Root/DataShard` ( + Col1 Uint64 NOT NULL, + Col2 Int32, + Col3 String, + PRIMARY KEY (Col1) + ) WITH ( + STORE = ROW, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + CREATE TABLE `/Root/ColumnShard` ( + Col1 Uint64 NOT NULL, + Col2 Int32, + Col3 String, + PRIMARY KEY (Col1) + ) WITH ( + STORE = COLUMN, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + )sql", NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); + UNIT_ASSERT_C(createTable.IsSuccess(), createTable.GetIssues().ToString()); + } + + { + auto replaceValues = client.ExecuteQuery(R"sql( + REPLACE INTO `/Root/DataShard` (Col1, Col2, Col3) VALUES + (1u, 1, "row"); + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(replaceValues.IsSuccess(), replaceValues.GetIssues().ToString()); + } + + { + auto replaceValues = client.ExecuteQuery(R"sql( + REPLACE INTO `/Root/ColumnShard` (Col1, Col2, Col3) VALUES + (2u, 2, "column"); + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(replaceValues.IsSuccess(), replaceValues.GetIssues().ToString()); + } + + { + auto it = client.StreamExecuteQuery(R"sql( + SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[2u;[2];["column"]]])"); + } + + { + auto it = client.StreamExecuteQuery(R"sql( + SELECT * FROM `/Root/DataShard` + UNION ALL + SELECT * FROM `/Root/ColumnShard` + ORDER BY Col1; + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[1u;[1];["row"]];[2u;[2];["column"]]])"); + } + + { + auto it = client.StreamExecuteQuery(R"sql( + SELECT r.Col3 AS a, c.Col3 AS b FROM `/Root/DataShard` AS r + JOIN `/Root/ColumnShard` AS c ON r.Col1 + 1 = c.Col1 + ORDER BY a; + )sql", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(it.GetStatus(), EStatus::SUCCESS, it.GetIssues().ToString()); + TString output = StreamResultToYson(it); + CompareYson( + output, + R"([[["row"];["column"]]])"); + } + } + Y_UNIT_TEST(ReplaceIntoWithDefaultValue) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnableOlapSink(false); @@ -3513,6 +3927,295 @@ Y_UNIT_TEST_SUITE(KqpQueryService) { UNIT_ASSERT_C(replaceValues.IsSuccess(), replaceValues.GetIssues().ToString()); } } + + void RunQuery (const TString& query, auto& session, bool expectOk = true) { + auto qResult = session.ExecuteQuery(query, NYdb::NQuery::TTxControl::NoTx()).ExtractValueSync(); + if (!qResult.IsSuccess()) { + Cerr << "Query failed, status: " << qResult.GetStatus() << ": " << qResult.GetIssues().ToString() << Endl; + } + UNIT_ASSERT(qResult.IsSuccess() == expectOk); + }; + + struct TEntryCheck { + NYdb::NScheme::ESchemeEntryType Type; + TString Name; + bool IsExpected; + bool WasFound = false; + }; + + TEntryCheck ExpectedTopic(const TString& name) { + return TEntryCheck{NYdb::NScheme::ESchemeEntryType::Topic, name, true}; + } + TEntryCheck UnexpectedTopic(const TString& name) { + return TEntryCheck{NYdb::NScheme::ESchemeEntryType::Topic, name, false}; + } + + void CheckDirEntry(TKikimrRunner& kikimr, TVector& entriesToCheck) { + auto res = kikimr.GetSchemeClient().ListDirectory("/Root").GetValueSync(); + for (const auto& entry : res.GetChildren()) { + Cerr << "Scheme entry: " << entry << Endl; + for (auto& checkEntry : entriesToCheck) { + if (checkEntry.Name != entry.Name) + continue; + if (checkEntry.IsExpected) { + UNIT_ASSERT_C(entry.Type == checkEntry.Type, checkEntry.Name); + checkEntry.WasFound = true; + } else { + UNIT_ASSERT_C(entry.Type != checkEntry.Type, checkEntry.Name); + } + } + } + for (auto& checkEntry : entriesToCheck) { + if (checkEntry.IsExpected) { + UNIT_ASSERT_C(checkEntry.WasFound, checkEntry.Name); + } + } + } + + Y_UNIT_TEST(CreateAndDropTopic) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + serverSettings.PQConfig.SetRequireCredentialsInNewProtocol(false); + TKikimrRunner kikimr( + serverSettings.SetWithSampleTables(false).SetEnableTempTables(true)); + auto client = kikimr.GetQueryClient(); + auto session = client.GetSession().GetValueSync().GetSession(); + auto pq = NYdb::NTopic::TTopicClient(kikimr.GetDriver(), + NYdb::NTopic::TTopicClientSettings().Database("/Root").AuthToken("root@builtin")); + + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TempTopic` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session); + Cerr << "Topic created\n"; + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetConsumers().size(), 1); + } + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC IF NOT EXISTS `/Root/TempTopic` (CONSUMER cons1, CONSUMER cons2); + )"); + RunQuery(queryCreateTopic, session); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetConsumers().size(), 1); + } + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TempTopic` (CONSUMER cons1, CONSUMER cons2, CONSUMER cons3); + )"); + RunQuery(queryCreateTopic, session, false); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetConsumers().size(), 1); + } + + TVector entriesToCheck = {ExpectedTopic("TempTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + { + const auto query = Q_(R"( + --!syntax_v1 + Drop TOPIC `/Root/TempTopic`; + )"); + RunQuery(query, session); + Cerr << "Topic dropped\n"; + TVector entriesToCheck = {UnexpectedTopic("TempTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + } + { + const auto query = Q_(R"( + --!syntax_v1 + Drop TOPIC IF EXISTS `/Root/TempTopic`; + )"); + RunQuery(query, session); + } + { + const auto query = Q_(R"( + --!syntax_v1 + Drop TOPIC `/Root/TempTopic`; + )"); + RunQuery(query, session, false); + } + } + + Y_UNIT_TEST(CreateAndAlterTopic) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + TKikimrRunner kikimr{serverSettings}; + auto client = kikimr.GetQueryClient(NYdb::NQuery::TClientSettings{}.AuthToken("root@builtin")); + auto session = client.GetSession().GetValueSync().GetSession(); + auto pq = NYdb::NTopic::TTopicClient(kikimr.GetDriver(), + NYdb::NTopic::TTopicClientSettings().Database("/Root").AuthToken("root@builtin")); + + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TempTopic` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session); + + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 1); + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC `/Root/TempTopic` SET (min_active_partitions = 10); + )"); + RunQuery(query, session); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 10); + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC IF EXISTS `/Root/TempTopic` SET (min_active_partitions = 15); + )"); + RunQuery(query, session); + auto desc = pq.DescribeTopic("/Root/TempTopic").ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL(desc.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 15); + } + + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC `/Root/NoSuchTopic` SET (min_active_partitions = 10); + )"); + RunQuery(query, session, false); + + TVector entriesToCheck = {UnexpectedTopic("NoSuchTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + } + { + const auto query = Q_(R"( + --!syntax_v1 + ALTER TOPIC IF EXISTS `/Root/NoSuchTopic` SET (min_active_partitions = 10); + )"); + RunQuery(query, session); + TVector entriesToCheck = {UnexpectedTopic("NoSuchTopic")}; + CheckDirEntry(kikimr, entriesToCheck); + } + } + Y_UNIT_TEST(CreateOrDropTopicOverTable) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + auto setting = NKikimrKqp::TKqpSetting(); + auto serverSettings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetKqpSettings({setting}); + TKikimrRunner kikimr{serverSettings}; + auto tableClient = kikimr.GetTableClient(); + + { + auto tcSession = tableClient.CreateSession().GetValueSync().GetSession(); + UNIT_ASSERT(tcSession.ExecuteSchemeQuery(R"( + CREATE TABLE `/Root/TmpTable` ( + Key Uint64, + Value String, + PRIMARY KEY (Key) + ); + )").GetValueSync().IsSuccess()); + tcSession.Close(); + } + + auto client = kikimr.GetQueryClient(NYdb::NQuery::TClientSettings{}.AuthToken("root@builtin")); + auto session = client.GetSession().GetValueSync().GetSession(); + + TVector entriesToCheck = {TEntryCheck{.Type = NYdb::NScheme::ESchemeEntryType::Table, + .Name = "TmpTable", .IsExpected = true}}; + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC `/Root/TmpTable` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session, false); + CheckDirEntry(kikimr, entriesToCheck); + + } + { + const auto queryCreateTopic = Q_(R"( + --!syntax_v1 + CREATE TOPIC IF NOT EXISTS `/Root/TmpTable` (CONSUMER cons1); + )"); + RunQuery(queryCreateTopic, session, false); + CheckDirEntry(kikimr, entriesToCheck); + } + { + const auto queryDropTopic = Q_(R"( + --!syntax_v1 + DROP TOPIC `/Root/TmpTable`; + )"); + RunQuery(queryDropTopic, session, false); + } + { + const auto queryDropTopic = Q_(R"( + --!syntax_v1 + DROP TOPIC IF EXISTS `/Root/TmpTable`; + )"); + RunQuery(queryDropTopic, session, false); + CheckDirEntry(kikimr, entriesToCheck); + } + { + auto tcSession = tableClient.CreateSession().GetValueSync().GetSession(); + auto type = TTypeBuilder().BeginOptional().Primitive(EPrimitiveType::Uint64).EndOptional().Build(); + auto alter = NYdb::NTable::TAlterTableSettings().AppendAddColumns(TColumn("NewColumn", type)); + + auto alterResult = tcSession.AlterTable("/Root/TmpTable", alter + ).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(alterResult.GetStatus(), EStatus::SUCCESS); + } + } + + Y_UNIT_TEST(TableSink_OlapRWQueries) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + auto settings = TKikimrSettings() + .SetAppConfig(appConfig) + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + const TString query = R"( + CREATE TABLE `/Root/ColumnShard` ( + Col1 Uint64 NOT NULL, + Col2 String, + Col3 Int32 NOT NULL, + PRIMARY KEY (Col1) + ) + PARTITION BY HASH(Col1) + WITH (STORE = COLUMN, AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 3); + )"; + + auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + + auto client = kikimr.GetQueryClient(); + { + auto result = client.ExecuteQuery(R"( + REPLACE INTO `/Root/ColumnShard` (Col1, Col2, Col3) VALUES + (1u, "test1", 10), (2u, "test2", 11), (3u, "test3", 12), (4u, NULL, 13); + SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; + INSERT INTO `/Root/ColumnShard` SELECT Col1 + 100 AS Col1, Col2, Col3 FROM `/Root/ColumnShard`; + SELECT * FROM `/Root/ColumnShard` ORDER BY Col1; + )", NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + CompareYson(R"([[1u;["test1"];10];[2u;["test2"];11];[3u;["test3"];12];[4u;#;13]])", FormatResultSetYson(result.GetResultSet(0))); + CompareYson(R"([[1u;["test1"];10];[2u;["test2"];11];[3u;["test3"];12];[4u;#;13];[101u;["test1"];10];[102u;["test2"];11];[103u;["test3"];12];[104u;#;13]])", FormatResultSetYson(result.GetResultSet(1))); + } + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/service/ya.make b/ydb/core/kqp/ut/service/ya.make index 646e1b16edb4..f8920e41b757 100644 --- a/ydb/core/kqp/ut/service/ya.make +++ b/ydb/core/kqp/ut/service/ya.make @@ -24,6 +24,7 @@ PEERDIR( library/cpp/threading/local_executor ydb/core/kqp ydb/core/kqp/ut/common + ydb/core/tx/columnshard/hooks/testing ydb/library/yql/sql/pg ydb/library/yql/parser/pg_wrapper ydb/public/lib/ut_helpers diff --git a/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp b/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp index 780fdf977c3c..9ac7d3fabee7 100644 --- a/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_locks_tricky_ut.cpp @@ -29,9 +29,13 @@ using NYql::TExprNode; Y_UNIT_TEST_SUITE(KqpLocksTricky) { - Y_UNIT_TEST(TestNoLocksIssue) { + Y_UNIT_TEST_TWIN(TestNoLocksIssue, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + auto setting = NKikimrKqp::TKqpSetting(); TKikimrSettings settings; + settings.SetAppConfig(appConfig); settings.SetUseRealThreads(false); TKikimrRunner kikimr(settings); auto db = kikimr.GetTableClient(); @@ -123,9 +127,13 @@ Y_UNIT_TEST_SUITE(KqpLocksTricky) { } } - Y_UNIT_TEST(TestNoLocksIssueInteractiveTx) { + Y_UNIT_TEST_TWIN(TestNoLocksIssueInteractiveTx, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + auto setting = NKikimrKqp::TKqpSetting(); TKikimrSettings settings; + settings.SetAppConfig(appConfig); settings.SetUseRealThreads(false); TKikimrRunner kikimr(settings); auto db = kikimr.GetTableClient(); diff --git a/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp b/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp index b07beb62a0a2..139e135522c1 100644 --- a/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_locks_ut.cpp @@ -204,6 +204,42 @@ Y_UNIT_TEST_SUITE(KqpLocks) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); CompareYson(R"([[[2u];#;[11u];["Session2"]]])", FormatResultSetYson(result.GetResultSet(0))); } + + Y_UNIT_TEST(TwoPhaseTx) { + TKikimrRunner kikimr; + auto db = kikimr.GetTableClient(); + + auto session1 = db.CreateSession().GetValueSync().GetSession(); + auto session2 = db.CreateSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteDataQuery(Q_(R"( + REPLACE INTO `/Root/Test` (Group, Name, Comment) VALUES (1U, "Paul", "Changed"); + SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteDataQuery(Q_(R"( + REPLACE INTO `/Root/Test` (Group, Name, Comment) + VALUES (1U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteDataQuery(Q_(R"( + SELECT * FROM `KeyValue`; + )"), TTxControl::Tx(*tx1)).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto commitResult = tx1->Commit().GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + commitResult.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_C(HasIssue(commitResult.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), commitResult.GetIssues().ToString()); + } } } // namespace NKqp diff --git a/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp b/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp index a39c839e6ee2..f4eb9ee7a9ce 100644 --- a/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp +++ b/ydb/core/kqp/ut/tx/kqp_mvcc_ut.cpp @@ -9,9 +9,12 @@ using namespace NYdb; using namespace NYdb::NTable; Y_UNIT_TEST_SUITE(KqpSnapshotRead) { - Y_UNIT_TEST(TestSnapshotExpiration) { + Y_UNIT_TEST_TWIN(TestSnapshotExpiration, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); auto settings = TKikimrSettings() - .SetKeepSnapshotTimeout(TDuration::Seconds(1)); + .SetKeepSnapshotTimeout(TDuration::Seconds(1)) + .SetAppConfig(appConfig); TKikimrRunner kikimr(settings); @@ -63,8 +66,9 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { UNIT_ASSERT_C(caught, "Failed to wait for snapshot expiration."); } - Y_UNIT_TEST(ReadOnlyTxCommitsOnConcurrentWrite) { + Y_UNIT_TEST_TWIN(ReadOnlyTxCommitsOnConcurrentWrite, withSink) { NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); TKikimrRunner kikimr(TKikimrSettings() .SetAppConfig(appConfig) @@ -125,8 +129,13 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { ])", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(ReadOnlyTxWithIndexCommitsOnConcurrentWrite) { - TKikimrRunner kikimr; + Y_UNIT_TEST_TWIN(ReadOnlyTxWithIndexCommitsOnConcurrentWrite, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + TKikimrRunner kikimr( + TKikimrSettings() + .SetAppConfig(appConfig) + ); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_DEBUG); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_BLOBS_STORAGE, NActors::NLog::PRI_DEBUG); @@ -186,8 +195,13 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { ])", FormatResultSetYson(result.GetResultSet(0))); } - Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite1) { - TKikimrRunner kikimr; + Y_UNIT_TEST_TWIN(ReadWriteTxFailsOnConcurrentWrite1, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + TKikimrRunner kikimr( + TKikimrSettings() + .SetAppConfig(appConfig) + ); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_DEBUG); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_BLOBS_STORAGE, NActors::NLog::PRI_DEBUG); @@ -223,8 +237,13 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); } - Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite2) { - TKikimrRunner kikimr; + Y_UNIT_TEST_TWIN(ReadWriteTxFailsOnConcurrentWrite2, withSink) { + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); + TKikimrRunner kikimr( + TKikimrSettings() + .SetAppConfig(appConfig) + ); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_COMPUTE, NActors::NLog::PRI_DEBUG); // kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::KQP_BLOBS_STORAGE, NActors::NLog::PRI_DEBUG); @@ -266,8 +285,9 @@ Y_UNIT_TEST_SUITE(KqpSnapshotRead) { UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); } - Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite3) { + Y_UNIT_TEST_TWIN(ReadWriteTxFailsOnConcurrentWrite3, withSink) { NKikimrConfig::TAppConfig appConfig; + appConfig.MutableTableServiceConfig()->SetEnableOltpSink(withSink); appConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); TKikimrRunner kikimr( TKikimrSettings() diff --git a/ydb/core/kqp/ut/tx/kqp_sink_common.h b/ydb/core/kqp/ut/tx/kqp_sink_common.h new file mode 100644 index 000000000000..9dfcad60ed43 --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_common.h @@ -0,0 +1,109 @@ +#pragma once + +#include +#include +#include +#include + + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +class TTableDataModificationTester { +protected: + NKikimrConfig::TAppConfig AppConfig; + std::unique_ptr Kikimr; + YDB_ACCESSOR(bool, IsOlap, false); + YDB_ACCESSOR(bool, FastSnapshotExpiration, false); + + virtual void DoExecute() = 0; +public: + void Execute() { + AppConfig.MutableTableServiceConfig()->SetEnableOlapSink(true); + AppConfig.MutableTableServiceConfig()->SetEnableOltpSink(true); + AppConfig.MutableTableServiceConfig()->SetEnableKqpDataQueryStreamLookup(true); + auto settings = TKikimrSettings().SetAppConfig(AppConfig).SetWithSampleTables(false); + if (FastSnapshotExpiration) { + settings.SetKeepSnapshotTimeout(TDuration::Seconds(1)); + } + + Kikimr = std::make_unique(settings); + Tests::NCommon::TLoggerInit(*Kikimr).Initialize(); + + auto client = Kikimr->GetQueryClient(); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + + { + auto type = IsOlap ? "COLUMN" : "ROW"; + auto result = client.ExecuteQuery(Sprintf(R"( + CREATE TABLE `/Root/Test` ( + Group Uint32 not null, + Name String not null, + Amount Uint64, + Comment String, + PRIMARY KEY (Group, Name) + ) WITH ( + STORE = %s, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 10 + ); + + CREATE TABLE `/Root/KV` ( + Key Uint32 not null, + Value String, + PRIMARY KEY (Key) + ) WITH ( + STORE = %s, + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_BY_LOAD = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 100, + UNIFORM_PARTITIONS = 100 + ); + + CREATE TABLE `/Root/KV2` ( + Key Uint32 not null, + Value String, + PRIMARY KEY (Key) + ) WITH ( + STORE = %s, + AUTO_PARTITIONING_BY_SIZE = DISABLED, + AUTO_PARTITIONING_BY_LOAD = DISABLED, + AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 100, + UNIFORM_PARTITIONS = 100 + ); + )", type, type, type), TTxControl::NoTx()).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = client.ExecuteQuery(R"( + REPLACE INTO `Test` (Group, Name, Amount, Comment) VALUES + (1u, "Anna", 3500ul, "None"), + (1u, "Paul", 300ul, "None"), + (2u, "Tony", 7200ul, "None"); + REPLACE INTO `KV` (Key, Value) VALUES + (1u, "One"), + (2u, "Two"), + (3u, "Three"), + (4000000001u, "BigOne"), + (4000000002u, "BigTwo"), + (4000000003u, "BigThree"); + )", TTxControl::NoTx()).GetValueSync(); + UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + DoExecute(); + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->WaitIndexation(TDuration::Seconds(5)); + } + +}; + +} +} diff --git a/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp new file mode 100644 index 000000000000..45428d86c8a7 --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_locks_ut.cpp @@ -0,0 +1,337 @@ +#include "kqp_sink_common.h" + +#include +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +Y_UNIT_TEST_SUITE(KqpSinkLocks) { + class TInvalidate : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` + SELECT Group + 10U AS Group, Name, Amount, Comment ?? "" || "Updated" AS Comment + FROM `/Root/Test` + WHERE Group == 1U AND Name == "Paul"; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (1U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (11U, "Sergey", "BadRow"); + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), result.GetIssues().ToString()); + } + + result = session2.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[300u];["Changed"];1u;"Paul"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(TInvalidate) { + TInvalidate tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(TInvalidateOlap) { + TInvalidate tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TInvalidateOnCommit : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` + SELECT Group + 10U AS Group, Name, Amount, Comment ?? "" || "Updated" AS Comment + FROM `/Root/Test` + WHERE Group == 1U AND Name == "Paul"; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (1U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto commitResult = tx1->Commit().GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); + commitResult.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(commitResult.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), commitResult.GetIssues().ToString()); + } + + result = session2.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Name == "Paul" ORDER BY Group, Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[300u];["Changed"];1u;"Paul"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(InvalidateOnCommit) { + TInvalidateOnCommit tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(InvalidateOlapOnCommit) { + TInvalidateOnCommit tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TDifferentKeyUpdate : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Group = 1; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` (Group, Name, Comment) + VALUES (2U, "Paul", "Changed"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + result = session1.ExecuteQuery(Q_(R"( + SELECT "Nothing"; + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(DifferentKeyUpdate) { + TDifferentKeyUpdate tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(DifferentKeyUpdateOlap) { + TDifferentKeyUpdate tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TEmptyRange : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session2", 2); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q1_(R"( + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session1", 1); + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), result.GetIssues().ToString()); + } + + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[2u];#;11u;"Session2"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(EmptyRange) { + TEmptyRange tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(EmptyRangeOlap) { + TEmptyRange tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TEmptyRangeAlreadyBroken : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 10; + )"), TTxControl::BeginTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + result = session2.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session2", 2); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "Session1", 1); + )"), TTxControl::Tx(tx1->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + result.GetIssues().PrintTo(Cerr); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED, + [] (const NYql::TIssue& issue) { + return issue.GetMessage().Contains("/Root/Test"); + }), result.GetIssues().ToString()); + } + + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[2u];#;11u;"Session2"]])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(EmptyRangeAlreadyBroken) { + TEmptyRangeAlreadyBroken tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(EmptyRangeAlreadyBrokenOlap) { + TEmptyRangeAlreadyBroken tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TUncommittedRead : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q1_(R"( + UPSERT INTO Test (Group, Name, Amount) VALUES + (11, "TEST", 2); + )"), TTxControl::BeginTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx1 = result.GetTransaction(); + UNIT_ASSERT(tx1); + + { + result = session2.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + } + + { + result = session1.ExecuteQuery(Q1_(R"( + SELECT * FROM Test WHERE Group = 11; + )"), TTxControl::Tx(tx1->GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[[2u];#;11u;"TEST"]])", FormatResultSetYson(result.GetResultSet(0))); + } + } + }; + + Y_UNIT_TEST(UncommittedRead) { + TUncommittedRead tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapUncommittedRead) { + TUncommittedRead tester; + tester.SetIsOlap(true); + tester.Execute(); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp new file mode 100644 index 000000000000..a0ac82ad151d --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_mvcc_ut.cpp @@ -0,0 +1,307 @@ +#include "kqp_sink_common.h" + +#include +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +Y_UNIT_TEST_SUITE(KqpSinkMvcc) { + class TSnapshotExpiration : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + auto tx = result.GetTransaction(); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "ChangedOne"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto deadline = TInstant::Now() + TDuration::Seconds(30); + auto caught = false; + do { + Sleep(TDuration::Seconds(1)); + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + if (result.GetStatus() == EStatus::SUCCESS) + continue; + + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::DEFAULT_ERROR, + [](const NYql::TIssue& issue){ + return issue.GetMessage().Contains("has no snapshot at"); + }), result.GetIssues().ToString()); + + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::ABORTED); + + caught = true; + break; + } while (TInstant::Now() < deadline); + UNIT_ASSERT_C(caught, "Failed to wait for snapshot expiration."); + } + }; + + Y_UNIT_TEST(SnapshotExpiration) { + TSnapshotExpiration tester; + tester.SetFastSnapshotExpiration(true); + tester.SetIsOlap(false); + tester.Execute(); + } + +// Y_UNIT_TEST(OlapSnapshotExpiration) { +// TSnapshotExpiration tester; +// tester.SetFastSnapshotExpiration(true); +// tester.SetIsOlap(true); +// tester.Execute(); +// } + + class TReadOnlyTxCommitsOnConcurrentWrite : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "ChangedOne"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session2.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["ChangedOne"]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 2u OR Key = 4000000002u ORDER BY Key; + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [2u;["Two"]]; + [4000000002u;["BigTwo"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(ReadOnlyTxCommitsOnConcurrentWrite) { + TReadOnlyTxCommitsOnConcurrentWrite tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadOnlyTxCommitsOnConcurrentWrite) { + TReadOnlyTxCommitsOnConcurrentWrite tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TReadWriteTxFailsOnConcurrentWrite1 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "ChangedOne"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "TwiceChangedOne"); + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite1) { + TReadWriteTxFailsOnConcurrentWrite1 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite1) { + TReadWriteTxFailsOnConcurrentWrite1 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TReadWriteTxFailsOnConcurrentWrite2 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + // We need to sleep before the upsert below, otherwise writes + // might happen in the same step as the snapshot, which would be + // treated as happening before snapshot and will not break any locks. + Sleep(TDuration::Seconds(2)); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV2` (Key, Value) VALUES (101u, "SomeText"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "Something" WHERE Key = 1u; + UPDATE `/Root/KV2` SET Value = "AnotherString" WHERE Key = 101u; + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + if (!GetIsOlap()) { + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); + } + } + }; + + Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite2) { + TReadWriteTxFailsOnConcurrentWrite2 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite2) { + TReadWriteTxFailsOnConcurrentWrite2 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TReadWriteTxFailsOnConcurrentWrite3 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session1 = client.GetSession().GetValueSync().GetSession(); + auto session2 = client.GetSession().GetValueSync().GetSession(); + + auto result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 1u OR Key = 4000000001u ORDER BY Key; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + + auto tx = result.GetTransaction(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["One"]]; + [4000000001u;["BigOne"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session2.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (2u, "ChangedTwo"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session1.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Key = 2u OR Key = 4000000002u ORDER BY Key; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [2u;["Two"]]; + [4000000002u;["BigTwo"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + + result = session1.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (2u, "TwiceChangedTwo"); + )"), TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + UNIT_ASSERT_C(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_LOCKS_INVALIDATED), result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(ReadWriteTxFailsOnConcurrentWrite3) { + TReadWriteTxFailsOnConcurrentWrite3 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapReadWriteTxFailsOnConcurrentWrite3) { + TReadWriteTxFailsOnConcurrentWrite3 tester; + tester.SetIsOlap(true); + tester.Execute(); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp new file mode 100644 index 000000000000..4959cf240e05 --- /dev/null +++ b/ydb/core/kqp/ut/tx/kqp_sink_tx_ut.cpp @@ -0,0 +1,404 @@ +#include "kqp_sink_common.h" + +#include +#include +#include +#include + +namespace NKikimr { +namespace NKqp { + +using namespace NYdb; +using namespace NYdb::NQuery; + +Y_UNIT_TEST_SUITE(KqpSinkTx) { + class TDeferredEffects : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/Test` + SELECT Group, "Sergey" AS Name + FROM `/Root/Test`; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx = result.GetTransaction(); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Group = 1 ORDER BY Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[3500u];["None"];1u;"Anna"]; + [[300u];["None"];1u;"Paul"] + ])", FormatResultSetYson(result.GetResultSet(0))); + + auto commitResult = tx->Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/Test` WHERE Group = 1 ORDER BY Name; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[3500u];["None"];1u;"Anna"]; + [[300u];["None"];1u;"Paul"]; + [#;#;1u;"Sergey"] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(DeferredEffects) { + TDeferredEffects tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapDeferredEffects) { + TDeferredEffects tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TExplicitTcl : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto tx = session.BeginTransaction(TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (10u, "New"); + )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Value = "New"; + )"), TTxControl::BeginTx(TTxSettings::OnlineRO()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + CompareYson(R"([])", FormatResultSetYson(result.GetResultSet(0))); + + auto commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_C(commitResult.IsSuccess(), commitResult.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV` WHERE Value = "New"; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT(result.IsSuccess()); + CompareYson(R"([[10u;["New"]]])", FormatResultSetYson(result.GetResultSet(0))); + + commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); + UNIT_ASSERT_C(HasIssue(commitResult.GetIssues(), NYql::TIssuesIds::KIKIMR_TRANSACTION_NOT_FOUND), commitResult.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(ExplicitTcl) { + TExplicitTcl tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapExplicitTcl) { + TExplicitTcl tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TLocksAbortOnCommit : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + { + auto result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1, "One"); + UPSERT INTO `/Root/KV` (Key, Value) VALUES (2, "Two"); + UPSERT INTO `/Root/KV` (Key, Value) VALUES (3, "Three"); + UPSERT INTO `/Root/KV` (Key, Value) VALUES (4, "Four"); + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + + auto result = session.ExecuteQuery(Q_(R"( + SELECT * FROM `/Root/KV`; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + auto tx = result.GetTransaction(); + + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "second" WHERE Key = 3; + )"), TTxControl::BeginTx(TTxSettings::SerializableRW()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + UPDATE `/Root/KV` SET Value = "third" WHERE Key = 4; + )"), TTxControl::Tx(tx->GetId())).ExtractValueSync(); + if (GetIsOlap()) { + // Olap has Reads in this query, so it breaks now. + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::ABORTED, result.GetIssues().ToString()); + } else { + // Oltp doesn't have Reads in this query, so it breaks later. + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + auto commitResult = tx->Commit().ExtractValueSync(); + + if (GetIsOlap()) { + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::NOT_FOUND, commitResult.GetIssues().ToString()); + } else { + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::ABORTED, commitResult.GetIssues().ToString()); + } + } + }; + + Y_UNIT_TEST(LocksAbortOnCommit) { + TLocksAbortOnCommit tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapLocksAbortOnCommit) { + TLocksAbortOnCommit tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TInvalidateOnError : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto tx = session.BeginTransaction(TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(Q_(R"( + INSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::BAD_REQUEST, result.GetIssues().ToString()); + + result = session.ExecuteQuery(Q_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + )"), TTxControl::Tx(tx.GetId())).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::NOT_FOUND, result.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(InvalidateOnError) { + TInvalidateOnError tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapInvalidateOnError) { + TInvalidateOnError tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TInteractive : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto tx = session.BeginTransaction(TTxSettings::SerializableRW()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(R"( + SELECT * FROM `/Root/KV` + )", TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES (1u, "New"); + )", TTxControl::Tx(tx.GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(R"( + SELECT * FROM `/Root/KV` WHERE Key < 3 ORDER BY Key + )", TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [1u;["New"]]; + [2u;["Two"]] + ])", FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(Interactive) { + TInteractive tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapInteractive) { + TInteractive tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TSnapshotRO : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + // Read Immediate + auto result = session.ExecuteQuery(Q1_(R"( + SELECT * FROM KV WHERE Key = 2; + )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[2u;["Two"]]])", FormatResultSetYson(result.GetResultSet(0))); + + // Read Distributed + result = session.ExecuteQuery(Q1_(R"( + SELECT COUNT(*) FROM KV WHERE Value = "One"; + )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([[1u]])", FormatResultSetYson(result.GetResultSet(0))); + + // Write + result = session.ExecuteQuery(Q1_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES + (100, "100500"), + (100500, "100"); + )"), TTxControl::BeginTx(TTxSettings::SnapshotRO()).CommitTx()).ExtractValueSync(); + result.GetIssues().PrintTo(Cerr); + UNIT_ASSERT_VALUES_EQUAL(result.GetStatus(), EStatus::GENERIC_ERROR); + UNIT_ASSERT(HasIssue(result.GetIssues(), NYql::TIssuesIds::KIKIMR_BAD_OPERATION)); + } + }; + + Y_UNIT_TEST(SnapshotRO) { + TSnapshotRO tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapSnapshotRO) { + TSnapshotRO tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TSnapshotROInteractive1 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto readQuery = Q1_(R"( + SELECT * FROM KV WHERE Key = 1u; + )"); + + auto readResult = R"([ + [1u;["One"]] + ])"; + + auto result = session.ExecuteQuery(readQuery, + TTxControl::BeginTx(TTxSettings::SnapshotRO())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + + auto tx = result.GetTransaction(); + UNIT_ASSERT(tx); + UNIT_ASSERT(tx->IsActive()); + + result = session.ExecuteQuery(Q1_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES + (1u, "value"); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(readQuery, + TTxControl::Tx(tx->GetId()).CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + } + }; + + Y_UNIT_TEST(SnapshotROInteractive1) { + TSnapshotROInteractive1 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapSnapshotROInteractive1) { + TSnapshotROInteractive1 tester; + tester.SetIsOlap(true); + tester.Execute(); + } + + class TSnapshotROInteractive2 : public TTableDataModificationTester { + protected: + void DoExecute() override { + auto client = Kikimr->GetQueryClient(); + + auto session = client.GetSession().GetValueSync().GetSession(); + auto readQuery = Q1_(R"( + SELECT COUNT(*) FROM KV WHERE Value = "One"; + )"); + + auto readResult = R"([ + [1u] + ])"; + + auto tx = session.BeginTransaction(TTxSettings::SnapshotRO()) + .ExtractValueSync() + .GetTransaction(); + UNIT_ASSERT(tx.IsActive()); + + auto result = session.ExecuteQuery(readQuery, + TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + + result = session.ExecuteQuery(Q1_(R"( + UPSERT INTO `/Root/KV` (Key, Value) VALUES + (100500u, "One"); + )"), TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + + result = session.ExecuteQuery(readQuery, + TTxControl::Tx(tx.GetId())).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(readResult, FormatResultSetYson(result.GetResultSet(0))); + + auto commitResult = tx.Commit().ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(commitResult.GetStatus(), EStatus::SUCCESS, commitResult.GetIssues().ToString()); + } + }; + + Y_UNIT_TEST(SnapshotROInteractive2) { + TSnapshotROInteractive2 tester; + tester.SetIsOlap(false); + tester.Execute(); + } + + Y_UNIT_TEST(OlapSnapshotROInteractive2) { + TSnapshotROInteractive2 tester; + tester.SetIsOlap(true); + tester.Execute(); + } +} + +} // namespace NKqp +} // namespace NKikimr diff --git a/ydb/core/kqp/ut/tx/ya.make b/ydb/core/kqp/ut/tx/ya.make index 7f4ca5fae0d6..e7369a4a256e 100644 --- a/ydb/core/kqp/ut/tx/ya.make +++ b/ydb/core/kqp/ut/tx/ya.make @@ -16,12 +16,16 @@ SRCS( kqp_locks_tricky_ut.cpp kqp_locks_ut.cpp kqp_mvcc_ut.cpp + kqp_sink_locks_ut.cpp + kqp_sink_mvcc_ut.cpp + kqp_sink_tx_ut.cpp kqp_tx_ut.cpp ) PEERDIR( ydb/core/kqp ydb/core/kqp/ut/common + ydb/core/tx/columnshard/hooks/testing ydb/library/yql/sql/pg_dummy ) diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql new file mode 100644 index 000000000000..f9f813948191 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/create_view.sql @@ -0,0 +1,43 @@ +CREATE VIEW `/Root/aggregates_and_window` WITH (security_invoker = TRUE) AS + SELECT + series.title AS series, + series_stats.seasons_with_episode_count_greater_than_average AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + SUM( + CASE + WHEN episode_count > average_episodes_in_season + THEN 1 + ELSE 0 + END + ) AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + season_id, + episode_count, + AVG(episode_count) OVER average_episodes_in_season_window AS average_episodes_in_season + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + WINDOW + average_episodes_in_season_window AS ( + PARTITION BY + series_id + ) + ) + GROUP BY + series_id + ) + AS series_stats + JOIN `/Root/series` + AS series + USING (series_id); diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql new file mode 100644 index 000000000000..c4c0dd0a1c8f --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/aggregates_and_window`; diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql new file mode 100644 index 000000000000..c701bd2d2648 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/etalon_query.sql @@ -0,0 +1,46 @@ +SELECT + * +FROM ( + SELECT + series.title AS series, + series_stats.seasons_with_episode_count_greater_than_average AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + SUM( + CASE + WHEN episode_count > average_episodes_in_season + THEN 1 + ELSE 0 + END + ) AS seasons_with_episode_count_greater_than_average + FROM ( + SELECT + series_id, + season_id, + episode_count, + AVG(episode_count) OVER average_episodes_in_season_window AS average_episodes_in_season + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + WINDOW + average_episodes_in_season_window AS ( + PARTITION BY + series_id + ) + ) + GROUP BY + series_id + ) + AS series_stats + JOIN `/Root/series` + AS series + USING (series_id) +); diff --git a/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql new file mode 100644 index 000000000000..0cc947dd8661 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/aggregates_and_window/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/aggregates_and_window`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql new file mode 100644 index 000000000000..dcbfee8f92db --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/create_view.sql @@ -0,0 +1,9 @@ +CREATE VIEW `/Root/count_episodes` WITH (security_invoker = TRUE) AS + SELECT + series_id, + season_id, + COUNT(*) + FROM `/Root/episodes` + GROUP BY + series_id, + season_id; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql new file mode 100644 index 000000000000..897adef6e6d6 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql new file mode 100644 index 000000000000..8964c9ab76a1 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/etalon_query.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + series_id, + season_id, + COUNT(*) + FROM `/Root/episodes` + GROUP BY + series_id, + season_id +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql new file mode 100644 index 000000000000..153ad494f165 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql new file mode 100644 index 000000000000..545ec4056034 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/create_view.sql @@ -0,0 +1,22 @@ +CREATE VIEW `/Root/count_episodes_with_titles` WITH (security_invoker = TRUE) AS + SELECT + series.title AS series, + seasons.title AS season, + episodes.episode_count AS episode_count + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + AS episodes + JOIN `/Root/series` + AS series + ON episodes.series_id == series.series_id + JOIN `/Root/seasons` + AS seasons + ON episodes.series_id == seasons.series_id AND episodes.season_id == seasons.season_id; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql new file mode 100644 index 000000000000..99e8b54a675d --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_episodes_with_titles`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql new file mode 100644 index 000000000000..faa472a66beb --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/etalon_query.sql @@ -0,0 +1,25 @@ +SELECT + * +FROM ( + SELECT + series.title AS series, + seasons.title AS season, + episodes.episode_count AS episode_count + FROM ( + SELECT + series_id, + season_id, + COUNT(*) AS episode_count + FROM `/Root/episodes` + GROUP BY + series_id, + season_id + ) + AS episodes + JOIN `/Root/series` + AS series + ON episodes.series_id == series.series_id + JOIN `/Root/seasons` + AS seasons + ON episodes.series_id == seasons.series_id AND episodes.season_id == seasons.season_id +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql new file mode 100644 index 000000000000..b84299a26d02 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_episodes_with_titles/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_episodes_with_titles`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql new file mode 100644 index 000000000000..0189412ace53 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/create_view.sql @@ -0,0 +1,4 @@ +CREATE VIEW `/Root/count_rows` WITH (security_invoker = TRUE) AS + SELECT + COUNT(*) + FROM `/Root/episodes`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql new file mode 100644 index 000000000000..b540f8f401e8 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW `/Root/count_rows`; diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql new file mode 100644 index 000000000000..b9d2cdb0f4a7 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/etalon_query.sql @@ -0,0 +1,7 @@ +SELECT + * +FROM ( + SELECT + COUNT(*) + FROM `/Root/episodes` +); diff --git a/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql new file mode 100644 index 000000000000..49ffdf0dd923 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/count_rows/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM `/Root/count_rows`; diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/create_view.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/create_view.sql new file mode 100644 index 000000000000..b8793469c350 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/create_view.sql @@ -0,0 +1,9 @@ +CREATE VIEW in_subquery WITH (security_invoker = TRUE) AS + SELECT + * + FROM series + WHERE series_id IN ( + SELECT + series_id + FROM series + ); diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/drop_view.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/drop_view.sql new file mode 100644 index 000000000000..da81f5486bc6 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/drop_view.sql @@ -0,0 +1 @@ +DROP VIEW in_subquery; diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/etalon_query.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/etalon_query.sql new file mode 100644 index 000000000000..596f8521ea79 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/etalon_query.sql @@ -0,0 +1,12 @@ +SELECT + * +FROM ( + SELECT + * + FROM series + WHERE series_id IN ( + SELECT + series_id + FROM series + ) +); diff --git a/ydb/core/kqp/ut/view/input/cases/in_subquery/select_from_view.sql b/ydb/core/kqp/ut/view/input/cases/in_subquery/select_from_view.sql new file mode 100644 index 000000000000..c0c1b52763e0 --- /dev/null +++ b/ydb/core/kqp/ut/view/input/cases/in_subquery/select_from_view.sql @@ -0,0 +1,3 @@ +SELECT + * +FROM in_subquery; diff --git a/ydb/core/kqp/ut/view/view_ut.cpp b/ydb/core/kqp/ut/view/view_ut.cpp index 381dea85c6d9..ba7aca97e589 100644 --- a/ydb/core/kqp/ut/view/view_ut.cpp +++ b/ydb/core/kqp/ut/view/view_ut.cpp @@ -56,6 +56,20 @@ TString ReadWholeFile(const TString& path) { return file.ReadAll(); } +NQuery::TExecuteQueryResult ExecuteQuery(NQuery::TSession& session, const TString& query) { + const auto result = session.ExecuteQuery( + query, + NQuery::TTxControl::NoTx() + ).ExtractValueSync(); + + UNIT_ASSERT_C(result.IsSuccess(), + "Failed to execute the following query:\n" << query << '\n' + << "The issues:\n" << result.GetIssues().ToString() + ); + + return result; +} + void ExecuteDataDefinitionQuery(TSession& session, const TString& script) { const auto result = session.ExecuteSchemeQuery(script).ExtractValueSync(); UNIT_ASSERT_C(result.IsSuccess(), "Failed to execute the following DDL script:\n" @@ -110,20 +124,25 @@ void AssertFromCache(const TMaybe& stats, bool expectedValue) { UNIT_ASSERT_VALUES_EQUAL_C(*isFromCache, expectedValue, stats->ToString()); } -void CompareResults(const TDataQueryResult& first, const TDataQueryResult& second) { - const auto& firstResults = first.GetResultSets(); - const auto& secondResults = second.GetResultSets(); - +void CompareResults(const TVector& firstResults, const TVector& secondResults) { UNIT_ASSERT_VALUES_EQUAL(firstResults.size(), secondResults.size()); for (size_t i = 0; i < firstResults.size(); ++i) { CompareYson(FormatResultSetYson(firstResults[i]), FormatResultSetYson(secondResults[i])); } } -void InitializeTablesAndSecondaryViews(TSession& session) { +void CompareResults(const TDataQueryResult& first, const TDataQueryResult& second) { + CompareResults(first.GetResultSets(), second.GetResultSets()); +} + +void CompareResults(const NQuery::TExecuteQueryResult& first, const NQuery::TExecuteQueryResult& second) { + CompareResults(first.GetResultSets(), second.GetResultSets()); +} + +void InitializeTablesAndSecondaryViews(NQuery::TSession& session) { const auto inputFolder = ArcadiaFromCurrentLocation(__SOURCE_FILE__, "input"); - ExecuteDataDefinitionQuery(session, ReadWholeFile(inputFolder + "/create_tables_and_secondary_views.sql")); - ExecuteDataModificationQuery(session, ReadWholeFile(inputFolder + "/fill_tables.sql")); + ExecuteQuery(session, ReadWholeFile(inputFolder + "/create_tables_and_secondary_views.sql")); + ExecuteQuery(session, ReadWholeFile(inputFolder + "/fill_tables.sql")); } } @@ -162,7 +181,7 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { )", path ); - + DisableViewsFeatureFlag(kikimr); const auto creationResult = session.ExecuteSchemeQuery(creationQuery).ExtractValueSync(); UNIT_ASSERT(!creationResult.IsSuccess()); @@ -194,6 +213,90 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { UNIT_ASSERT_STRING_CONTAINS(creationResult.GetIssues().ToString(), "Error: Cannot divide type String and String"); } + Y_UNIT_TEST(ParsingSecurityInvoker) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* path = "TheView"; + constexpr const char* query = "SELECT 1"; + + auto fail = [&](const char* options) { + const TString creationQuery = std::format(R"( + CREATE VIEW {} {} AS {}; + )", + path, + options, + query + ); + + const auto creationResult = session.ExecuteQuery( + creationQuery, + NQuery::TTxControl::NoTx() + ).ExtractValueSync(); + + UNIT_ASSERT_C(!creationResult.IsSuccess(), creationQuery); + UNIT_ASSERT_STRING_CONTAINS( + creationResult.GetIssues().ToString(), "security_invoker option must be explicitly enabled" + ); + }; + fail(""); + fail("WITH security_invoker"); + fail("WITH security_invoker = false"); + fail("WITH SECURITY_INVOKER = true"); // option name is case-sensitive + fail("WITH (security_invoker)"); + fail("WITH (security_invoker = false)"); + fail("WITH (security_invoker = true, security_invoker = false)"); + + auto succeed = [&](const char* options) { + const TString creationQuery = std::format(R"( + CREATE VIEW {} {} AS {}; + DROP VIEW {}; + )", + path, + options, + query, + path + ); + ExecuteQuery(session, creationQuery); + }; + succeed("WITH security_invoker = true"); + succeed("WITH (security_invoker = true)"); + succeed("WITH (security_invoker = tRuE)"); // bool parsing is flexible enough + succeed("WITH (security_invoker = false, security_invoker = true)"); + + { + // literal named expression + const TString creationQuery = std::format(R"( + $value = "true"; + CREATE VIEW {} WITH security_invoker = $value AS {}; + DROP VIEW {}; + )", + path, + query, + path + ); + ExecuteQuery(session, creationQuery); + } + { + // evaluated expression + const TString creationQuery = std::format(R"( + $lambda = ($x) -> {{ + RETURN CAST($x as String) + }}; + $value = $lambda(true); + + CREATE VIEW {} WITH security_invoker = $value AS {}; + DROP VIEW {}; + )", + path, + query, + path + ); + ExecuteQuery(session, creationQuery); + } + } + Y_UNIT_TEST(ListCreatedView) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); EnableViewsFeatureFlag(kikimr); @@ -325,6 +428,32 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { } } + Y_UNIT_TEST(DropViewInFolder) { + TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); + EnableViewsFeatureFlag(kikimr); + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + + constexpr const char* path = "/Root/some/path/to/TheView"; + constexpr const char* queryInView = "SELECT 1"; + + const TString creationQuery = std::format(R"( + CREATE VIEW `{}` WITH (security_invoker = true) AS {}; + )", + path, + queryInView + ); + ExecuteDataDefinitionQuery(session, creationQuery); + + const TString dropQuery = std::format(R"( + DROP VIEW `{}`; + )", + path + ); + ExecuteDataDefinitionQuery(session, dropQuery); + ExpectUnknownEntry(runtime, path); + } + Y_UNIT_TEST(ContextPollution) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); EnableViewsFeatureFlag(kikimr); @@ -336,7 +465,7 @@ Y_UNIT_TEST_SUITE(TCreateAndDropViewTest) { ExecuteDataDefinitionQuery(session, R"( CREATE VIEW OuterView WITH (security_invoker = TRUE) AS SELECT * FROM InnerView; )"); - + ExecuteDataDefinitionQuery(session, R"( DROP VIEW OuterView; CREATE VIEW OuterView WITH (security_invoker = TRUE) AS SELECT * FROM InnerView; @@ -382,6 +511,46 @@ Y_UNIT_TEST_SUITE(TSelectFromViewTest) { CompareResults(etalonResults, selectFromViewResults); } + Y_UNIT_TEST(OneTableUsingRelativeName) { + TKikimrRunner kikimr; + + auto& runtime = *kikimr.GetTestServer().GetRuntime(); + runtime.SetLogPriority(NKikimrServices::FLAT_TX_SCHEMESHARD, NLog::PRI_DEBUG); + + EnableViewsFeatureFlag(kikimr); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); + + constexpr const char* viewName = "TheView"; + constexpr const char* testTable = "Test"; + const auto innerQuery = std::format(R"( + SELECT * FROM {} + )", + testTable + ); + + const TString creationQuery = std::format(R"( + CREATE VIEW {} WITH (security_invoker = true) AS {}; + )", + viewName, + innerQuery + ); + ExecuteQuery(session, creationQuery); + + const auto etalonResults = ExecuteQuery(session, std::format(R"( + SELECT * FROM ({}); + )", + innerQuery + ) + ); + const auto selectFromViewResults = ExecuteQuery(session, std::format(R"( + SELECT * FROM {}; + )", + viewName + ) + ); + CompareResults(etalonResults, selectFromViewResults); + } + Y_UNIT_TEST(DisabledFeatureFlag) { TKikimrRunner kikimr(TKikimrSettings().SetWithSampleTables(false)); auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); @@ -413,7 +582,7 @@ Y_UNIT_TEST_SUITE(TSelectFromViewTest) { Y_UNIT_TEST(ReadTestCasesFromFiles) { TKikimrRunner kikimr; EnableViewsFeatureFlag(kikimr); - auto session = kikimr.GetTableClient().CreateSession().GetValueSync().GetSession(); + auto session = kikimr.GetQueryClient().GetSession().ExtractValueSync().GetSession(); InitializeTablesAndSecondaryViews(session); EnableLogging(); @@ -424,13 +593,13 @@ Y_UNIT_TEST_SUITE(TSelectFromViewTest) { TString testcase; while (testcase = testcases.Next()) { const auto pathPrefix = TStringBuilder() << testcasesFolder << '/' << testcase << '/'; - ExecuteDataDefinitionQuery(session, ReadWholeFile(pathPrefix + "create_view.sql")); + ExecuteQuery(session, ReadWholeFile(pathPrefix + "create_view.sql")); - const auto etalonResults = ExecuteDataModificationQuery(session, ReadWholeFile(pathPrefix + "etalon_query.sql")); - const auto selectFromViewResults = ExecuteDataModificationQuery(session, ReadWholeFile(pathPrefix + "select_from_view.sql")); + const auto etalonResults = ExecuteQuery(session, ReadWholeFile(pathPrefix + "etalon_query.sql")); + const auto selectFromViewResults = ExecuteQuery(session, ReadWholeFile(pathPrefix + "select_from_view.sql")); CompareResults(etalonResults, selectFromViewResults); - ExecuteDataDefinitionQuery(session, ReadWholeFile(pathPrefix + "drop_view.sql")); + ExecuteQuery(session, ReadWholeFile(pathPrefix + "drop_view.sql")); } } diff --git a/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp b/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp index 8cd9b54a6857..edf5740b7ef2 100644 --- a/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_pragma_ut.cpp @@ -84,6 +84,93 @@ Y_UNIT_TEST_SUITE(KqpPragma) { UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); UNIT_ASSERT_C(result.GetIssues().Empty(), result.GetIssues().ToString()); } + + Y_UNIT_TEST(MatchRecognizeWithTimeOrderRecoverer) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableQueryServiceConfig()->SetEnableMatchRecognize(true); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + NYdb::NScripting::TScriptingClient client(kikimr.GetDriver()); + + auto result = client.ExecuteYqlScript(R"( + PRAGMA FeatureR010="prototype"; + + CREATE TABLE `/Root/NewTable` ( + dt Uint64, + value String, + PRIMARY KEY (dt) + ); + COMMIT; + + INSERT INTO `/Root/NewTable` (dt, value) VALUES + (1, 'value1'), (2, 'value2'), (3, 'value3'), (4, 'value4'); + COMMIT; + + SELECT * FROM (SELECT dt, value FROM `/Root/NewTable`) + MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) + MEASURES + LAST(V1.dt) as v1, + LAST(V4.dt) as v4 + ONE ROW PER MATCH + PATTERN (V1 V* V4) + DEFINE + V1 as V1.value = "value1", + V as True, + V4 as V4.value = "value4" + ); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[1u];[4u]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } + + Y_UNIT_TEST(MatchRecognizeWithoutTimeOrderRecoverer) { + TKikimrSettings settings; + NKikimrConfig::TAppConfig appConfig; + appConfig.MutableQueryServiceConfig()->SetEnableMatchRecognize(true); + settings.SetAppConfig(appConfig); + + TKikimrRunner kikimr(settings); + NYdb::NScripting::TScriptingClient client(kikimr.GetDriver()); + + auto result = client.ExecuteYqlScript(R"( + PRAGMA FeatureR010="prototype"; + PRAGMA config.flags("MatchRecognizeStream", "disable"); + + CREATE TABLE `/Root/NewTable` ( + dt Uint64, + value String, + PRIMARY KEY (dt) + ); + COMMIT; + + INSERT INTO `/Root/NewTable` (dt, value) VALUES + (1, 'value1'), (2, 'value2'), (3, 'value3'), (4, 'value4'); + COMMIT; + + SELECT * FROM (SELECT dt, value FROM `/Root/NewTable`) + MATCH_RECOGNIZE( + ORDER BY CAST(dt as Timestamp) + MEASURES + LAST(V1.dt) as v1, + LAST(V4.dt) as v4 + ONE ROW PER MATCH + PATTERN (V1 V* V4) + DEFINE + V1 as V1.value = "value1", + V as True, + V4 as V4.value = "value4" + ); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [[1u];[4u]]; + ])", FormatResultSetYson(result.GetResultSet(0))); + } } } // namspace NKqp diff --git a/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp b/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp index 6525749d7ce2..b09e37b794dd 100644 --- a/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_scripting_ut.cpp @@ -79,7 +79,7 @@ Y_UNIT_TEST_SUITE(KqpScripting) { ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_BY_SIZE = ENABLED); )").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - + result = client.ExecuteYqlScript(R"( ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_BY_SIZE = ENABLED); COMMIT; @@ -88,7 +88,7 @@ Y_UNIT_TEST_SUITE(KqpScripting) { ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_MIN_PARTITIONS_COUNT = 4); )").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); - + result = client.ExecuteYqlScript(R"( ALTER TABLE `/Root/ScriptingCreateAndAlterTableTest` SET (AUTO_PARTITIONING_BY_SIZE = ENABLED); COMMIT; @@ -989,7 +989,36 @@ Y_UNIT_TEST_SUITE(KqpScripting) { [[[101u]];[[201u]];[[301u]];[[401u]];[[501u]];[[601u]];[[701u]];[[801u]]]; [[8u]]; [[8u]]; - [[8u]]])", StreamResultToYson(it)); + [[8u]] + ])", StreamResultToYson(it)); + } + + Y_UNIT_TEST(SelectNullType) { + TKikimrRunner kikimr; + TScriptingClient client(kikimr.GetDriver()); + { + auto result = client.ExecuteYqlScript(R"( + CREATE TABLE demo1(id Text, PRIMARY KEY(id)); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = client.ExecuteYqlScript(R"( + UPSERT INTO demo1(id) VALUES("a"),("b"),("c"); + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + { + auto result = client.ExecuteYqlScript(R"( + SELECT NULL auto_proc_ FROM demo1 LIMIT 10; + )").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + CompareYson(R"([ + [#];[#];[#] + ])", FormatResultSetYson(result.GetResultSet(0))); + } } Y_UNIT_TEST(StreamExecuteYqlScriptLeadingEmptyScan) { @@ -1206,7 +1235,7 @@ Y_UNIT_TEST_SUITE(KqpScripting) { Y_UNIT_TEST(StreamExecuteYqlScriptPg) { TKikimrRunner kikimr; - + auto settings = TExecuteYqlRequestSettings() .Syntax(Ydb::Query::SYNTAX_PG); diff --git a/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp b/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp index 247704928cfb..17c7fa65c0fc 100644 --- a/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp +++ b/ydb/core/kqp/ut/yql/kqp_yql_ut.cpp @@ -613,6 +613,7 @@ Y_UNIT_TEST_SUITE(KqpYql) { appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() + .SetEnableUuidAsPrimaryKey(false) .SetAppConfig(appConfig) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -684,7 +685,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() .SetAppConfig(appConfig) - .SetEnableUuidAsPrimaryKey(true) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -833,7 +833,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() .SetAppConfig(appConfig) - .SetEnableUuidAsPrimaryKey(true) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -890,7 +889,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { auto setting = NKikimrKqp::TKqpSetting(); auto serverSettings = TKikimrSettings() .SetAppConfig(appConfig) - .SetEnableUuidAsPrimaryKey(true) .SetKqpSettings({setting}); TKikimrRunner kikimr(serverSettings.SetWithSampleTables(false)); @@ -917,7 +915,6 @@ Y_UNIT_TEST_SUITE(KqpYql) { Y_UNIT_TEST(UuidPrimaryKeyBulkUpsert) { auto settings = TKikimrSettings() - .SetEnableUuidAsPrimaryKey(true) .SetWithSampleTables(false); auto kikimr = TKikimrRunner{settings}; auto db = kikimr.GetTableClient(); diff --git a/ydb/core/kqp/workload_service/actors/actors.h b/ydb/core/kqp/workload_service/actors/actors.h index 7bae3b142656..c575842faf78 100644 --- a/ydb/core/kqp/workload_service/actors/actors.h +++ b/ydb/core/kqp/workload_service/actors/actors.h @@ -15,4 +15,10 @@ NActors::IActor* CreatePoolResolverActor(TEvPlaceRequestIntoPool::TPtr event, bo NActors::IActor* CreatePoolFetcherActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, TIntrusiveConstPtr userToken); NActors::IActor* CreatePoolCreatorActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, TIntrusiveConstPtr userToken, NACLibProto::TDiffACL diffAcl); +// Checks that database is serverless +NActors::IActor* CreateDatabaseFetcherActor(const NActors::TActorId& replyActorId, const TString& database); + +// Cpu load fetcher actor +NActors::IActor* CreateCpuLoadFetcherActor(const NActors::TActorId& replyActorId); + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/actors/cpu_load_actors.cpp b/ydb/core/kqp/workload_service/actors/cpu_load_actors.cpp new file mode 100644 index 000000000000..425c8cfbd30e --- /dev/null +++ b/ydb/core/kqp/workload_service/actors/cpu_load_actors.cpp @@ -0,0 +1,77 @@ +#include "actors.h" + +#include + +#include + + +namespace NKikimr::NKqp::NWorkload { + +namespace { + +class TCpuLoadFetcherActor : public NKikimr::TQueryBase { + using TBase = NKikimr::TQueryBase; + +public: + TCpuLoadFetcherActor() + : TBase(NKikimrServices::KQP_WORKLOAD_SERVICE) + { + SetOperationInfo(__func__, ""); + } + + void OnRunQuery() override { + TString sql = TStringBuilder() << R"( + -- TCpuLoadFetcherActor::OnRunQuery + + SELECT + SUM(CpuThreads) AS ThreadsCount, + SUM(CpuThreads * (1.0 - CpuIdle)) AS TotalLoad + FROM `.sys/nodes`; + )"; + + RunDataQuery(sql); + } + + void OnQueryResult() override { + if (ResultSets.size() != 1) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected database response"); + return; + } + + NYdb::TResultSetParser result(ResultSets[0]); + if (!result.TryNextRow()) { + Finish(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected database response"); + return; + } + + ThreadsCount = result.ColumnParser("ThreadsCount").GetOptionalUint64().GetOrElse(0); + TotalLoad = result.ColumnParser("TotalLoad").GetOptionalDouble().GetOrElse(0.0); + + if (!ThreadsCount) { + Finish(Ydb::StatusIds::NOT_FOUND, "Cpu info not found"); + return; + } + + Finish(); + } + + void OnFinish(Ydb::StatusIds::StatusCode status, NYql::TIssues&& issues) override { + if (status == Ydb::StatusIds::SUCCESS) { + Send(Owner, new TEvPrivate::TEvCpuLoadResponse(Ydb::StatusIds::SUCCESS, TotalLoad / ThreadsCount, ThreadsCount, std::move(issues))); + } else { + Send(Owner, new TEvPrivate::TEvCpuLoadResponse(status, 0.0, 0, std::move(issues))); + } + } + +private: + double TotalLoad = 0.0; + ui64 ThreadsCount = 0; +}; + +} // anonymous namespace + +IActor* CreateCpuLoadFetcherActor(const TActorId& replyActorId) { + return new TQueryRetryActor(replyActorId); +} + +} // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp b/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp index 76b72ceee20b..921944a0db41 100644 --- a/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp +++ b/ydb/core/kqp/workload_service/actors/pool_handlers_acors.cpp @@ -22,6 +22,80 @@ template class TPoolHandlerActorBase : public TActor { using TBase = TActor; + struct TCommonCounters { + const NMonitoring::TDynamicCounterPtr CountersRoot; + const NMonitoring::TDynamicCounterPtr CountersSubgroup; + + // Workload service counters + NMonitoring::TDynamicCounters::TCounterPtr ActivePoolHandlers; + + // Pool counters + NMonitoring::TDynamicCounters::TCounterPtr LocalInFly; + NMonitoring::TDynamicCounters::TCounterPtr LocalDelayedRequests; + NMonitoring::TDynamicCounters::TCounterPtr ContinueOk; + NMonitoring::TDynamicCounters::TCounterPtr ContinueOverloaded; + NMonitoring::TDynamicCounters::TCounterPtr ContinueError; + NMonitoring::TDynamicCounters::TCounterPtr CleanupOk; + NMonitoring::TDynamicCounters::TCounterPtr CleanupError; + NMonitoring::TDynamicCounters::TCounterPtr Cancelled; + NMonitoring::THistogramPtr DelayedTimeMs; + NMonitoring::THistogramPtr RequestsLatencyMs; + + // Config counters + NMonitoring::TDynamicCounters::TCounterPtr InFlightLimit; + NMonitoring::TDynamicCounters::TCounterPtr QueueSizeLimit; + NMonitoring::TDynamicCounters::TCounterPtr LoadCpuThreshold; + + TCommonCounters(NMonitoring::TDynamicCounterPtr counters, const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig) + : CountersRoot(counters) + , CountersSubgroup(counters->GetSubgroup("pool", CanonizePath(TStringBuilder() << database << "/" << poolId))) + { + Register(); + UpdateConfigCounters(poolConfig); + } + + void CollectRequestLatency(TInstant continueTime) { + if (continueTime) { + RequestsLatencyMs->Collect((TInstant::Now() - continueTime).MilliSeconds()); + } + } + + void UpdateConfigCounters(const NResourcePool::TPoolSettings& poolConfig) { + InFlightLimit->Set(std::max(poolConfig.ConcurrentQueryLimit, 0)); + QueueSizeLimit->Set(std::max(poolConfig.QueueSize, 0)); + LoadCpuThreshold->Set(std::max(poolConfig.DatabaseLoadCpuThreshold, 0.0)); + } + + void OnCleanup() { + ActivePoolHandlers->Dec(); + + InFlightLimit->Set(0); + QueueSizeLimit->Set(0); + LoadCpuThreshold->Set(0); + } + + private: + void Register() { + ActivePoolHandlers = CountersRoot->GetCounter("ActivePoolHandlers", false); + ActivePoolHandlers->Inc(); + + LocalInFly = CountersSubgroup->GetCounter("LocalInFly", false); + LocalDelayedRequests = CountersSubgroup->GetCounter("LocalDelayedRequests", false); + ContinueOk = CountersSubgroup->GetCounter("ContinueOk", true); + ContinueOverloaded = CountersSubgroup->GetCounter("ContinueOverloaded", true); + ContinueError = CountersSubgroup->GetCounter("ContinueError", true); + CleanupOk = CountersSubgroup->GetCounter("CleanupOk", true); + CleanupError = CountersSubgroup->GetCounter("CleanupError", true); + Cancelled = CountersSubgroup->GetCounter("Cancelled", true); + DelayedTimeMs = CountersSubgroup->GetHistogram("DelayedTimeMs", NMonitoring::ExponentialHistogram(20, 2, 4)); + RequestsLatencyMs = CountersSubgroup->GetHistogram("RequestsLatencyMs", NMonitoring::ExponentialHistogram(20, 2, 4)); + + InFlightLimit = CountersSubgroup->GetCounter("InFlightLimit", false); + QueueSizeLimit = CountersSubgroup->GetCounter("QueueSizeLimit", false); + LoadCpuThreshold = CountersSubgroup->GetCounter("LoadCpuThreshold", false); + } + }; + protected: struct TRequest { enum class EState { @@ -38,26 +112,26 @@ class TPoolHandlerActorBase : public TActor { const TActorId WorkerActorId; const TString SessionId; const TInstant StartTime = TInstant::Now(); + TInstant ContinueTime; EState State = EState::Pending; bool Started = false; // after TEvContinueRequest success bool CleanupRequired = false; + bool UsedCpuQuota = false; + TDuration Duration; + TDuration CpuConsumed; }; public: TPoolHandlerActorBase(void (TDerived::* requestFunc)(TAutoPtr& ev), const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) : TBase(requestFunc) - , CountersRoot(counters) - , CountersSubgroup(counters->GetSubgroup("pool", CanonizePath(TStringBuilder() << database << "/" << poolId))) + , Counters(counters, database, poolId, poolConfig) , Database(database) , PoolId(poolId) , QueueSizeLimit(GetMaxQueueSize(poolConfig)) , InFlightLimit(GetMaxInFlight(poolConfig)) , PoolConfig(poolConfig) - , CancelAfter(poolConfig.QueryCancelAfter) - { - RegisterCounters(); - } + {} STRICT_STFUNC(StateFuncBase, // Workload service events @@ -74,7 +148,7 @@ class TPoolHandlerActorBase : public TActor { // Schemeboard events hFunc(TEvTxProxySchemeCache::TEvWatchNotifyUpdated, Handle); - IgnoreFunc(TEvTxProxySchemeCache::TEvWatchNotifyDeleted); + hFunc(TEvTxProxySchemeCache::TEvWatchNotifyDeleted, Handle); IgnoreFunc(TEvTxProxySchemeCache::TEvWatchNotifyUnavailable); ) @@ -83,7 +157,9 @@ class TPoolHandlerActorBase : public TActor { this->Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvWatchRemove(0)); } - ActivePoolHandlers->Dec(); + SendPoolInfoUpdate(std::nullopt, std::nullopt); + + Counters.OnCleanup(); TBase::PassAway(); } @@ -118,12 +194,12 @@ class TPoolHandlerActorBase : public TActor { } LOG_D("Received new request, worker id: " << workerActorId << ", session id: " << sessionId); - if (CancelAfter) { - this->Schedule(CancelAfter, new TEvPrivate::TEvCancelRequest(sessionId)); + if (auto cancelAfter = PoolConfig.QueryCancelAfter) { + this->Schedule(cancelAfter, new TEvPrivate::TEvCancelRequest(sessionId)); } TRequest* request = &LocalSessions.insert({sessionId, TRequest(workerActorId, sessionId)}).first->second; - LocalDelayedRequests->Inc(); + Counters.LocalDelayedRequests->Inc(); UpdatePoolConfig(ev->Get()->PoolConfig); UpdateSchemeboardSubscription(ev->Get()->PathId); @@ -146,8 +222,10 @@ class TPoolHandlerActorBase : public TActor { return; } request->State = TRequest::EState::Finishing; + request->Duration = ev->Get()->Duration; + request->CpuConsumed = ev->Get()->CpuConsumed; - LOG_D("Received cleanup request, worker id: " << workerActorId << ", session id: " << sessionId); + LOG_D("Received cleanup request, worker id: " << workerActorId << ", session id: " << sessionId << ", duration: " << request->Duration << ", cpu consumed: " << request->CpuConsumed); OnCleanupRequest(request); } @@ -185,6 +263,24 @@ class TPoolHandlerActorBase : public TActor { NResourcePool::TPoolSettings poolConfig; ParsePoolSettings(result->GetPathDescription().GetResourcePoolDescription(), poolConfig); UpdatePoolConfig(poolConfig); + + const auto& pathDescription = result->GetPathDescription().GetSelf(); + NACLib::TSecurityObject object(pathDescription.GetOwner(), false); + if (object.MutableACL()->ParseFromString(pathDescription.GetEffectiveACL())) { + SendPoolInfoUpdate(poolConfig, object); + } else { + SendPoolInfoUpdate(poolConfig, std::nullopt); + } + } + + void Handle(TEvTxProxySchemeCache::TEvWatchNotifyDeleted::TPtr& ev) { + if (ev->Get()->Key != WatchKey) { + // Skip old paths watch notifications + return; + } + + LOG_D("Got delete notification"); + SendPoolInfoUpdate(std::nullopt, std::nullopt); } public: @@ -198,25 +294,26 @@ class TPoolHandlerActorBase : public TActor { if (status == Ydb::StatusIds::SUCCESS) { LocalInFlight++; request->Started = true; - LocalInFly->Inc(); - ContinueOk->Inc(); - DelayedTimeMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); + request->ContinueTime = TInstant::Now(); + Counters.LocalInFly->Inc(); + Counters.ContinueOk->Inc(); + Counters.DelayedTimeMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); LOG_D("Reply continue success to " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight); } else { if (status == Ydb::StatusIds::OVERLOADED) { - ContinueOverloaded->Inc(); + Counters.ContinueOverloaded->Inc(); LOG_I("Reply overloaded to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } else if (status == Ydb::StatusIds::CANCELLED) { - Cancelled->Inc(); + Counters.Cancelled->Inc(); LOG_I("Reply cancelled to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } else { - ContinueError->Inc(); + Counters.ContinueError->Inc(); LOG_W("Reply continue error " << status << " to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } - RemoveRequest(request->SessionId); + RemoveRequest(request); } - LocalDelayedRequests->Dec(); + Counters.LocalDelayedRequests->Dec(); } void FinalReply(TRequest* request, Ydb::StatusIds::StatusCode status, const TString& message) { @@ -235,9 +332,9 @@ class TPoolHandlerActorBase : public TActor { if (request->Started) { LocalInFlight--; - LocalInFly->Dec(); + Counters.LocalInFly->Dec(); } else { - LocalDelayedRequests->Dec(); + Counters.LocalDelayedRequests->Dec(); } if (request->State == TRequest::EState::Canceling) { @@ -246,7 +343,11 @@ class TPoolHandlerActorBase : public TActor { ReplyCleanup(request, status, issues); } - RemoveRequest(request->SessionId); + RemoveRequest(request); + } + + void SendPoolInfoUpdate(const std::optional& config, const std::optional& securityObject) const { + this->Send(MakeKqpProxyID(this->SelfId().NodeId()), new TEvUpdatePoolInfo(Database, PoolId, config, securityObject)); } protected: @@ -273,9 +374,13 @@ class TPoolHandlerActorBase : public TActor { return nullptr; } - void RemoveRequest(const TString& sessionId) { - LocalSessions.erase(sessionId); - this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvFinishRequestInPool(Database, PoolId)); + void RemoveRequest(TRequest* request) { + auto event = std::make_unique( + Database, PoolId, request->Duration, request->CpuConsumed, request->UsedCpuQuota + ); + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), event.release()); + + LocalSessions.erase(request->SessionId); if (StopHandler && LocalSessions.empty()) { LOG_I("All requests finished, stop handler"); PassAway(); @@ -291,10 +396,17 @@ class TPoolHandlerActorBase : public TActor { } TMaybe GetWaitDeadline(TInstant startTime) const { - if (!CancelAfter) { + if (auto cancelAfter = PoolConfig.QueryCancelAfter) { + return startTime + cancelAfter; + } + return Nothing(); + } + + TMaybe GetLoadCpuThreshold() const { + if (PoolConfig.DatabaseLoadCpuThreshold < 0.0) { return Nothing(); } - return startTime + CancelAfter; + return PoolConfig.DatabaseLoadCpuThreshold; } TString LogPrefix() const { @@ -306,11 +418,11 @@ class TPoolHandlerActorBase : public TActor { this->Send(request->WorkerActorId, new TEvCleanupResponse(status, issues)); if (status == Ydb::StatusIds::SUCCESS) { - CleanupOk->Inc(); - RequestsLatencyMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); + Counters.CleanupOk->Inc(); + Counters.CollectRequestLatency(request->ContinueTime); LOG_D("Reply cleanup success to " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight); } else { - CleanupError->Inc(); + Counters.CleanupError->Inc(); LOG_W("Reply cleanup error " << status << " to " << request->WorkerActorId << ", session id: " << request->SessionId << ", issues: " << issues.ToOneLineString()); } } @@ -320,8 +432,8 @@ class TPoolHandlerActorBase : public TActor { ev->Record.MutableRequest()->SetSessionId(request->SessionId); this->Send(MakeKqpProxyID(this->SelfId().NodeId()), ev.release()); - Cancelled->Inc(); - RequestsLatencyMs->Collect((TInstant::Now() - request->StartTime).MilliSeconds()); + Counters.Cancelled->Inc(); + Counters.CollectRequestLatency(request->ContinueTime); LOG_I("Cancel request for worker " << request->WorkerActorId << ", session id: " << request->SessionId << ", local in flight: " << LocalInFlight); } @@ -349,13 +461,13 @@ class TPoolHandlerActorBase : public TActor { LOG_D("Pool config has changed, queue size: " << poolConfig.QueueSize << ", in flight limit: " << poolConfig.ConcurrentQueryLimit); PoolConfig = poolConfig; - CancelAfter = poolConfig.QueryCancelAfter; QueueSizeLimit = GetMaxQueueSize(poolConfig); InFlightLimit = GetMaxInFlight(poolConfig); + Counters.UpdateConfigCounters(poolConfig); RefreshState(true); if (ShouldResign()) { - const TActorId& newHandler = this->RegisterWithSameMailbox(CreatePoolHandlerActor(Database, PoolId, poolConfig, CountersRoot)); + const TActorId& newHandler = this->RegisterWithSameMailbox(CreatePoolHandlerActor(Database, PoolId, poolConfig, Counters.CountersRoot)); this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvResignPoolHandler(Database, PoolId, newHandler)); } } @@ -370,25 +482,8 @@ class TPoolHandlerActorBase : public TActor { return concurrentQueryLimit == -1 ? std::numeric_limits::max() : static_cast(concurrentQueryLimit); } - void RegisterCounters() { - ActivePoolHandlers = CountersRoot->GetCounter("ActivePoolHandlers", false); - ActivePoolHandlers->Inc(); - - LocalInFly = CountersSubgroup->GetCounter("LocalInFly", false); - LocalDelayedRequests = CountersSubgroup->GetCounter("LocalDelayedRequests", false); - ContinueOk = CountersSubgroup->GetCounter("ContinueOk", true); - ContinueOverloaded = CountersSubgroup->GetCounter("ContinueOverloaded", true); - ContinueError = CountersSubgroup->GetCounter("ContinueError", true); - CleanupOk = CountersSubgroup->GetCounter("CleanupOk", true); - CleanupError = CountersSubgroup->GetCounter("CleanupError", true); - Cancelled = CountersSubgroup->GetCounter("Cancelled", true); - DelayedTimeMs = CountersSubgroup->GetHistogram("DelayedTimeMs", NMonitoring::ExponentialHistogram(20, 2, 4)); - RequestsLatencyMs = CountersSubgroup->GetHistogram("RequestsLatencyMs", NMonitoring::ExponentialHistogram(20, 2, 4)); - } - protected: - NMonitoring::TDynamicCounterPtr CountersRoot; - NMonitoring::TDynamicCounterPtr CountersSubgroup; + TCommonCounters Counters; // Configuration const TString Database; @@ -398,7 +493,6 @@ class TPoolHandlerActorBase : public TActor { private: NResourcePool::TPoolSettings PoolConfig; - TDuration CancelAfter; // Scheme board settings std::unique_ptr WatchPathId; @@ -408,19 +502,6 @@ class TPoolHandlerActorBase : public TActor { ui64 LocalInFlight = 0; std::unordered_map LocalSessions; bool StopHandler = false; // Stop than all requests finished - - // Counters - NMonitoring::TDynamicCounters::TCounterPtr ActivePoolHandlers; - NMonitoring::TDynamicCounters::TCounterPtr LocalInFly; - NMonitoring::TDynamicCounters::TCounterPtr LocalDelayedRequests; - NMonitoring::TDynamicCounters::TCounterPtr ContinueOk; - NMonitoring::TDynamicCounters::TCounterPtr ContinueOverloaded; - NMonitoring::TDynamicCounters::TCounterPtr ContinueError; - NMonitoring::TDynamicCounters::TCounterPtr CleanupOk; - NMonitoring::TDynamicCounters::TCounterPtr CleanupError; - NMonitoring::TDynamicCounters::TCounterPtr Cancelled; - NMonitoring::THistogramPtr DelayedTimeMs; - NMonitoring::THistogramPtr RequestsLatencyMs; }; @@ -436,7 +517,7 @@ class TUnlimitedPoolHandlerActor : public TPoolHandlerActorBase::max(); + return 0 < InFlightLimit && (InFlightLimit < std::numeric_limits::max() || GetLoadCpuThreshold()); } void OnScheduleRequest(TRequest* request) override { @@ -452,20 +533,59 @@ class TUnlimitedPoolHandlerActor : public TPoolHandlerActorBase { using TBase = TPoolHandlerActorBase; + struct TCounters { + // Fifo pool counters + NMonitoring::TDynamicCounters::TCounterPtr PendingRequestsCount; + NMonitoring::TDynamicCounters::TCounterPtr FinishingRequestsCount; + NMonitoring::TDynamicCounters::TCounterPtr GlobalInFly; + NMonitoring::TDynamicCounters::TCounterPtr GlobalDelayedRequests; + NMonitoring::THistogramPtr PoolStateUpdatesBacklogMs; + + TCounters(NMonitoring::TDynamicCounterPtr countersSubgroup) { + Register(countersSubgroup); + } + + void UpdateGlobalState(const TPoolStateDescription& description) { + GlobalInFly->Set(description.RunningRequests); + GlobalDelayedRequests->Set(description.DelayedRequests); + } + + void OnCleanup() { + GlobalInFly->Set(0); + GlobalDelayedRequests->Set(0); + } + + private: + void Register(NMonitoring::TDynamicCounterPtr countersSubgroup) { + PendingRequestsCount = countersSubgroup->GetCounter("PendingRequestsCount", false); + FinishingRequestsCount = countersSubgroup->GetCounter("FinishingRequestsCount", false); + GlobalInFly = countersSubgroup->GetCounter("GlobalInFly", false); + GlobalDelayedRequests = countersSubgroup->GetCounter("GlobalDelayedRequests", false); + PoolStateUpdatesBacklogMs = countersSubgroup->GetHistogram("PoolStateUpdatesBacklogMs", NMonitoring::LinearHistogram(20, 0, 3 * LEASE_DURATION.MillisecondsFloat() / 40)); + } + }; + + enum class EStartRequestCase { + Pending, + Delayed + }; + static constexpr ui64 MAX_PENDING_REQUESTS = 1000; public: TFifoPoolHandlerActor(const TString& database, const TString& poolId, const NResourcePool::TPoolSettings& poolConfig, NMonitoring::TDynamicCounterPtr counters) : TBase(&TFifoPoolHandlerActor::StateFunc, database, poolId, poolConfig, counters) + , FifoCounters(Counters.CountersSubgroup) { Y_ENSURE(!ShouldResign()); - RegisterCounters(); } STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { sFunc(TEvents::TEvWakeup, HandleRefreshState); sFunc(TEvPrivate::TEvRefreshPoolState, HandleExternalRefreshState); + hFunc(TEvPrivate::TEvCpuQuotaResponse, Handle); + hFunc(TEvPrivate::TEvNodesInfoResponse, Handle); hFunc(TEvPrivate::TEvTablesCreationFinished, Handle); hFunc(TEvPrivate::TEvRefreshPoolStateResponse, Handle); @@ -478,25 +598,24 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(0); - GlobalDelayedRequests->Set(0); + FifoCounters.OnCleanup(); TBase::PassAway(); } protected: bool ShouldResign() const override { - return InFlightLimit == 0 || InFlightLimit == std::numeric_limits::max(); + return InFlightLimit == 0 || (InFlightLimit == std::numeric_limits::max() && !GetLoadCpuThreshold()); } void OnScheduleRequest(TRequest* request) override { - if (PendingRequests.size() >= MAX_PENDING_REQUESTS || GetLocalSessionsCount() - GetLocalInFlight() > QueueSizeLimit + 1) { + if (PendingRequests.size() >= MAX_PENDING_REQUESTS || SaturationSub(GetLocalSessionsCount() - GetLocalInFlight(), InFlightLimit) > QueueSizeLimit) { ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Too many pending requests for pool " << PoolId); return; } PendingRequests.emplace_back(request->SessionId); - PendingRequestsCount->Inc(); + FifoCounters.PendingRequestsCount->Inc(); if (!PreparingFinished) { this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvPrepareTablesRequest(Database, PoolId)); @@ -515,6 +634,11 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase LEASE_DURATION) { + WaitingNodesInfo = true; + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvNodesInfoRequest()); + } + RefreshRequired |= refreshRequired; if (!PreparingFinished) { return; @@ -532,7 +656,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateRefreshPoolStateActor(this->SelfId(), Database, PoolId, LEASE_DURATION, CountersSubgroup)); + this->Register(CreateRefreshPoolStateActor(this->SelfId(), Database, PoolId, LEASE_DURATION, Counters.CountersSubgroup)); } } @@ -552,6 +676,14 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseGet()->NodeCount; + + LOG_T("Updated node info, noode count: " << NodeCount); + } + void Handle(TEvPrivate::TEvTablesCreationFinished::TPtr& ev) { if (ev->Get()->Success) { PreparingFinished = true; @@ -565,7 +697,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(0); + FifoCounters.PendingRequestsCount->Set(0); } void Handle(TEvPrivate::TEvRefreshPoolStateResponse::TPtr& ev) { @@ -579,7 +711,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseCollect((TInstant::Now() - LastRefreshTime).MilliSeconds()); + FifoCounters.PoolStateUpdatesBacklogMs->Collect((TInstant::Now() - LastRefreshTime).MilliSeconds()); } LastRefreshTime = TInstant::Now(); @@ -587,20 +719,19 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(GlobalState.RunningRequests); - GlobalDelayedRequests->Set(GlobalState.DelayedRequests); + FifoCounters.UpdateGlobalState(GlobalState); LOG_T("succefully refreshed pool state, in flight: " << GlobalState.RunningRequests << ", delayed: " << GlobalState.DelayedRequests); RemoveFinishedRequests(); size_t delayedRequestsCount = DelayedRequests.size(); - DoStartPendingRequest(); + DoStartPendingRequest(GetLoadCpuThreshold()); - if (GlobalState.DelayedRequests + PendingRequests.size() > QueueSizeLimit) { - RemoveBackRequests(PendingRequests, std::min(GlobalState.DelayedRequests + PendingRequests.size() - QueueSizeLimit, PendingRequests.size()), [this](TRequest* request) { + if (const ui64 delayedRequests = SaturationSub(GlobalState.AmountRequests() + PendingRequests.size(), InFlightLimit); delayedRequests > QueueSizeLimit) { + RemoveBackRequests(PendingRequests, std::min(delayedRequests - QueueSizeLimit, PendingRequests.size()), [this](TRequest* request) { ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Too many pending requests for pool " << PoolId); }); - PendingRequestsCount->Set(PendingRequests.size()); + FifoCounters.PendingRequestsCount->Set(PendingRequests.size()); } if (PendingRequests.empty() && delayedRequestsCount > QueueSizeLimit) { @@ -611,7 +742,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseInc(); + FifoCounters.GlobalDelayedRequests->Inc(); LOG_D("succefully delayed request, session id: " << ev->Get()->SessionId); - DoStartDelayedRequest(); + DoStartDelayedRequest(GetLoadCpuThreshold()); RefreshState(); }; + void Handle(TEvPrivate::TEvCpuQuotaResponse::TPtr& ev) { + RunningOperation = false; + + if (!ev->Get()->QuotaAccepted) { + LOG_D("Skipped request start due to load cpu threshold"); + if (static_cast(ev->Cookie) == EStartRequestCase::Pending) { + ForEachUnfinished(DelayedRequests.begin(), DelayedRequests.end(), [this](TRequest* request) { + AddFinishedRequest(request->SessionId); + ReplyContinue(request, Ydb::StatusIds::OVERLOADED, TStringBuilder() << "Too many pending requests for pool " << PoolId); + }); + } + RefreshState(); + return; + } + + RemoveFinishedRequests(); + switch (static_cast(ev->Cookie)) { + case EStartRequestCase::Pending: + if (!RunningOperation && !DelayedRequests.empty()) { + RunningOperation = true; + const TString& sessionId = DelayedRequests.front(); + this->Register(CreateStartRequestActor(this->SelfId(), Database, PoolId, sessionId, LEASE_DURATION, Counters.CountersSubgroup)); + GetRequest(sessionId)->CleanupRequired = true; + } + break; + + case EStartRequestCase::Delayed: + DoStartDelayedRequest(Nothing()); + break; + } + + RefreshState(); + } + void Handle(TEvPrivate::TEvStartRequestResponse::TPtr& ev) { RunningOperation = false; @@ -668,14 +833,15 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSessionId == sessionId) { + request->UsedCpuQuota = !!GetLoadCpuThreshold(); requestFound = true; GlobalState.RunningRequests++; - GlobalInFly->Inc(); + FifoCounters.GlobalInFly->Inc(); ReplyContinue(request); } else { // Request was dropped due to lease expiration PendingRequests.emplace_front(request->SessionId); - PendingRequestsCount->Inc(); + FifoCounters.PendingRequestsCount->Inc(); } }); DelayedRequests.pop_front(); @@ -706,22 +872,28 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase loadCpuThreshold) { RemoveFinishedRequests(); if (RunningOperation) { return; } - if (!PendingRequests.empty() && QueueSizeLimit == 0 && GlobalState.RunningRequests < InFlightLimit) { + bool canStartRequest = QueueSizeLimit == 0 && GlobalState.RunningRequests < InFlightLimit; + canStartRequest |= !GetLoadCpuThreshold() && DelayedRequests.size() + GlobalState.DelayedRequests == 0 && NodeCount && GlobalState.RunningRequests + NodeCount < InFlightLimit; + if (!PendingRequests.empty() && canStartRequest) { RunningOperation = true; const TString& sessionId = PopPendingRequest(); - this->Register(CreateStartRequestActor(this->SelfId(), Database, PoolId, sessionId, LEASE_DURATION, CountersSubgroup)); DelayedRequests.emplace_front(sessionId); - GetRequest(sessionId)->CleanupRequired = true; + if (loadCpuThreshold) { + RequestCpuQuota(*loadCpuThreshold, EStartRequestCase::Pending); + } else { + this->Register(CreateStartRequestActor(this->SelfId(), Database, PoolId, sessionId, LEASE_DURATION, Counters.CountersSubgroup)); + GetRequest(sessionId)->CleanupRequired = true; + } } } - void DoStartDelayedRequest() { + void DoStartDelayedRequest(TMaybe loadCpuThreshold) { RemoveFinishedRequests(); if (RunningOperation) { return; @@ -729,7 +901,11 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateStartRequestActor(this->SelfId(), Database, PoolId, std::nullopt, LEASE_DURATION, CountersSubgroup)); + if (loadCpuThreshold) { + RequestCpuQuota(*loadCpuThreshold, EStartRequestCase::Delayed); + } else { + this->Register(CreateStartRequestActor(this->SelfId(), Database, PoolId, std::nullopt, LEASE_DURATION, Counters.CountersSubgroup)); + } } } @@ -743,7 +919,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateDelayRequestActor(this->SelfId(), Database, PoolId, sessionId, request->StartTime, GetWaitDeadline(request->StartTime), LEASE_DURATION, CountersSubgroup)); + this->Register(CreateDelayRequestActor(this->SelfId(), Database, PoolId, sessionId, request->StartTime, GetWaitDeadline(request->StartTime), LEASE_DURATION, Counters.CountersSubgroup)); DelayedRequests.emplace_back(sessionId); request->CleanupRequired = true; } @@ -756,9 +932,9 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseRegister(CreateCleanupRequestsActor(this->SelfId(), Database, PoolId, FinishedRequests, CountersSubgroup)); + this->Register(CreateCleanupRequestsActor(this->SelfId(), Database, PoolId, FinishedRequests, Counters.CountersSubgroup)); FinishedRequests.clear(); - FinishingRequestsCount->Set(0); + FifoCounters.FinishingRequestsCount->Set(0); } } @@ -770,6 +946,10 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSchedule(LEASE_DURATION / 2, new TEvents::TEvWakeup()); } + void RequestCpuQuota(double loadCpuThreshold, EStartRequestCase requestCase) const { + this->Send(MakeKqpWorkloadServiceId(this->SelfId().NodeId()), new TEvPrivate::TEvCpuQuotaRequest(loadCpuThreshold / 100.0), 0, static_cast(requestCase)); + } + private: void RemoveFinishedRequests() { if (RunningOperation) { @@ -778,7 +958,7 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseSet(PendingRequests.size()); + FifoCounters.PendingRequestsCount->Set(PendingRequests.size()); } void RemoveFinishedRequests(std::deque& requests) { @@ -819,24 +999,18 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBaseDec(); + FifoCounters.PendingRequestsCount->Dec(); return sessionId; } void AddFinishedRequest(const TString& sessionId) { FinishedRequests.emplace_back(sessionId); - FinishingRequestsCount->Inc(); - } - - void RegisterCounters() { - PendingRequestsCount = CountersSubgroup->GetCounter("PendingRequestsCount", false); - FinishingRequestsCount = CountersSubgroup->GetCounter("FinishingRequestsCount", false); - GlobalInFly = CountersSubgroup->GetCounter("GlobalInFly", false); - GlobalDelayedRequests = CountersSubgroup->GetCounter("GlobalDelayedRequests", false); - PoolStateUpdatesBacklogMs = CountersSubgroup->GetHistogram("PoolStateUpdatesBacklogMs", NMonitoring::LinearHistogram(20, 0, 3 * LEASE_DURATION.MillisecondsFloat() / 40)); + FifoCounters.FinishingRequestsCount->Inc(); } private: + TCounters FifoCounters; + bool PreparingFinished = false; bool RefreshRequired = false; bool RunningOperation = false; @@ -849,17 +1023,15 @@ class TFifoPoolHandlerActor : public TPoolHandlerActorBase +#include #include #include #include +#include #include #include @@ -63,7 +65,13 @@ class TPoolResolverActor : public TActorBootstrapped { for (const TString& usedSid : AppData()->AdministrationAllowedSIDs) { diffAcl.AddAccess(NACLib::EAccessType::Allow, NACLib::EAccessRights::GenericFull, usedSid); } - diffAcl.AddAccess(NACLib::EAccessType::Allow, NACLib::EAccessRights::SelectRow | NACLib::EAccessRights::DescribeSchema, AppData()->AllAuthenticatedUsers); + + auto useAccess = NACLib::EAccessRights::SelectRow | NACLib::EAccessRights::DescribeSchema; + for (const auto& userSID : AppData()->DefaultUserSIDs) { + diffAcl.AddAccess(NACLib::EAccessType::Allow, useAccess, userSID); + } + diffAcl.AddAccess(NACLib::EAccessType::Allow, useAccess, AppData()->AllAuthenticatedUsers); + diffAcl.AddAccess(NACLib::EAccessType::Allow, useAccess, BUILTIN_ACL_ROOT); auto token = MakeIntrusive(BUILTIN_ACL_METADATA, TVector{}); Register(CreatePoolCreatorActor(SelfId(), Event->Get()->Database, Event->Get()->PoolId, NResourcePool::TPoolSettings(), token, diffAcl)); @@ -114,7 +122,7 @@ class TPoolResolverActor : public TActorBootstrapped { class TPoolFetcherActor : public TSchemeActorBase { public: - TPoolFetcherActor(const NActors::TActorId& replyActorId, const TString& database, const TString& poolId, TIntrusiveConstPtr userToken) + TPoolFetcherActor(const TActorId& replyActorId, const TString& database, const TString& poolId, TIntrusiveConstPtr userToken) : ReplyActorId(replyActorId) , Database(database) , PoolId(poolId) @@ -246,38 +254,67 @@ class TPoolCreatorActor : public TSchemeActorBase { } void Handle(TEvTxUserProxy::TEvProposeTransactionStatus::TPtr& ev) { - const auto ssStatus = ev->Get()->Record.GetSchemeShardStatus(); - switch (ev->Get()->Status()) { + const auto& response = ev->Get()->Record; + const auto ssStatus = response.GetSchemeShardStatus(); + const auto status = ev->Get()->Status(); + switch (status) { case NTxProxy::TResultStatus::ExecComplete: case NTxProxy::TResultStatus::ExecAlready: if (ssStatus == NKikimrScheme::EStatus::StatusSuccess || ssStatus == NKikimrScheme::EStatus::StatusAlreadyExists) { Reply(Ydb::StatusIds::SUCCESS); } else { - Reply(Ydb::StatusIds::SCHEME_ERROR, TStringBuilder() << "Invalid creation status: " << static_cast(ssStatus)); + Reply(Ydb::StatusIds::SCHEME_ERROR, ExtractIssues(response, TStringBuilder() << "Invalid creation status: " << static_cast(ssStatus))); } return; case NTxProxy::TResultStatus::ExecError: - if (ssStatus == NKikimrScheme::EStatus::StatusMultipleModifications || ssStatus == NKikimrScheme::EStatus::StatusInvalidParameter) { - ScheduleRetry(ssStatus, "Retry execution error", true); + if (ssStatus == NKikimrScheme::EStatus::StatusMultipleModifications) { + SubscribeOnTransactionOrRetry(status, response); } else { - Reply(Ydb::StatusIds::SCHEME_ERROR, TStringBuilder() << "Execution error: " << static_cast(ssStatus)); + Reply(Ydb::StatusIds::SCHEME_ERROR, ExtractIssues(response, TStringBuilder() << "Execution error: " << static_cast(ssStatus))); } return; case NTxProxy::TResultStatus::ExecInProgress: - ScheduleRetry(ssStatus, "Retry execution in progress error", true); + SubscribeOnTransactionOrRetry(status, response); return; case NTxProxy::TResultStatus::ProxyShardNotAvailable: - ScheduleRetry(ssStatus, "Retry shard unavailable error"); + ScheduleRetry(response, "Retry shard unavailable error"); return; default: - Reply(Ydb::StatusIds::SCHEME_ERROR, TStringBuilder() << "Failed to create resource pool: " << static_cast(ssStatus)); + Reply(Ydb::StatusIds::SCHEME_ERROR, ExtractIssues(response, TStringBuilder() << "Failed to create resource pool: " << static_cast(ssStatus))); return; } } + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { + if (ev->Get()->Status == NKikimrProto::OK) { + LOG_T("Tablet to pipe successfully connected"); + return; + } + + ClosePipeClient(); + ScheduleRetry(TStringBuilder() << "Tablet to pipe not connected: " << NKikimrProto::EReplyStatus_Name(ev->Get()->Status)); + } + + void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev) { + const TActorId clientId = ev->Get()->ClientId; + if (!ClosedSchemePipeActors.contains(clientId)) { + ClosePipeClient(); + ScheduleRetry("Tablet to pipe destroyed"); + } + } + + void Handle(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionResult::TPtr& ev) { + ScheduleRetry(TStringBuilder() << "Transaction " << ev->Get()->Record.GetTxId() << " completed, doublechecking"); + } + STFUNC(StateFunc) { switch (ev->GetTypeRewrite()) { hFunc(TEvTxUserProxy::TEvProposeTransactionStatus, Handle) + hFunc(TEvTabletPipe::TEvClientConnected, Handle) + hFunc(TEvTabletPipe::TEvClientDestroyed, Handle) + hFunc(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionResult, Handle) + IgnoreFunc(NSchemeShard::TEvSchemeShard::TEvNotifyTxCompletionRegistered) + default: StateFuncBase(ev); } @@ -292,13 +329,12 @@ class TPoolCreatorActor : public TSchemeActorBase { schemeTx.SetWorkingDir(JoinPath({Database, ".resource_pools"})); schemeTx.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateResourcePool); schemeTx.SetInternal(true); - schemeTx.SetAllowAccessToPrivatePaths(true); BuildCreatePoolRequest(*schemeTx.MutableCreateResourcePool()); BuildModifyAclRequest(*schemeTx.MutableModifyACL()); if (UserToken) { - event->Record.SetUserToken(UserToken->GetSerializedToken()); + event->Record.SetUserToken(UserToken->SerializeAsString()); } Send(MakeTxProxyID(), std::move(event)); @@ -313,10 +349,42 @@ class TPoolCreatorActor : public TSchemeActorBase { } private: - void ScheduleRetry(ui32 status, const TString& message, bool longDelay = false) { - auto ssStatus = static_cast(status); - if (!TBase::ScheduleRetry(TStringBuilder() << message << ", status: " << ssStatus, longDelay)) { - Reply(Ydb::StatusIds::UNAVAILABLE, TStringBuilder() << "Retry limit exceeded on status: " << ssStatus); + void SubscribeOnTransactionOrRetry(NTxProxy::TResultStatus::EStatus status, const NKikimrTxUserProxy::TEvProposeTransactionStatus& response) { + const ui64 txId = status == NTxProxy::TResultStatus::ExecInProgress ? response.GetTxId() : response.GetPathCreateTxId(); + if (txId == 0) { + ScheduleRetry(response, "Unable to subscribe to concurrent transaction", true); + return; + } + + SchemePipeActorId = Register(NTabletPipe::CreateClient(SelfId(), response.GetSchemeShardTabletId())); + + auto request = MakeHolder(); + request->Record.SetTxId(txId); + NTabletPipe::SendData(SelfId(), SchemePipeActorId, std::move(request)); + LOG_D("Subscribe on create pool tx: " << txId); + } + + void ClosePipeClient() { + if (SchemePipeActorId) { + ClosedSchemePipeActors.insert(SchemePipeActorId); + NTabletPipe::CloseClient(SelfId(), SchemePipeActorId); + SchemePipeActorId = {}; + } + } + + void ScheduleRetry(const NKikimrTxUserProxy::TEvProposeTransactionStatus& response, const TString& message, bool longDelay = false) { + ClosePipeClient(); + + auto ssStatus = static_cast(response.GetSchemeShardStatus()); + if (!TBase::ScheduleRetry(ExtractIssues(response, TStringBuilder() << message << ", status: " << ssStatus), longDelay)) { + Reply(Ydb::StatusIds::UNAVAILABLE, ExtractIssues(response, TStringBuilder() << "Retry limit exceeded on status: " << ssStatus)); + } + } + + void ScheduleRetry(const TString& message, bool longDelay = false) { + ClosePipeClient(); + if (!TBase::ScheduleRetry(message, longDelay)) { + Reply(Ydb::StatusIds::UNAVAILABLE, TStringBuilder() << "Retry limit exceeded on error: " << message); } } @@ -349,11 +417,19 @@ class TPoolCreatorActor : public TSchemeActorBase { LOG_W("Failed to create pool, " << status << ", issues: " << issues.ToOneLineString()); } + ClosePipeClient(); + Issues.AddIssues(std::move(issues)); Send(ReplyActorId, new TEvPrivate::TEvCreatePoolResponse(status, std::move(Issues))); PassAway(); } + static NYql::TIssues ExtractIssues(const NKikimrTxUserProxy::TEvProposeTransactionStatus& response, const TString& message) { + NYql::TIssues issues; + NYql::IssuesFromMessage(response.GetIssues(), issues); + return GroupIssues(issues, message); + } + private: const TActorId ReplyActorId; const TString Database; @@ -361,6 +437,100 @@ class TPoolCreatorActor : public TSchemeActorBase { const TIntrusiveConstPtr UserToken; const NACLibProto::TDiffACL DiffAcl; NResourcePool::TPoolSettings PoolConfig; + + std::unordered_set ClosedSchemePipeActors; + TActorId SchemePipeActorId; +}; + + +class TDatabaseFetcherActor : public TSchemeActorBase { +public: + TDatabaseFetcherActor(const TActorId& replyActorId, const TString& database) + : ReplyActorId(replyActorId) + , Database(database) + {} + + void DoBootstrap() { + Become(&TDatabaseFetcherActor::StateFunc); + } + + void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + const auto& results = ev->Get()->Request->ResultSet; + if (results.size() != 1) { + Reply(Ydb::StatusIds::INTERNAL_ERROR, "Unexpected scheme cache response"); + return; + } + + const auto& result = results[0]; + switch (result.Status) { + case EStatus::Unknown: + case EStatus::PathNotTable: + case EStatus::PathNotPath: + case EStatus::RedirectLookupError: + case EStatus::AccessDenied: + case EStatus::RootUnknown: + case EStatus::PathErrorUnknown: + Reply(Ydb::StatusIds::NOT_FOUND, TStringBuilder() << "Database " << Database << " not found or you don't have access permissions"); + return; + case EStatus::LookupError: + case EStatus::TableCreationNotComplete: + if (!ScheduleRetry(TStringBuilder() << "Retry error " << result.Status)) { + Reply(Ydb::StatusIds::UNAVAILABLE, TStringBuilder() << "Retry limit exceeded on scheme error: " << result.Status); + } + return; + case EStatus::Ok: + Serverless = result.DomainInfo && result.DomainInfo->IsServerless(); + Reply(Ydb::StatusIds::SUCCESS); + return; + } + } + + STFUNC(StateFunc) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + default: + StateFuncBase(ev); + } + } + +protected: + void StartRequest() override { + LOG_D("Start database fetching"); + auto event = NTableCreator::BuildSchemeCacheNavigateRequest({{}}, Database, nullptr); + event->ResultSet[0].Operation = NSchemeCache::TSchemeCacheNavigate::OpPath; + Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(event.Release()), IEventHandle::FlagTrackDelivery); + } + + void OnFatalError(Ydb::StatusIds::StatusCode status, NYql::TIssue issue) override { + Reply(status, {std::move(issue)}); + } + + TString LogPrefix() const override { + return TStringBuilder() << "[TDatabaseFetcherActor] ActorId: " << SelfId() << ", Database: " << Database << ", "; + } + +private: + void Reply(Ydb::StatusIds::StatusCode status, const TString& message) { + Reply(status, {NYql::TIssue(message)}); + } + + void Reply(Ydb::StatusIds::StatusCode status, NYql::TIssues issues = {}) { + if (status == Ydb::StatusIds::SUCCESS) { + LOG_D("Database info successfully fetched"); + } else { + LOG_W("Failed to fetch database info, " << status << ", issues: " << issues.ToOneLineString()); + } + + Issues.AddIssues(std::move(issues)); + Send(ReplyActorId, new TEvPrivate::TEvFetchDatabaseResponse(status, Database, Serverless, std::move(Issues))); + PassAway(); + } + +private: + const TActorId ReplyActorId; + const TString Database; + + bool Serverless = false; }; } // anonymous namespace @@ -377,4 +547,8 @@ IActor* CreatePoolCreatorActor(const TActorId& replyActorId, const TString& data return new TPoolCreatorActor(replyActorId, database, poolId, poolConfig, userToken, diffAcl); } +IActor* CreateDatabaseFetcherActor(const TActorId& replyActorId, const TString& database) { + return new TDatabaseFetcherActor(replyActorId, database); +} + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/actors/ya.make b/ydb/core/kqp/workload_service/actors/ya.make index 01d4e7c9d663..774488a83d6a 100644 --- a/ydb/core/kqp/workload_service/actors/ya.make +++ b/ydb/core/kqp/workload_service/actors/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + cpu_load_actors.cpp pool_handlers_acors.cpp scheme_actors.cpp ) diff --git a/ydb/core/kqp/workload_service/common/cpu_quota_manager.cpp b/ydb/core/kqp/workload_service/common/cpu_quota_manager.cpp new file mode 100644 index 000000000000..dd3a66183423 --- /dev/null +++ b/ydb/core/kqp/workload_service/common/cpu_quota_manager.cpp @@ -0,0 +1,156 @@ +#include "cpu_quota_manager.h" + +#include + + +namespace NKikimr::NKqp::NWorkload { + +//// TCpuQuotaManager::TCounters + +TCpuQuotaManager::TCounters::TCounters(const ::NMonitoring::TDynamicCounterPtr& subComponent) + : SubComponent(subComponent) +{ + Register(); +} + +void TCpuQuotaManager::TCounters::Register() { + RegisterCommonMetrics(CpuLoadRequest); + InstantLoadPercentage = SubComponent->GetCounter("InstantLoadPercentage", false); + AverageLoadPercentage = SubComponent->GetCounter("AverageLoadPercentage", false); + QuotedLoadPercentage = SubComponent->GetCounter("QuotedLoadPercentage", false); +} + +void TCpuQuotaManager::TCounters::RegisterCommonMetrics(TCommonMetrics& metrics) const { + metrics.Ok = SubComponent->GetCounter("Ok", true); + metrics.Error = SubComponent->GetCounter("Error", true); +} + +//// TCpuQuotaManager::TCpuQuotaResponse + +TCpuQuotaManager::TCpuQuotaResponse::TCpuQuotaResponse(int32_t currentLoad, NYdb::EStatus status, NYql::TIssues issues) + : CurrentLoad(currentLoad) + , Status(status) + , Issues(std::move(issues)) +{} + +//// TCpuQuotaManager + +TCpuQuotaManager::TCpuQuotaManager(TDuration monitoringRequestDelay, TDuration averageLoadInterval, TDuration idleTimeout, double defaultQueryLoad, bool strict, ui64 cpuNumber, const ::NMonitoring::TDynamicCounterPtr& subComponent) + : Counters(subComponent) + , MonitoringRequestDelay(monitoringRequestDelay) + , AverageLoadInterval(averageLoadInterval) + , IdleTimeout(idleTimeout) + , DefaultQueryLoad(defaultQueryLoad) + , Strict(strict) + , CpuNumber(cpuNumber) +{} + +double TCpuQuotaManager::GetInstantLoad() const { + return InstantLoad; +} + +double TCpuQuotaManager::GetAverageLoad() const { + return AverageLoad; +} + +TDuration TCpuQuotaManager::GetMonitoringRequestDelay() const { + return GetMonitoringRequestTime() - TInstant::Now(); +} + +TInstant TCpuQuotaManager::GetMonitoringRequestTime() const { + TDuration delay = MonitoringRequestDelay; + if (IdleTimeout && TInstant::Now() - LastRequestCpuQuota > IdleTimeout) { + delay = AverageLoadInterval / 2; + } + + return LastUpdateCpuLoad ? LastUpdateCpuLoad + delay : TInstant::Now(); +} + +void TCpuQuotaManager::UpdateCpuLoad(double instantLoad, ui64 cpuNumber, bool success) { + auto now = TInstant::Now(); + LastUpdateCpuLoad = now; + + if (!success) { + Counters.CpuLoadRequest.Error->Inc(); + CheckLoadIsOutdated(); + return; + } + + auto delta = now - LastCpuLoad; + LastCpuLoad = now; + + if (cpuNumber) { + CpuNumber = cpuNumber; + } + + InstantLoad = instantLoad; + // exponential moving average + if (!Ready || delta >= AverageLoadInterval) { + AverageLoad = InstantLoad; + QuotedLoad = InstantLoad; + } else { + auto ratio = static_cast(delta.GetValue()) / AverageLoadInterval.GetValue(); + AverageLoad = (1 - ratio) * AverageLoad + ratio * InstantLoad; + QuotedLoad = (1 - ratio) * QuotedLoad + ratio * InstantLoad; + } + Ready = true; + Counters.CpuLoadRequest.Ok->Inc(); + Counters.InstantLoadPercentage->Set(static_cast(InstantLoad * 100)); + Counters.AverageLoadPercentage->Set(static_cast(AverageLoad * 100)); + Counters.QuotedLoadPercentage->Set(static_cast(QuotedLoad * 100)); +} + +bool TCpuQuotaManager::CheckLoadIsOutdated() { + if (TInstant::Now() - LastCpuLoad > AverageLoadInterval) { + Ready = false; + QuotedLoad = 0.0; + Counters.QuotedLoadPercentage->Set(0); + } + return Ready; +} + +bool TCpuQuotaManager::HasCpuQuota(double maxClusterLoad) { + LastRequestCpuQuota = TInstant::Now(); + return maxClusterLoad == 0.0 || ((Ready || !Strict) && QuotedLoad < maxClusterLoad); +} + +TCpuQuotaManager::TCpuQuotaResponse TCpuQuotaManager::RequestCpuQuota(double quota, double maxClusterLoad) { + if (quota < 0.0 || quota > 1.0) { + return TCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, {NYql::TIssue(TStringBuilder() << "Incorrect quota value (exceeds 1.0 or less than 0.0) " << quota)}); + } + quota = quota ? quota : DefaultQueryLoad; + + CheckLoadIsOutdated(); + if (!HasCpuQuota(maxClusterLoad)) { + return TCpuQuotaResponse(-1, NYdb::EStatus::OVERLOADED, {NYql::TIssue(TStringBuilder() + << "Cluster is overloaded, current quoted load " << static_cast(QuotedLoad * 100) + << "%, average load " << static_cast(AverageLoad * 100) << "%" + )}); + } + + QuotedLoad += quota; + Counters.QuotedLoadPercentage->Set(static_cast(QuotedLoad * 100)); + return TCpuQuotaResponse(QuotedLoad * 100); +} + +void TCpuQuotaManager::AdjustCpuQuota(double quota, TDuration duration, double cpuSecondsConsumed) { + if (!CpuNumber) { + return; + } + + if (duration && duration < AverageLoadInterval / 2 && quota <= 1.0) { + quota = quota ? quota : DefaultQueryLoad; + auto load = (cpuSecondsConsumed * 1000.0 / duration.MilliSeconds()) / CpuNumber; + if (quota > load) { + auto adjustment = (quota - load) / 2; + if (QuotedLoad > adjustment) { + QuotedLoad -= adjustment; + } else { + QuotedLoad = 0.0; + } + Counters.QuotedLoadPercentage->Set(static_cast(QuotedLoad * 100)); + } + } +} + +} // namespace NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/cpu_quota_manager.h b/ydb/core/kqp/workload_service/common/cpu_quota_manager.h new file mode 100644 index 000000000000..f0587e1d4418 --- /dev/null +++ b/ydb/core/kqp/workload_service/common/cpu_quota_manager.h @@ -0,0 +1,76 @@ +#pragma once + +#include + +#include + +#include + + +namespace NKikimr::NKqp::NWorkload { + +class TCpuQuotaManager { + struct TCounters { + const ::NMonitoring::TDynamicCounterPtr SubComponent; + struct TCommonMetrics { + ::NMonitoring::TDynamicCounters::TCounterPtr Ok; + ::NMonitoring::TDynamicCounters::TCounterPtr Error; + }; + + TCommonMetrics CpuLoadRequest; + ::NMonitoring::TDynamicCounters::TCounterPtr InstantLoadPercentage; + ::NMonitoring::TDynamicCounters::TCounterPtr AverageLoadPercentage; + ::NMonitoring::TDynamicCounters::TCounterPtr QuotedLoadPercentage; + + explicit TCounters(const ::NMonitoring::TDynamicCounterPtr& subComponent); + + private: + void Register(); + void RegisterCommonMetrics(TCommonMetrics& metrics) const; + }; + +public: + struct TCpuQuotaResponse { + explicit TCpuQuotaResponse(int32_t currentLoad, NYdb::EStatus status = NYdb::EStatus::SUCCESS, NYql::TIssues issues = {}); + + const int32_t CurrentLoad; + const NYdb::EStatus Status; + const NYql::TIssues Issues; + }; + +public: + TCpuQuotaManager(TDuration monitoringRequestDelay, TDuration averageLoadInterval, TDuration idleTimeout, double defaultQueryLoad, bool strict, ui64 cpuNumber, const ::NMonitoring::TDynamicCounterPtr& subComponent); + + double GetInstantLoad() const; + double GetAverageLoad() const; + TDuration GetMonitoringRequestDelay() const; + TInstant GetMonitoringRequestTime() const; + + void UpdateCpuLoad(double instantLoad, ui64 cpuNumber, bool success); + bool CheckLoadIsOutdated(); + + bool HasCpuQuota(double maxClusterLoad); + TCpuQuotaResponse RequestCpuQuota(double quota, double maxClusterLoad); + void AdjustCpuQuota(double quota, TDuration duration, double cpuSecondsConsumed); + +private: + TCounters Counters; + + const TDuration MonitoringRequestDelay; + const TDuration AverageLoadInterval; + const TDuration IdleTimeout; + const double DefaultQueryLoad; + const bool Strict; + ui64 CpuNumber = 0; + + TInstant LastCpuLoad; + TInstant LastUpdateCpuLoad; + TInstant LastRequestCpuQuota; + + double InstantLoad = 0.0; + double AverageLoad = 0.0; + double QuotedLoad = 0.0; + bool Ready = false; +}; + +} // namespace NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/events.h b/ydb/core/kqp/workload_service/common/events.h index 25018bbe6728..a0db39a644b5 100644 --- a/ydb/core/kqp/workload_service/common/events.h +++ b/ydb/core/kqp/workload_service/common/events.h @@ -22,6 +22,7 @@ struct TEvPrivate { EvRefreshPoolState = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), EvResolvePoolResponse, EvFetchPoolResponse, + EvFetchDatabaseResponse, EvCreatePoolResponse, EvPrepareTablesRequest, EvPlaceRequestIntoPoolResponse, @@ -30,6 +31,12 @@ struct TEvPrivate { EvStopPoolHandler, EvCancelRequest, + EvCpuQuotaRequest, + EvCpuQuotaResponse, + EvCpuLoadResponse, + EvNodesInfoRequest, + EvNodesInfoResponse, + EvTablesCreationFinished, EvCleanupTableResponse, EvCleanupTablesFinished, @@ -79,6 +86,20 @@ struct TEvPrivate { const NYql::TIssues Issues; }; + struct TEvFetchDatabaseResponse : public NActors::TEventLocal { + TEvFetchDatabaseResponse(Ydb::StatusIds::StatusCode status, const TString& database, bool serverless, NYql::TIssues issues) + : Status(status) + , Database(database) + , Serverless(serverless) + , Issues(std::move(issues)) + {} + + const Ydb::StatusIds::StatusCode Status; + const TString Database; + const bool Serverless; + const NYql::TIssues Issues; + }; + struct TEvCreatePoolResponse : public NActors::TEventLocal { TEvCreatePoolResponse(Ydb::StatusIds::StatusCode status, NYql::TIssues issues) : Status(status) @@ -110,13 +131,19 @@ struct TEvPrivate { }; struct TEvFinishRequestInPool : public NActors::TEventLocal { - TEvFinishRequestInPool(const TString& database, const TString& poolId) + TEvFinishRequestInPool(const TString& database, const TString& poolId, TDuration duration, TDuration cpuConsumed, bool adjustCpuQuota) : Database(database) , PoolId(poolId) + , Duration(duration) + , CpuConsumed(cpuConsumed) + , AdjustCpuQuota(adjustCpuQuota) {} const TString Database; const TString PoolId; + const TDuration Duration; + const TDuration CpuConsumed; + const bool AdjustCpuQuota; }; struct TEvResignPoolHandler : public NActors::TEventLocal { @@ -142,6 +169,48 @@ struct TEvPrivate { const TString SessionId; }; + // Cpu load requests + struct TEvCpuQuotaRequest : public NActors::TEventLocal { + explicit TEvCpuQuotaRequest(double maxClusterLoad) + : MaxClusterLoad(maxClusterLoad) + {} + + const double MaxClusterLoad; + }; + + struct TEvCpuQuotaResponse : public NActors::TEventLocal { + explicit TEvCpuQuotaResponse(bool quotaAccepted) + : QuotaAccepted(quotaAccepted) + {} + + const bool QuotaAccepted; + }; + + struct TEvCpuLoadResponse : public NActors::TEventLocal { + TEvCpuLoadResponse(Ydb::StatusIds::StatusCode status, double instantLoad, ui64 cpuNumber, NYql::TIssues issues) + : Status(status) + , InstantLoad(instantLoad) + , CpuNumber(cpuNumber) + , Issues(std::move(issues)) + {} + + const Ydb::StatusIds::StatusCode Status; + const double InstantLoad; + const ui64 CpuNumber; + const NYql::TIssues Issues; + }; + + struct TEvNodesInfoRequest : public NActors::TEventLocal { + }; + + struct TEvNodesInfoResponse : public NActors::TEventLocal { + explicit TEvNodesInfoResponse(ui32 nodeCount) + : NodeCount(nodeCount) + {} + + const ui32 NodeCount; + }; + // Tables queries events struct TEvTablesCreationFinished : public NActors::TEventLocal { TEvTablesCreationFinished(bool success, NYql::TIssues issues) diff --git a/ydb/core/kqp/workload_service/common/helpers.cpp b/ydb/core/kqp/workload_service/common/helpers.cpp index 3a5427a9de4a..f1893bf8b92f 100644 --- a/ydb/core/kqp/workload_service/common/helpers.cpp +++ b/ydb/core/kqp/workload_service/common/helpers.cpp @@ -20,4 +20,8 @@ void ParsePoolSettings(const NKikimrSchemeOp::TResourcePoolDescription& descript } } +ui64 SaturationSub(ui64 x, ui64 y) { + return (x > y) ? x - y : 0; +} + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/helpers.h b/ydb/core/kqp/workload_service/common/helpers.h index 85aff302d68f..163b2d765ed1 100644 --- a/ydb/core/kqp/workload_service/common/helpers.h +++ b/ydb/core/kqp/workload_service/common/helpers.h @@ -62,21 +62,25 @@ class TSchemeActorBase : public NActors::TActorBootstrapped { virtual TString LogPrefix() const = 0; protected: - bool ScheduleRetry(const TString& message, bool longDelay = false) { + bool ScheduleRetry(NYql::TIssues issues, bool longDelay = false) { if (!RetryState) { RetryState = CreateRetryState(); } if (const auto delay = RetryState->GetNextRetryDelay(longDelay)) { - Issues.AddIssue(message); + Issues.AddIssues(issues); this->Schedule(*delay, new TEvents::TEvWakeup()); - LOG_W("Scheduled retry for error: " << message); + LOG_W("Scheduled retry for error: " << issues.ToOneLineString()); return true; } return false; } + bool ScheduleRetry(const TString& message, bool longDelay = false) { + return ScheduleRetry({NYql::TIssue(message)}, longDelay); + } + private: static TRetryPolicy::IRetryState::TPtr CreateRetryState() { return TRetryPolicy::GetFixedIntervalPolicy( @@ -99,4 +103,6 @@ NYql::TIssues GroupIssues(const NYql::TIssues& issues, const TString& message); void ParsePoolSettings(const NKikimrSchemeOp::TResourcePoolDescription& description, NResourcePool::TPoolSettings& poolConfig); +ui64 SaturationSub(ui64 x, ui64 y); + } // NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/common/ya.make b/ydb/core/kqp/workload_service/common/ya.make index 44cbd65ca22e..4026b389648b 100644 --- a/ydb/core/kqp/workload_service/common/ya.make +++ b/ydb/core/kqp/workload_service/common/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + cpu_quota_manager.cpp events.cpp helpers.cpp ) @@ -14,6 +15,8 @@ PEERDIR( ydb/library/actors/core + ydb/public/sdk/cpp/client/ydb_types + library/cpp/retry ) diff --git a/ydb/core/kqp/workload_service/kqp_workload_service.cpp b/ydb/core/kqp/workload_service/kqp_workload_service.cpp index 94ad44dbe460..295e536d2499 100644 --- a/ydb/core/kqp/workload_service/kqp_workload_service.cpp +++ b/ydb/core/kqp/workload_service/kqp_workload_service.cpp @@ -1,4 +1,5 @@ #include "kqp_workload_service.h" +#include "kqp_workload_service_impl.h" #include #include @@ -8,12 +9,13 @@ #include #include -#include #include #include #include +#include + namespace NKikimr::NKqp { @@ -23,10 +25,25 @@ namespace { using namespace NActors; -constexpr TDuration IDLE_DURATION = TDuration::Seconds(15); - class TKqpWorkloadService : public TActorBootstrapped { + struct TCounters { + const NMonitoring::TDynamicCounterPtr Counters; + + NMonitoring::TDynamicCounters::TCounterPtr ActivePools; + + TCounters(NMonitoring::TDynamicCounterPtr counters) + : Counters(counters) + { + Register(); + } + + private: + void Register() { + ActivePools = Counters->GetCounter("ActivePools", false); + } + }; + enum class ETablesCreationStatus { Cleanup, NotStarted, @@ -34,53 +51,16 @@ class TKqpWorkloadService : public TActorBootstrapped { Finished, }; - struct TPoolState { - TActorId PoolHandler; - TActorContext ActorContext; - - std::queue PendingRequests = {}; - bool WaitingInitialization = false; - bool PlaceRequestRunning = false; - std::optional NewPoolHandler = std::nullopt; - - ui64 InFlightRequests = 0; - TInstant LastUpdateTime = TInstant::Now(); - - void UpdateHandler() { - if (PlaceRequestRunning || WaitingInitialization || !NewPoolHandler) { - return; - } - - ActorContext.Send(PoolHandler, new TEvPrivate::TEvStopPoolHandler()); - PoolHandler = *NewPoolHandler; - NewPoolHandler = std::nullopt; - InFlightRequests = 0; - } - - void StartPlaceRequest() { - if (PlaceRequestRunning || PendingRequests.empty()) { - return; - } - - PlaceRequestRunning = true; - InFlightRequests++; - ActorContext.Send(PendingRequests.front()->Forward(PoolHandler)); - PendingRequests.pop(); - } - - void OnRequestFinished() { - Y_ENSURE(InFlightRequests); - InFlightRequests--; - LastUpdateTime = TInstant::Now(); - } + enum class EWakeUp { + IdleCheck, + StartCpuLoadRequest, + StartNodeInfoRequest }; public: explicit TKqpWorkloadService(NMonitoring::TDynamicCounterPtr counters) : Counters(counters) - { - RegisterCounters(); - } + {} void Bootstrap() { Become(&TKqpWorkloadService::MainState); @@ -90,7 +70,10 @@ class TKqpWorkloadService : public TActorBootstrapped { (ui32)NKikimrConsole::TConfigItem::FeatureFlagsItem }), IEventHandle::FlagTrackDelivery); + CpuQuotaManager = std::make_unique(ActorContext(), Counters.Counters->GetSubgroup("subcomponent", "CpuQuotaManager")); + EnabledResourcePools = AppData()->FeatureFlags.GetEnableResourcePools(); + EnabledResourcePoolsOnServerless = AppData()->FeatureFlags.GetEnableResourcePoolsOnServerless(); if (EnabledResourcePools) { InitializeWorkloadService(); } @@ -117,6 +100,7 @@ class TKqpWorkloadService : public TActorBootstrapped { const auto& event = ev->Get()->Record; EnabledResourcePools = event.GetConfig().GetFeatureFlags().GetEnableResourcePools(); + EnabledResourcePoolsOnServerless = event.GetConfig().GetFeatureFlags().GetEnableResourcePoolsOnServerless(); if (EnabledResourcePools) { LOG_I("Resource pools was enanbled"); InitializeWorkloadService(); @@ -128,6 +112,13 @@ class TKqpWorkloadService : public TActorBootstrapped { Send(ev->Sender, responseEvent.release(), IEventHandle::FlagTrackDelivery, ev->Cookie); } + void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { + NodeCount = ev->Get()->Nodes.size(); + ScheduleNodeInfoRequest(); + + LOG_T("Updated node info, noode count: " << NodeCount); + } + void Handle(TEvents::TEvUndelivered::TPtr& ev) const { switch (ev->Get()->SourceType) { case NConsole::TEvConfigsDispatcher::EvSetConfigSubscriptionRequest: @@ -138,6 +129,11 @@ class TKqpWorkloadService : public TActorBootstrapped { LOG_E("Failed to deliver config notification response"); break; + case TEvInterconnect::EvListNodes: + LOG_W("Failed to deliver list nodes request"); + ScheduleNodeInfoRequest(); + break; + default: LOG_E("Undelivered event with unexpected source type: " << ev->Get()->SourceType); break; @@ -151,12 +147,9 @@ class TKqpWorkloadService : public TActorBootstrapped { return; } - // Add AllAuthenticatedUsers group SID into user token - ev->Get()->UserToken = GetUserToken(ev->Get()->UserToken); - - LOG_D("Recieved new request from " << workerActorId << ", Database: " << ev->Get()->Database << ", PoolId: " << ev->Get()->PoolId << ", SessionId: " << ev->Get()->SessionId); - bool hasDefaultPool = DatabasesWithDefaultPool.contains(CanonizePath(ev->Get()->Database)); - Register(CreatePoolResolverActor(std::move(ev), hasDefaultPool)); + const TString& database = ev->Get()->Database; + LOG_D("Recieved new request from " << workerActorId << ", Database: " << database << ", PoolId: " << ev->Get()->PoolId << ", SessionId: " << ev->Get()->SessionId); + GetOrCreateDatabaseState(database)->DoPlaceRequest(std::move(ev)); } void Handle(TEvCleanupRequest::TPtr& ev) { @@ -172,24 +165,19 @@ class TKqpWorkloadService : public TActorBootstrapped { Send(ev->Forward(poolState->PoolHandler)); } - void HandleWakeup() { - IdleChecksStarted = false; + void Handle(TEvents::TEvWakeup::TPtr& ev) { + switch (static_cast(ev->Get()->Tag)) { + case EWakeUp::IdleCheck: + RunIdleCheck(); + break; - std::vector poolsToDelete; - poolsToDelete.reserve(PoolIdToState.size()); - for (const auto& [poolKey, poolState] : PoolIdToState) { - if (!poolState.InFlightRequests && TInstant::Now() - poolState.LastUpdateTime > IDLE_DURATION) { - Send(poolState.PoolHandler, new TEvPrivate::TEvStopPoolHandler()); - poolsToDelete.emplace_back(poolKey); - } - } - for (const auto& poolKey : poolsToDelete) { - PoolIdToState.erase(poolKey); - ActivePools->Dec(); - } + case EWakeUp::StartCpuLoadRequest: + RunCpuLoadRequest(); + break; - if (!PoolIdToState.empty()) { - StartIdleChecks(); + case EWakeUp::StartNodeInfoRequest: + RunNodeInfoRequest(); + break; } } @@ -197,28 +185,37 @@ class TKqpWorkloadService : public TActorBootstrapped { sFunc(TEvents::TEvPoison, HandlePoison); sFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse, HandleSetConfigSubscriptionResponse); hFunc(NConsole::TEvConsole::TEvConfigNotificationRequest, Handle); + hFunc(TEvInterconnect::TEvNodesInfo, Handle); hFunc(TEvents::TEvUndelivered, Handle); hFunc(TEvPlaceRequestIntoPool, Handle); hFunc(TEvCleanupRequest, Handle); - sFunc(TEvents::TEvWakeup, HandleWakeup); + hFunc(TEvents::TEvWakeup, Handle); + hFunc(TEvPrivate::TEvFetchDatabaseResponse, Handle); hFunc(TEvPrivate::TEvResolvePoolResponse, Handle); hFunc(TEvPrivate::TEvPlaceRequestIntoPoolResponse, Handle); + hFunc(TEvPrivate::TEvNodesInfoRequest, Handle); hFunc(TEvPrivate::TEvRefreshPoolState, Handle); + hFunc(TEvPrivate::TEvCpuQuotaRequest, Handle); hFunc(TEvPrivate::TEvFinishRequestInPool, Handle); hFunc(TEvPrivate::TEvPrepareTablesRequest, Handle); hFunc(TEvPrivate::TEvCleanupTablesFinished, Handle); hFunc(TEvPrivate::TEvTablesCreationFinished, Handle); + hFunc(TEvPrivate::TEvCpuLoadResponse, Handle); hFunc(TEvPrivate::TEvResignPoolHandler, Handle); ) private: + void Handle(TEvPrivate::TEvFetchDatabaseResponse::TPtr& ev) { + GetOrCreateDatabaseState(ev->Get()->Database)->UpdateDatabaseInfo(ev); + } + void Handle(TEvPrivate::TEvResolvePoolResponse::TPtr& ev) { const auto& event = ev->Get()->Event; const TString& database = event->Get()->Database; if (ev->Get()->DefaultPoolCreated) { - DatabasesWithDefaultPool.insert(CanonizePath(database)); + GetOrCreateDatabaseState(database)->HasDefaultPool = true; } const TString& poolId = event->Get()->PoolId; @@ -234,11 +231,11 @@ class TKqpWorkloadService : public TActorBootstrapped { TString poolKey = GetPoolKey(database, poolId); LOG_I("Creating new handler for pool " << poolKey); - auto poolHandler = Register(CreatePoolHandlerActor(database, poolId, ev->Get()->PoolConfig, Counters)); + auto poolHandler = Register(CreatePoolHandlerActor(database, poolId, ev->Get()->PoolConfig, Counters.Counters)); poolState = &PoolIdToState.insert({poolKey, TPoolState{.PoolHandler = poolHandler, .ActorContext = ActorContext()}}).first->second; - ActivePools->Inc(); - StartIdleChecks(); + Counters.ActivePools->Inc(); + ScheduleIdleCheck(); } poolState->PendingRequests.emplace(std::move(ev)); @@ -257,6 +254,10 @@ class TKqpWorkloadService : public TActorBootstrapped { } } + void Handle(TEvPrivate::TEvNodesInfoRequest::TPtr& ev) const { + Send(ev->Sender, new TEvPrivate::TEvNodesInfoResponse(NodeCount)); + } + void Handle(TEvPrivate::TEvRefreshPoolState::TPtr& ev) { const auto& event = ev->Get()->Record; const TString& database = event.GetDatabase(); @@ -268,14 +269,27 @@ class TKqpWorkloadService : public TActorBootstrapped { } } + void Handle(TEvPrivate::TEvCpuQuotaRequest::TPtr& ev) { + const TActorId& poolHandler = ev->Sender; + const double maxClusterLoad = ev->Get()->MaxClusterLoad; + LOG_T("Requested cpu quota from handler " << poolHandler << ", MaxClusterLoad: " << maxClusterLoad); + + CpuQuotaManager->RequestCpuQuota(poolHandler, maxClusterLoad, ev->Cookie); + ScheduleCpuLoadRequest(); + } + void Handle(TEvPrivate::TEvFinishRequestInPool::TPtr& ev) { const TString& database = ev->Get()->Database; const TString& poolId = ev->Get()->PoolId; - LOG_T("Request finished in pool, Database: " << database << ", PoolId: " << poolId); + LOG_T("Request finished in pool, Database: " << database << ", PoolId: " << poolId << ", Duration: " << ev->Get()->Duration << ", CpuConsumed: " << ev->Get()->CpuConsumed << ", AdjustCpuQuota: " << ev->Get()->AdjustCpuQuota); if (auto poolState = GetPoolState(database, poolId)) { poolState->OnRequestFinished(); } + if (ev->Get()->AdjustCpuQuota) { + CpuQuotaManager->AdjustCpuQuota(ev->Get()->Duration, ev->Get()->CpuConsumed.SecondsFloat()); + ScheduleCpuLoadRequest(); + } } void Handle(TEvPrivate::TEvPrepareTablesRequest::TPtr& ev) { @@ -327,6 +341,19 @@ class TKqpWorkloadService : public TActorBootstrapped { OnTabelsCreated(false, issues); } + void Handle(TEvPrivate::TEvCpuLoadResponse::TPtr& ev) { + const bool success = ev->Get()->Status == Ydb::StatusIds::SUCCESS; + if (!success) { + LOG_E("Failed to fetch cpu load " << ev->Get()->Status << ", issues: " << ev->Get()->Issues.ToOneLineString()); + } else { + LOG_T("Succesfully fetched cpu load: " << 100.0 * ev->Get()->InstantLoad << "%, cpu number: " << ev->Get()->CpuNumber); + } + + CpuQuotaManager->CpuLoadRequestRunning = false; + CpuQuotaManager->UpdateCpuLoad(ev->Get()->InstantLoad, ev->Get()->CpuNumber, success); + ScheduleCpuLoadRequest(); + } + void Handle(TEvPrivate::TEvResignPoolHandler::TPtr& ev) { const TString& database = ev->Get()->Database; const TString& poolId = ev->Get()->PoolId; @@ -350,6 +377,7 @@ class TKqpWorkloadService : public TActorBootstrapped { LOG_I("Started workload service initialization"); Register(CreateCleanupTablesActor()); + RunNodeInfoRequest(); } void PrepareWorkloadServiceTables() { @@ -378,13 +406,71 @@ class TKqpWorkloadService : public TActorBootstrapped { PendingHandlers.clear(); } - void StartIdleChecks() { + void ScheduleIdleCheck() { if (IdleChecksStarted) { return; } IdleChecksStarted = true; - Schedule(IDLE_DURATION, new TEvents::TEvWakeup()); + Schedule(IDLE_DURATION / 2, new TEvents::TEvWakeup(static_cast(EWakeUp::IdleCheck))); + } + + void RunIdleCheck() { + IdleChecksStarted = false; + + std::vector poolsToDelete; + poolsToDelete.reserve(PoolIdToState.size()); + for (const auto& [poolKey, poolState] : PoolIdToState) { + if (!poolState.InFlightRequests && TInstant::Now() - poolState.LastUpdateTime > IDLE_DURATION) { + CpuQuotaManager->CleanupHandler(poolState.PoolHandler); + Send(poolState.PoolHandler, new TEvPrivate::TEvStopPoolHandler()); + poolsToDelete.emplace_back(poolKey); + } + } + for (const auto& poolKey : poolsToDelete) { + PoolIdToState.erase(poolKey); + Counters.ActivePools->Dec(); + } + + if (!PoolIdToState.empty()) { + ScheduleIdleCheck(); + } + } + + void ScheduleCpuLoadRequest() const { + auto delay = CpuQuotaManager->GetCpuLoadRequestDelay(); + if (!delay) { + return; + } + + if (*delay) { + Schedule(*delay, new TEvents::TEvWakeup(static_cast(EWakeUp::StartCpuLoadRequest))); + } else { + RunCpuLoadRequest(); + } + } + + void RunCpuLoadRequest() const { + if (CpuQuotaManager->CpuLoadRequestRunning) { + return; + } + + CpuQuotaManager->CpuLoadRequestTime = TInstant::Zero(); + if (CpuQuotaManager->CpuQuotaManager.GetMonitoringRequestDelay()) { + ScheduleCpuLoadRequest(); + return; + } + + CpuQuotaManager->CpuLoadRequestRunning = true; + Register(CreateCpuLoadFetcherActor(SelfId())); + } + + void ScheduleNodeInfoRequest() const { + Schedule(IDLE_DURATION * 2, new TEvents::TEvWakeup(static_cast(EWakeUp::StartNodeInfoRequest))); + } + + void RunNodeInfoRequest() const { + Send(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes(), IEventHandle::FlagTrackDelivery); } private: @@ -406,23 +492,12 @@ class TKqpWorkloadService : public TActorBootstrapped { Send(replyActorId, new TEvCleanupResponse(status, {NYql::TIssue(message)})); } - static TIntrusivePtr GetUserToken(TIntrusiveConstPtr userToken) { - auto token = MakeIntrusive(userToken ? userToken->GetUserSID() : NACLib::TSID(), TVector{}); - - bool hasAllAuthenticatedUsersSID = false; - const auto& allAuthenticatedUsersSID = AppData()->AllAuthenticatedUsers; - if (userToken) { - for (const auto& groupSID : userToken->GetGroupSIDs()) { - token->AddGroupSID(groupSID); - hasAllAuthenticatedUsersSID = hasAllAuthenticatedUsersSID || groupSID == allAuthenticatedUsersSID; - } - } - - if (!hasAllAuthenticatedUsersSID) { - token->AddGroupSID(allAuthenticatedUsersSID); + TDatabaseState* GetOrCreateDatabaseState(const TString& database) { + auto databaseIt = DatabaseToState.find(database); + if (databaseIt != DatabaseToState.end()) { + return &databaseIt->second; } - - return token; + return &DatabaseToState.insert({database, TDatabaseState{.ActorContext = ActorContext(), .EnabledResourcePoolsOnServerless = EnabledResourcePoolsOnServerless}}).first->second; } TPoolState* GetPoolState(const TString& database, const TString& poolId) { @@ -445,27 +520,28 @@ class TKqpWorkloadService : public TActorBootstrapped { return "[Service] "; } - void RegisterCounters() { - ActivePools = Counters->GetCounter("ActivePools", false); - } - private: - NMonitoring::TDynamicCounterPtr Counters; + TCounters Counters; bool EnabledResourcePools = false; + bool EnabledResourcePoolsOnServerless = false; bool ServiceInitialized = false; bool IdleChecksStarted = false; ETablesCreationStatus TablesCreationStatus = ETablesCreationStatus::Cleanup; std::unordered_set PendingHandlers; - std::unordered_set DatabasesWithDefaultPool; + std::unordered_map DatabaseToState; std::unordered_map PoolIdToState; - - NMonitoring::TDynamicCounters::TCounterPtr ActivePools; + std::unique_ptr CpuQuotaManager; + ui32 NodeCount = 0; }; } // anonymous namespace +bool IsWorkloadServiceRequired(const NResourcePool::TPoolSettings& config) { + return config.ConcurrentQueryLimit != -1 || config.DatabaseLoadCpuThreshold >= 0.0 || config.QueryCancelAfter; +} + } // namespace NWorkload IActor* CreateKqpWorkloadService(NMonitoring::TDynamicCounterPtr counters) { diff --git a/ydb/core/kqp/workload_service/kqp_workload_service.h b/ydb/core/kqp/workload_service/kqp_workload_service.h index 33371c97a249..5b6015328dd9 100644 --- a/ydb/core/kqp/workload_service/kqp_workload_service.h +++ b/ydb/core/kqp/workload_service/kqp_workload_service.h @@ -1,10 +1,18 @@ #pragma once +#include + #include namespace NKikimr::NKqp { +namespace NWorkload { + +bool IsWorkloadServiceRequired(const NResourcePool::TPoolSettings& config); + +} // namespace NWorkload + NActors::IActor* CreateKqpWorkloadService(NMonitoring::TDynamicCounterPtr counters); } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/workload_service/kqp_workload_service_impl.h b/ydb/core/kqp/workload_service/kqp_workload_service_impl.h new file mode 100644 index 000000000000..8503a4fb7949 --- /dev/null +++ b/ydb/core/kqp/workload_service/kqp_workload_service_impl.h @@ -0,0 +1,188 @@ +#pragma once + +#include + +#include +#include +#include +#include + + +namespace NKikimr::NKqp::NWorkload { + +constexpr TDuration IDLE_DURATION = TDuration::Seconds(60); + + +struct TDatabaseState { + NActors::TActorContext ActorContext; + bool& EnabledResourcePoolsOnServerless; + + std::vector PendingRequersts = {}; + bool HasDefaultPool = false; + bool Serverless = false; + + TInstant LastUpdateTime = TInstant::Zero(); + + void DoPlaceRequest(TEvPlaceRequestIntoPool::TPtr ev) { + TString database = ev->Get()->Database; + PendingRequersts.emplace_back(std::move(ev)); + + if (!EnabledResourcePoolsOnServerless && (TInstant::Now() - LastUpdateTime) > IDLE_DURATION) { + ActorContext.Register(CreateDatabaseFetcherActor(ActorContext.SelfID, database)); + } else { + StartPendingRequests(); + } + } + + void UpdateDatabaseInfo(const TEvPrivate::TEvFetchDatabaseResponse::TPtr& ev) { + if (ev->Get()->Status != Ydb::StatusIds::SUCCESS) { + ReplyContinueError(ev->Get()->Status, GroupIssues(ev->Get()->Issues, "Failed to fetch database info")); + return; + } + + LastUpdateTime = TInstant::Now(); + Serverless = ev->Get()->Serverless; + StartPendingRequests(); + } + +private: + void StartPendingRequests() { + if (!EnabledResourcePoolsOnServerless && Serverless) { + ReplyContinueError(Ydb::StatusIds::UNSUPPORTED, {NYql::TIssue("Resource pools are disabled for serverless domains. Please contact your system administrator to enable it")}); + return; + } + + for (auto& ev : PendingRequersts) { + ActorContext.Register(CreatePoolResolverActor(std::move(ev), HasDefaultPool)); + } + PendingRequersts.clear(); + } + + void ReplyContinueError(Ydb::StatusIds::StatusCode status, NYql::TIssues issues) { + for (const auto& ev : PendingRequersts) { + ActorContext.Send(ev->Sender, new TEvContinueRequest(status, {}, {}, issues)); + } + PendingRequersts.clear(); + } +}; + +struct TPoolState { + NActors::TActorId PoolHandler; + NActors::TActorContext ActorContext; + + std::queue PendingRequests = {}; + bool WaitingInitialization = false; + bool PlaceRequestRunning = false; + std::optional NewPoolHandler = std::nullopt; + + ui64 InFlightRequests = 0; + TInstant LastUpdateTime = TInstant::Now(); + + void UpdateHandler() { + if (PlaceRequestRunning || WaitingInitialization || !NewPoolHandler) { + return; + } + + ActorContext.Send(PoolHandler, new TEvPrivate::TEvStopPoolHandler()); + PoolHandler = *NewPoolHandler; + NewPoolHandler = std::nullopt; + InFlightRequests = 0; + } + + void StartPlaceRequest() { + if (PlaceRequestRunning || PendingRequests.empty()) { + return; + } + + PlaceRequestRunning = true; + InFlightRequests++; + ActorContext.Send(PendingRequests.front()->Forward(PoolHandler)); + PendingRequests.pop(); + } + + void OnRequestFinished() { + Y_ENSURE(InFlightRequests); + InFlightRequests--; + LastUpdateTime = TInstant::Now(); + } +}; + +struct TCpuQuotaManagerState { + TCpuQuotaManager CpuQuotaManager; + NActors::TActorContext ActorContext; + bool CpuLoadRequestRunning = false; + TInstant CpuLoadRequestTime = TInstant::Zero(); + + TCpuQuotaManagerState(NActors::TActorContext actorContext, NMonitoring::TDynamicCounterPtr subComponent) + : CpuQuotaManager(TDuration::Seconds(1), TDuration::Seconds(10), IDLE_DURATION, 0.1, true, 0, subComponent) + , ActorContext(actorContext) + {} + + void RequestCpuQuota(TActorId poolHandler, double maxClusterLoad, ui64 coockie) { + auto response = CpuQuotaManager.RequestCpuQuota(0.0, maxClusterLoad); + + bool quotaAccepted = response.Status == NYdb::EStatus::SUCCESS; + ActorContext.Send(poolHandler, new TEvPrivate::TEvCpuQuotaResponse(quotaAccepted), 0, coockie); + + // Schedule notification + if (!quotaAccepted) { + if (auto it = HandlersLimits.find(poolHandler); it != HandlersLimits.end()) { + PendingHandlers[it->second].erase(poolHandler); + } + HandlersLimits[poolHandler] = maxClusterLoad; + PendingHandlers[maxClusterLoad].insert(poolHandler); + } + } + + void UpdateCpuLoad(double instantLoad, ui64 cpuNumber, bool success) { + CpuQuotaManager.UpdateCpuLoad(instantLoad, cpuNumber, success); + CheckPendingQueue(); + } + + void AdjustCpuQuota(TDuration duration, double cpuSecondsConsumed) { + CpuQuotaManager.AdjustCpuQuota(0.0, duration, cpuSecondsConsumed); + CheckPendingQueue(); + } + + std::optional GetCpuLoadRequestDelay() { + if (CpuLoadRequestRunning) { + return std::nullopt; + } + + auto requestTime = CpuQuotaManager.GetMonitoringRequestTime(); + if (!CpuLoadRequestTime || CpuLoadRequestTime > requestTime) { + CpuLoadRequestTime = requestTime; + return CpuLoadRequestTime - TInstant::Now(); + } + return std::nullopt; + } + + void CleanupHandler(TActorId poolHandler) { + if (auto it = HandlersLimits.find(poolHandler); it != HandlersLimits.end()) { + PendingHandlers[it->second].erase(poolHandler); + HandlersLimits.erase(it); + } + } + +private: + void CheckPendingQueue() { + while (!PendingHandlers.empty()) { + const auto& [maxClusterLoad, poolHandlers] = *PendingHandlers.begin(); + if (!CpuQuotaManager.HasCpuQuota(maxClusterLoad)) { + break; + } + + for (const TActorId& poolHandler : poolHandlers) { + ActorContext.Send(poolHandler, new TEvPrivate::TEvRefreshPoolState()); + HandlersLimits.erase(poolHandler); + } + PendingHandlers.erase(PendingHandlers.begin()); + } + } + +private: + std::map> PendingHandlers; + std::unordered_map HandlersLimits; +}; + +} // namespace NKikimr::NKqp::NWorkload diff --git a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp index 7edc17a78525..bfe8fdc9242e 100644 --- a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp +++ b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.cpp @@ -8,9 +8,10 @@ #include #include #include - #include +#include + namespace NKikimr::NKqp::NWorkload { @@ -229,6 +230,9 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { TAppConfig GetAppConfig() const { TAppConfig appConfig; appConfig.MutableFeatureFlags()->SetEnableResourcePools(Settings_.EnableResourcePools_); + appConfig.MutableFeatureFlags()->SetEnableMetadataObjectsOnServerless(Settings_.EnableMetadataObjectsOnServerless_); + appConfig.MutableFeatureFlags()->SetEnableExternalDataSourcesOnServerless(Settings_.EnableExternalDataSourcesOnServerless_); + appConfig.MutableFeatureFlags()->SetEnableExternalDataSources(true); return appConfig; } @@ -236,7 +240,7 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { void SetLoggerSettings(TServerSettings& serverSettings) const { auto loggerInitializer = [](TTestActorRuntime& runtime) { runtime.SetLogPriority(NKikimrServices::KQP_WORKLOAD_SERVICE, NLog::EPriority::PRI_TRACE); - runtime.SetLogPriority(NKikimrServices::KQP_SESSION, NLog::EPriority::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::KQP_SESSION, NLog::EPriority::PRI_TRACE); }; serverSettings.SetLoggerInitializer(loggerInitializer); @@ -253,16 +257,50 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { .SetAppConfig(appConfig) .SetFeatureFlags(appConfig.GetFeatureFlags()); + if (Settings_.CreateSampleTenants_) { + serverSettings + .SetDynamicNodeCount(2) + .AddStoragePoolType(Settings_.GetDedicatedTenantName()) + .AddStoragePoolType(Settings_.GetSharedTenantName()); + } + SetLoggerSettings(serverSettings); return serverSettings; } + void SetupResourcesTenant(Ydb::Cms::CreateDatabaseRequest& request, Ydb::Cms::StorageUnits* storage, const TString& name) { + request.set_path(name); + storage->set_unit_kind(name); + storage->set_count(1); + } + + void CreateTenants() { + { // Dedicated + Ydb::Cms::CreateDatabaseRequest request; + SetupResourcesTenant(request, request.mutable_resources()->add_storage_units(), Settings_.GetDedicatedTenantName()); + Tenants_->CreateTenant(std::move(request)); + } + + { // Shared + Ydb::Cms::CreateDatabaseRequest request; + SetupResourcesTenant(request, request.mutable_shared_resources()->add_storage_units(), Settings_.GetSharedTenantName()); + Tenants_->CreateTenant(std::move(request)); + } + + { // Serverless + Ydb::Cms::CreateDatabaseRequest request; + request.set_path(Settings_.GetServerlessTenantName()); + request.mutable_serverless_resources()->set_shared_database_path(Settings_.GetSharedTenantName()); + Tenants_->CreateTenant(std::move(request)); + } + } + void InitializeServer() { ui32 grpcPort = PortManager_.GetPort(); TServerSettings serverSettings = GetServerSettings(grpcPort); - Server_ = std::make_unique(serverSettings); + Server_ = MakeIntrusive(serverSettings); Server_->EnableGRpc(grpcPort); GetRuntime()->SetDispatchTimeout(FUTURE_WAIT_TIMEOUT); @@ -275,10 +313,15 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { TableClient_ = std::make_unique(*YdbDriver_, NYdb::NTable::TClientSettings().AuthToken("user@" BUILTIN_SYSTEM_DOMAIN)); TableClientSession_ = std::make_unique(TableClient_->CreateSession().GetValueSync().GetSession()); + + Tenants_ = std::make_unique(Server_); + if (Settings_.CreateSampleTenants_) { + CreateTenants(); + } } void CreateSamplePool() const { - if (!Settings_.EnableResourcePools_) { + if (!Settings_.EnableResourcePools_ || Settings_.CreateSampleTenants_) { return; } @@ -287,6 +330,7 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { poolConfig.QueueSize = Settings_.QueueSize_; poolConfig.QueryCancelAfter = Settings_.QueryCancelAfter_; poolConfig.QueryMemoryLimitPercentPerNode = Settings_.QueryMemoryLimitPercentPerNode_; + poolConfig.DatabaseLoadCpuThreshold = Settings_.DatabaseLoadCpuThreshold_; TActorId edgeActor = GetRuntime()->AllocateEdgeActor(); GetRuntime()->Register(CreatePoolCreatorActor(edgeActor, Settings_.DomainName_, Settings_.PoolId_, poolConfig, nullptr, {})); @@ -303,6 +347,41 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { CreateSamplePool(); } + // Cluster helpers + void UpdateNodeCpuInfo(double usage, ui32 threads, ui64 nodeIndex = 0) override { + TVector> pools; + pools.emplace_back("User", usage, threads, threads); + + auto edgeActor = GetRuntime()->AllocateEdgeActor(nodeIndex); + GetRuntime()->Send( + NNodeWhiteboard::MakeNodeWhiteboardServiceId(GetRuntime()->GetNodeId(nodeIndex)), edgeActor, + new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateUpdate(pools), nodeIndex + ); + + WaitFor(FUTURE_WAIT_TIMEOUT, "node cpu usage", [this, usage, threads, nodeIndex, edgeActor](TString& errorString) { + GetRuntime()->Send( + NNodeWhiteboard::MakeNodeWhiteboardServiceId(GetRuntime()->GetNodeId(nodeIndex)), edgeActor, + new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest(), nodeIndex + ); + auto response = GetRuntime()->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); + + if (!response->Get()->Record.SystemStateInfoSize()) { + errorString = "empty system state info"; + return false; + } + const auto& systemStateInfo = response->Get()->Record.GetSystemStateInfo()[0]; + + if (!systemStateInfo.PoolStatsSize()) { + errorString = "empty pool stats"; + return false; + } + const auto& poolStat = systemStateInfo.GetPoolStats()[0]; + + errorString = TStringBuilder() << "usage: " << poolStat.GetUsage() << ", threads: " << poolStat.GetThreads(); + return poolStat.GetUsage() == usage && threads == poolStat.GetThreads(); + }); + } + // Scheme queries helpers NYdb::NScheme::TSchemeClient GetSchemeClient() const override { return NYdb::NScheme::TSchemeClient(*YdbDriver_); @@ -323,21 +402,17 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { void WaitPoolAccess(const TString& userSID, ui32 access, const TString& poolId = "") const override { auto token = NACLib::TUserToken(userSID, {}); - TInstant start = TInstant::Now(); - while (TInstant::Now() - start <= FUTURE_WAIT_TIMEOUT) { - if (auto response = Navigate(TStringBuilder() << ".resource_pools/" << (poolId ? poolId : Settings_.PoolId_))) { - const auto& result = response->ResultSet.at(0); - bool resourcePool = result.Kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindResourcePool; - if (resourcePool && (!result.SecurityObject || result.SecurityObject->CheckAccess(access, token))) { - return; - } - Cerr << "WaitPoolAccess " << TInstant::Now() - start << ": " << (resourcePool ? TStringBuilder() << "access denied" : TStringBuilder() << "unexpected kind " << result.Kind) << "\n"; - } else { - Cerr << "WaitPoolAccess " << TInstant::Now() - start << ": empty response\n"; + WaitFor(FUTURE_WAIT_TIMEOUT, "pool acl", [this, token, access, poolId](TString& errorString) { + auto response = Navigate(TStringBuilder() << ".resource_pools/" << (poolId ? poolId : Settings_.PoolId_)); + if (!response) { + errorString = "empty response"; + return false; } - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool version waiting timeout"); + const auto& result = response->ResultSet.at(0); + bool resourcePool = result.Kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindResourcePool; + errorString = (resourcePool ? TStringBuilder() << "access denied" : TStringBuilder() << "unexpected kind " << result.Kind); + return resourcePool && (!result.SecurityObject || result.SecurityObject->CheckAccess(access, token)); + }); } // Generic query helpers @@ -390,17 +465,11 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { } void WaitPoolState(const TPoolStateDescription& state, const TString& poolId = "") const override { - TInstant start = TInstant::Now(); - while (TInstant::Now() - start <= FUTURE_WAIT_TIMEOUT) { + WaitFor(FUTURE_WAIT_TIMEOUT, "pool state", [this, state, poolId](TString& errorString) { auto description = GetPoolDescription(TDuration::Zero(), poolId); - if (description.DelayedRequests == state.DelayedRequests && description.RunningRequests == state.RunningRequests) { - return; - } - - Cerr << "WaitPoolState " << TInstant::Now() - start << ": delayed = " << description.DelayedRequests << ", running = " << description.RunningRequests << "\n"; - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool state waiting timeout"); + errorString = TStringBuilder() << "delayed = " << description.DelayedRequests << ", running = " << description.RunningRequests; + return description.DelayedRequests == state.DelayedRequests && description.RunningRequests == state.RunningRequests; + }); } void WaitPoolHandlersCount(i64 finalCount, std::optional initialCount = std::nullopt, TDuration timeout = FUTURE_WAIT_TIMEOUT) const override { @@ -410,16 +479,10 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { UNIT_ASSERT_VALUES_EQUAL_C(counter->Val(), *initialCount, "Unexpected pool handlers count"); } - TInstant start = TInstant::Now(); - while (TInstant::Now() - start < timeout) { - if (counter->Val() == finalCount) { - return; - } - - Cerr << "WaitPoolHandlersCount " << TInstant::Now() - start << ": number handlers = " << counter->Val() << "\n"; - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool handlers count wait timeout"); + WaitFor(timeout, "pool handlers", [counter, finalCount](TString& errorString) { + errorString = TStringBuilder() << "number handlers = " << counter->Val(); + return counter->Val() == finalCount; + }); } void StopWorkloadService(ui64 nodeIndex = 0) const override { @@ -432,13 +495,15 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { auto subgroup = GetWorkloadManagerCounters(nodeIndex) ->GetSubgroup("pool", CanonizePath(TStringBuilder() << Settings_.DomainName_ << "/" << (poolId ? poolId : Settings_.PoolId_))); - CheckCommonCounters(subgroup); + const TString description = TStringBuilder() << "Node id: " << GetRuntime()->GetNodeId(nodeIndex); + CheckCommonCounters(subgroup, description); if (checkTableCounters) { - CheckTableCounters(subgroup); + CheckTableCounters(subgroup, description); } } } + // Coomon helpers TTestActorRuntime* GetRuntime() const override { return Server_->GetRuntime(); } @@ -464,7 +529,7 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { request->SetQuery(query); request->SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); request->SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request->SetDatabase(Settings_.DomainName_); + request->SetDatabase(settings.Database_ ? settings.Database_ : Settings_.DomainName_); request->SetPoolId(settings.PoolId_); return event; @@ -475,37 +540,30 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { ->GetSubgroup("subsystem", "workload_manager"); } - static void CheckCommonCounters(NMonitoring::TDynamicCounterPtr subgroup) { - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("LocalInFly", false)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("LocalDelayedRequests", false)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("ContinueOverloaded", true)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("ContinueError", true)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("CleanupError", true)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("Cancelled", true)->Val(), 0); - - UNIT_ASSERT_GE(subgroup->GetCounter("ContinueOk", true)->Val(), 1); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("ContinueOk", true)->Val(), subgroup->GetCounter("CleanupOk", true)->Val()); + static void CheckCommonCounters(NMonitoring::TDynamicCounterPtr subgroup, const TString& description) { + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("LocalInFly", false)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("LocalDelayedRequests", false)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("ContinueOverloaded", true)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("ContinueError", true)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("CleanupError", true)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("Cancelled", true)->Val(), 0, description); } - static void CheckTableCounters(NMonitoring::TDynamicCounterPtr subgroup) { - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("PendingRequestsCount", false)->Val(), 0); - UNIT_ASSERT_VALUES_EQUAL(subgroup->GetCounter("FinishingRequestsCount", false)->Val(), 0); + static void CheckTableCounters(NMonitoring::TDynamicCounterPtr subgroup, const TString& description) { + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("PendingRequestsCount", false)->Val(), 0, description); + UNIT_ASSERT_VALUES_EQUAL_C(subgroup->GetCounter("FinishingRequestsCount", false)->Val(), 0, description); - const std::vector> tableQueries = { - {"TCleanupTablesQuery", false}, - {"TRefreshPoolStateQuery", true}, - {"TDelayRequestQuery", true}, - {"TStartFirstDelayedRequestQuery", true}, - {"TStartRequestQuery", false}, - {"TCleanupRequestsQuery", true}, + const std::vector tableQueries = { + "TCleanupTablesQuery", + "TRefreshPoolStateQuery", + "TDelayRequestQuery", + "TStartFirstDelayedRequestQuery", + "TStartRequestQuery", + "TCleanupRequestsQuery", }; - for (const auto& [operation, runExpected] : tableQueries) { + for (const auto& operation : tableQueries) { auto operationSubgroup = subgroup->GetSubgroup("operation", operation); - - UNIT_ASSERT_VALUES_EQUAL_C(operationSubgroup->GetCounter("FinishError", true)->Val(), 0, TStringBuilder() << "Unexpected vaule for operation " << operation); - if (runExpected) { - UNIT_ASSERT_GE_C(operationSubgroup->GetCounter("FinishOk", true)->Val(), 1, TStringBuilder() << "Unexpected vaule for operation " << operation); - } + UNIT_ASSERT_VALUES_EQUAL_C(operationSubgroup->GetCounter("FinishError", true)->Val(), 0, TStringBuilder() << description << ", unexpected vaule for operation " << operation); } } @@ -513,9 +571,10 @@ class TWorkloadServiceYdbSetup : public IYdbSetup { const TYdbSetupSettings Settings_; TPortManager PortManager_; - std::unique_ptr Server_; + TServer::TPtr Server_; std::unique_ptr Client_; std::unique_ptr YdbDriver_; + std::unique_ptr Tenants_; std::unique_ptr TableClient_; std::unique_ptr TableClientSession_; @@ -564,6 +623,33 @@ TIntrusivePtr TYdbSetupSettings::Create() const { return MakeIntrusive(*this); } +TString TYdbSetupSettings::GetDedicatedTenantName() const { + return TStringBuilder() << CanonizePath(DomainName_) << "/test-dedicated"; +} + +TString TYdbSetupSettings::GetSharedTenantName() const { + return TStringBuilder() << CanonizePath(DomainName_) << "/test-shared"; +} + +TString TYdbSetupSettings::GetServerlessTenantName() const { + return TStringBuilder() << CanonizePath(DomainName_) << "/test-serverless"; +} + +//// IYdbSetup + +void IYdbSetup::WaitFor(TDuration timeout, TString description, std::function callback) { + TInstant start = TInstant::Now(); + while (TInstant::Now() - start <= timeout) { + TString errorString; + if (callback(errorString)) { + return; + } + Cerr << "Wait " << description << " " << TInstant::Now() - start << ": " << errorString << "\n"; + Sleep(TDuration::Seconds(1)); + } + UNIT_ASSERT_C(false, "Waiting " << description << " timeout. Spent time " << TInstant::Now() - start << " exceeds limit " << timeout); +} + //// TSampleQueriess void TSampleQueries::CompareYson(const TString& expected, const TString& actual) { diff --git a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h index f673f070c11b..15fa13a2ea7d 100644 --- a/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h +++ b/ydb/core/kqp/workload_service/ut/common/kqp_workload_service_ut_common.h @@ -26,6 +26,7 @@ struct TQueryRunnerSettings { FLUENT_SETTING_DEFAULT(ui32, NodeIndex, 0); FLUENT_SETTING_DEFAULT(TString, PoolId, ""); FLUENT_SETTING_DEFAULT(TString, UserSID, "user@" BUILTIN_SYSTEM_DOMAIN); + FLUENT_SETTING_DEFAULT(TString, Database, ""); // Runner settings FLUENT_SETTING_DEFAULT(bool, HangUpDuringExecution, false); @@ -66,7 +67,10 @@ struct TYdbSetupSettings { // Cluster settings FLUENT_SETTING_DEFAULT(ui32, NodeCount, 1); FLUENT_SETTING_DEFAULT(TString, DomainName, "Root"); + FLUENT_SETTING_DEFAULT(bool, CreateSampleTenants, false); FLUENT_SETTING_DEFAULT(bool, EnableResourcePools, true); + FLUENT_SETTING_DEFAULT(bool, EnableMetadataObjectsOnServerless, true); + FLUENT_SETTING_DEFAULT(bool, EnableExternalDataSourcesOnServerless, true); // Default pool settings FLUENT_SETTING_DEFAULT(TString, PoolId, "sample_pool_id"); @@ -74,12 +78,20 @@ struct TYdbSetupSettings { FLUENT_SETTING_DEFAULT(i32, QueueSize, -1); FLUENT_SETTING_DEFAULT(TDuration, QueryCancelAfter, FUTURE_WAIT_TIMEOUT); FLUENT_SETTING_DEFAULT(double, QueryMemoryLimitPercentPerNode, -1); + FLUENT_SETTING_DEFAULT(double, DatabaseLoadCpuThreshold, -1); TIntrusivePtr Create() const; + + TString GetDedicatedTenantName() const; + TString GetSharedTenantName() const; + TString GetServerlessTenantName() const; }; class IYdbSetup : public TThrRefBase { public: + // Cluster helpers + virtual void UpdateNodeCpuInfo(double usage, ui32 threads, ui64 nodeIndex = 0) = 0; + // Scheme queries helpers virtual NYdb::NScheme::TSchemeClient GetSchemeClient() const = 0; virtual void ExecuteSchemeQuery(const TString& query, NYdb::EStatus expectedStatus = NYdb::EStatus::SUCCESS, const TString& expectedMessage = "") const = 0; @@ -102,8 +114,10 @@ class IYdbSetup : public TThrRefBase { virtual void StopWorkloadService(ui64 nodeIndex = 0) const = 0; virtual void ValidateWorkloadServiceCounters(bool checkTableCounters = true, const TString& poolId = "") const = 0; + // Coomon helpers virtual TTestActorRuntime* GetRuntime() const = 0; virtual const TYdbSetupSettings& GetSettings() const = 0; + static void WaitFor(TDuration timeout, TString description, std::function callback); }; // Test queries @@ -126,6 +140,12 @@ struct TSampleQueries { UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), "Request timeout exceeded, cancelling after"); } + template + static void CheckNotFound(const TResult& result, const TString& poolId) { + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::NOT_FOUND, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << poolId << " not found or you don't have access permissions"); + } + struct TSelect42 { static constexpr char Query[] = "SELECT 42;"; diff --git a/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp b/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp index 8b9a8262609d..8d6880d3eb58 100644 --- a/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp +++ b/ydb/core/kqp/workload_service/ut/kqp_workload_service_actors_ut.cpp @@ -16,10 +16,20 @@ TEvPrivate::TEvFetchPoolResponse::TPtr FetchPool(TIntrusivePtr ydb, c auto runtime = ydb->GetRuntime(); const auto& edgeActor = runtime->AllocateEdgeActor(); - runtime->Register(CreatePoolFetcherActor(edgeActor, settings.DomainName_, poolId ? poolId : settings.PoolId_, MakeIntrusive(userSID, TVector{}))); + auto userToken = MakeIntrusive(userSID, TVector{}); + userToken->SaveSerializationInfo(); + runtime->Register(CreatePoolFetcherActor(edgeActor, settings.DomainName_, poolId ? poolId : settings.PoolId_, userToken)); return runtime->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); } +TEvPrivate::TEvCpuLoadResponse::TPtr FetchCpuInfo(TIntrusivePtr ydb) { + auto runtime = ydb->GetRuntime(); + const auto& edgeActor = runtime->AllocateEdgeActor(); + + runtime->Register(CreateCpuLoadFetcherActor(edgeActor)); + return runtime->GrabEdgeEvent(edgeActor, FUTURE_WAIT_TIMEOUT); +} + } // anonymous namespace Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { @@ -100,7 +110,8 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { // Check default pool access TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(userSID))); - TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(""))); + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(ydb->GetRuntime()->GetAppData().AllAuthenticatedUsers))); + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings.UserSID(BUILTIN_ACL_ROOT))); } Y_UNIT_TEST(TestDefaultPoolAdminPermissions) { @@ -129,6 +140,28 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceActors) { DROP RESOURCE POOL )" << NResourcePool::DEFAULT_POOL_ID << ";" , settings)); } + + Y_UNIT_TEST(TestCpuLoadActor) { + const ui32 nodeCount = 5; + auto ydb = TYdbSetupSettings() + .NodeCount(nodeCount) + .Create(); + + auto response = FetchCpuInfo(ydb); + UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::NOT_FOUND, response->Get()->Issues.ToOneLineString()); + UNIT_ASSERT_STRING_CONTAINS(response->Get()->Issues.ToString(), "Cpu info not found"); + + const double usage = 0.25; + const ui32 threads = 2; + for (size_t nodeIndex = 0; nodeIndex < nodeCount; ++nodeIndex) { + ydb->UpdateNodeCpuInfo(usage, threads, nodeIndex); + } + + response = FetchCpuInfo(ydb); + UNIT_ASSERT_VALUES_EQUAL_C(response->Get()->Status, Ydb::StatusIds::SUCCESS, response->Get()->Issues.ToOneLineString()); + UNIT_ASSERT_VALUES_EQUAL(response->Get()->CpuNumber, threads * nodeCount); + UNIT_ASSERT_DOUBLES_EQUAL(response->Get()->InstantLoad, usage, 0.01); + } } } // namespace NKikimr::NKqp diff --git a/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp b/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp index 288f3b72edeb..4d37370a8599 100644 --- a/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp +++ b/ydb/core/kqp/workload_service/ut/kqp_workload_service_tables_ut.cpp @@ -133,22 +133,26 @@ Y_UNIT_TEST_SUITE(KqpWorkloadServiceTables) { Y_UNIT_TEST(TestLeaseExpiration) { auto ydb = TYdbSetupSettings() .ConcurrentQueryLimit(1) + .QueryCancelAfter(TDuration::Zero()) .Create(); // Create tables - TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query)); + auto hangingRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().HangUpDuringExecution(true)); + ydb->WaitQueryExecution(hangingRequest); - const TDuration leaseDuration = TDuration::Seconds(10); - StartRequest(ydb, "test_session", leaseDuration); - DelayRequest(ydb, "test_session", leaseDuration); - CheckPoolDescription(ydb, 1, 1, leaseDuration); + auto delayedRequest = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + ydb->WaitPoolState({.DelayedRequests = 1, .RunningRequests = 1}); ydb->StopWorkloadService(); ydb->WaitPoolHandlersCount(0); // Check that lease expired - Sleep(leaseDuration + TDuration::Seconds(5)); - CheckPoolDescription(ydb, 0, 0); + IYdbSetup::WaitFor(TDuration::Seconds(60), "lease expiration", [ydb](TString& errorString) { + auto description = ydb->GetPoolDescription(TDuration::Zero()); + + errorString = TStringBuilder() << "delayed = " << description.DelayedRequests << ", running = " << description.RunningRequests; + return description.AmountRequests() == 0; + }); } Y_UNIT_TEST(TestLeaseUpdates) { diff --git a/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp b/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp index 553cb5b17933..5dda602ba3fc 100644 --- a/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp +++ b/ydb/core/kqp/workload_service/ut/kqp_workload_service_ut.cpp @@ -123,6 +123,36 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { TSampleQueries::TSelect42::CheckResult(hangingRequest.GetResult()); } + Y_UNIT_TEST(TestZeroQueueSizeManyQueries) { + const i32 inFlight = 10; + auto ydb = TYdbSetupSettings() + .ConcurrentQueryLimit(inFlight) + .QueueSize(0) + .QueryCancelAfter(FUTURE_WAIT_TIMEOUT * inFlight) + .Create(); + + auto settings = TQueryRunnerSettings().HangUpDuringExecution(true); + + std::vector asyncResults; + for (size_t i = 0; i < inFlight; ++i) { + asyncResults.emplace_back(ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, settings)); + } + + for (const auto& asyncResult : asyncResults) { + ydb->WaitQueryExecution(asyncResult); + } + + TSampleQueries::CheckOverloaded( + ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)), + ydb->GetSettings().PoolId_ + ); + + for (const auto& asyncResult : asyncResults) { + ydb->ContinueQueryExecution(asyncResult); + TSampleQueries::TSelect42::CheckResult(asyncResult.GetResult()); + } + } + Y_UNIT_TEST(TestQueryCancelAfterUnlimitedPool) { auto ydb = TYdbSetupSettings() .QueryCancelAfter(TDuration::Seconds(10)) @@ -189,6 +219,38 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << ydb->GetSettings().PoolId_ << " was disabled due to zero concurrent query limit"); } + Y_UNIT_TEST(TestCpuLoadThreshold) { + auto ydb = TYdbSetupSettings() + .DatabaseLoadCpuThreshold(90) + .QueryCancelAfter(TDuration::Seconds(10)) + .Create(); + + // Simulate load + ydb->UpdateNodeCpuInfo(1.0, 1); + + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::CANCELLED, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Delay deadline exceeded in pool " << ydb->GetSettings().PoolId_); + } + + Y_UNIT_TEST(TestCpuLoadThresholdRefresh) { + auto ydb = TYdbSetupSettings() + .DatabaseLoadCpuThreshold(90) + .Create(); + + // Simulate load + ydb->UpdateNodeCpuInfo(1.0, 1); + + // Delay request + auto result = ydb->ExecuteQueryAsync(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().ExecutionExpected(false)); + ydb->WaitPoolState({.DelayedRequests = 1, .RunningRequests = 0}); + + // Free load + ydb->ContinueQueryExecution(result); + ydb->UpdateNodeCpuInfo(0.0, 1); + TSampleQueries::TSelect42::CheckResult(result.GetResult(TDuration::Seconds(5))); + } + Y_UNIT_TEST(TestHandlerActorCleanup) { auto ydb = TYdbSetupSettings() .ConcurrentQueryLimit(1) @@ -197,7 +259,10 @@ Y_UNIT_TEST_SUITE(KqpWorkloadService) { TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query)); TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, TQueryRunnerSettings().PoolId(NResourcePool::DEFAULT_POOL_ID))); - ydb->WaitPoolHandlersCount(0, 2, TDuration::Seconds(35)); + ydb->WaitPoolHandlersCount(0, 2, TDuration::Seconds(95)); + + // Check pool creation after cleanup + TSampleQueries::TSelect42::CheckResult(ydb->ExecuteQuery(TSampleQueries::TSelect42::Query)); } } @@ -412,19 +477,16 @@ Y_UNIT_TEST_SUITE(ResourcePoolsDdl) { DROP RESOURCE POOL )" << poolId << ";" ); - TInstant start = TInstant::Now(); - while (TInstant::Now() - start <= FUTURE_WAIT_TIMEOUT) { - if (ydb->Navigate(TStringBuilder() << ".resource_pools/" << poolId)->ResultSet.at(0).Kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindUnknown) { - auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings); - UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::NOT_FOUND, result.GetIssues().ToString()); - UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << poolId << " not found"); - return; - } - - Cerr << "WaitPoolDrop " << TInstant::Now() - start << "\n"; - Sleep(TDuration::Seconds(1)); - } - UNIT_ASSERT_C(false, "Pool drop waiting timeout"); + IYdbSetup::WaitFor(FUTURE_WAIT_TIMEOUT, "pool drop", [ydb, poolId](TString& errorString) { + auto kind = ydb->Navigate(TStringBuilder() << ".resource_pools/" << poolId)->ResultSet.at(0).Kind; + + errorString = TStringBuilder() << "kind = " << kind; + return kind == NSchemeCache::TSchemeCacheNavigate::EKind::KindUnknown; + }); + + auto result = ydb->ExecuteQuery(TSampleQueries::TSelect42::Query, settings); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::NOT_FOUND, result.GetIssues().ToString()); + UNIT_ASSERT_STRING_CONTAINS(result.GetIssues().ToString(), TStringBuilder() << "Resource pool " << poolId << " not found"); } Y_UNIT_TEST(TestResourcePoolAcl) { diff --git a/ydb/core/kqp/workload_service/ya.make b/ydb/core/kqp/workload_service/ya.make index 40ee9196cdad..b8b5704044e2 100644 --- a/ydb/core/kqp/workload_service/ya.make +++ b/ydb/core/kqp/workload_service/ya.make @@ -7,7 +7,11 @@ SRCS( PEERDIR( ydb/core/cms/console + ydb/core/fq/libs/compute/common + ydb/core/kqp/workload_service/actors + + ydb/library/actors/interconnect ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/load_test/aggregated_result.cpp b/ydb/core/load_test/aggregated_result.cpp index 82ae3dee3e68..548a5628b442 100644 --- a/ydb/core/load_test/aggregated_result.cpp +++ b/ydb/core/load_test/aggregated_result.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace NKikimr { @@ -77,55 +78,35 @@ IOutputStream& operator<<(IOutputStream& output, const TAggregatedResult& result return output; } -using TColumnPositions = THashMap; - -TColumnPositions GetColumnPositionsInResponse(const NKikimrMiniKQL::TType& ttype) { - TColumnPositions columnPositions; - for (const NKikimrMiniKQL::TMember& member : ttype.GetStruct().GetMember()) { - if (member.GetName() == "Data") { - const auto& listStruct = member.GetType().GetList().GetItem().GetStruct(); - for (const NKikimrMiniKQL::TMember& listMember : listStruct.GetMember()) { - columnPositions.emplace(listMember.GetName(), columnPositions.size()); - } - break; - } - } - return columnPositions; -} - -NKikimrMiniKQL::TValue GetOptional(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return listItem.GetStruct(pos).GetOptional(); -} - template -T ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - Y_UNUSED(listItem, pos); +T ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + Y_UNUSED(parser, column); Y_ABORT("unimplemented"); } template<> -ui32 ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetUint32(); +ui32 ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalUint32().GetOrElse(0); } template<> -ui64 ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetUint64(); +ui64 ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalUint64().GetOrElse(0); } template<> -double ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetDouble(); +double ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalDouble().GetOrElse(static_cast(0)); } template<> -TString ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return GetOptional(listItem, pos).GetBytes(); +TString ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return parser.ColumnParser(column).GetOptionalString().GetOrElse(""); } template<> -TInstant ExtractValue(const NKikimrMiniKQL::TValue& listItem, ui32 pos) { - return TInstant::Seconds(GetOptional(listItem, pos).GetUint32()); +TInstant ExtractValue(NYdb::TResultSetParser& parser, const TString& column) { + return TInstant::Seconds(parser.ColumnParser(column).GetOptionalUint32().GetOrElse(0)); } bool GetStatName(TStringBuf columnName, TStringBuf& statName, TStringBuf& suffix) { @@ -161,38 +142,40 @@ void SetInAggregatedField(TStringBuf suffix, T value, TAggregatedField& dst) } } -TAggregatedResult GetResultFromValueListItem(const NKikimrMiniKQL::TValue& listItem, const TColumnPositions& columnPositions) { +TAggregatedResult GetResultFromValueListItem(NYdb::TResultSetParser& parser, const NYdb::TResultSet& rs) { TAggregatedResult result; TStringBuf statName; TStringBuf suffix; TStringBuf levelSb; - for (const auto& [column, pos] : columnPositions) { + for (const auto& columnMeta : rs.GetColumnsMeta()) { + TString column = columnMeta.Name; + if (column == "id") { - result.Uuid = ExtractValue(listItem, pos); + result.Uuid = ExtractValue(parser, column); } else if (column == "start") { - result.Start = ExtractValue(listItem, pos); + result.Start = ExtractValue(parser, column); } else if (column == "finish") { - result.Finish = ExtractValue(listItem, pos); + result.Finish = ExtractValue(parser, column); } else if (column == "total_nodes") { - result.Stats.TotalNodes = ExtractValue(listItem, pos); + result.Stats.TotalNodes = ExtractValue(parser, column); } else if (column == "success_nodes") { - result.Stats.SuccessNodes = ExtractValue(listItem, pos); + result.Stats.SuccessNodes = ExtractValue(parser, column); } else if (column == "config") { - result.Config = ExtractValue(listItem, pos); + result.Config = ExtractValue(parser, column); } else if (GetStatName(column, statName, suffix)) { if (statName == "transactions") { if (suffix == "_avg") { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.Transactions); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.Transactions); } else { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.Transactions); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.Transactions); } } else if (statName == "transactions_per_sec") { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.TransactionsPerSecond); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.TransactionsPerSecond); } else if (statName == "errors_per_sec") { - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.ErrorsPerSecond); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.ErrorsPerSecond); } else if (GetPercentileLevel(statName, levelSb)) { auto level = FromString(levelSb); - SetInAggregatedField(suffix, ExtractValue(listItem, pos), result.Stats.Percentiles[level]); + SetInAggregatedField(suffix, ExtractValue(parser, column), result.Stats.Percentiles[level]); } } } @@ -200,16 +183,16 @@ TAggregatedResult GetResultFromValueListItem(const NKikimrMiniKQL::TValue& listI } bool LoadResultFromResponseProto(const NKikimrKqp::TQueryResponse& response, TVector& results) { - const auto& ttype = response.GetResults(0).GetType(); - auto columnPositions = GetColumnPositionsInResponse(ttype); - if (columnPositions.empty()) { - return false; - } + Y_ABORT_UNLESS(response.GetYdbResults().size() > 0); + + NYdb::TResultSet rs(response.GetYdbResults(0)); + NYdb::TResultSetParser parser(response.GetYdbResults(0)); results.clear(); - for (const NKikimrMiniKQL::TValue& listItem : response.GetResults(0).GetValue().GetStruct().Get(0).GetList()) { - results.push_back(GetResultFromValueListItem(listItem, columnPositions)); + while(parser.TryNextRow()) { + results.push_back(GetResultFromValueListItem(parser, rs)); } + return true; } diff --git a/ydb/core/local_pgwire/local_pgwire.cpp b/ydb/core/local_pgwire/local_pgwire.cpp index f71cd38d07d5..7dc6f855ce00 100644 --- a/ydb/core/local_pgwire/local_pgwire.cpp +++ b/ydb/core/local_pgwire/local_pgwire.cpp @@ -18,33 +18,8 @@ class TPgYdbProxy : public TActor { using TBase = TActor; struct TSecurityState { - TString Ticket; - Ydb::Auth::LoginResult LoginResult; - TEvTicketParser::TError Error; - TIntrusiveConstPtr Token; TString SerializedToken; - }; - - struct TTokenState { - std::unordered_set Senders; - }; - - struct TEvPrivate { - enum EEv { - EvTokenReady = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)"); - - struct TEvTokenReady : TEventLocal { - Ydb::Auth::LoginResult LoginResult; - TActorId Sender; - TString Database; - TString PeerName; - - TEvTokenReady() = default; - }; + TString Ticket; }; struct TConnectionState { @@ -54,7 +29,6 @@ class TPgYdbProxy : public TActor { std::unordered_map ConnectionState; std::unordered_map SecurityState; - std::unordered_map TokenState; uint32_t ConnectionNum = 0; public: @@ -63,85 +37,24 @@ class TPgYdbProxy : public TActor { { } - void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev) { - auto token = ev->Get()->Ticket; - auto itTokenState = TokenState.find(token); - if (itTokenState == TokenState.end()) { - BLOG_W("Couldn't find token in reply from TicketParser"); - return; - } - for (auto sender : itTokenState->second.Senders) { - auto& securityState(SecurityState[sender]); - securityState.Ticket = token; - securityState.Error = ev->Get()->Error; - securityState.Token = ev->Get()->Token; - securityState.SerializedToken = ev->Get()->SerializedToken; - auto authResponse = std::make_unique(); - if (ev->Get()->Error) { - authResponse->Error = ev->Get()->Error.Message; - } - Send(sender, authResponse.release()); - } - TokenState.erase(itTokenState); - } - - void Handle(TEvPrivate::TEvTokenReady::TPtr& ev) { - auto token = ev->Get()->LoginResult.token(); - auto itTokenState = TokenState.find(token); - if (itTokenState == TokenState.end()) { - itTokenState = TokenState.insert({token, {}}).first; - } - bool needSend = itTokenState->second.Senders.empty(); - itTokenState->second.Senders.insert(ev->Get()->Sender); - if (needSend) { - Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ - .Database = ev->Get()->Database, - .Ticket = token, - .PeerName = ev->Get()->PeerName, - })); - } - SecurityState[ev->Get()->Sender].LoginResult = std::move(ev->Get()->LoginResult); - } - void Handle(NPG::TEvPGEvents::TEvAuth::TPtr& ev) { - std::unordered_map clientParams = ev->Get()->InitialMessage->GetClientParams(); BLOG_D("TEvAuth " << ev->Get()->InitialMessage->Dump() << " cookie " << ev->Cookie); - Ydb::Auth::LoginRequest request; - request.set_user(clientParams["user"]); + std::unordered_map clientParams = ev->Get()->InitialMessage->GetClientParams(); + TPgWireAuthData pgWireAuthData; + pgWireAuthData.UserName = clientParams["user"]; if (ev->Get()->PasswordMessage) { - request.set_password(TString(ev->Get()->PasswordMessage->GetPassword())); + pgWireAuthData.Password = TString(ev->Get()->PasswordMessage->GetPassword()); } - TActorSystem* actorSystem = TActivationContext::ActorSystem(); - TActorId sender = ev->Sender; - TString database = clientParams["database"]; - if (database == "/postgres") { + pgWireAuthData.Sender = ev->Sender; + pgWireAuthData.DatabasePath = clientParams["database"]; + if (pgWireAuthData.DatabasePath == "/postgres") { auto authResponse = std::make_unique(); authResponse->Error = Ydb::StatusIds_StatusCode_Name(Ydb::StatusIds_StatusCode::StatusIds_StatusCode_BAD_REQUEST); - actorSystem->Send(sender, authResponse.release()); + Send(pgWireAuthData.Sender, authResponse.release()); } - TString peerName = TStringBuilder() << ev->Get()->Address; + pgWireAuthData.PeerName = TStringBuilder() << ev->Get()->Address; - using TRpcEv = NGRpcService::TGRpcRequestWrapperNoAuth; - auto rpcFuture = NRpcService::DoLocalRpc(std::move(request), database, {}, actorSystem); - rpcFuture.Subscribe([actorSystem, sender, database, peerName, selfId = SelfId()](const NThreading::TFuture& future) { - auto& response = future.GetValueSync(); - if (response.operation().status() == Ydb::StatusIds::SUCCESS) { - auto tokenReady = std::make_unique(); - response.operation().result().UnpackTo(&(tokenReady->LoginResult)); - tokenReady->Sender = sender; - tokenReady->Database = database; - tokenReady->PeerName = peerName; - actorSystem->Send(selfId, tokenReady.release()); - } else { - auto authResponse = std::make_unique(); - if (response.operation().issues_size() > 0) { - authResponse->Error = response.operation().issues(0).message(); - } else { - authResponse->Error = Ydb::StatusIds_StatusCode_Name(response.operation().status()); - } - actorSystem->Send(sender, authResponse.release()); - } - }); + Register(CreateLocalPgWireAuthActor(pgWireAuthData, SelfId())); } void Handle(NPG::TEvPGEvents::TEvConnectionOpened::TPtr& ev) { @@ -173,7 +86,6 @@ class TPgYdbProxy : public TActor { } SecurityState.erase(ev->Sender); ConnectionState.erase(itConnection); - // TODO: cleanup TokenState too } void Handle(NPG::TEvPGEvents::TEvQuery::TPtr& ev) { @@ -236,6 +148,18 @@ class TPgYdbProxy : public TActor { } } + void Handle(TEvEvents::TEvAuthResponse::TPtr& ev) { + auto& securityState = SecurityState[ev->Get()->Sender]; + auto authResponse = std::make_unique(); + if (!ev->Get()->ErrorMessage.empty()) { + authResponse->Error = ev->Get()->ErrorMessage; + } else { + securityState.SerializedToken = ev->Get()->SerializedToken; + securityState.Ticket = ev->Get()->Ticket; + } + Send(ev->Get()->Sender, authResponse.release()); + } + STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { hFunc(NPG::TEvPGEvents::TEvAuth, Handle); @@ -248,8 +172,7 @@ class TPgYdbProxy : public TActor { hFunc(NPG::TEvPGEvents::TEvExecute, Handle); hFunc(NPG::TEvPGEvents::TEvClose, Handle); hFunc(NPG::TEvPGEvents::TEvCancelRequest, Handle); - hFunc(TEvPrivate::TEvTokenReady, Handle); - hFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + hFunc(TEvEvents::TEvAuthResponse, Handle); } } }; diff --git a/ydb/core/local_pgwire/local_pgwire.h b/ydb/core/local_pgwire/local_pgwire.h index a5c9cc395793..b9d6588981b3 100644 --- a/ydb/core/local_pgwire/local_pgwire.h +++ b/ydb/core/local_pgwire/local_pgwire.h @@ -1,3 +1,6 @@ +#pragma once + +#include "local_pgwire_util.h" #include namespace NLocalPgWire { @@ -5,4 +8,6 @@ namespace NLocalPgWire { inline NActors::TActorId CreateLocalPgWireProxyId(uint32_t nodeId = 0) { return NActors::TActorId(nodeId, "localpgwire"); } NActors::IActor* CreateLocalPgWireProxy(); +NActors::IActor* CreateLocalPgWireAuthActor(const TPgWireAuthData& pgWireAuthData, const NActors::TActorId& pgYdbProxy); + } diff --git a/ydb/core/local_pgwire/local_pgwire_auth_actor.cpp b/ydb/core/local_pgwire/local_pgwire_auth_actor.cpp new file mode 100644 index 000000000000..896247f599b9 --- /dev/null +++ b/ydb/core/local_pgwire/local_pgwire_auth_actor.cpp @@ -0,0 +1,192 @@ +#include "log_impl.h" +#include "local_pgwire.h" +#include "local_pgwire_util.h" + +#include +#include +#include +#include + +#include +#include + +#include + +#include + +namespace NLocalPgWire { + +using namespace NActors; +using namespace NKikimr; + +class TPgYdbAuthActor : public NActors::TActorBootstrapped { + using TBase = TActor; + + struct TEvPrivate { + enum EEv { + EvTokenReady = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), + EvAuthFailed, + EvEnd + }; + + static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE)"); + + struct TEvTokenReady : TEventLocal { + Ydb::Auth::LoginResult LoginResult; + + TEvTokenReady() = default; + }; + + struct TEvAuthFailed : NActors::TEventLocal { + TString ErrorMessage; + }; + }; + + TPgWireAuthData PgWireAuthData; + TActorId PgYdbProxy; + + TString DatabaseId; + TString FolderId; + TString SerializedToken; + TString Ticket; + +public: + TPgYdbAuthActor(const TPgWireAuthData& pgWireAuthData, const TActorId& pgYdbProxy) + : PgWireAuthData(pgWireAuthData) + , PgYdbProxy(pgYdbProxy) { + } + + void Bootstrap() { + if (PgWireAuthData.UserName == "__ydb_apikey") { + if (PgWireAuthData.Password.empty()) { + SendResponseAndDie("Invalid password"); + } + SendDescribeRequest(); + } else { + SendLoginRequest(); + } + + Become(&TPgYdbAuthActor::StateWork); + } + + void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev) { + if (ev->Get()->Error) { + SendResponseAndDie(ev->Get()->Error.Message); + return; + } + + SerializedToken = ev->Get()->SerializedToken; + Ticket = ev->Get()->Ticket; + + SendResponseAndDie(); + } + + void Handle(TEvPrivate::TEvTokenReady::TPtr& ev) { + Send(MakeTicketParserID(), new TEvTicketParser::TEvAuthorizeTicket({ + .Database = PgWireAuthData.DatabasePath, + .Ticket = ev->Get()->LoginResult.token(), + .PeerName = PgWireAuthData.PeerName, + })); + } + + void Handle(TEvPrivate::TEvAuthFailed::TPtr& ev) { + SendResponseAndDie(ev->Get()->ErrorMessage); + } + + void Handle(NKikimr::TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + const NKikimr::NSchemeCache::TSchemeCacheNavigate* navigate = ev->Get()->Request.Get(); + if (navigate->ErrorCount) { + SendResponseAndDie(TStringBuilder() << "Database with path '" << PgWireAuthData.DatabasePath << "' doesn't exists"); + return; + } + Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); + + const auto& entry = navigate->ResultSet.front(); + + for (const auto& attr : entry.Attributes) { + if (attr.first == "folder_id") FolderId = attr.second; + else if (attr.first == "database_id") DatabaseId = attr.second; + } + + SendApiKeyRequest(); + } + + STATEFN(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvTokenReady, Handle); + hFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); + hFunc(TEvPrivate::TEvAuthFailed, Handle); + } + } +private: + void SendLoginRequest() { + Ydb::Auth::LoginRequest request; + request.set_user(PgWireAuthData.UserName); + if (!PgWireAuthData.Password.empty()) { + request.set_password(PgWireAuthData.Password); + } + + auto* actorSystem = TActivationContext::ActorSystem();; + + using TRpcEv = NGRpcService::TGRpcRequestWrapperNoAuth; + auto rpcFuture = NRpcService::DoLocalRpc(std::move(request), PgWireAuthData.DatabasePath, {}, actorSystem); + rpcFuture.Subscribe([actorSystem, selfId = SelfId()](const NThreading::TFuture& future) { + auto& response = future.GetValueSync(); + if (response.operation().status() == Ydb::StatusIds::SUCCESS) { + auto tokenReady = std::make_unique(); + response.operation().result().UnpackTo(&(tokenReady->LoginResult)); + actorSystem->Send(selfId, tokenReady.release()); + } else { + auto authFailedEvent = std::make_unique(); + if (response.operation().issues_size() > 0) { + authFailedEvent->ErrorMessage = response.operation().issues(0).message(); + } else { + authFailedEvent->ErrorMessage = Ydb::StatusIds_StatusCode_Name(response.operation().status()); + } + actorSystem->Send(selfId, authFailedEvent.release()); + } + }); + } + + void SendApiKeyRequest() { + auto entries = NKikimr::NGRpcProxy::V1::GetTicketParserEntries(DatabaseId, FolderId); + + Send(NKikimr::MakeTicketParserID(), new NKikimr::TEvTicketParser::TEvAuthorizeTicket({ + .Database = PgWireAuthData.DatabasePath, + .Ticket = "ApiKey " + PgWireAuthData.Password, + .PeerName = PgWireAuthData.PeerName, + .Entries = entries + })); + } + + void SendDescribeRequest() { + auto schemeCacheRequest = std::make_unique(); + NKikimr::NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.Path = NKikimr::SplitPath(PgWireAuthData.DatabasePath); + entry.Operation = NKikimr::NSchemeCache::TSchemeCacheNavigate::OpPath; + entry.SyncVersion = false; + schemeCacheRequest->ResultSet.emplace_back(entry); + Send(NKikimr::MakeSchemeCacheID(), MakeHolder(schemeCacheRequest.release())); + } + + void SendResponseAndDie(const TString& errorMessage = "") { + std::unique_ptr authResponse; + if (!errorMessage.empty()) { + authResponse = std::make_unique(errorMessage, PgWireAuthData.Sender); + } else { + authResponse = std::make_unique(SerializedToken, Ticket, PgWireAuthData.Sender); + } + + Send(PgYdbProxy, authResponse.release()); + + PassAway(); + } +}; + + +NActors::IActor* CreateLocalPgWireAuthActor(const TPgWireAuthData& pgWireAuthData, const TActorId& pgYdbProxy) { + return new TPgYdbAuthActor(pgWireAuthData, pgYdbProxy); +} + +} diff --git a/ydb/core/local_pgwire/local_pgwire_util.h b/ydb/core/local_pgwire/local_pgwire_util.h index 21ecf6dd88db..0ef16d84b27e 100644 --- a/ydb/core/local_pgwire/local_pgwire_util.h +++ b/ydb/core/local_pgwire/local_pgwire_util.h @@ -30,6 +30,14 @@ struct TConnectionState { uint32_t ConnectionNum = 0; }; +struct TPgWireAuthData { + TActorId Sender; + TString UserName; + TString DatabasePath; + TString Password; + TString PeerName; +}; + struct TParsedStatement { NPG::TPGParse::TQueryData QueryData; std::vector ParameterTypes; @@ -56,6 +64,7 @@ struct TEvEvents { EvUpdateStatement, EvSingleQuery, EvCancelRequest, + EvAuthResponse, EvEnd }; @@ -98,6 +107,24 @@ struct TEvEvents { struct TEvCancelRequest : NActors::TEventLocal { TEvCancelRequest() = default; }; + + struct TEvAuthResponse : NActors::TEventLocal { + TString SerializedToken; + TString Ticket; + TString ErrorMessage; + TActorId Sender; + + TEvAuthResponse(const TString& serializedToken, const TString& ticket, const TActorId& sender) + : SerializedToken(serializedToken) + , Ticket(ticket) + , Sender(sender) + {} + + TEvAuthResponse(const TString& errorMessage, const TActorId& sender) + : ErrorMessage(errorMessage) + , Sender(sender) + {} + }; }; TString ColumnPrimitiveValueToString(NYdb::TValueParser& valueParser); diff --git a/ydb/core/local_pgwire/ya.make b/ydb/core/local_pgwire/ya.make index d63b67b22f5e..71b533976682 100644 --- a/ydb/core/local_pgwire/ya.make +++ b/ydb/core/local_pgwire/ya.make @@ -1,6 +1,7 @@ LIBRARY() SRCS( + local_pgwire_auth_actor.cpp local_pgwire_connection.cpp local_pgwire.cpp local_pgwire.h @@ -18,6 +19,7 @@ PEERDIR( ydb/core/kqp/common/events ydb/core/kqp/common/simple ydb/core/kqp/executer_actor + ydb/core/base ydb/core/grpc_services ydb/core/grpc_services/local_rpc ydb/core/protos @@ -25,6 +27,7 @@ PEERDIR( ydb/core/ydb_convert ydb/public/api/grpc ydb/public/lib/operation_id/protos + ydb/services/persqueue_v1/actors ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/log_backend/log_backend.cpp b/ydb/core/log_backend/log_backend.cpp index 249f1b6a77d1..ad56e773bef8 100644 --- a/ydb/core/log_backend/log_backend.cpp +++ b/ydb/core/log_backend/log_backend.cpp @@ -15,7 +15,14 @@ TAutoPtr CreateLogBackendWithUnifiedAgent( const auto& uaClientConfig = logConfig.GetUAClientConfig(); auto uaCounters = GetServiceCounters(counters, "utils")->GetSubgroup("subsystem", "ua_client"); auto logName = uaClientConfig.GetLogName(); - TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(uaClientConfig, uaCounters, logName); + TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig( + uaClientConfig, + uaCounters, + logName, + runConfig.TenantName == "" ? "static" : "slot", + runConfig.TenantName, + runConfig.ClusterName + ); logBackend = logBackend ? NActors::CreateCompositeLogBackend({logBackend, uaLogBackend}) : uaLogBackend; } if (logBackend) { @@ -52,7 +59,14 @@ TAutoPtr CreateMeteringLogBackendWithUnifiedAgent( auto logName = meteringConfig.HasLogName() ? meteringConfig.GetLogName() : uaClientConfig.GetLogName(); - TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(uaClientConfig, uaCounters, logName); + TAutoPtr uaLogBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig( + uaClientConfig, + uaCounters, + logName, + runConfig.TenantName == "" ? "static" : "slot", + runConfig.TenantName, + runConfig.ClusterName + ); logBackend = logBackend ? NActors::CreateCompositeLogBackend({logBackend, uaLogBackend}) : uaLogBackend; } @@ -100,7 +114,14 @@ TAutoPtr CreateAuditLogUnifiedAgentBackend( auto logName = runConfig.AppConfig.GetAuditConfig().GetUnifiedAgentBackend().HasLogName() ? runConfig.AppConfig.GetAuditConfig().GetUnifiedAgentBackend().GetLogName() : uaClientConfig.GetLogName(); - logBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(uaClientConfig, uaCounters, logName); + logBackend = TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig( + uaClientConfig, + uaCounters, + logName, + runConfig.TenantName == "" ? "static" : "slot", + runConfig.TenantName, + runConfig.ClusterName + ); } return logBackend; diff --git a/ydb/core/log_backend/log_backend_build.cpp b/ydb/core/log_backend/log_backend_build.cpp index 1261f2dde53d..db31ce3722a7 100644 --- a/ydb/core/log_backend/log_backend_build.cpp +++ b/ydb/core/log_backend/log_backend_build.cpp @@ -15,7 +15,11 @@ TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromLogConfig(cons return logBackend; } -TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(const NKikimrConfig::TUAClientConfig& uaClientConfig, NMonitoring::TDynamicCounterPtr uaCounters, const TString& logName) { +TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig(const NKikimrConfig::TUAClientConfig& uaClientConfig, + NMonitoring::TDynamicCounterPtr uaCounters, + const TString& logName, const TString& nodeType = "static", + const TString& tenant = "", + const TString& clusterName = "") { auto parameters = NUnifiedAgent::TClientParameters(uaClientConfig.GetUri()) .SetCounters(uaCounters) .SetMaxInflightBytes(uaClientConfig.GetMaxInflightBytes()); @@ -44,6 +48,18 @@ TAutoPtr TLogBackendBuildHelper::CreateLogBackendFromUAClientConfig (*sessionParameters.Meta)["_log_name"] = logName; } + if (nodeType) { + (*sessionParameters.Meta)["node_type"] = nodeType; + } + + if (tenant) { + (*sessionParameters.Meta)["database"] = tenant; + } + + if (clusterName) { + (*sessionParameters.Meta)["cluster"] = clusterName; + } + TAutoPtr uaLogBackend = MakeLogBackend(parameters, sessionParameters).Release(); return uaLogBackend; } diff --git a/ydb/core/log_backend/log_backend_build.h b/ydb/core/log_backend/log_backend_build.h index ab672bb81961..0577a01004bc 100644 --- a/ydb/core/log_backend/log_backend_build.h +++ b/ydb/core/log_backend/log_backend_build.h @@ -16,6 +16,9 @@ class TLogBackendBuildHelper { static TAutoPtr CreateLogBackendFromUAClientConfig( const NKikimrConfig::TUAClientConfig& uaClientConfig, NMonitoring::TDynamicCounterPtr uaCounters, - const TString& logName); + const TString& logName, + const TString& nodeType, + const TString& tenant, + const TString& clusterName); }; } diff --git a/ydb/core/mind/bscontroller/bsc.cpp b/ydb/core/mind/bscontroller/bsc.cpp index 9ce2d1ca2700..b56b6dfe11f4 100644 --- a/ydb/core/mind/bscontroller/bsc.cpp +++ b/ydb/core/mind/bscontroller/bsc.cpp @@ -131,6 +131,8 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) { auto prevStaticVSlots = std::exchange(StaticVSlots, {}); StaticVDiskMap.clear(); + const TMonotonic mono = TActivationContext::Monotonic(); + if (StorageConfig.HasBlobStorageConfig()) { if (const auto& bsConfig = StorageConfig.GetBlobStorageConfig(); bsConfig.HasServiceSet()) { const auto& ss = bsConfig.GetServiceSet(); @@ -143,7 +145,7 @@ void TBlobStorageController::Handle(TEvNodeWardenStorageConfig::TPtr ev) { const auto& location = vslot.GetVDiskLocation(); const TPDiskId pdiskId(location.GetNodeID(), location.GetPDiskID()); const TVSlotId vslotId(pdiskId, location.GetVDiskSlotID()); - StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots); + StaticVSlots.try_emplace(vslotId, vslot, prevStaticVSlots, mono); const TVDiskID& vdiskId = VDiskIDFromVDiskID(vslot.GetVDiskID()); StaticVDiskMap.emplace(vdiskId, vslotId); StaticVDiskMap.emplace(TVDiskID(vdiskId.GroupID, 0, vdiskId), vslotId); @@ -270,6 +272,13 @@ void TBlobStorageController::Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) { const bool initial = !HostRecords; HostRecords = std::make_shared(ev->Get()); if (initial) { + if (auto *appData = AppData()) { + if (appData->Icb) { + EnableSelfHealWithDegraded = std::make_shared(0, 0, 1); + appData->Icb->RegisterSharedControl(*EnableSelfHealWithDegraded, + "BlobStorageControllerControls.EnableSelfHealWithDegraded"); + } + } SelfHealId = Register(CreateSelfHealActor()); PushStaticGroupsToSelfHeal(); if (StorageConfigObtained) { @@ -337,7 +346,7 @@ void TBlobStorageController::ValidateInternalState() { Y_ABORT_UNLESS(donor->GetShortVDiskId() == vslot->GetShortVDiskId()); } if (vslot->Group) { - if (vslot->Status == NKikimrBlobStorage::EVDiskStatus::READY) { + if (vslot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) { Y_DEBUG_ABORT_UNLESS(vslot->IsReady || vslot->IsInVSlotReadyTimestampQ()); } else { Y_DEBUG_ABORT_UNLESS(!vslot->IsReady && !vslot->IsInVSlotReadyTimestampQ()); @@ -401,7 +410,7 @@ ui32 TBlobStorageController::GetEventPriority(IEventHandle *ev) { const auto& record = msg->Record; for (const auto& item : record.GetVDiskStatus()) { const TVSlotId vslotId(item.GetNodeId(), item.GetPDiskId(), item.GetVSlotId()); - if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->Status > item.GetStatus()) { + if (TVSlotInfo *slot = FindVSlot(vslotId); slot && slot->GetStatus() > item.GetStatus()) { return 1; } else if (const auto it = StaticVSlots.find(vslotId); it != StaticVSlots.end() && it->second.VDiskStatus > item.GetStatus()) { return 1; diff --git a/ydb/core/mind/bscontroller/cmds_box.cpp b/ydb/core/mind/bscontroller/cmds_box.cpp index e1e821c5c643..4b7f53c6beb0 100644 --- a/ydb/core/mind/bscontroller/cmds_box.cpp +++ b/ydb/core/mind/bscontroller/cmds_box.cpp @@ -213,7 +213,7 @@ namespace NKikimr::NBsController { for (const auto& [id, slot] : pdisk->VSlotsOnPDisk) { if (slot->Group) { auto *m = VSlots.FindForUpdate(slot->VSlotId); - m->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + m->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; m->IsReady = false; TGroupInfo *group = Groups.FindForUpdate(slot->Group->ID); GroupFailureModelChanged.insert(slot->Group->ID); diff --git a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp index 516ab802d6ad..01a09938030b 100644 --- a/ydb/core/mind/bscontroller/cmds_storage_pool.cpp +++ b/ydb/core/mind/bscontroller/cmds_storage_pool.cpp @@ -557,7 +557,7 @@ namespace NKikimr::NBsController { x->MutableVDiskMetrics()->CopyFrom(*vslot.VDiskMetrics); x->MutableVDiskMetrics()->ClearVDiskId(); } - x->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.VDiskStatus)); + x->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR))); x->SetReady(vslot.ReadySince <= mono); } if (const auto& s = Self.StorageConfig; s.HasBlobStorageConfig()) { @@ -698,7 +698,7 @@ namespace NKikimr::NBsController { TGroupInfo *group = Groups.FindForUpdate(vslot->GroupId); vslot->Mood = TMood::Wipe; - vslot->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + vslot->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; vslot->IsReady = false; GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); @@ -744,7 +744,7 @@ namespace NKikimr::NBsController { TGroupInfo *group = Groups.FindForUpdate(vslot->GroupId); vslot->Mood = targetMood; - vslot->Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + vslot->VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; vslot->IsReady = false; GroupFailureModelChanged.insert(group->ID); group->CalculateGroupStatus(); diff --git a/ydb/core/mind/bscontroller/config.cpp b/ydb/core/mind/bscontroller/config.cpp index 4ed5a2cffc5b..b63680720525 100644 --- a/ydb/core/mind/bscontroller/config.cpp +++ b/ydb/core/mind/bscontroller/config.cpp @@ -486,9 +486,9 @@ namespace NKikimr::NBsController { if (!overlay->second || !overlay->second->Group) { // deleted one (overlay->second ? overlay->second : base->second)->DropFromVSlotReadyTimestampQ(); NotReadyVSlotIds.erase(overlay->first); - } else if (overlay->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + } else if (overlay->second->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { overlay->second->DropFromVSlotReadyTimestampQ(); - } else if (!base || base->second->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + } else if (!base || base->second->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { overlay->second->PutInVSlotReadyTimestampQ(now); } else { Y_DEBUG_ABORT_UNLESS(overlay->second->IsReady || overlay->second->IsInVSlotReadyTimestampQ()); @@ -998,7 +998,7 @@ namespace NKikimr::NBsController { pb->SetAllocatedSize(vslot.Metrics.GetAllocatedSize()); pb->MutableVDiskMetrics()->CopyFrom(vslot.Metrics); pb->MutableVDiskMetrics()->ClearVDiskId(); - pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.Status)); + pb->SetStatus(NKikimrBlobStorage::EVDiskStatus_Name(vslot.GetStatus())); for (const TVSlotId& vslotId : vslot.Donors) { auto *item = pb->AddDonors(); Serialize(item->MutableVSlotId(), vslotId); diff --git a/ydb/core/mind/bscontroller/config.h b/ydb/core/mind/bscontroller/config.h index 7352658bc107..8c1bb321e5ec 100644 --- a/ydb/core/mind/bscontroller/config.h +++ b/ydb/core/mind/bscontroller/config.h @@ -101,6 +101,7 @@ namespace NKikimr { // when the config cmd received const TInstant Timestamp; + const TMonotonic Mono; // various settings from controller const bool DonorMode; @@ -124,7 +125,8 @@ namespace NKikimr { bool PushStaticGroupsToSelfHeal = false; public: - TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp) + TConfigState(TBlobStorageController &controller, const THostRecordMap &hostRecords, TInstant timestamp, + TMonotonic mono) : Self(controller) , HostConfigs(&controller.HostConfigs) , Boxes(&controller.Boxes) @@ -142,6 +144,7 @@ namespace NKikimr { , NextStoragePoolId(&controller.NextStoragePoolId) , HostRecords(hostRecords) , Timestamp(timestamp) + , Mono(mono) , DonorMode(controller.DonorMode) , DefaultMaxSlots(controller.DefaultMaxSlots) , StaticVSlots(controller.StaticVSlots) diff --git a/ydb/core/mind/bscontroller/config_cmd.cpp b/ydb/core/mind/bscontroller/config_cmd.cpp index 88fe17f61f34..ca5e9f91da25 100644 --- a/ydb/core/mind/bscontroller/config_cmd.cpp +++ b/ydb/core/mind/bscontroller/config_cmd.cpp @@ -180,7 +180,7 @@ namespace NKikimr::NBsController { Response->MutableStatus()->RemoveLast(); } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); TString m; diff --git a/ydb/core/mind/bscontroller/config_fit_groups.cpp b/ydb/core/mind/bscontroller/config_fit_groups.cpp index e72184045d26..e3f1f199de01 100644 --- a/ydb/core/mind/bscontroller/config_fit_groups.cpp +++ b/ydb/core/mind/bscontroller/config_fit_groups.cpp @@ -524,7 +524,7 @@ namespace NKikimr { // also we have to find replicating VSlots on this PDisk and assume they consume up to // max(vslotSize for every slot in group), not their actual AllocatedSize for (const auto& [id, slot] : info.VSlotsOnPDisk) { - if (slot->Group && slot->Status != NKikimrBlobStorage::EVDiskStatus::READY) { + if (slot->Group && slot->GetStatus() != NKikimrBlobStorage::EVDiskStatus::READY) { ui64 maxGroupSlotSize = 0; for (const TVSlotInfo *peer : slot->Group->VDisksInGroup) { maxGroupSlotSize = Max(maxGroupSlotSize, peer->Metrics.GetAllocatedSize()); @@ -607,6 +607,7 @@ namespace NKikimr { groupInfo->ID, 0, groupInfo->Generation, StoragePool.VDiskKind, failRealmIdx, failDomainIdx, vdiskIdx, TMood::Normal, groupInfo, &VSlotReadyTimestampQ, TInstant::Zero(), TDuration::Zero()); + vslotInfo->VDiskStatusTimestamp = State.Mono; // mark as uncommitted State.UncommittedVSlots.insert(vslotId); diff --git a/ydb/core/mind/bscontroller/drop_donor.cpp b/ydb/core/mind/bscontroller/drop_donor.cpp index 55558a8322ee..b9b0b9cdd944 100644 --- a/ydb/core/mind/bscontroller/drop_donor.cpp +++ b/ydb/core/mind/bscontroller/drop_donor.cpp @@ -18,7 +18,7 @@ class TBlobStorageController::TTxDropDonor TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DROP_DONOR; } bool Execute(TTransactionContext &txc, const TActorContext&) override { - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); for (const TVSlotId& vslotId : VSlotIds) { if (const TVSlotInfo *vslot = State->VSlots.Find(vslotId); vslot && !vslot->IsBeingDeleted()) { diff --git a/ydb/core/mind/bscontroller/impl.h b/ydb/core/mind/bscontroller/impl.h index d55e57b34d63..c8e24f664880 100644 --- a/ydb/core/mind/bscontroller/impl.h +++ b/ydb/core/mind/bscontroller/impl.h @@ -124,16 +124,17 @@ class TBlobStorageController : public TActor, public TTa TVSlotReadyTimestampQ::iterator VSlotReadyTimestampIter; public: - NKikimrBlobStorage::EVDiskStatus Status = NKikimrBlobStorage::EVDiskStatus::ERROR; + std::optional VDiskStatus; + TMonotonic VDiskStatusTimestamp; bool IsReady = false; bool OnlyPhantomsRemain = false; public: void SetStatus(NKikimrBlobStorage::EVDiskStatus status, TMonotonic now, TInstant instant, bool onlyPhantomsRemain) { - if (status != Status) { + if (status != VDiskStatus) { if (status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // became "replicating" LastGotReplicating = instant; - } else if (Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // was "replicating" + } else if (VDiskStatus == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { // was "replicating" Y_DEBUG_ABORT_UNLESS(LastGotReplicating != TInstant::Zero()); ReplicationTime += instant - LastGotReplicating; LastGotReplicating = {}; @@ -145,7 +146,7 @@ class TBlobStorageController : public TActor, public TTa LastSeenReady = instant; } - Status = status; + VDiskStatus = status; IsReady = false; if (status == NKikimrBlobStorage::EVDiskStatus::READY) { PutInVSlotReadyTimestampQ(now); @@ -159,6 +160,10 @@ class TBlobStorageController : public TActor, public TTa } } + NKikimrBlobStorage::EVDiskStatus GetStatus() const { + return VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR); + } + void PutInVSlotReadyTimestampQ(TMonotonic now) { const TMonotonic readyAfter = now + ReadyStablePeriod; // vdisk will be treated as READY one shortly, but not now Y_ABORT_UNLESS(VSlotReadyTimestampIter == TVSlotReadyTimestampQ::iterator()); @@ -291,15 +296,16 @@ class TBlobStorageController : public TActor, public TTa TString GetStatusString() const { TStringStream s; - s << NKikimrBlobStorage::EVDiskStatus_Name(Status); - if (Status == NKikimrBlobStorage::REPLICATING && OnlyPhantomsRemain) { + const auto status = GetStatus(); + s << NKikimrBlobStorage::EVDiskStatus_Name(status); + if (status == NKikimrBlobStorage::REPLICATING && OnlyPhantomsRemain) { s << "/p"; } return s.Str(); } bool IsOperational() const { - return Status >= NKikimrBlobStorage::REPLICATING; + return GetStatus() >= NKikimrBlobStorage::REPLICATING; } void OnCommit(); @@ -1510,6 +1516,7 @@ class TBlobStorageController : public TActor, public TTa bool AllowMultipleRealmsOccupation = true; bool StorageConfigObtained = false; bool Loaded = false; + std::shared_ptr EnableSelfHealWithDegraded; std::set> GroupToNode; @@ -2276,7 +2283,7 @@ class TBlobStorageController : public TActor, public TTa histo.IncrementFor(passed.Seconds()); TDuration timeBeingReplicating = slot->ReplicationTime; - if (slot->Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { + if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { timeBeingReplicating += now - slot->LastGotReplicating; } @@ -2301,11 +2308,12 @@ class TBlobStorageController : public TActor, public TTa const NKikimrBlobStorage::TVDiskKind::EVDiskKind VDiskKind; std::optional VDiskMetrics; - NKikimrBlobStorage::EVDiskStatus VDiskStatus = NKikimrBlobStorage::EVDiskStatus::ERROR; + std::optional VDiskStatus; + TMonotonic VDiskStatusTimestamp; TMonotonic ReadySince = TMonotonic::Max(); // when IsReady becomes true for this disk; Max() in non-READY state TStaticVSlotInfo(const NKikimrBlobStorage::TNodeWardenServiceSet::TVDisk& vdisk, - std::map& prev) + std::map& prev, TMonotonic mono) : VDiskId(VDiskIDFromVDiskID(vdisk.GetVDiskID())) , VDiskKind(vdisk.GetVDiskKind()) { @@ -2315,7 +2323,10 @@ class TBlobStorageController : public TActor, public TTa TStaticVSlotInfo& item = it->second; VDiskMetrics = std::move(item.VDiskMetrics); VDiskStatus = item.VDiskStatus; + VDiskStatusTimestamp = item.VDiskStatusTimestamp; ReadySince = item.ReadySince; + } else { + VDiskStatusTimestamp = mono; } } }; diff --git a/ydb/core/mind/bscontroller/load_everything.cpp b/ydb/core/mind/bscontroller/load_everything.cpp index b8eb7d59a5e2..a6efc5a39457 100644 --- a/ydb/core/mind/bscontroller/load_everything.cpp +++ b/ydb/core/mind/bscontroller/load_everything.cpp @@ -352,6 +352,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBaseVSlots.clear(); { using T = Schema::VSlot; @@ -374,6 +375,7 @@ class TBlobStorageController::TTxLoadEverything : public TTransactionBaseNotReadyVSlotIds.insert(x.VSlotId); } + x.VDiskStatusTimestamp = mono; if (!slot.Next()) { return false; diff --git a/ydb/core/mind/bscontroller/monitoring.cpp b/ydb/core/mind/bscontroller/monitoring.cpp index 9f6fa7597e6d..374955bc70f7 100644 --- a/ydb/core/mind/bscontroller/monitoring.cpp +++ b/ydb/core/mind/bscontroller/monitoring.cpp @@ -1296,7 +1296,7 @@ void TBlobStorageController::RenderVSlotRow(IOutputStream& out, const TVSlotInfo } TABLED() { TDuration time = vslot.ReplicationTime; - if (vslot.Status == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { + if (vslot.GetStatus() == NKikimrBlobStorage::EVDiskStatus::REPLICATING) { time += TActivationContext::Now() - vslot.LastGotReplicating; } out << time; diff --git a/ydb/core/mind/bscontroller/node_report.cpp b/ydb/core/mind/bscontroller/node_report.cpp index 868d8fb98822..ed1b116f1ca9 100644 --- a/ydb/core/mind/bscontroller/node_report.cpp +++ b/ydb/core/mind/bscontroller/node_report.cpp @@ -26,7 +26,7 @@ class TBlobStorageController::TTxNodeReport return true; } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); NIceDb::TNiceDb db(txc.DB); diff --git a/ydb/core/mind/bscontroller/register_node.cpp b/ydb/core/mind/bscontroller/register_node.cpp index 69d05dcdfd8d..d0952a8bdf0b 100644 --- a/ydb/core/mind/bscontroller/register_node.cpp +++ b/ydb/core/mind/bscontroller/register_node.cpp @@ -160,7 +160,7 @@ class TBlobStorageController::TTxUpdateNodeDrives bool Execute(TTransactionContext& txc, const TActorContext&) override { const TNodeId nodeId = Record.GetNodeId(); - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); State->CheckConsistency(); auto updateIsSuccessful = true; @@ -547,7 +547,7 @@ void TBlobStorageController::OnWardenDisconnected(TNodeId nodeId, TActorId serve updates.push_back({ .VDiskId = it->second->GetVDiskId(), .IsReady = it->second->IsReady, - .VDiskStatus = it->second->Status, + .VDiskStatus = it->second->GetStatus(), }); ScrubState.UpdateVDiskState(&*it->second); SysViewChangedVSlots.insert(it->second->VSlotId); diff --git a/ydb/core/mind/bscontroller/self_heal.cpp b/ydb/core/mind/bscontroller/self_heal.cpp index 2d13a1d86cfd..d2ff2877b55b 100644 --- a/ydb/core/mind/bscontroller/self_heal.cpp +++ b/ydb/core/mind/bscontroller/self_heal.cpp @@ -43,6 +43,7 @@ namespace NKikimr::NBsController { std::shared_ptr Topology; TBlobStorageGroupInfo::TGroupVDisks FailedGroupDisks; const bool IsSelfHealReasonDecommit; + const bool IgnoreDegradedGroupsChecks; const bool DonorMode; THashSet PendingVDisks; THashMap ActorToDiskMap; @@ -51,7 +52,7 @@ namespace NKikimr::NBsController { public: TReassignerActor(TActorId controllerId, TGroupId groupId, TEvControllerUpdateSelfHealInfo::TGroupContent group, std::optional vdiskToReplace, std::shared_ptr topology, - bool isSelfHealReasonDecommit, bool donorMode) + bool isSelfHealReasonDecommit, bool ignoreDegradedGroupsChecks, bool donorMode) : ControllerId(controllerId) , GroupId(groupId) , Group(std::move(group)) @@ -59,6 +60,7 @@ namespace NKikimr::NBsController { , Topology(std::move(topology)) , FailedGroupDisks(Topology.get()) , IsSelfHealReasonDecommit(isSelfHealReasonDecommit) + , IgnoreDegradedGroupsChecks(ignoreDegradedGroupsChecks) , DonorMode(donorMode) {} @@ -166,6 +168,9 @@ namespace NKikimr::NBsController { request->SetIgnoreGroupReserve(true); request->SetSettleOnlyOnOperationalDisks(true); request->SetIsSelfHealReasonDecommit(IsSelfHealReasonDecommit); + if (IgnoreDegradedGroupsChecks) { + request->SetIgnoreDegradedGroupsChecks(IgnoreDegradedGroupsChecks); + } request->SetAllowUnusableDisks(true); if (VDiskToReplace) { ev->SelfHeal = true; @@ -278,6 +283,7 @@ namespace NKikimr::NBsController { bool AllowMultipleRealmsOccupation; bool DonorMode; THostRecordMap HostRecords; + std::shared_ptr EnableSelfHealWithDegraded; using TTopologyDescr = std::tuple; THashMap> Topologies; @@ -289,13 +295,15 @@ namespace NKikimr::NBsController { public: TSelfHealActor(ui64 tabletId, std::shared_ptr unreassignableGroups, THostRecordMap hostRecords, - bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode) + bool groupLayoutSanitizerEnabled, bool allowMultipleRealmsOccupation, bool donorMode, + std::shared_ptr enableSelfHealWithDegraded) : TabletId(tabletId) , UnreassignableGroups(std::move(unreassignableGroups)) , GroupLayoutSanitizerEnabled(groupLayoutSanitizerEnabled) , AllowMultipleRealmsOccupation(allowMultipleRealmsOccupation) , DonorMode(donorMode) , HostRecords(std::move(hostRecords)) + , EnableSelfHealWithDegraded(std::move(enableSelfHealWithDegraded)) {} void Bootstrap(const TActorId& parentId) { @@ -427,11 +435,35 @@ namespace NKikimr::NBsController { // check if it is possible to move anything out bool isSelfHealReasonDecommit; - if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit)) { + bool ignoreDegradedGroupsChecks; + if (const auto v = FindVDiskToReplace(group.Content, now, group.Topology.get(), &isSelfHealReasonDecommit, + &ignoreDegradedGroupsChecks)) { group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content, - *v, group.Topology, isSelfHealReasonDecommit, DonorMode)); + *v, group.Topology, isSelfHealReasonDecommit, ignoreDegradedGroupsChecks, DonorMode)); } else { ++counter; // this group can't be reassigned right now + + auto log = [&]() { + TStringStream ss; + ss << "["; + bool first = true; + for (const auto& [vdiskId, vdisk] : group.Content.VDisks) { + if (!std::exchange(first, false)) { + ss << ","; + } + ss << "{"; + ss << vdiskId; + ss << (IsReady(vdisk, now) ? " Ready" : " NotReady"); + ss << (vdisk.Faulty ? " Faulty" : ""); + ss << (vdisk.Bad ? " IsBad" : ""); + ss << (vdisk.Decommitted ? " Decommitted" : ""); + ss << "}"; + } + ss << "]"; + return ss.Str(); + }; + + STLOG(PRI_INFO, BS_SELFHEAL, BSSH11, "group can't be reassigned right now " << log(), (GroupId, group.GroupId)); } } @@ -462,7 +494,8 @@ namespace NKikimr::NBsController { ADD_RECORD_WITH_TIMESTAMP_TO_OPERATION_LOG(GroupLayoutSanitizerOperationLog, "Start sanitizing GroupId# " << group.GroupId << " GroupGeneration# " << group.Content.Generation); group.ReassignerActorId = Register(new TReassignerActor(ControllerId, group.GroupId, group.Content, - std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, DonorMode)); + std::nullopt, group.Topology, false /*isSelfHealReasonDecommit*/, + false /*ignoreDegradedGroupsChecks*/, DonorMode)); } } } @@ -512,7 +545,8 @@ namespace NKikimr::NBsController { } std::optional FindVDiskToReplace(const TEvControllerUpdateSelfHealInfo::TGroupContent& content, - TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit) { + TMonotonic now, TBlobStorageGroupInfo::TTopology *topology, bool *isSelfHealReasonDecommit, + bool *ignoreDegradedGroupsChecks) { // main idea of selfhealing is step-by-step healing of bad group; we can allow healing of group with more // than one disk missing, but we should not move next faulty disk until previous one is replicated, at least // partially (meaning only phantoms left) @@ -531,7 +565,7 @@ namespace NKikimr::NBsController { } [[fallthrough]]; case NKikimrBlobStorage::EVDiskStatus::INIT_PENDING: - return std::nullopt; // don't touch group with replicating disks + return std::nullopt; // don't touch group with replicating or starting disks default: break; @@ -557,6 +591,7 @@ namespace NKikimr::NBsController { continue; // this group will become degraded when applying self-heal logic, skip disk } *isSelfHealReasonDecommit = vdisk.IsSelfHealReasonDecommit; + *ignoreDegradedGroupsChecks = checker.IsDegraded(failedByReadiness) && *EnableSelfHealWithDegraded; return vdiskId; } } @@ -864,7 +899,7 @@ namespace NKikimr::NBsController { IActor *TBlobStorageController::CreateSelfHealActor() { Y_ABORT_UNLESS(HostRecords); return new TSelfHealActor(TabletID(), SelfHealUnreassignableGroups, HostRecords, GroupLayoutSanitizerEnabled, - AllowMultipleRealmsOccupation, DonorMode); + AllowMultipleRealmsOccupation, DonorMode, EnableSelfHealWithDegraded); } void TBlobStorageController::InitializeSelfHealState() { @@ -913,7 +948,7 @@ namespace NKikimr::NBsController { slot->OnlyPhantomsRemain, slot->IsReady, TMonotonic::Zero(), - slot->Status, + slot->GetStatus(), }; } } @@ -960,7 +995,7 @@ namespace NKikimr::NBsController { false, /* OnlyPhantomsRemain */ true, /* IsReady; decision is based on ReadySince */ info.ReadySince, - info.VDiskStatus, + info.VDiskStatus.value_or(NKikimrBlobStorage::EVDiskStatus::ERROR), }; } } @@ -987,7 +1022,7 @@ namespace NKikimr::NBsController { const bool was = slot->IsOperational(); if (const TGroupInfo *group = slot->Group) { const bool wasReady = slot->IsReady; - if (slot->Status != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) { + if (slot->GetStatus() != m.GetStatus() || slot->OnlyPhantomsRemain != m.GetOnlyPhantomsRemain()) { slot->SetStatus(m.GetStatus(), mono, now, m.GetOnlyPhantomsRemain()); if (slot->IsReady != wasReady) { ScrubState.UpdateVDiskState(slot); @@ -1001,14 +1036,14 @@ namespace NKikimr::NBsController { .VDiskId = vdiskId, .OnlyPhantomsRemain = slot->OnlyPhantomsRemain, .IsReady = slot->IsReady, - .VDiskStatus = slot->Status, + .VDiskStatus = slot->GetStatus(), }); if (!was && slot->IsOperational() && !group->SeenOperational) { groups.insert(const_cast(group)); } SysViewChangedVSlots.insert(vslotId); } - if (slot->Status == NKikimrBlobStorage::EVDiskStatus::READY) { + if (slot->GetStatus() == NKikimrBlobStorage::EVDiskStatus::READY) { // we can release donor slots without further notice then the VDisk is completely replicated; we // intentionally use GetStatus() here instead of IsReady() to prevent waiting for (const TVSlotId& donorVSlotId : slot->Donors) { diff --git a/ydb/core/mind/bscontroller/sys_view.cpp b/ydb/core/mind/bscontroller/sys_view.cpp index 6526402a825c..b9fed5ab6d1d 100644 --- a/ydb/core/mind/bscontroller/sys_view.cpp +++ b/ydb/core/mind/bscontroller/sys_view.cpp @@ -325,7 +325,8 @@ void CopyInfo(NKikimrSysView::TPDiskInfo* info, const THolder status, NKikimrBlobStorage::TVDiskKind::EVDiskKind kind, + bool isBeingDeleted) { pb->SetGroupId(vdiskId.GroupID.GetRawId()); pb->SetGroupGeneration(vdiskId.GroupGeneration); pb->SetFailRealm(vdiskId.FailRealm); @@ -337,7 +338,9 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, if (m.HasAvailableSize()) { pb->SetAvailableSize(m.GetAvailableSize()); } - pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(status)); + if (status) { + pb->SetStatusV2(NKikimrBlobStorage::EVDiskStatus_Name(*status)); + } pb->SetKind(NKikimrBlobStorage::TVDiskKind::EVDiskKind_Name(kind)); if (isBeingDeleted) { pb->SetIsBeingDeleted(true); @@ -345,8 +348,8 @@ void SerializeVSlotInfo(NKikimrSysView::TVSlotInfo *pb, const TVDiskID& vdiskId, } void CopyInfo(NKikimrSysView::TVSlotInfo* info, const THolder& vSlotInfo) { - SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->Status, vSlotInfo->Kind, - vSlotInfo->IsBeingDeleted()); + SerializeVSlotInfo(info, vSlotInfo->GetVDiskId(), vSlotInfo->Metrics, vSlotInfo->VDiskStatus, + vSlotInfo->Kind, vSlotInfo->IsBeingDeleted()); } void CopyInfo(NKikimrSysView::TGroupInfo* info, const THolder& groupInfo) { @@ -422,6 +425,21 @@ void TBlobStorageController::UpdateSystemViews() { return; } + const TMonotonic now = TActivationContext::Monotonic(); + const TDuration expiration = TDuration::Seconds(15); + for (auto& [key, value] : VSlots) { + if (!value->VDiskStatus && value->VDiskStatusTimestamp + expiration <= now) { + value->VDiskStatus = NKikimrBlobStorage::ERROR; + SysViewChangedVSlots.insert(key); + } + } + for (auto& [key, value] : StaticVSlots) { + if (!value.VDiskStatus && value.VDiskStatusTimestamp + expiration <= now) { + value.VDiskStatus = NKikimrBlobStorage::ERROR; + SysViewChangedVSlots.insert(key); + } + } + if (!SysViewChangedPDisks.empty() || !SysViewChangedVSlots.empty() || !SysViewChangedGroups.empty() || !SysViewChangedStoragePools.empty() || SysViewChangedSettings) { auto update = MakeHolder(); diff --git a/ydb/core/mind/bscontroller/virtual_group.cpp b/ydb/core/mind/bscontroller/virtual_group.cpp index 66f3056e06ab..ee3b31fb2ac2 100644 --- a/ydb/core/mind/bscontroller/virtual_group.cpp +++ b/ydb/core/mind/bscontroller/virtual_group.cpp @@ -248,7 +248,7 @@ namespace NKikimr::NBsController { if (const TGroupInfo *group = Self->FindGroup(GroupId); !group || group->VirtualGroupSetupMachineId != MachineId) { return true; // another machine is already running } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); TGroupInfo *group = State->Groups.FindForUpdate(GroupId); Y_ABORT_UNLESS(group); if (!Callback(*group, *State)) { @@ -294,7 +294,7 @@ namespace NKikimr::NBsController { if (Token.expired()) { return true; // actor is already dead } - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); const size_t n = State->BlobDepotDeleteQueue.Unshare().erase(GroupId); Y_ABORT_UNLESS(n == 1); TString error; @@ -897,7 +897,7 @@ namespace NKikimr::NBsController { TTxType GetTxType() const override { return NBlobStorageController::TXTYPE_DECOMMIT_GROUP; } bool Execute(TTransactionContext& txc, const TActorContext&) override { - State.emplace(*Self, Self->HostRecords, TActivationContext::Now()); + State.emplace(*Self, Self->HostRecords, TActivationContext::Now(), TActivationContext::Monotonic()); Action(*State); TString error; if (State->Changed() && !Self->CommitConfigUpdates(*State, true, true, true, txc, &error)) { diff --git a/ydb/core/mind/dynamic_nameserver.cpp b/ydb/core/mind/dynamic_nameserver.cpp index 9f31734f3c67..d6d135286404 100644 --- a/ydb/core/mind/dynamic_nameserver.cpp +++ b/ydb/core/mind/dynamic_nameserver.cpp @@ -286,6 +286,7 @@ void TDynamicNameserver::UpdateState(const NKikimrNodeBroker::TNodesInfo &rec, ctx.Schedule(config->Epoch.End - ctx.Now(), new TEvPrivate::TEvUpdateEpoch(domain, config->Epoch.Id + 1)); } else { + // Note: this update may be optimized to only include new nodes for (auto &node : rec.GetNodes()) { auto nodeId = node.GetNodeId(); if (!config->DynamicNodes.contains(nodeId)) diff --git a/ydb/core/mind/hive/balancer.cpp b/ydb/core/mind/hive/balancer.cpp index 637a15f9753b..620f502eead6 100644 --- a/ydb/core/mind/hive/balancer.cpp +++ b/ydb/core/mind/hive/balancer.cpp @@ -62,17 +62,17 @@ void BalanceNodes } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); // weighted random shuffle std::vector weights; - weights.reserve(tablets.size()); - for (auto it = tablets.begin(); it != tablets.end(); ++it) { + weights.reserve(last - first); + for (auto it = first; it != last; ++it) { weights.emplace_back((*it)->GetWeight(resourceToBalance)); } - auto itT = tablets.begin(); + auto itT = first; auto itW = weights.begin(); - while (itT != tablets.end() && itW != weights.end()) { + while (itT != last && itW != weights.end()) { auto idx = std::discrete_distribution(itW, weights.end())(randGen); if (idx != 0) { std::iter_swap(itT, std::next(itT, idx)); @@ -84,32 +84,32 @@ void BalanceTablets -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { - std::sort(tablets.begin(), tablets.end(), [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { + std::sort(first, last, [resourceToBalance](const TTabletInfo* a, const TTabletInfo* b) -> bool { return a->GetWeight(resourceToBalance) > b->GetWeight(resourceToBalance); }); } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); - std::shuffle(tablets.begin(), tablets.end(), randGen); + std::shuffle(first, last, randGen); } template<> -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance) { +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance) { auto& randGen = *TAppData::RandomProvider.Get(); std::vector> weights; - weights.reserve(tablets.size()); - for (TTabletInfo* tablet : tablets) { - double weight = tablet->GetWeight(resourceToBalance); - weights.emplace_back(weight * randGen(), tablet); + weights.reserve(last - first); + for (auto it = first; it != last; ++it) { + double weight = (*it)->GetWeight(resourceToBalance); + weights.emplace_back(weight * randGen(), *it); } std::sort(weights.begin(), weights.end(), [](const auto& a, const auto& b) -> bool { return a.first > b.first; }); for (size_t n = 0; n < weights.size(); ++n) { - tablets[n] = weights[n].second; + first[n] = weights[n].second; } } @@ -252,18 +252,31 @@ class THiveBalancer : public NActors::TActorBootstrapped, public } BLOG_TRACE("Balancer on node " << node->Id << ": " << tablets.size() << "/" << nodeTablets.size() << " tablets are suitable for balancing"); if (!tablets.empty()) { + // avoid moving system tablets if possible + std::vector::iterator partitionIt; + if (Hive->GetLessSystemTabletsMoves()) { + partitionIt = std::partition(tablets.begin(), tablets.end(), [](TTabletInfo* tablet) { + return !THive::IsSystemTablet(tablet->GetTabletType()); + }); + } else { + partitionIt = tablets.end(); + } switch (Hive->GetTabletBalanceStrategy()) { case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_OLD_WEIGHTED_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_WEIGHTED_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_HEAVIEST: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; case NKikimrConfig::THiveConfig::HIVE_TABLET_BALANCE_STRATEGY_RANDOM: - BalanceTablets(tablets, Settings.ResourceToBalance); + BalanceTablets(tablets.begin(), partitionIt, Settings.ResourceToBalance); + BalanceTablets(partitionIt, tablets.end(), Settings.ResourceToBalance); break; } Tablets.clear(); diff --git a/ydb/core/mind/hive/balancer.h b/ydb/core/mind/hive/balancer.h index 62289c4f2401..e5ee1a02820d 100644 --- a/ydb/core/mind/hive/balancer.h +++ b/ydb/core/mind/hive/balancer.h @@ -10,7 +10,7 @@ template& nodes, EResourceToBalance resourceTobalance); template -void BalanceTablets(std::vector& tablets, EResourceToBalance resourceToBalance); +void BalanceTablets(std::vector::iterator first, std::vector::iterator last, EResourceToBalance resourceToBalance); template void BalanceChannels(std::vector& channels, NKikimrConfig::THiveConfig::EHiveStorageBalanceStrategy metricToBalance); diff --git a/ydb/core/mind/hive/hive_impl.cpp b/ydb/core/mind/hive/hive_impl.cpp index 8807f3626302..9c250e144647 100644 --- a/ydb/core/mind/hive/hive_impl.cpp +++ b/ydb/core/mind/hive/hive_impl.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -96,10 +97,12 @@ void THive::RestartPipeTx(ui64 tabletId) { bool THive::TryToDeleteNode(TNodeInfo* node) { if (node->CanBeDeleted()) { + BLOG_I("TryToDeleteNode(" << node->Id << "): deleting"); DeleteNode(node->Id); return true; } if (!node->DeletionScheduled) { + BLOG_D("TryToDeleteNode(" << node->Id << "): waiting " << GetNodeDeletePeriod()); Schedule(GetNodeDeletePeriod(), new TEvPrivate::TEvDeleteNode(node->Id)); node->DeletionScheduled = true; } @@ -233,6 +236,7 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec if (tablet == nullptr) { continue; } + tablet->InWaitQueue = false; if (tablet->IsAlive()) { BLOG_D("tablet " << record.TabletId << " already alive, skipping"); continue; @@ -253,9 +257,10 @@ void THive::ExecuteProcessBootQueue(NIceDb::TNiceDb& db, TSideEffects& sideEffec sideEffects.Send(actorToNotify, new TEvPrivate::TEvRestartComplete(tablet->GetFullTabletId(), "boot delay")); } tablet->ActorsToNotifyOnRestart.clear(); + tablet->InWaitQueue = true; if (tablet->IsFollower()) { TLeaderTabletInfo& leader = tablet->GetLeader(); - UpdateTabletFollowersNumber(leader, db, sideEffects); + UpdateTabletFollowersNumber(leader, db, sideEffects); // this may delete tablet } BootQueue.AddToWaitQueue(record); // waiting for new node continue; @@ -495,6 +500,7 @@ void THive::Handle(TEvPrivate::TEvBootTablets::TPtr&) { for (auto* node : unimportantNodes) { node->Ping(); } + ProcessNodePingQueue(); TVector tabletsToReleaseFromParent; TSideEffects sideEffects; sideEffects.Reset(SelfId()); @@ -685,11 +691,13 @@ void THive::Cleanup() { void THive::Handle(TEvLocal::TEvStatus::TPtr& ev) { BLOG_D("Handle TEvLocal::TEvStatus for Node " << ev->Sender.NodeId() << ": " << ev->Get()->Record.ShortDebugString()); + RemoveFromPingInProgress(ev->Sender.NodeId()); Execute(CreateStatus(ev->Sender, ev->Get()->Record)); } void THive::Handle(TEvLocal::TEvSyncTablets::TPtr& ev) { BLOG_D("THive::Handle::TEvSyncTablets"); + RemoveFromPingInProgress(ev->Sender.NodeId()); Execute(CreateSyncTablets(ev->Sender, ev->Get()->Record)); } @@ -743,6 +751,7 @@ void THive::Handle(TEvInterconnect::TEvNodeConnected::TPtr &ev) { void THive::Handle(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { TNodeId nodeId = ev->Get()->NodeId; BLOG_W("Handle TEvInterconnect::TEvNodeDisconnected, NodeId " << nodeId); + RemoveFromPingInProgress(nodeId); if (ConnectedNodes.erase(nodeId)) { UpdateCounterNodesConnected(-1); } @@ -915,6 +924,7 @@ void THive::Handle(TEvents::TEvUndelivered::TPtr &ev) { case TEvLocal::EvPing: { TNodeId nodeId = ev->Cookie; TNodeInfo* node = FindNode(nodeId); + NodePingsInProgress.erase(nodeId); if (node != nullptr && ev->Sender == node->Local) { if (node->IsDisconnecting()) { // ping continiousily until we fully disconnected from the node @@ -923,6 +933,7 @@ void THive::Handle(TEvents::TEvUndelivered::TPtr &ev) { KillNode(node->Id, node->Local); } } + ProcessNodePingQueue(); break; } }; @@ -981,8 +992,9 @@ void THive::OnActivateExecutor(const TActorContext&) { BuildLocalConfig(); ClusterConfig = AppData()->HiveConfig; SpreadNeighbours = ClusterConfig.GetSpreadNeighbours(); + NodeBrokerEpoch = TDuration::MicroSeconds(NKikimrNodeBroker::TConfig().GetEpochDuration()); Send(NConsole::MakeConfigsDispatcherID(SelfId().NodeId()), - new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest(NKikimrConsole::TConfigItem::HiveConfigItem)); + new NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionRequest({NKikimrConsole::TConfigItem::HiveConfigItem, NKikimrConsole::TConfigItem::NodeBrokerConfigItem})); Execute(CreateInitScheme()); if (!ResponsivenessPinger) { ResponsivenessPinger = new TTabletResponsivenessPinger(TabletCounters->Simple()[NHive::COUNTER_RESPONSE_TIME_USEC], TDuration::Seconds(1)); @@ -1686,6 +1698,13 @@ void THive::UpdateCounterNodesConnected(i64 nodesConnectedDiff) { } } +void THive::UpdateCounterPingQueueSize() { + if (TabletCounters != nullptr) { + auto& counter = TabletCounters->Simple()[NHive::COUNTER_PINGQUEUE_SIZE]; + counter.Set(NodePingQueue.size()); + } +} + void THive::RecordTabletMove(const TTabletMoveInfo& moveInfo) { TabletMoveHistory.PushBack(moveInfo); TabletCounters->Cumulative()[NHive::COUNTER_TABLETS_MOVED].Increment(1); @@ -1837,6 +1856,9 @@ void THive::FillTabletInfo(NKikimrHive::TEvResponseHiveInfo& response, ui64 tabl if (req.GetReturnMetrics()) { tabletInfo.MutableMetrics()->CopyFrom(info->GetResourceValues()); } + if (info->InWaitQueue) { + tabletInfo.SetInWaitQueue(true); + } if (req.GetReturnChannelHistory()) { for (const auto& channel : info->TabletStorageInfo->Channels) { auto& tabletChannel = *tabletInfo.AddTabletChannels(); @@ -2195,7 +2217,9 @@ void THive::Handle(TEvHive::TEvInitiateTabletExternalBoot::TPtr& ev) { void THive::Handle(NConsole::TEvConsole::TEvConfigNotificationRequest::TPtr& ev) { const NKikimrConsole::TConfigNotificationRequest& record = ev->Get()->Record; ClusterConfig = record.GetConfig().GetHiveConfig(); - BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString()); + NodeBrokerEpoch = TDuration::MicroSeconds(record.GetConfig().GetNodeBrokerConfig().GetEpochDuration()); + BLOG_D("Received TEvConsole::TEvConfigNotificationRequest with update of cluster config: " << ClusterConfig.ShortDebugString() + << "; " << record.GetConfig().GetNodeBrokerConfig().ShortDebugString()); BuildCurrentConfig(); Send(ev->Sender, new NConsole::TEvConsole::TEvConfigNotificationResponse(record), 0, ev->Cookie); } @@ -2327,7 +2351,8 @@ void THive::Handle(TEvPrivate::TEvProcessTabletBalancer::TPtr&) { nodeUsageHistogram.IncrementFor(record.Usage * 100); } - if (stats.MaxUsage >= GetMaxNodeUsageToKick()) { + double minUsageToKick = GetMaxNodeUsageToKick() - GetNodeUsageRangeToKick(); + if (stats.MaxUsage >= GetMaxNodeUsageToKick() && stats.MinUsage < minUsageToKick) { std::vector overloadedNodes; for (const auto& [nodeId, nodeInfo] : Nodes) { if (nodeInfo.IsAlive() && !nodeInfo.Down && nodeInfo.IsOverloaded()) { @@ -2661,6 +2686,25 @@ void THive::ExecuteStartTablet(TFullTabletId tabletId, const TActorId& local, ui Execute(CreateStartTablet(tabletId, local, cookie, external)); } +void THive::QueuePing(const TActorId& local) { + NodePingQueue.push(local); +} + +void THive::ProcessNodePingQueue() { + while (!NodePingQueue.empty() && NodePingsInProgress.size() < GetMaxPingsInFlight()) { + TActorId local = NodePingQueue.front(); + TNodeId node = local.NodeId(); + NodePingQueue.pop(); + NodePingsInProgress.insert(node); + SendPing(local, node); + } +} + +void THive::RemoveFromPingInProgress(TNodeId node) { + NodePingsInProgress.erase(node); + ProcessNodePingQueue(); +} + void THive::SendPing(const TActorId& local, TNodeId id) { Send(local, new TEvLocal::TEvPing(HiveId, @@ -2747,6 +2791,7 @@ void THive::UpdateTabletFollowersNumber(TLeaderTabletInfo& tablet, NIceDb::TNice db.Table().Key(tablet.Id, follower.Id).Delete(); follower.InitiateStop(sideEffects); tablet.Followers.erase(std::prev(itFollower.base())); + UpdateCounterTabletsTotal(-1); --followerCount; } } diff --git a/ydb/core/mind/hive/hive_impl.h b/ydb/core/mind/hive/hive_impl.h index 57a0a3dce3d3..24edd21f968d 100644 --- a/ydb/core/mind/hive/hive_impl.h +++ b/ydb/core/mind/hive/hive_impl.h @@ -415,6 +415,8 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar TEventPriorityQueue EventQueue{*this}; ui64 OperationsLogIndex = 0; std::vector ActorsWaitingToMoveTablets; + std::queue NodePingQueue; + std::unordered_set NodePingsInProgress; struct TPendingCreateTablet { NKikimrHive::TEvCreateTablet CreateTablet; @@ -443,6 +445,7 @@ class THive : public TActor, public TTabletExecutedFlat, public THiveShar NKikimrConfig::THiveConfig ClusterConfig; NKikimrConfig::THiveConfig DatabaseConfig; + TDuration NodeBrokerEpoch; std::unordered_map TabletLimit; // built from CurrentConfig std::unordered_map DefaultDataCentersPreference; std::unordered_map> RegisteredDataCenterNodes; @@ -649,6 +652,7 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId void UpdateCounterBootQueueSize(ui64 bootQueueSize); void UpdateCounterEventQueueSize(i64 eventQueueSizeDiff); void UpdateCounterNodesConnected(i64 nodesConnectedDiff); + void UpdateCounterPingQueueSize(); void RecordTabletMove(const TTabletMoveInfo& info); bool DomainHasNodes(const TSubDomainKey &domainKey) const; void ProcessBootQueue(); @@ -677,7 +681,10 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId void UpdateRegisteredDataCenters(); void AddRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId); void RemoveRegisteredDataCentersNode(TDataCenterId dataCenterId, TNodeId nodeId); + void QueuePing(const TActorId& local); void SendPing(const TActorId& local, TNodeId id); + void RemoveFromPingInProgress(TNodeId node); + void ProcessNodePingQueue(); void SendReconnect(const TActorId& local); static THolder BuildGroupParametersForChannel(const TLeaderTabletInfo& tablet, ui32 channelId); void KickTablet(const TTabletInfo& tablet); @@ -733,7 +740,11 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId } TDuration GetNodeDeletePeriod() const { - return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod()); + if (CurrentConfig.HasNodeDeletePeriod()) { + return TDuration::Seconds(CurrentConfig.GetNodeDeletePeriod()); + } else { + return NodeBrokerEpoch; + } } ui64 GetDrainInflight() const { @@ -934,6 +945,19 @@ TTabletInfo* FindTabletEvenInDeleting(TTabletId tabletId, TFollowerId followerId return CurrentConfig.GetStorageBalancerInflight(); } + double GetNodeUsageRangeToKick() const { + return CurrentConfig.GetNodeUsageRangeToKick(); + } + + ui64 GetMaxPingsInFlight() const { + return CurrentConfig.GetMaxPingsInFlight(); + } + + bool GetLessSystemTabletsMoves() const { + return CurrentConfig.GetLessSystemTabletsMoves(); + + } + static void ActualizeRestartStatistics(google::protobuf::RepeatedField& restartTimestamps, ui64 barrier); static ui64 GetRestartsPerPeriod(const google::protobuf::RepeatedField& restartTimestamps, ui64 barrier); static bool IsSystemTablet(TTabletTypes::EType type); diff --git a/ydb/core/mind/hive/hive_impl_ut.cpp b/ydb/core/mind/hive/hive_impl_ut.cpp index 0f71b8d31503..bf540540a7e0 100644 --- a/ydb/core/mind/hive/hive_impl_ut.cpp +++ b/ydb/core/mind/hive/hive_impl_ut.cpp @@ -109,7 +109,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { auto CheckSpeedAndDistribution = []( std::unordered_map& allTablets, - std::function&, EResourceToBalance)> func, + std::function::iterator, std::vector::iterator, EResourceToBalance)> func, EResourceToBalance resource) -> void { std::vector tablets; @@ -119,7 +119,7 @@ Y_UNIT_TEST_SUITE(THiveImplTest) { TProfileTimer timer; - func(tablets, resource); + func(tablets.begin(), tablets.end(), resource); double passed = timer.Get().SecondsFloat(); diff --git a/ydb/core/mind/hive/hive_ut.cpp b/ydb/core/mind/hive/hive_ut.cpp index 1d89e73632ee..18417ce97eeb 100644 --- a/ydb/core/mind/hive/hive_ut.cpp +++ b/ydb/core/mind/hive/hive_ut.cpp @@ -3961,6 +3961,7 @@ Y_UNIT_TEST_SUITE(THiveTest) { // this value of MaxNodeUsageToKick is selected specifically to make test scenario work // in link with number of tablets and values of network usage metrics used below app.HiveConfig.SetMaxNodeUsageToKick(0.01); + app.HiveConfig.SetNodeUsageRangeToKick(0); app.HiveConfig.SetEmergencyBalancerInflight(1); // to ensure fair distribution }); @@ -4776,6 +4777,84 @@ Y_UNIT_TEST_SUITE(THiveTest) { } } + Y_UNIT_TEST(TestHiveBalancerUselessNeighbourMoves) { + // 7 tablets of same object, 3 nodes, one of nodes cannot run them + // distribution should be (4, 3, 0) + // this should trigger balancer, but not lead to any moves + static constexpr ui64 NUM_NODES = 3; + static constexpr ui64 NUM_TABLETS = 7; + TTestBasicRuntime runtime(NUM_NODES, false); + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetTabletKickCooldownPeriod(0); + app.HiveConfig.SetResourceChangeReactionPeriod(0); + app.HiveConfig.SetMetricsWindowSize(1); + }); + const int nodeBase = runtime.GetNodeId(0); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + + auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array, NUM_NODES> { + std::array, NUM_NODES> nodeTablets = {}; + { + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo()); + TAutoPtr handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES), + "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase); + nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID()); + } + } + return nodeTablets; + }; + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // wait for creation of nodes + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES); + runtime.DispatchEvents(options); + } + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + std::vector tablets; + tablets.reserve(NUM_TABLETS); + for (size_t i = 0; i < NUM_TABLETS; ++i) { + THolder ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetObjectId(1); + ev->Record.AddAllowedNodeIDs(nodeBase); + ev->Record.AddAllowedNodeIDs(nodeBase + 1); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + tablets.push_back(tabletId); + } + + auto initialDistribution = getDistribution(); + + for (auto tablet : tablets) { + THolder metrics = MakeHolder(); + NKikimrHive::TTabletMetrics* metric = metrics->Record.AddTabletMetrics(); + metric->SetTabletID(tablet); + metric->MutableResourceUsage()->SetCPU(0); + metric->MutableResourceUsage()->SetMemory(0); + + runtime.SendToPipe(hiveTablet, senderA, metrics.Release()); + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut); + runtime.DispatchEvents(options, TDuration::Seconds(10)); + } + + // Check that balancer moved no tablets + auto newDistribution = getDistribution(); + + UNIT_ASSERT_EQUAL(initialDistribution, newDistribution); + } + Y_UNIT_TEST(TestHiveBalancerWithImmovableTablets) { static constexpr ui64 TABLETS_PER_NODE = 10; TTestBasicRuntime runtime(3, false); @@ -4855,6 +4934,75 @@ Y_UNIT_TEST_SUITE(THiveTest) { UNIT_ASSERT_VALUES_EQUAL(newDistribution[1].size(), TABLETS_PER_NODE - 1); } + Y_UNIT_TEST(TestHiveBalancerHighUsage) { + static constexpr ui64 NUM_NODES = 2; + TTestBasicRuntime runtime(2, false); + Setup(runtime, true, 1, [](TAppPrepare& app) { + app.HiveConfig.SetTabletKickCooldownPeriod(0); + app.HiveConfig.SetResourceChangeReactionPeriod(0); + }); + const int nodeBase = runtime.GetNodeId(0); + TActorId senderA = runtime.AllocateEdgeActor(); + const ui64 hiveTablet = MakeDefaultHiveID(); + const ui64 testerTablet = MakeTabletID(false, 1); + + auto getDistribution = [hiveTablet, nodeBase, senderA, &runtime]() -> std::array, NUM_NODES> { + std::array, NUM_NODES> nodeTablets = {}; + { + runtime.SendToPipe(hiveTablet, senderA, new TEvHive::TEvRequestHiveInfo()); + TAutoPtr handle; + TEvHive::TEvResponseHiveInfo* response = runtime.GrabEdgeEventRethrow(handle); + for (const NKikimrHive::TTabletInfo& tablet : response->Record.GetTablets()) { + UNIT_ASSERT_C(((int)tablet.GetNodeID() - nodeBase >= 0) && (tablet.GetNodeID() - nodeBase < NUM_NODES), + "nodeId# " << tablet.GetNodeID() << " nodeBase# " << nodeBase); + nodeTablets[tablet.GetNodeID() - nodeBase].push_back(tablet.GetTabletID()); + } + } + return nodeTablets; + }; + + CreateTestBootstrapper(runtime, CreateTestTabletInfo(hiveTablet, TTabletTypes::Hive), &CreateDefaultHive); + + // wait for creation of nodes + { + TDispatchOptions options; + options.FinalEvents.emplace_back(TEvLocal::EvStatus, NUM_NODES); + runtime.DispatchEvents(options); + } + + TTabletTypes::EType tabletType = TTabletTypes::Dummy; + for (size_t i = 0; i < 2; ++i) { + THolder ev(new TEvHive::TEvCreateTablet(testerTablet, 100500 + i, tabletType, BINDED_CHANNELS)); + ev->Record.SetObjectId(i); + ui64 tabletId = SendCreateTestTablet(runtime, hiveTablet, testerTablet, std::move(ev), 0, true); + MakeSureTabletIsUp(runtime, tabletId, 0); + } + + auto initialDistribution = getDistribution(); + + std::array usages = {.89, .91}; + for (ui32 i = 0; i < 2; ++i) { + for (ui32 node = 0; node < NUM_NODES; ++node) { + TActorId sender = runtime.AllocateEdgeActor(node); + THolder metrics = MakeHolder(); + metrics->Record.SetTotalNodeUsage(usages[node]); + + runtime.SendToPipe(hiveTablet, sender, metrics.Release(), node); + } + } + + { + TDispatchOptions options; + options.FinalEvents.emplace_back(NHive::TEvPrivate::EvBalancerOut); + runtime.DispatchEvents(options, TDuration::Seconds(10)); + } + + // Check that balancer moved no tablets + auto newDistribution = getDistribution(); + + UNIT_ASSERT_EQUAL(initialDistribution, newDistribution); + } + Y_UNIT_TEST(TestUpdateTabletsObjectUpdatesMetrics) { TTestBasicRuntime runtime(1, false); Setup(runtime, true); diff --git a/ydb/core/mind/hive/monitoring.cpp b/ydb/core/mind/hive/monitoring.cpp index a1be16436e5d..2a74db07926b 100644 --- a/ydb/core/mind/hive/monitoring.cpp +++ b/ydb/core/mind/hive/monitoring.cpp @@ -794,6 +794,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr UpdateConfig(db, "MinNetworkScatterToBalance", configUpdates); UpdateConfig(db, "MinCounterScatterToBalance", configUpdates); UpdateConfig(db, "MaxNodeUsageToKick", configUpdates, TSchemeIds::State::MaxNodeUsageToKick); + UpdateConfig(db, "NodeUsageRangeToKick", configUpdates); UpdateConfig(db, "ResourceChangeReactionPeriod", configUpdates, TSchemeIds::State::ResourceChangeReactionPeriod); UpdateConfig(db, "TabletKickCooldownPeriod", configUpdates, TSchemeIds::State::TabletKickCooldownPeriod); UpdateConfig(db, "SpreadNeighbours", configUpdates, TSchemeIds::State::SpreadNeighbours); @@ -835,6 +836,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr UpdateConfig(db, "MinStorageScatterToBalance", configUpdates); UpdateConfig(db, "MinGroupUsageToBalance", configUpdates); UpdateConfig(db, "StorageBalancerInflight", configUpdates); + UpdateConfig(db, "LessSystemTabletsMoves", configUpdates); if (params.contains("BalancerIgnoreTabletTypes")) { auto value = params.Get("BalancerIgnoreTabletTypes"); @@ -1140,6 +1142,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr ShowConfig(out, "MinCounterScatterToBalance"); ShowConfig(out, "MinNodeUsageToBalance"); ShowConfig(out, "MaxNodeUsageToKick"); + ShowConfig(out, "NodeUsageRangeToKick"); ShowConfig(out, "ResourceChangeReactionPeriod"); ShowConfig(out, "TabletKickCooldownPeriod"); ShowConfig(out, "NodeSelectStrategy"); @@ -1180,6 +1183,7 @@ class TTxMonEvent_Settings : public TTransactionBase, public TLoggedMonTr ShowConfig(out, "MinStorageScatterToBalance"); ShowConfig(out, "MinGroupUsageToBalance"); ShowConfig(out, "StorageBalancerInflight"); + ShowConfig(out, "LessSystemTabletsMoves"); ShowConfigForBalancerIgnoreTabletTypes(out); out << "
"; diff --git a/ydb/core/mind/hive/node_info.cpp b/ydb/core/mind/hive/node_info.cpp index 21c92abdce94..6acc1853abae 100644 --- a/ydb/core/mind/hive/node_info.cpp +++ b/ydb/core/mind/hive/node_info.cpp @@ -356,7 +356,7 @@ void TNodeInfo::DeregisterInDomains() { void TNodeInfo::Ping() { Y_ABORT_UNLESS((bool)Local); BLOG_D("Node(" << Id << ") Ping(" << Local << ")"); - Hive.SendPing(Local, Id); + Hive.QueuePing(Local); } void TNodeInfo::SendReconnect(const TActorId& local) { diff --git a/ydb/core/mind/hive/node_info.h b/ydb/core/mind/hive/node_info.h index 961116babce5..afc23ad92318 100644 --- a/ydb/core/mind/hive/node_info.h +++ b/ydb/core/mind/hive/node_info.h @@ -132,7 +132,11 @@ struct TNodeInfo { ui32 GetTabletNeighboursCount(const TTabletInfo& tablet) const { auto it = TabletsOfObject.find(tablet.GetObjectId()); if (it != TabletsOfObject.end()) { - return it->second.size(); + auto count = it->second.size(); + if (tablet.IsAliveOnLocal(Local)) { + --count; + } + return count; } else { return 0; } diff --git a/ydb/core/mind/hive/tablet_info.h b/ydb/core/mind/hive/tablet_info.h index 35920dd1748c..433b5e988bd9 100644 --- a/ydb/core/mind/hive/tablet_info.h +++ b/ydb/core/mind/hive/tablet_info.h @@ -162,6 +162,7 @@ struct TTabletInfo { TInstant PostponedStart; EBalancerPolicy BalancerPolicy; TNodeId FailedNodeId = 0; // last time we tried to start the tablet, we failed on this node + bool InWaitQueue = false; TTabletInfo(ETabletRole role, THive& hive); TTabletInfo(const TTabletInfo&) = delete; diff --git a/ydb/core/mind/hive/tx__load_everything.cpp b/ydb/core/mind/hive/tx__load_everything.cpp index 88967b806b4b..710a14d2bc38 100644 --- a/ydb/core/mind/hive/tx__load_everything.cpp +++ b/ydb/core/mind/hive/tx__load_everything.cpp @@ -318,6 +318,11 @@ class TTxLoadEverything : public TTransactionBase { node.Statistics = nodeRowset.GetValueOrDefault(); node.Name = nodeRowset.GetValueOrDefault(); node.BecomeUpOnRestart = nodeRowset.GetValueOrDefault(false); + if (node.BecomeUpOnRestart) { + // If a node must become up on restart, it must have been down + // That was not persisted to avoid issues with downgrades + node.Down = true; + } if (nodeRowset.HaveValue()) { auto location = nodeRowset.GetValue(); if (location.HasDataCenter()) { @@ -333,9 +338,9 @@ class TTxLoadEverything : public TTransactionBase { // it's safe to call here, because there is no any tablets in the node yet node.BecomeDisconnected(); } - if (node.CanBeDeleted()) { + if (Self->TryToDeleteNode(&node)) { + // node is deleted from hashmap db.Table().Key(nodeId).Delete(); - Self->Nodes.erase(nodeId); } else if (node.IsUnknown() && node.LocationAcquired) { Self->AddRegisteredDataCentersNode(node.Location.GetDataCenterId(), node.Id); } diff --git a/ydb/core/mind/hive/tx__register_node.cpp b/ydb/core/mind/hive/tx__register_node.cpp index e03431ef4789..7e8fb49b1f54 100644 --- a/ydb/core/mind/hive/tx__register_node.cpp +++ b/ydb/core/mind/hive/tx__register_node.cpp @@ -87,7 +87,9 @@ class TTxRegisterNode : public TTransactionBase { BLOG_D("THive::TTxRegisterNode(" << Local.NodeId() << ")::Complete"); TNodeInfo* node = Self->FindNode(Local.NodeId()); if (node != nullptr && node->Local) { // we send ping on every RegisterNode because we want to re-sync tablets upon every reconnection + Self->NodePingsInProgress.erase(node->Id); node->Ping(); + Self->ProcessNodePingQueue(); } } }; diff --git a/ydb/core/mind/hive/tx__switch_drain.cpp b/ydb/core/mind/hive/tx__switch_drain.cpp index 1e87a70b1e49..e665b58e2d95 100644 --- a/ydb/core/mind/hive/tx__switch_drain.cpp +++ b/ydb/core/mind/hive/tx__switch_drain.cpp @@ -38,7 +38,10 @@ class TTxSwitchDrainOn : public TTransactionBase { } node->SetDown(true); if (Settings.Persist) { - db.Table().Key(NodeId).Update(true, node->BecomeUpOnRestart); + db.Table().Key(NodeId).Update(node->BecomeUpOnRestart); + if (Settings.DownPolicy == NKikimrHive::DRAIN_POLICY_KEEP_DOWN) { + db.Table().Key(NodeId).Update(true); + } } } Self->StartHiveDrain(NodeId, std::move(Settings)); diff --git a/ydb/core/mind/hive/tx__update_tablet_metrics.cpp b/ydb/core/mind/hive/tx__update_tablet_metrics.cpp index 7e6150ec9a83..ed7d689456cc 100644 --- a/ydb/core/mind/hive/tx__update_tablet_metrics.cpp +++ b/ydb/core/mind/hive/tx__update_tablet_metrics.cpp @@ -54,6 +54,7 @@ class TTxUpdateTabletMetrics : public TTransactionBase { } TNodeInfo* node = Self->FindNode(nodeId); if (node != nullptr) { + node->UpdateResourceMaximum(record.GetResourceMaximum()); node->UpdateResourceTotalUsage(record); node->Statistics.SetLastAliveTimestamp(now.MilliSeconds()); node->ActualizeNodeStatistics(now); diff --git a/ydb/core/mind/local.cpp b/ydb/core/mind/local.cpp index ea60b85275a8..ebd564891b0a 100644 --- a/ydb/core/mind/local.cpp +++ b/ydb/core/mind/local.cpp @@ -109,6 +109,7 @@ class TLocalNodeRegistrar : public TActorBootstrapped { ui64 UserPoolUsage = 0; // (usage uS x threads) / sec ui64 MemUsage = 0; ui64 MemLimit = 0; + ui64 CpuLimit = 0; // PotentialMaxThreadCount of UserPool double NodeUsage = 0; bool SentDrainNode = false; @@ -272,28 +273,28 @@ class TLocalNodeRegistrar : public TActorBootstrapped { HandlePipeDestroyed(ctx); } - void SendStatusOk(const TActorContext &ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar SendStatusOk"); - TAutoPtr eventStatus = new TEvLocal::TEvStatus(TEvLocal::TEvStatus::StatusOk); - auto& record = eventStatus->Record; - record.SetStartTime(StartTime.GetValue()); - record.MutableResourceMaximum()->CopyFrom(ResourceLimit); - if (!record.GetResourceMaximum().HasCPU()) { - TExecutorPoolStats poolStats; - TVector statsCopy; - TVector sharedStatsCopy; - ctx.ExecutorThread.ActorSystem->GetPoolStats(AppData()->UserPoolId, poolStats, statsCopy, sharedStatsCopy); - if (!statsCopy.empty()) { - record.MutableResourceMaximum()->SetCPU(poolStats.CurrentThreadCount * 1000000); + void FillResourceMaximum(NKikimrTabletBase::TMetrics* record) { + record->CopyFrom(ResourceLimit); + if (!record->HasCPU()) { + if (CpuLimit != 0) { + record->SetCPU(CpuLimit); } } - if (!record.GetResourceMaximum().HasMemory()) { + if (!record->HasMemory()) { if (MemLimit != 0) { - record.MutableResourceMaximum()->SetMemory(MemLimit); + record->SetMemory(MemLimit); } else { - record.MutableResourceMaximum()->SetMemory(NSystemInfo::TotalMemorySize()); + record->SetMemory(NSystemInfo::TotalMemorySize()); } } + } + + void SendStatusOk(const TActorContext &ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::LOCAL, "TLocalNodeRegistrar SendStatusOk"); + TAutoPtr eventStatus = new TEvLocal::TEvStatus(TEvLocal::TEvStatus::StatusOk); + auto& record = eventStatus->Record; + record.SetStartTime(StartTime.GetValue()); + FillResourceMaximum(record.MutableResourceMaximum()); NTabletPipe::SendData(ctx, HivePipeClient, eventStatus.Release()); } @@ -587,6 +588,7 @@ class TLocalNodeRegistrar : public TActorBootstrapped { record.MutableTotalResourceUsage()->SetMemory(MemUsage); } record.SetTotalNodeUsage(NodeUsage); + FillResourceMaximum(record.MutableResourceMaximum()); NTabletPipe::SendData(ctx, HivePipeClient, event.Release()); SendTabletMetricsTime = ctx.Now(); } else { @@ -649,7 +651,8 @@ class TLocalNodeRegistrar : public TActorBootstrapped { const NKikimrWhiteboard::TSystemStateInfo& info = record.GetSystemStateInfo(0); if (static_cast(info.PoolStatsSize()) > AppData()->UserPoolId) { const auto& poolStats(info.GetPoolStats(AppData()->UserPoolId)); - UserPoolUsage = poolStats.usage() * poolStats.threads() * 1000000; // uS + CpuLimit = poolStats.limit() * 1'000'000; // microseconds + UserPoolUsage = poolStats.usage() * CpuLimit; // microseconds } // Note: we use allocated memory because MemoryUsed(AnonRSS) has lag diff --git a/ydb/core/mind/node_broker.cpp b/ydb/core/mind/node_broker.cpp index 27f96e66ca40..feb19c94e374 100644 --- a/ydb/core/mind/node_broker.cpp +++ b/ydb/core/mind/node_broker.cpp @@ -7,7 +7,9 @@ #include #include #include +#include #include +#include #include #include @@ -66,6 +68,7 @@ void TNodeBroker::OnActivateExecutor(const TActorContext &ctx) EnableStableNodeNames = appData->FeatureFlags.GetEnableStableNodeNames(); + Executor()->RegisterExternalTabletCounters(TabletCountersPtr); ClearState(); ProcessTx(CreateTxInitScheme(), ctx); @@ -287,15 +290,47 @@ void TNodeBroker::AddDelayedListNodesRequest(ui64 epoch, void TNodeBroker::ProcessListNodesRequest(TEvNodeBroker::TEvListNodes::TPtr &ev) { - ui64 version = ev->Get()->Record.GetCachedVersion(); + auto *msg = ev->Get(); NKikimrNodeBroker::TNodesInfo info; Epoch.Serialize(*info.MutableEpoch()); info.SetDomain(AppData()->DomainsInfo->GetDomain()->DomainUid); TAutoPtr resp = new TEvNodeBroker::TEvNodesInfo(info); - if (version != Epoch.Version) + + bool optimized = false; + + if (msg->Record.HasCachedVersion()) { + if (msg->Record.GetCachedVersion() == Epoch.Version) { + // Client has an up-to-date list already + optimized = true; + } else { + // We may be able to only send added nodes in the same epoch when + // all deltas are cached up to the current epoch inclusive. + ui64 neededFirstVersion = msg->Record.GetCachedVersion() + 1; + if (!EpochDeltasVersions.empty() && + EpochDeltasVersions.front() <= neededFirstVersion && + EpochDeltasVersions.back() == Epoch.Version && + neededFirstVersion <= Epoch.Version) + { + ui64 firstIndex = neededFirstVersion - EpochDeltasVersions.front(); + if (firstIndex > 0) { + // Note: usually there is a small number of nodes added + // between subsequent requests, so this substr should be + // very cheap. + resp->PreSerializedData = EpochDeltasCache.substr(EpochDeltasEndOffsets[firstIndex - 1]); + } else { + resp->PreSerializedData = EpochDeltasCache; + } + optimized = true; + } + } + } + + if (!optimized) { resp->PreSerializedData = EpochCache; + } + TabletCounters->Percentile()[COUNTER_LIST_NODES_BYTES].IncrementFor(resp->GetCachedByteSize()); LOG_TRACE_S(TActorContext::AsActorContext(), NKikimrServices::NODE_BROKER, "Send TEvNodesInfo for epoch " << Epoch.ToString()); @@ -304,12 +339,16 @@ void TNodeBroker::ProcessListNodesRequest(TEvNodeBroker::TEvListNodes::TPtr &ev) void TNodeBroker::ProcessDelayedListNodesRequests() { + THashSet processed; while (!DelayedListNodesRequests.empty()) { auto it = DelayedListNodesRequests.begin(); if (it->first > Epoch.Id) break; - ProcessListNodesRequest(it->second); + // Avoid processing more than one request from the same sender + if (processed.insert(it->second->Sender).second) { + ProcessListNodesRequest(it->second); + } DelayedListNodesRequests.erase(it); } } @@ -427,6 +466,12 @@ void TNodeBroker::PrepareEpochCache() FillNodeInfo(entry.second, *info.AddExpiredNodes()); Y_PROTOBUF_SUPPRESS_NODISCARD info.SerializeToString(&EpochCache); + TabletCounters->Simple()[COUNTER_EPOCH_SIZE_BYTES].Set(EpochCache.Size()); + + EpochDeltasCache.clear(); + EpochDeltasVersions.clear(); + EpochDeltasEndOffsets.clear(); + TabletCounters->Simple()[COUNTER_EPOCH_DELTAS_SIZE_BYTES].Set(EpochDeltasCache.size()); } void TNodeBroker::AddNodeToEpochCache(const TNodeInfo &node) @@ -441,6 +486,18 @@ void TNodeBroker::AddNodeToEpochCache(const TNodeInfo &node) Y_PROTOBUF_SUPPRESS_NODISCARD info.SerializeToString(&delta); EpochCache += delta; + TabletCounters->Simple()[COUNTER_EPOCH_SIZE_BYTES].Set(EpochCache.Size()); + + if (!EpochDeltasVersions.empty() && EpochDeltasVersions.back() + 1 != Epoch.Version) { + EpochDeltasCache.clear(); + EpochDeltasVersions.clear(); + EpochDeltasEndOffsets.clear(); + } + + EpochDeltasCache += delta; + EpochDeltasVersions.push_back(Epoch.Version); + EpochDeltasEndOffsets.push_back(EpochDeltasCache.size()); + TabletCounters->Simple()[COUNTER_EPOCH_DELTAS_SIZE_BYTES].Set(EpochDeltasCache.size()); } void TNodeBroker::SubscribeForConfigUpdates(const TActorContext &ctx) @@ -822,6 +879,7 @@ void TNodeBroker::Handle(TEvConsole::TEvReplaceConfigSubscriptionsResponse::TPtr void TNodeBroker::Handle(TEvNodeBroker::TEvListNodes::TPtr &ev, const TActorContext &) { + TabletCounters->Cumulative()[COUNTER_LIST_NODES_REQUESTS].Increment(1); auto &rec = ev->Get()->Record; ui64 epoch = rec.GetMinEpoch(); @@ -836,6 +894,7 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvListNodes::TPtr &ev, void TNodeBroker::Handle(TEvNodeBroker::TEvResolveNode::TPtr &ev, const TActorContext &ctx) { + TabletCounters->Cumulative()[COUNTER_RESOLVE_NODE_REQUESTS].Increment(1); ui32 nodeId = ev->Get()->Record.GetNodeId(); TAutoPtr resp = new TEvNodeBroker::TEvResolvedNode; @@ -859,10 +918,11 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, { LOG_TRACE_S(ctx, NKikimrServices::NODE_BROKER, "Handle TEvNodeBroker::TEvRegistrationRequest" << ": request# " << ev->Get()->Record.ShortDebugString()); + TabletCounters->Cumulative()[COUNTER_REGISTRATION_REQUESTS].Increment(1); - class TRegisterNodeActor : public TActorBootstrapped { + class TResolveTenantActor : public TActorBootstrapped { TEvNodeBroker::TEvRegistrationRequest::TPtr Ev; - TNodeBroker *Self; + TActorId ReplyTo; NActors::TScopeId ScopeId; TSubDomainKey ServicedSubDomain; @@ -871,9 +931,9 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, return NKikimrServices::TActivity::NODE_BROKER_ACTOR; } - TRegisterNodeActor(TEvNodeBroker::TEvRegistrationRequest::TPtr& ev, TNodeBroker *self) + TResolveTenantActor(TEvNodeBroker::TEvRegistrationRequest::TPtr& ev, TActorId replyTo) : Ev(ev) - , Self(self) + , ReplyTo(replyTo) {} void Bootstrap(const TActorContext& ctx) { @@ -930,7 +990,7 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, << ": scope id# " << ScopeIdToString(ScopeId) << ": serviced subdomain# " << ServicedSubDomain); - Self->ProcessTx(Self->CreateTxRegisterNode(Ev, ScopeId, ServicedSubDomain), ctx); + Send(ReplyTo, new TEvPrivate::TEvResolvedRegistrationRequest(Ev, ScopeId, ServicedSubDomain)); Die(ctx); } @@ -939,12 +999,13 @@ void TNodeBroker::Handle(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, CFunc(TEvents::TSystem::Undelivered, HandleUndelivered) }) }; - ctx.RegisterWithSameMailbox(new TRegisterNodeActor(ev, this)); + ctx.RegisterWithSameMailbox(new TResolveTenantActor(ev, SelfId())); } void TNodeBroker::Handle(TEvNodeBroker::TEvExtendLeaseRequest::TPtr &ev, const TActorContext &ctx) { + TabletCounters->Cumulative()[COUNTER_EXTEND_LEASE_REQUESTS].Increment(1); ui32 nodeId = ev->Get()->Record.GetNodeId(); ProcessTx(nodeId, CreateTxExtendLease(ev), ctx); } @@ -989,6 +1050,29 @@ void TNodeBroker::Handle(TEvPrivate::TEvUpdateEpoch::TPtr &ev, ProcessTx(CreateTxUpdateEpoch(), ctx); } +void TNodeBroker::Handle(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev, + const TActorContext &ctx) +{ + ProcessTx(CreateTxRegisterNode(ev), ctx); +} + +TNodeBroker::TNodeBroker(const TActorId &tablet, TTabletStorageInfo *info) + : TActor(&TThis::StateInit) + , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) + , EpochDuration(TDuration::Hours(1)) + , ConfigSubscriptionId(0) + , StableNodeNamePrefix("slot-") + , TxProcessor(new TTxProcessor(*this, "root", NKikimrServices::NODE_BROKER)) +{ + TabletCountersPtr.Reset(new TProtobufTabletCounters< + ESimpleCounters_descriptor, + ECumulativeCounters_descriptor, + EPercentileCounters_descriptor, + ETxTypes_descriptor + >()); + TabletCounters = TabletCountersPtr.Get(); +} + IActor *CreateNodeBroker(const TActorId &tablet, TTabletStorageInfo *info) { diff --git a/ydb/core/mind/node_broker__extend_lease.cpp b/ydb/core/mind/node_broker__extend_lease.cpp index a9845d7ceff0..7788593e0257 100644 --- a/ydb/core/mind/node_broker__extend_lease.cpp +++ b/ydb/core/mind/node_broker__extend_lease.cpp @@ -2,6 +2,7 @@ #include "node_broker__scheme.h" #include +#include namespace NKikimr { namespace NNodeBroker { @@ -17,6 +18,8 @@ class TNodeBroker::TTxExtendLease : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_EXTEND_LEASE; } + bool Error(TStatus::ECode code, const TString &reason, const TActorContext &ctx) diff --git a/ydb/core/mind/node_broker__init_scheme.cpp b/ydb/core/mind/node_broker__init_scheme.cpp index bc0c011c34ad..8e44825ce559 100644 --- a/ydb/core/mind/node_broker__init_scheme.cpp +++ b/ydb/core/mind/node_broker__init_scheme.cpp @@ -1,6 +1,8 @@ #include "node_broker_impl.h" #include "node_broker__scheme.h" +#include + namespace NKikimr { namespace NNodeBroker { @@ -11,6 +13,8 @@ class TNodeBroker::TTxInitScheme : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_INIT_SCHEME; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::NODE_BROKER, "TTxInitScheme Execute"); diff --git a/ydb/core/mind/node_broker__load_state.cpp b/ydb/core/mind/node_broker__load_state.cpp index 7cca5bea2c8d..8a58c91c6c99 100644 --- a/ydb/core/mind/node_broker__load_state.cpp +++ b/ydb/core/mind/node_broker__load_state.cpp @@ -2,6 +2,7 @@ #include "node_broker__scheme.h" #include +#include namespace NKikimr { namespace NNodeBroker { @@ -13,6 +14,8 @@ class TNodeBroker::TTxLoadState : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_LOAD_STATE; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG(ctx, NKikimrServices::NODE_BROKER, "TTxLoadState Execute"); diff --git a/ydb/core/mind/node_broker__register_node.cpp b/ydb/core/mind/node_broker__register_node.cpp index 1b4ea33e47b0..e10237f4e614 100644 --- a/ydb/core/mind/node_broker__register_node.cpp +++ b/ydb/core/mind/node_broker__register_node.cpp @@ -2,6 +2,7 @@ #include "node_broker__scheme.h" #include +#include namespace NKikimr { namespace NNodeBroker { @@ -10,18 +11,19 @@ using namespace NKikimrNodeBroker; class TNodeBroker::TTxRegisterNode : public TTransactionBase { public: - TTxRegisterNode(TNodeBroker *self, TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, - const NActors::TScopeId& scopeId, const TSubDomainKey& servicedSubDomain) + TTxRegisterNode(TNodeBroker *self, TEvPrivate::TEvResolvedRegistrationRequest::TPtr &resolvedEv) : TBase(self) - , Event(ev) - , ScopeId(scopeId) - , ServicedSubDomain(servicedSubDomain) + , Event(resolvedEv->Get()->Request) + , ScopeId(resolvedEv->Get()->ScopeId) + , ServicedSubDomain(resolvedEv->Get()->ServicedSubDomain) , NodeId(0) , ExtendLease(false) , FixNodeId(false) { } + TTxType GetTxType() const override { return TXTYPE_REGISTER_NODE; } + bool Error(TStatus::ECode code, const TString &reason, const TActorContext &ctx) @@ -186,11 +188,9 @@ class TNodeBroker::TTxRegisterNode : public TTransactionBase { bool FixNodeId; }; -ITransaction *TNodeBroker::CreateTxRegisterNode(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, - const NActors::TScopeId& scopeId, - const TSubDomainKey& servicedSubDomain) +ITransaction *TNodeBroker::CreateTxRegisterNode(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev) { - return new TTxRegisterNode(this, ev, scopeId, servicedSubDomain); + return new TTxRegisterNode(this, ev); } } // NNodeBroker diff --git a/ydb/core/mind/node_broker__update_config.cpp b/ydb/core/mind/node_broker__update_config.cpp index 2211b447ac1e..c5a891f83b03 100644 --- a/ydb/core/mind/node_broker__update_config.cpp +++ b/ydb/core/mind/node_broker__update_config.cpp @@ -1,6 +1,8 @@ #include "node_broker_impl.h" #include "node_broker__scheme.h" +#include + namespace NKikimr { namespace NNodeBroker { @@ -24,6 +26,8 @@ class TNodeBroker::TTxUpdateConfig : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_UPDATE_CONFIG; } + bool ProcessNotification(const TActorContext &ctx) { auto &rec = Notification->Get()->Record; diff --git a/ydb/core/mind/node_broker__update_config_subscription.cpp b/ydb/core/mind/node_broker__update_config_subscription.cpp index 8fa376ec85b8..dec51fc71f14 100644 --- a/ydb/core/mind/node_broker__update_config_subscription.cpp +++ b/ydb/core/mind/node_broker__update_config_subscription.cpp @@ -1,6 +1,8 @@ #include "node_broker_impl.h" #include "node_broker__scheme.h" +#include + namespace NKikimr { namespace NNodeBroker { @@ -14,6 +16,8 @@ class TNodeBroker::TTxUpdateConfigSubscription : public TTransactionBase + namespace NKikimr { namespace NNodeBroker { @@ -11,6 +13,8 @@ class TNodeBroker::TTxUpdateEpoch : public TTransactionBase { { } + TTxType GetTxType() const override { return TXTYPE_UPDATE_EPOCH; } + bool Execute(TTransactionContext &txc, const TActorContext &ctx) override { LOG_DEBUG_S(ctx, NKikimrServices::NODE_BROKER, "TTxUpdateEpoch Execute"); diff --git a/ydb/core/mind/node_broker_impl.h b/ydb/core/mind/node_broker_impl.h index 9efe587947b2..6e522e8b8792 100644 --- a/ydb/core/mind/node_broker_impl.h +++ b/ydb/core/mind/node_broker_impl.h @@ -46,6 +46,7 @@ class TNodeBroker : public TActor struct TEvPrivate { enum EEv { EvUpdateEpoch = EventSpaceBegin(TEvents::ES_PRIVATE), + EvResolvedRegistrationRequest, EvEnd }; @@ -53,6 +54,22 @@ class TNodeBroker : public TActor static_assert(EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE)"); struct TEvUpdateEpoch : public TEventLocal {}; + + struct TEvResolvedRegistrationRequest : public TEventLocal { + + TEvResolvedRegistrationRequest( + TEvNodeBroker::TEvRegistrationRequest::TPtr request, + NActors::TScopeId scopeId, + TSubDomainKey servicedSubDomain) + : Request(request) + , ScopeId(scopeId) + , ServicedSubDomain(servicedSubDomain) + {} + + TEvNodeBroker::TEvRegistrationRequest::TPtr Request; + NActors::TScopeId ScopeId; + TSubDomainKey ServicedSubDomain; + }; }; private: @@ -138,9 +155,7 @@ class TNodeBroker : public TActor ITransaction *CreateTxExtendLease(TEvNodeBroker::TEvExtendLeaseRequest::TPtr &ev); ITransaction *CreateTxInitScheme(); ITransaction *CreateTxLoadState(); - ITransaction *CreateTxRegisterNode(TEvNodeBroker::TEvRegistrationRequest::TPtr &ev, - const NActors::TScopeId& scopeId, - const TSubDomainKey& servicedSubDomain); + ITransaction *CreateTxRegisterNode(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev); ITransaction *CreateTxUpdateConfig(TEvConsole::TEvConfigNotificationRequest::TPtr &ev); ITransaction *CreateTxUpdateConfig(TEvNodeBroker::TEvSetConfigRequest::TPtr &ev); ITransaction *CreateTxUpdateConfigSubscription(TEvConsole::TEvReplaceConfigSubscriptionsResponse::TPtr &ev); @@ -192,6 +207,7 @@ class TNodeBroker : public TActor HFuncTraced(TEvNodeBroker::TEvGetConfigRequest, Handle); HFuncTraced(TEvNodeBroker::TEvSetConfigRequest, Handle); HFuncTraced(TEvPrivate::TEvUpdateEpoch, Handle); + HFuncTraced(TEvPrivate::TEvResolvedRegistrationRequest, Handle); IgnoreFunc(TEvTabletPipe::TEvServerConnected); IgnoreFunc(TEvTabletPipe::TEvServerDisconnected); IgnoreFunc(NConsole::TEvConfigsDispatcher::TEvSetConfigSubscriptionResponse); @@ -293,6 +309,8 @@ class TNodeBroker : public TActor const TActorContext &ctx); void Handle(TEvPrivate::TEvUpdateEpoch::TPtr &ev, const TActorContext &ctx); + void Handle(TEvPrivate::TEvResolvedRegistrationRequest::TPtr &ev, + const TActorContext &ctx); // All registered dynamic nodes. THashMap Nodes; @@ -323,16 +341,15 @@ class TNodeBroker : public TActor TSchedulerCookieHolder EpochTimerCookieHolder; TString EpochCache; + TString EpochDeltasCache; + TVector EpochDeltasVersions; + TVector EpochDeltasEndOffsets; + + TTabletCountersBase* TabletCounters; + TAutoPtr TabletCountersPtr; + public: - TNodeBroker(const TActorId &tablet, TTabletStorageInfo *info) - : TActor(&TThis::StateInit) - , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) - , EpochDuration(TDuration::Hours(1)) - , ConfigSubscriptionId(0) - , StableNodeNamePrefix("slot-") - , TxProcessor(new TTxProcessor(*this, "root", NKikimrServices::NODE_BROKER)) - { - } + TNodeBroker(const TActorId &tablet, TTabletStorageInfo *info); static constexpr NKikimrServices::TActivity::EType ActorActivityType() { diff --git a/ydb/core/mind/node_broker_ut.cpp b/ydb/core/mind/node_broker_ut.cpp index 33156cd3af8a..55404659b077 100644 --- a/ydb/core/mind/node_broker_ut.cpp +++ b/ydb/core/mind/node_broker_ut.cpp @@ -858,6 +858,48 @@ Y_UNIT_TEST_SUITE(TNodeBrokerTest) { UNIT_ASSERT_VALUES_EQUAL(epoch1.GetId(), epoch.GetId() + 5); } + Y_UNIT_TEST(TestListNodesEpochDeltas) + { + TTestBasicRuntime runtime(8, false); + Setup(runtime, 10); + TActorId sender = runtime.AllocateEdgeActor(); + + WaitForEpochUpdate(runtime, sender); + WaitForEpochUpdate(runtime, sender); + + auto epoch0 = GetEpoch(runtime, sender); + CheckRegistration(runtime, sender, "host1", 1001, "host1.yandex.net", "1.2.3.4", + 1, 2, 3, 4, TStatus::OK, NODE1, epoch0.GetNextEnd()); + auto epoch1 = CheckFilteredNodesList(runtime, sender, {NODE1}, {}, 0, epoch0.GetVersion()); + CheckRegistration(runtime, sender, "host2", 1001, "host2.yandex.net", "1.2.3.5", + 1, 2, 3, 5, TStatus::OK, NODE2, epoch1.GetNextEnd()); + auto epoch2 = CheckFilteredNodesList(runtime, sender, {NODE2}, {}, 0, epoch1.GetVersion()); + CheckRegistration(runtime, sender, "host3", 1001, "host3.yandex.net", "1.2.3.6", + 1, 2, 3, 6, TStatus::OK, NODE3, epoch2.GetNextEnd()); + auto epoch3 = CheckFilteredNodesList(runtime, sender, {NODE3}, {}, 0, epoch2.GetVersion()); + + CheckFilteredNodesList(runtime, sender, {NODE1, NODE2, NODE3}, {}, 0, epoch0.GetVersion()); + CheckFilteredNodesList(runtime, sender, {NODE2, NODE3}, {}, 0, epoch1.GetVersion()); + CheckFilteredNodesList(runtime, sender, {}, {}, 0, epoch3.GetVersion()); + + RebootTablet(runtime, MakeNodeBrokerID(), sender); + CheckFilteredNodesList(runtime, sender, {}, {}, 0, epoch3.GetVersion()); + + CheckRegistration(runtime, sender, "host4", 1001, "host4.yandex.net", "1.2.3.7", + 1, 2, 3, 7, TStatus::OK, NODE4, epoch3.GetNextEnd()); + auto epoch4 = CheckFilteredNodesList(runtime, sender, {NODE4}, {}, 0, epoch3.GetVersion()); + + // NodeBroker doesn't have enough history in memory and replies with the full node list + CheckFilteredNodesList(runtime, sender, {NODE1, NODE2, NODE3, NODE4}, {}, 0, epoch2.GetVersion()); + + WaitForEpochUpdate(runtime, sender); + auto epoch5 = GetEpoch(runtime, sender); + CheckFilteredNodesList(runtime, sender, {}, {}, 0, epoch5.GetVersion()); + + // New epoch may remove nodes, so deltas are not returned on epoch change + CheckFilteredNodesList(runtime, sender, {NODE1, NODE2, NODE3, NODE4}, {}, 0, epoch3.GetVersion()); + } + Y_UNIT_TEST(TestRandomActions) { TTestBasicRuntime runtime(8, false); diff --git a/ydb/core/mon/async_http_mon.cpp b/ydb/core/mon/async_http_mon.cpp index 7eabb9f8409b..8df05b03fbc9 100644 --- a/ydb/core/mon/async_http_mon.cpp +++ b/ydb/core/mon/async_http_mon.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -145,6 +146,11 @@ class THttpMonRequest : public NMonitoring::IMonHttpRequest { return {}; } + bool AcceptsJsonResponse() { + TStringBuf acceptHeader = GetHeader("Accept"); + return acceptHeader.find(TStringBuf("application/json")) != TStringBuf::npos; + } + virtual TStringBuf GetCookie(TStringBuf name) const override { NHttp::TCookies cookies(GetHeader("Cookie")); return cookies.Get(name); @@ -213,7 +219,8 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedGet()->Request->URL.Before('?')); TString status(response->Status); - NMonitoring::THistogramPtr ResponseTimeHgram = NKikimr::GetServiceCounters(NKikimr::AppData()->Counters, "utils") + NMonitoring::THistogramPtr ResponseTimeHgram = NKikimr::GetServiceCounters(NKikimr::AppData()->Counters, + ActorMonPage->MonServiceName) ->GetSubgroup("subsystem", "mon") ->GetSubgroup("url", url) ->GetSubgroup("status", status) @@ -239,18 +246,24 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedCreateResponseString(response)); PassAway(); } + bool CredentialsProvided() { + return Container.GetCookie("ydb_session_id") || Container.GetHeader("Authorization"); + } + TString YdbToHttpError(Ydb::StatusIds::StatusCode status) { switch (status) { case Ydb::StatusIds::UNAUTHORIZED: - return "401 Unauthorized"; + // YDB status UNAUTHORIZED is used for both access denied case and if no credentials were provided. + return CredentialsProvided() ? "403 Forbidden" : "401 Unauthorized"; case Ydb::StatusIds::INTERNAL_ERROR: return "500 Internal Server Error"; case Ydb::StatusIds::UNAVAILABLE: @@ -267,26 +280,45 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedGet()->Request; - NHttp::THeaders headers(request->Headers); TStringBuilder response; TStringBuilder body; - const TString httpError = YdbToHttpError(result.Status); - body << "

" << httpError << "

"; - if (result.Issues) { - body << "

" << result.Issues.ToString() << "

"; - } - body << ""; - TString origin = TString(headers["Origin"]); - if (origin.empty()) { - origin = "*"; + TStringBuf contentType; + const TString httpError = YdbToHttpError(status); + + if (Container.AcceptsJsonResponse()) { + contentType = "application/json"; + NJson::TJsonValue json; + TString message; + MakeJsonErrorReply(json, message, issues, NYdb::EStatus(status)); + NJson::WriteJson(&body.Out, &json); + } else { + contentType = "text/html"; + body << "

" << httpError << "

"; + if (issues) { + body << "

" << issues.ToString() << "

"; + } + body << ""; } + response << "HTTP/1.1 " << httpError << "\r\n"; - response << "Access-Control-Allow-Origin: " << origin << "\r\n"; - response << "Access-Control-Allow-Credentials: true\r\n"; - response << "Access-Control-Allow-Headers: Content-Type,Authorization,Origin,Accept\r\n"; - response << "Access-Control-Allow-Methods: OPTIONS, GET, POST, PUT, DELETE\r\n"; - response << "Content-Type: text/html\r\n"; + if (addAccessControlHeaders) { + NHttp::THeaders headers(request->Headers); + TString origin = TString(headers["Origin"]); + if (origin.empty()) { + origin = "*"; + } + response << "Access-Control-Allow-Origin: " << origin << "\r\n"; + response << "Access-Control-Allow-Credentials: true\r\n"; + response << "Access-Control-Allow-Headers: Content-Type,Authorization,Origin,Accept\r\n"; + response << "Access-Control-Allow-Methods: OPTIONS, GET, POST, PUT, DELETE\r\n"; + } + + response << "Content-Type: " << contentType << "\r\n"; response << "Content-Length: " << body.Size() << "\r\n"; response << "\r\n"; response << body; @@ -295,21 +327,9 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedGet()->Request; - TStringBuilder response; - TStringBuilder body; - body << "

403 Forbidden

"; - if (!error.empty()) { - body << "

" << error << "

"; - } - body << ""; - response << "HTTP/1.1 403 Forbidden\r\n"; - response << "Content-Type: text/html\r\n"; - response << "Content-Length: " << body.Size() << "\r\n"; - response << "\r\n"; - response << body; - ReplyWith(request->CreateResponseString(response)); - PassAway(); + NYql::TIssues issues; + issues.AddIssue(error); + ReplyErrorAndPassAway(Ydb::StatusIds::UNAUTHORIZED, issues, false); } void SendRequest(const NKikimr::NGRpcService::TEvRequestAuthAndCheckResult* result = nullptr) { @@ -355,13 +375,15 @@ class THttpMonLegacyActorRequest : public TActorBootstrappedAllowedSIDs) { - if (result.UserToken->IsExist(sid)) { - found = true; - break; + if (result.UserToken) { + for (const TString& sid : ActorMonPage->AllowedSIDs) { + if (result.UserToken->IsExist(sid)) { + found = true; + break; + } } } - if (found || ActorMonPage->AllowedSIDs.empty()) { + if (found || ActorMonPage->AllowedSIDs.empty() || !result.UserToken) { SendRequest(&result); } else { return ReplyForbiddenAndPassAway("SID is not allowed"); @@ -533,10 +555,26 @@ class THttpMonServiceNodeRequest : public TActorBootstrapped parser(response); + + NHttp::THeadersBuilder headers(parser.Headers); + headers.Set("X-Forwarded-From-Node", TStringBuilder() << Event->Sender.NodeId()); + + NHttp::THttpRenderer renderer; + renderer.InitRequest(parser.Method, parser.URL, parser.Protocol, parser.Version); + renderer.Set(headers); + if (parser.HaveBody()) { + renderer.SetBody(parser.Body); // it shouldn't be here, 30x with a body is a bad idea + } + renderer.Finish(); + return renderer.AsString(); + } + void Bootstrap() { NHttp::THttpConfig::SocketAddressType address; FromProto(address, Event->Get()->Record.GetAddress()); - NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(Event->Get()->Record.GetHttpRequest(), Endpoint, address); + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(RewriteWithForwardedFromNode(Event->Get()->Record.GetHttpRequest()), Endpoint, address); TStringBuilder prefix; prefix << "/node/" << TActivationContext::ActorSystem()->NodeId; if (request->URL.SkipPrefix(prefix)) { @@ -551,9 +589,36 @@ class THttpMonServiceNodeRequest : public TActorBootstrapped parser(response); + + NHttp::THeadersBuilder headers(parser.Headers); + headers.Set("Location", TStringBuilder() << "/node/" << TActivationContext::ActorSystem()->NodeId << headers["Location"]); + + NHttp::THttpRenderer renderer; + renderer.InitResponse(parser.Protocol, parser.Version, parser.Status, parser.Message); + renderer.Set(headers); + if (parser.HaveBody()) { + renderer.SetBody(parser.Body); // it shouldn't be here, 30x with a body is a bad idea + } + renderer.Finish(); + return renderer.AsString(); + } + void Handle(NHttp::TEvHttpProxy::TEvHttpOutgoingResponse::TPtr& ev) { + TString httpResponse = ev->Get()->Response->AsString(); + switch (FromStringWithDefault(ev->Get()->Response->Status)) { + case 301: + case 303: + case 307: + case 308: + if (!NHttp::THeaders(ev->Get()->Response->Headers).Get("Location").starts_with("/node/")) { + httpResponse = RewriteLocationWithNode(httpResponse); + } + break; + } auto response = std::make_unique(); - response->Record.SetHttpResponse(ev->Get()->Response->AsString()); + response->Record.SetHttpResponse(httpResponse); Send(Event->Sender, response.release(), 0, Event->Cookie); PassAway(); } @@ -823,7 +888,8 @@ NMonitoring::IMonPage* TAsyncHttpMon::RegisterActorPage(TRegisterActorPageFields fields.ActorSystem, fields.ActorId, fields.AllowedSIDs ? fields.AllowedSIDs : Config.AllowedSIDs, - fields.UseAuth ? Config.Authorizer : TRequestAuthorizer()); + fields.UseAuth ? Config.Authorizer : TRequestAuthorizer(), + fields.MonServiceName); if (fields.Index) { fields.Index->Register(page); if (fields.SortPages) { diff --git a/ydb/core/mon/mon.cpp b/ydb/core/mon/mon.cpp index 4d7b8c797e2c..311132a04fda 100644 --- a/ydb/core/mon/mon.cpp +++ b/ydb/core/mon/mon.cpp @@ -8,6 +8,7 @@ #include #include +#include #include @@ -88,6 +89,48 @@ NActors::IEventHandle* GetAuthorizeTicketResult(const NActors::TActorId& owner) } } +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYdb::TStatus& status) { + MakeJsonErrorReply(jsonResponse, message, status.GetIssues(), status.GetStatus()); +} + +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYql::TIssues& issues, NYdb::EStatus status) { + google::protobuf::RepeatedPtrField protoIssues; + NYql::IssuesToMessage(issues, &protoIssues); + + message.clear(); + + NJson::TJsonValue& jsonIssues = jsonResponse["issues"]; + for (const auto& queryIssue : protoIssues) { + NJson::TJsonValue& issue = jsonIssues.AppendValue({}); + NProtobufJson::Proto2Json(queryIssue, issue); + } + + TString textStatus = TStringBuilder() << status; + jsonResponse["status"] = textStatus; + + // find first deepest error + std::stable_sort(protoIssues.begin(), protoIssues.end(), [](const Ydb::Issue::IssueMessage& a, const Ydb::Issue::IssueMessage& b) -> bool { + return a.severity() < b.severity(); + }); + + const google::protobuf::RepeatedPtrField* protoIssuesPtr = &protoIssues; + while (protoIssuesPtr->size() > 0 && protoIssuesPtr->at(0).issuesSize() > 0) { + protoIssuesPtr = &protoIssuesPtr->at(0).issues(); + } + + if (protoIssuesPtr->size() > 0) { + const Ydb::Issue::IssueMessage& issue = protoIssuesPtr->at(0); + NProtobufJson::Proto2Json(issue, jsonResponse["error"]); + message = issue.message(); + } else { + jsonResponse["error"]["message"] = textStatus; + } + + if (message.empty()) { + message = textStatus; + } +} + IMonPage* TMon::RegisterActorPage(TIndexMonPage* index, const TString& relPath, const TString& title, bool preTag, TActorSystem* actorSystem, const TActorId& actorId, bool useAuth, bool sortPages) { return RegisterActorPage({ diff --git a/ydb/core/mon/mon.h b/ydb/core/mon/mon.h index 69373811a836..bdf30bc354ba 100644 --- a/ydb/core/mon/mon.h +++ b/ydb/core/mon/mon.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -10,12 +11,17 @@ #include #include +#include +#include namespace NActors { IEventHandle* SelectAuthorizationScheme(const NActors::TActorId& owner, NMonitoring::IMonHttpRequest& request); IEventHandle* GetAuthorizeTicketResult(const NActors::TActorId& owner); +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYql::TIssues& issues, NYdb::EStatus status); +void MakeJsonErrorReply(NJson::TJsonValue& jsonResponse, TString& message, const NYdb::TStatus& status); + class TActorSystem; struct TActorId; @@ -54,6 +60,7 @@ class TMon { bool UseAuth = true; TVector AllowedSIDs; bool SortPages = true; + TString MonServiceName = "utils"; }; virtual NMonitoring::IMonPage* RegisterActorPage(TRegisterActorPageFields fields) = 0; diff --git a/ydb/core/mon/mon_impl.h b/ydb/core/mon/mon_impl.h index 135b95c69ef9..99ea18cba1d7 100644 --- a/ydb/core/mon/mon_impl.h +++ b/ydb/core/mon/mon_impl.h @@ -360,7 +360,7 @@ class TActorMonPage: public IMonPage { public: TActorMonPage(const TString &path, const TString &title, const TString &host, bool preTag, TActorSystem *actorSystem, const TActorId &actorId, const TVector &sids, - TMon::TRequestAuthorizer authorizer) + TMon::TRequestAuthorizer authorizer, TString monServiceName = "utils") : IMonPage(path, title) , Host(host) , PreTag(preTag) @@ -368,6 +368,7 @@ class TActorMonPage: public IMonPage { , TargetActorId(actorId) , AllowedSIDs(sids) , Authorizer(std::move(authorizer)) + , MonServiceName(monServiceName) { } @@ -405,6 +406,7 @@ class TActorMonPage: public IMonPage { TActorId TargetActorId; const TVector AllowedSIDs; TMon::TRequestAuthorizer Authorizer; + TString MonServiceName; }; inline TString GetPageFullPath(const NMonitoring::IMonPage* page) { diff --git a/ydb/core/mon/sync_http_mon.cpp b/ydb/core/mon/sync_http_mon.cpp index 822cd7e3389e..4fc334def9d8 100644 --- a/ydb/core/mon/sync_http_mon.cpp +++ b/ydb/core/mon/sync_http_mon.cpp @@ -81,7 +81,8 @@ namespace NActors { fields.ActorSystem, fields.ActorId, fields.AllowedSIDs ? fields.AllowedSIDs : Config.AllowedSIDs, - fields.UseAuth ? Config.Authorizer : TRequestAuthorizer()); + fields.UseAuth ? Config.Authorizer : TRequestAuthorizer(), + fields.MonServiceName); if (fields.Index) { fields.Index->Register(page); if (fields.SortPages) { diff --git a/ydb/core/mon/ya.make b/ydb/core/mon/ya.make index 86f82b217bf8..25a2e6b3c283 100644 --- a/ydb/core/mon/ya.make +++ b/ydb/core/mon/ya.make @@ -14,6 +14,7 @@ SRCS( PEERDIR( library/cpp/json library/cpp/lwtrace/mon + library/cpp/protobuf/json library/cpp/string_utils/url ydb/core/base ydb/core/grpc_services/base @@ -21,6 +22,8 @@ PEERDIR( ydb/library/aclib ydb/library/actors/core ydb/library/actors/http + ydb/library/yql/public/issue + ydb/public/sdk/cpp/client/ydb_types/status ) END() diff --git a/ydb/core/node_whiteboard/node_whiteboard.h b/ydb/core/node_whiteboard/node_whiteboard.h index 2b697e1a81ca..ba0d5e999850 100644 --- a/ydb/core/node_whiteboard/node_whiteboard.h +++ b/ydb/core/node_whiteboard/node_whiteboard.h @@ -361,23 +361,18 @@ struct TEvWhiteboard{ } } - TEvSystemStateUpdate(const TVector>& poolStats) { + TEvSystemStateUpdate(const TVector>& poolStats) { for (const auto& row : poolStats) { auto& pb = *Record.AddPoolStats(); pb.SetName(std::get<0>(row)); pb.SetUsage(std::get<1>(row)); pb.SetThreads(std::get<2>(row)); + pb.SetLimit(std::get<3>(row)); } } TEvSystemStateUpdate(const TNodeLocation& systemLocation) { systemLocation.Serialize(Record.MutableLocation(), false); - const auto& x = systemLocation.GetLegacyValue(); - auto *pb = Record.MutableSystemLocation(); - pb->SetDataCenter(x.DataCenter); - pb->SetRoom(x.Room); - pb->SetRack(x.Rack); - pb->SetBody(x.Body); } TEvSystemStateUpdate(const NKikimrWhiteboard::TSystemStateInfo& systemStateInfo) { @@ -506,5 +501,41 @@ inline TActorId MakeNodeWhiteboardServiceId(ui32 node) { IActor* CreateNodeWhiteboardService(); -} // NTabletState +template +struct WhiteboardResponse {}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvTabletStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvPDiskStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvVDiskStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvSystemStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvBSGroupStateResponse; +}; + +template<> +struct WhiteboardResponse { + using Type = TEvWhiteboard::TEvNodeStateResponse; +}; + +template +::google::protobuf::RepeatedField GetDefaultWhiteboardFields(); + +} // NNodeWhiteboard } // NKikimr diff --git a/ydb/core/persqueue/blob.cpp b/ydb/core/persqueue/blob.cpp index 80027c17577b..5564d7cd03e2 100644 --- a/ydb/core/persqueue/blob.cpp +++ b/ydb/core/persqueue/blob.cpp @@ -422,7 +422,7 @@ void TBatch::Unpack() { PackedData.Clear(); } -void TBatch::UnpackTo(TVector *blobs) +void TBatch::UnpackTo(TVector *blobs) const { Y_ABORT_UNLESS(PackedData.size()); auto type = Header.GetFormat(); @@ -446,7 +446,7 @@ NScheme::TDataRef GetChunk(const char*& data, const char *end) return NScheme::TDataRef(data - size, size); } -void TBatch::UnpackToType1(TVector *blobs) { +void TBatch::UnpackToType1(TVector *blobs) const { Y_ABORT_UNLESS(Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); Y_ABORT_UNLESS(PackedData.size()); ui32 totalBlobs = Header.GetCount() + Header.GetInternalPartsCount(); @@ -606,7 +606,7 @@ void TBatch::UnpackToType1(TVector *blobs) { } } -void TBatch::UnpackToType0(TVector *blobs) { +void TBatch::UnpackToType0(TVector *blobs) const { Y_ABORT_UNLESS(Header.GetFormat() == NKikimrPQ::TBatchHeader::EUncompressed); Y_ABORT_UNLESS(PackedData.size()); ui32 shift = 0; @@ -640,7 +640,7 @@ ui32 TBatch::FindPos(const ui64 offset, const ui16 partNo) const { void THead::Clear() { Offset = PartNo = PackedSize = 0; - Batches.clear(); + ClearBatches(); } ui64 THead::GetNextOffset() const @@ -650,11 +650,7 @@ ui64 THead::GetNextOffset() const ui16 THead::GetInternalPartsCount() const { - ui16 res = 0; - for (auto& b : Batches) { - res += b.GetInternalPartsCount(); - } - return res; + return InternalPartsCount; } ui32 THead::GetCount() const @@ -675,15 +671,73 @@ IOutputStream& operator <<(IOutputStream& out, const THead& value) } ui32 THead::FindPos(const ui64 offset, const ui16 partNo) const { - ui32 i = 0; - for (; i < Batches.size(); ++i) { - //this batch contains blobs with position bigger than requested - if (Batches[i].GetOffset() > offset || Batches[i].GetOffset() == offset && Batches[i].GetPartNo() > partNo) - break; - } - if (i == 0) + if (Batches.empty()) { return Max(); - return i - 1; + } + + ui32 i = Batches.size() - 1; + while (i > 0 && Batches[i].IsGreaterThan(offset, partNo)) { + --i; + } + + if (i == 0) { + if (Batches[i].IsGreaterThan(offset, partNo)) { + return Max(); + } else { + return 0; + } + } + + return i; +} + +void THead::AddBatch(const TBatch& batch) { + auto& b = Batches.emplace_back(batch); + InternalPartsCount += b.GetInternalPartsCount(); +} + +void THead::ClearBatches() { + Batches.clear(); + InternalPartsCount = 0; +} + +const std::deque& THead::GetBatches() const { + return Batches; +} + +const TBatch& THead::GetBatch(ui32 idx) const { + return Batches.at(idx); +} + +const TBatch& THead::GetLastBatch() const { + Y_ABORT_UNLESS(!Batches.empty()); + return Batches.back(); +} + +TBatch THead::ExtractFirstBatch() { + Y_ABORT_UNLESS(!Batches.empty()); + auto batch = std::move(Batches.front()); + InternalPartsCount -= batch.GetInternalPartsCount(); + Batches.pop_front(); + return batch; +} + +THead::TBatchAccessor THead::MutableBatch(ui32 idx) { + Y_ABORT_UNLESS(idx < Batches.size()); + return TBatchAccessor(Batches[idx]); +} + +THead::TBatchAccessor THead::MutableLastBatch() { + Y_ABORT_UNLESS(!Batches.empty()); + return TBatchAccessor(Batches.back()); +} + +void THead::AddBlob(const TClientBlob& blob) { + Y_ABORT_UNLESS(!Batches.empty()); + auto& batch = Batches.back(); + InternalPartsCount -= batch.GetInternalPartsCount(); + batch.AddBlob(blob); + InternalPartsCount += batch.GetInternalPartsCount(); } TPartitionedBlob::TRenameFormedBlobInfo::TRenameFormedBlobInfo(const TKey& oldKey, const TKey& newKey, ui32 size) : @@ -832,7 +886,7 @@ auto TPartitionedBlob::CreateFormedBlob(ui32 size, bool useRename) -> std::optio GlueHead = GlueNewHead = false; if (!Blobs.empty()) { - TBatch batch{Offset, Blobs.front().GetPartNo(), std::move(Blobs)}; + auto batch = TBatch::FromBlobs(Offset, std::move(Blobs)); Blobs.clear(); batch.Pack(); Y_ABORT_UNLESS(batch.Packed); diff --git a/ydb/core/persqueue/blob.h b/ydb/core/persqueue/blob.h index 24aa479a2eaa..6ad52f28e42c 100644 --- a/ydb/core/persqueue/blob.h +++ b/ydb/core/persqueue/blob.h @@ -121,38 +121,30 @@ struct TBatch { TVector InternalPartsPos; NKikimrPQ::TBatchHeader Header; TBuffer PackedData; + TBatch() : Packed(false) { PackedData.Reserve(8_MB); } - TBatch(const ui64 offset, const ui16 partNo, const TVector& blobs) - : Packed(false) + TBatch(const ui64 offset, const ui16 partNo) + : TBatch() { - PackedData.Reserve(8_MB); Header.SetOffset(offset); Header.SetPartNo(partNo); Header.SetUnpackedSize(0); Header.SetCount(0); Header.SetInternalPartsCount(0); - for (auto& b : blobs) { - AddBlob(b); - } } - TBatch(const ui64 offset, const ui16 partNo, const std::deque& blobs) - : Packed(false) - { - PackedData.Reserve(8_MB); - Header.SetOffset(offset); - Header.SetPartNo(partNo); - Header.SetUnpackedSize(0); - Header.SetCount(0); - Header.SetInternalPartsCount(0); + static TBatch FromBlobs(const ui64 offset, std::deque&& blobs) { + Y_ABORT_UNLESS(!blobs.empty()); + TBatch batch(offset, blobs.front().GetPartNo()); for (auto& b : blobs) { - AddBlob(b); + batch.AddBlob(b); } + return batch; } void AddBlob(const TClientBlob &b) { @@ -187,6 +179,9 @@ struct TBatch { ui16 GetInternalPartsCount() const { return Header.GetInternalPartsCount(); } + bool IsGreaterThan(ui64 offset, ui16 partNo) const { + return GetOffset() > offset || GetOffset() == offset && GetPartNo() > partNo; + } TBatch(const NKikimrPQ::TBatchHeader &header, const char* data) : Packed(true) @@ -198,9 +193,9 @@ struct TBatch { ui32 GetPackedSize() const { Y_ABORT_UNLESS(Packed); return sizeof(ui16) + PackedData.size() + Header.ByteSize(); } void Pack(); void Unpack(); - void UnpackTo(TVector *result); - void UnpackToType0(TVector *result); - void UnpackToType1(TVector *result); + void UnpackTo(TVector *result) const; + void UnpackToType0(TVector *result) const; + void UnpackToType1(TVector *result) const; void SerializeTo(TString& res) const; @@ -232,14 +227,39 @@ class TBlobIterator { ui16 InternalPartsCount; }; +class TPartitionedBlob; + //THead represents bathes, stored in head(at most 8 Mb) struct THead { - std::deque Batches; //all batches except last must be packed // BlobsSize <= 512Kb // size of Blobs after packing must be <= BlobsSize //otherwise head will be compacted not in total, some blobs will still remain in head //PackedSize + BlobsSize must be <= 8Mb +private: + std::deque Batches; + ui16 InternalPartsCount = 0; + + friend class TPartitionedBlob; + + class TBatchAccessor { + TBatch& Batch; + + public: + explicit TBatchAccessor(TBatch& batch) + : Batch(batch) + {} + + void Pack() { + Batch.Pack(); + } + + void Unpack() { + Batch.Unpack(); + } + }; + +public: ui64 Offset; ui16 PartNo; ui32 PackedSize; @@ -261,6 +281,18 @@ struct THead { //return Max if not such pos in head //returns batch with such position ui32 FindPos(const ui64 offset, const ui16 partNo) const; + + void AddBatch(const TBatch& batch); + void ClearBatches(); + const std::deque& GetBatches() const; + const TBatch& GetBatch(ui32 idx) const; + const TBatch& GetLastBatch() const; + TBatchAccessor MutableBatch(ui32 idx); + TBatchAccessor MutableLastBatch(); + TBatch ExtractFirstBatch(); + void AddBlob(const TClientBlob& blob); + + friend IOutputStream& operator <<(IOutputStream& out, const THead& value); }; IOutputStream& operator <<(IOutputStream& out, const THead& value); diff --git a/ydb/core/persqueue/cluster_tracker.cpp b/ydb/core/persqueue/cluster_tracker.cpp index 4b1d757b1fca..2b0d15f693e5 100644 --- a/ydb/core/persqueue/cluster_tracker.cpp +++ b/ydb/core/persqueue/cluster_tracker.cpp @@ -13,6 +13,8 @@ #include #include +#include + namespace NKikimr::NPQ::NClusterTracker { inline auto& Ctx() { @@ -132,6 +134,7 @@ class TClusterTracker: public TActorBootstrapped { req->Record.MutableRequest()->SetKeepSession(false); req->Record.MutableRequest()->SetQuery(MakeListClustersQuery()); req->Record.MutableRequest()->SetDatabase(GetDatabase()); + req->Record.MutableRequest()->SetUsePublicResponseDataFormat(true); // useless without explicit session // req->Record.MutableRequest()->MutableQueryCachePolicy()->set_keep_in_cache(true); req->Record.MutableRequest()->MutableTxControl()->mutable_begin_tx()->mutable_serializable_read_write(); @@ -144,45 +147,52 @@ class TClusterTracker: public TActorBootstrapped { LOG_DEBUG_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "HandleWhileWorking TEvQueryResponse"); const auto& record = ev->Get()->Record.GetRef(); - if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS && record.GetResponse().GetResults(0).GetValue().GetStruct(0).ListSize()) { - LOG_DEBUG_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "HandleWhileWorking TEvQueryResponse UpdateClustersList"); - UpdateClustersList(record); + if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { + NYdb::TResultSetParser parser(record.GetResponse().GetYdbResults(0)); + if (parser.RowsCount()) { + LOG_DEBUG_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "HandleWhileWorking TEvQueryResponse UpdateClustersList"); + UpdateClustersList(parser); - Y_ABORT_UNLESS(ClustersList); - Y_ABORT_UNLESS(ClustersList->Clusters.size()); - Y_ABORT_UNLESS(ClustersListUpdateTimestamp && *ClustersListUpdateTimestamp); + Y_ABORT_UNLESS(ClustersList); + Y_ABORT_UNLESS(ClustersList->Clusters.size()); + Y_ABORT_UNLESS(ClustersListUpdateTimestamp && *ClustersListUpdateTimestamp); - BroadcastClustersUpdate(); + BroadcastClustersUpdate(); - Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutSec()), new TEvents::TEvWakeup); - } else { - LOG_ERROR_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "failed to list clusters: " << record); + Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutSec()), new TEvents::TEvWakeup); + return; + } + } - ClustersList = nullptr; + LOG_ERROR_S(Ctx(), NKikimrServices::PERSQUEUE_CLUSTER_TRACKER, "failed to list clusters: " << record); - Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutOnErrorSec()), new TEvents::TEvWakeup); - } + ClustersList = nullptr; + Schedule(TDuration::Seconds(Cfg().GetClustersUpdateTimeoutOnErrorSec()), new TEvents::TEvWakeup); } template - void UpdateClustersList(const TProtoRecord& record) { + void UpdateClustersList(TProtoRecord& parser) { auto clustersList = MakeIntrusive(); - auto& t = record.GetResponse().GetResults(0).GetValue().GetStruct(0); - clustersList->Clusters.resize(t.ListSize()); + clustersList->Clusters.resize(parser.RowsCount()); - for (size_t i = 0; i < t.ListSize(); ++i) { + bool firstRow = parser.TryNextRow(); + YQL_ENSURE(firstRow); + clustersList->Version = *parser.ColumnParser(5).GetOptionalInt64(); + size_t i = 0; + + do { auto& cluster = clustersList->Clusters[i]; - cluster.Name = t.GetList(i).GetStruct(0).GetOptional().GetText(); + cluster.Name = *parser.ColumnParser(0).GetOptionalUtf8(); cluster.Datacenter = cluster.Name; - cluster.Balancer = t.GetList(i).GetStruct(1).GetOptional().GetText(); + cluster.Balancer = *parser.ColumnParser(1).GetOptionalUtf8(); - cluster.IsLocal = t.GetList(i).GetStruct(2).GetOptional().GetBool(); - cluster.IsEnabled = t.GetList(i).GetStruct(3).GetOptional().GetBool(); - cluster.Weight = t.GetList(i).GetStruct(4).GetOptional().GetUint64(); - } + cluster.IsLocal = *parser.ColumnParser(2).GetOptionalBool(); + cluster.IsEnabled = *parser.ColumnParser(3).GetOptionalBool(); + cluster.Weight = *parser.ColumnParser(4).GetOptionalUint64(); - clustersList->Version = t.GetList(0).GetStruct(5).GetOptional().GetInt64(); + ++i; + } while (parser.TryNextRow()); ClustersList = std::move(clustersList); ClustersListUpdateTimestamp = Ctx().Now(); diff --git a/ydb/core/persqueue/dread_cache_service/caching_service.cpp b/ydb/core/persqueue/dread_cache_service/caching_service.cpp index 6f8951df4aa0..7181e7161385 100644 --- a/ydb/core/persqueue/dread_cache_service/caching_service.cpp +++ b/ydb/core/persqueue/dread_cache_service/caching_service.cpp @@ -475,6 +475,10 @@ class TPQDirectReadCacheService : public TActorBootstrapped { TEvUpdateConfig() {} }; + struct TEvUpdateConfigBuilder: public TEvUpdateConfig { + using TBase::Record; + }; + struct TEvUpdateBalancerConfig: public TEventPB { TEvUpdateBalancerConfig() {} @@ -245,7 +249,11 @@ struct TEvPersQueue { {} }; - struct TEvProposeTransaction : public TEventPB { + struct TEvProposeTransaction : public TEventPreSerializedPB { + }; + + struct TEvProposeTransactionBuilder: public TEvProposeTransaction { + using TBase::Record; }; struct TEvProposeTransactionResult : public TEventPB { diff --git a/ydb/core/persqueue/events/internal.h b/ydb/core/persqueue/events/internal.h index b588ba9c32b8..99c05a742c31 100644 --- a/ydb/core/persqueue/events/internal.h +++ b/ydb/core/persqueue/events/internal.h @@ -576,13 +576,15 @@ struct TEvPQ { }; struct TEvChangePartitionConfig : public TEventLocal { - TEvChangePartitionConfig(const NPersQueue::TTopicConverterPtr& topicConverter, const NKikimrPQ::TPQTabletConfig& config) + TEvChangePartitionConfig(const NPersQueue::TTopicConverterPtr& topicConverter, const NKikimrPQ::TPQTabletConfig& config, const NKikimrPQ::TBootstrapConfig& bootstrapConfig) : TopicConverter(topicConverter) , Config(config) + , BootstrapConfig(bootstrapConfig) {} NPersQueue::TTopicConverterPtr TopicConverter; NKikimrPQ::TPQTabletConfig Config; + NKikimrPQ::TBootstrapConfig BootstrapConfig; }; struct TEvPartitionConfigChanged : public TEventLocal { @@ -820,7 +822,7 @@ struct TEvPQ { }; struct TEvTxCalcPredicateResult : public TEventLocal { - TEvTxCalcPredicateResult(ui64 step, ui64 txId, const NPQ::TPartitionId& partition, bool predicate) : + TEvTxCalcPredicateResult(ui64 step, ui64 txId, const NPQ::TPartitionId& partition, TMaybe predicate) : Step(step), TxId(txId), Partition(partition), @@ -831,7 +833,7 @@ struct TEvPQ { ui64 Step; ui64 TxId; NPQ::TPartitionId Partition; - bool Predicate = false; + TMaybe Predicate; }; struct TEvProposePartitionConfig : public TEventLocal { @@ -845,6 +847,7 @@ struct TEvPQ { ui64 TxId; NPersQueue::TTopicConverterPtr TopicConverter; NKikimrPQ::TPQTabletConfig Config; + NKikimrPQ::TBootstrapConfig BootstrapConfig; }; struct TEvProposePartitionConfigResult : public TEventLocal { diff --git a/ydb/core/persqueue/fetch_request_actor.cpp b/ydb/core/persqueue/fetch_request_actor.cpp index a5d259a6a2aa..42533fc525ef 100644 --- a/ydb/core/persqueue/fetch_request_actor.cpp +++ b/ydb/core/persqueue/fetch_request_actor.cpp @@ -93,6 +93,9 @@ struct TEvPrivate { TActorId RequesterId; ui64 PendingQuotaAmount; + std::unordered_map PrivateTopicPathToCdcPath; + std::unordered_map CdcPathToPrivateTopicPath; + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::PQ_FETCH_REQUEST; @@ -176,16 +179,24 @@ struct TEvPrivate { schemeCacheRequest->DatabaseName = Settings.Database; THashSet topicsRequested; - for (const auto& part : Settings.Partitions) { - auto ins = topicsRequested.insert(part.Topic).second; - if (!ins) - continue; - auto split = NKikimr::SplitPath(part.Topic); + + if (PrivateTopicPathToCdcPath.empty()) { + for (const auto& part : Settings.Partitions) { + topicsRequested.insert(part.Topic); + } + } else { + for (const auto& [key, value] : PrivateTopicPathToCdcPath) { + topicsRequested.insert(key); + } + } + + for (const auto& topicName : topicsRequested) { + auto split = NKikimr::SplitPath(topicName); TSchemeCacheNavigate::TEntry entry; entry.Path.insert(entry.Path.end(), split.begin(), split.end()); entry.SyncVersion = true; - entry.ShowPrivatePath = false; + entry.ShowPrivatePath = true; entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; schemeCacheRequest->ResultSet.emplace_back(std::move(entry)); @@ -197,6 +208,7 @@ struct TEvPrivate { void HandleSchemeCacheResponse(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PQ_FETCH_REQUEST, "Handle SchemeCache response"); auto& result = ev->Get()->Request; + bool anyCdcTopicInRequest = false; for (const auto& entry : result->ResultSet) { auto path = CanonizePath(NKikimr::JoinPath(entry.Path)); switch (entry.Status) { @@ -219,6 +231,16 @@ struct TEvPrivate { ), ctx ); } + if (entry.Kind == NSchemeCache::TSchemeCacheNavigate::KindCdcStream) { + anyCdcTopicInRequest = true; + Y_ABORT_UNLESS(entry.ListNodeEntry->Children.size() == 1); + auto privateTopicPath = CanonizePath(JoinPath(ChildPath(NKikimr::SplitPath(path), entry.ListNodeEntry->Children.at(0).Name))); + PrivateTopicPathToCdcPath[privateTopicPath] = path; + CdcPathToPrivateTopicPath[path] = privateTopicPath; + TopicInfo[privateTopicPath] = TopicInfo[path]; + TopicInfo.erase(path); + continue; + } if (entry.Kind != TSchemeCacheNavigate::EKind::KindTopic) { return SendReplyAndDie( CreateErrorReply( @@ -256,6 +278,12 @@ struct TEvPrivate { topicInfo.BalancerTabletId = description.GetBalancerTabletID(); topicInfo.PQInfo = entry.PQGroupInfo; } + + if (anyCdcTopicInRequest) { + SendSchemeCacheRequest(ctx); + return; + } + for (auto& p: TopicInfo) { ProcessMetadata(p.first, p.second, ctx); } @@ -393,8 +421,15 @@ struct TEvPrivate { return SendReplyAndDie(std::move(Response), ctx); } Y_ABORT_UNLESS(FetchRequestReadsDone < Settings.Partitions.size()); - const auto& req = Settings.Partitions[FetchRequestReadsDone]; - const auto& topic = req.Topic; + auto& req = Settings.Partitions[FetchRequestReadsDone]; + + auto& topic = req.Topic; + + auto cdcToPrivateIt = CdcPathToPrivateTopicPath.find(req.Topic); + if (cdcToPrivateIt != CdcPathToPrivateTopicPath.end()) { + topic = cdcToPrivateIt->second; + } + const auto& offset = req.Offset; const auto& part = req.Partition; const auto& maxBytes = req.MaxBytes; @@ -462,7 +497,13 @@ struct TEvPrivate { const auto& topic = req.Topic; const auto& part = req.Partition; - res->SetTopic(topic); + auto privateTopicToCdcIt = PrivateTopicPathToCdcPath.find(topic); + if (privateTopicToCdcIt == PrivateTopicPathToCdcPath.end()) { + res->SetTopic(topic); + } else { + res->SetTopic(PrivateTopicPathToCdcPath[topic]); + } + res->SetPartition(part); auto read = res->MutableReadResult(); if (record.HasPartitionResponse() && record.GetPartitionResponse().HasCmdReadResult()) diff --git a/ydb/core/persqueue/partition.cpp b/ydb/core/persqueue/partition.cpp index 8a16e3f7ec04..f0694691f0a5 100644 --- a/ydb/core/persqueue/partition.cpp +++ b/ydb/core/persqueue/partition.cpp @@ -55,8 +55,8 @@ auto GetStepAndTxId(const E& event) return GetStepAndTxId(event.Step, event.TxId); } -bool TPartition::LastOffsetHasBeenCommited(const TUserInfo& userInfo) const { - return !IsActive() && static_cast(std::max(userInfo.Offset, 0)) == EndOffset; +bool TPartition::LastOffsetHasBeenCommited(const TUserInfoBase& userInfo) const { + return !IsActive() && (static_cast(std::max(userInfo.Offset, 0)) == EndOffset || StartOffset == EndOffset); } struct TMirrorerInfo { @@ -82,7 +82,7 @@ TString TPartition::LogPrefix() const { } else { state = "Unknown"; } - return TStringBuilder() << "[Partition:" << Partition << ", State:" << state << "] "; + return TStringBuilder() << "[PQ: " << TabletID << ", Partition: " << Partition << ", State: " << state << "] "; } bool TPartition::IsActive() const { @@ -245,7 +245,7 @@ void TPartition::EmplaceResponse(TMessage&& message, const TActorContext& ctx) { ); } -ui64 TPartition::MeteringDataSize() const { +ui64 TPartition::UserDataSize() const { if (DataKeysBody.size() <= 1) { // tiny optimization - we do not meter very small queues up to 16MB return 0; @@ -260,25 +260,39 @@ ui64 TPartition::MeteringDataSize() const { return size >= lastBlobSize ? size - lastBlobSize : 0; } +ui64 TPartition::MeteringDataSize(TInstant now) const { + if (IsActive() || NKikimrPQ::TPQTabletConfig::METERING_MODE_REQUEST_UNITS == Config.GetMeteringMode()) { + return UserDataSize(); + } else { + // We only add the amount of data that is blocked by an important consumer. + ui64 size = 0; + auto expirationTimestamp = now - TDuration::Seconds(Config.GetPartitionConfig().GetLifetimeSeconds()) - WAKE_TIMEOUT; + for (size_t i = 1; i < DataKeysBody.size() && DataKeysBody[i].Timestamp < expirationTimestamp; ++i) { + size += DataKeysBody[i].Size; + } + return size; + } +} + ui64 TPartition::ReserveSize() const { - return TopicPartitionReserveSize(Config); + return IsActive() ? TopicPartitionReserveSize(Config) : 0; } ui64 TPartition::StorageSize(const TActorContext&) const { - return std::max(MeteringDataSize(), ReserveSize()); + return std::max(UserDataSize(), ReserveSize()); } ui64 TPartition::UsedReserveSize(const TActorContext&) const { - return std::min(MeteringDataSize(), ReserveSize()); + return std::min(UserDataSize(), ReserveSize()); } ui64 TPartition::GetUsedStorage(const TInstant& now) { const auto duration = now - LastUsedStorageMeterTimestamp; LastUsedStorageMeterTimestamp = now; - auto dataSize = MeteringDataSize(); + auto dataSize = MeteringDataSize(now); auto reservedSize = ReserveSize(); - ui64 size = dataSize > reservedSize ? dataSize - reservedSize : 0; + auto size = dataSize > reservedSize ? dataSize - reservedSize : 0; return size * duration.MilliSeconds() / 1000 / 1_MB; // mb*seconds } @@ -493,8 +507,8 @@ void TPartition::DestroyActor(const TActorContext& ctx) UsersInfoStorage->Clear(ctx); } + Send(ReadQuotaTrackerActor, new TEvents::TEvPoisonPill()); if (!IsSupportive()) { - Send(ReadQuotaTrackerActor, new TEvents::TEvPoisonPill()); Send(WriteQuotaTrackerActor, new TEvents::TEvPoisonPill()); } @@ -749,7 +763,7 @@ void TPartition::Handle(TEvPQ::TEvPartitionStatus::TPtr& ev, const TActorContext result.SetReadBytesQuota(maxQuota); - result.SetPartitionSize(MeteringDataSize()); + result.SetPartitionSize(UserDataSize()); result.SetUsedReserveSize(UsedReserveSize(ctx)); result.SetLastWriteTimestampMs(WriteTimestamp.MilliSeconds()); @@ -885,7 +899,7 @@ void TPartition::Handle(TEvPQ::TEvUpdateWriteTimestamp::TPtr& ev, const TActorCo void TPartition::Handle(TEvPersQueue::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx) { - const NKikimrPQ::TEvProposeTransaction& event = ev->Get()->Record; + const NKikimrPQ::TEvProposeTransaction& event = ev->Get()->GetRecord(); Y_ABORT_UNLESS(event.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kData); Y_ABORT_UNLESS(event.HasData()); const NKikimrPQ::TDataTransaction& txBody = event.GetData(); @@ -913,6 +927,10 @@ void TPartition::Handle(TEvPersQueue::TEvProposeTransaction::TPtr& ev, const TAc void TPartition::Handle(TEvPQ::TEvProposePartitionConfig::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvProposePartitionConfig" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + PushBackDistrTx(ev->Release()); ProcessTxsAndUserActs(ctx); @@ -925,6 +943,10 @@ void TPartition::HandleOnInit(TEvPQ::TEvTxCalcPredicate::TPtr& ev, const TActorC void TPartition::HandleOnInit(TEvPQ::TEvTxCommit::TPtr& ev, const TActorContext&) { + PQ_LOG_D("HandleOnInit TEvPQ::TEvTxCommit" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + PendingEvents.emplace_back(ev->ReleaseBase().Release()); } @@ -935,11 +957,30 @@ void TPartition::HandleOnInit(TEvPQ::TEvTxRollback::TPtr& ev, const TActorContex void TPartition::HandleOnInit(TEvPQ::TEvProposePartitionConfig::TPtr& ev, const TActorContext&) { + PQ_LOG_D("HandleOnInit TEvPQ::TEvProposePartitionConfig" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + PendingEvents.emplace_back(ev->ReleaseBase().Release()); } void TPartition::Handle(TEvPQ::TEvTxCalcPredicate::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvTxCalcPredicate" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + + if (PlanStep.Defined() && TxId.Defined()) { + if (GetStepAndTxId(*ev->Get()) < GetStepAndTxId(*PlanStep, *TxId)) { + Send(Tablet, + MakeHolder(ev->Get()->Step, + ev->Get()->TxId, + Partition, + Nothing()).Release()); + return; + } + } + PushBackDistrTx(ev->Release()); ProcessTxsAndUserActs(ctx); @@ -947,8 +988,15 @@ void TPartition::Handle(TEvPQ::TEvTxCalcPredicate::TPtr& ev, const TActorContext void TPartition::Handle(TEvPQ::TEvTxCommit::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvTxCommit" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); + if (PlanStep.Defined() && TxId.Defined()) { if (GetStepAndTxId(*ev->Get()) < GetStepAndTxId(*PlanStep, *TxId)) { + PQ_LOG_D("Send TEvTxCommitDone" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); ctx.Send(Tablet, MakeCommitDone(ev->Get()->Step, ev->Get()->TxId).Release()); return; } @@ -956,11 +1004,20 @@ void TPartition::Handle(TEvPQ::TEvTxCommit::TPtr& ev, const TActorContext& ctx) auto txIter = TransactionsInflight.begin(); if (ChangeConfig) { - Y_ABORT_UNLESS(TransactionsInflight.size() == 1); + Y_ABORT_UNLESS(TransactionsInflight.size() == 1, + "PQ: %" PRIu64 ", Partition: %" PRIu32 ", Step: %" PRIu64 ", TxId: %" PRIu64, + TabletID, Partition.OriginalPartitionId, + ev->Get()->Step, ev->Get()->TxId); } else { - Y_ABORT_UNLESS(!TransactionsInflight.empty()); + Y_ABORT_UNLESS(!TransactionsInflight.empty(), + "PQ: %" PRIu64 ", Partition: %" PRIu32 ", Step: %" PRIu64 ", TxId: %" PRIu64, + TabletID, Partition.OriginalPartitionId, + ev->Get()->Step, ev->Get()->TxId); txIter = TransactionsInflight.find(ev->Get()->TxId); - Y_ABORT_UNLESS(!txIter.IsEnd()); + Y_ABORT_UNLESS(!txIter.IsEnd(), + "PQ: %" PRIu64 ", Partition: %" PRIu32 ", Step: %" PRIu64 ", TxId: %" PRIu64, + TabletID, Partition.OriginalPartitionId, + ev->Get()->Step, ev->Get()->TxId); } Y_ABORT_UNLESS(txIter->second->State == ECommitState::Pending); @@ -973,18 +1030,29 @@ void TPartition::Handle(TEvPQ::TEvTxRollback::TPtr& ev, const TActorContext& ctx auto* event = ev->Get(); if (PlanStep.Defined() && TxId.Defined()) { if (GetStepAndTxId(*event) < GetStepAndTxId(*PlanStep, *TxId)) { + PQ_LOG_D("Rollback for" << + " Step " << ev->Get()->Step << + ", TxId " << ev->Get()->TxId); return; } } auto txIter = TransactionsInflight.begin(); if (ChangeConfig) { - Y_ABORT_UNLESS(TransactionsInflight.size() == 1); + Y_ABORT_UNLESS(TransactionsInflight.size() == 1, + "PQ: %" PRIu64 ", Partition: %" PRIu32, + TabletID, Partition.OriginalPartitionId); } else { + Y_ABORT_UNLESS(!TransactionsInflight.empty(), + "PQ: %" PRIu64 ", Partition: %" PRIu32, + TabletID, Partition.OriginalPartitionId); txIter = TransactionsInflight.find(ev->Get()->TxId); - Y_ABORT_UNLESS(!txIter.IsEnd()); + Y_ABORT_UNLESS(!txIter.IsEnd(), + "PQ: %" PRIu64 ", Partition: %" PRIu32, + TabletID, Partition.OriginalPartitionId); } Y_ABORT_UNLESS(txIter->second->State == ECommitState::Pending); + txIter->second->State = ECommitState::Aborted; ProcessTxsAndUserActs(ctx); } @@ -1109,19 +1177,20 @@ void TPartition::Handle(TEvPQ::TEvGetWriteInfoError::TPtr& ev, const TActorConte void TPartition::ReplyToProposeOrPredicate(TSimpleSharedPtr& tx, bool isPredicate) { if (isPredicate) { - auto insRes = TransactionsInflight.insert(std::make_pair(tx->Tx->TxId, tx)); + auto insRes = TransactionsInflight.emplace(tx->Tx->TxId, tx); Y_ABORT_UNLESS(insRes.second); - Send(Tablet, MakeHolder(tx->Tx->Step, - tx->Tx->TxId, - Partition, - *tx->Predicate).Release()); + Send(Tablet, + MakeHolder(tx->Tx->Step, + tx->Tx->TxId, + Partition, + *tx->Predicate).Release()); } else { - auto insRes = TransactionsInflight.insert(std::make_pair(tx->ProposeConfig->TxId, tx)); + auto insRes = TransactionsInflight.emplace(tx->ProposeConfig->TxId, tx); Y_ABORT_UNLESS(insRes.second); Send(Tablet, - MakeHolder(tx->ProposeConfig->Step, - tx->ProposeConfig->TxId, - Partition).Release()); + MakeHolder(tx->ProposeConfig->Step, + tx->ProposeConfig->TxId, + Partition).Release()); } } @@ -1921,7 +1990,7 @@ TPartition::EProcessResult TPartition::PreProcessUserActionOrTransaction(TSimple return EProcessResult::Continue; } t->Predicate.ConstructInPlace(true); - return PreProcessImmediateTx(t->ProposeTransaction->Record); + return PreProcessImmediateTx(t->ProposeTransaction->GetRecord()); } else if (t->Tx) { // Distributed TX if (t->Predicate.Defined()) { // Predicate defined - either failed previously or Tx created with predicate defined. @@ -1983,7 +2052,8 @@ bool TPartition::ExecUserActionOrTransaction(TSimpleSharedPtr& t, } else if (t->ProposeConfig) { Y_ABORT_UNLESS(ChangingConfig); ChangeConfig = MakeSimpleShared(TopicConverter, - t->ProposeConfig->Config); + t->ProposeConfig->Config, + t->ProposeConfig->BootstrapConfig); PendingPartitionConfig = GetPartitionConfig(ChangeConfig->Config); SendChangeConfigReply = false; } @@ -2069,7 +2139,8 @@ bool TPartition::BeginTransaction(const TEvPQ::TEvProposePartitionConfig& event) { ChangeConfig = MakeSimpleShared(TopicConverter, - event.Config); + event.Config, + event.BootstrapConfig); PendingPartitionConfig = GetPartitionConfig(ChangeConfig->Config); SendChangeConfigReply = false; @@ -2078,6 +2149,8 @@ bool TPartition::BeginTransaction(const TEvPQ::TEvProposePartitionConfig& event) void TPartition::CommitWriteOperations(TTransaction& t) { + PQ_LOG_D("TPartition::CommitWriteOperations TxId: " << t.GetTxId()); + Y_ABORT_UNLESS(PersistRequest); Y_ABORT_UNLESS(!PartitionedBlob.IsInited()); @@ -2095,6 +2168,10 @@ void TPartition::CommitWriteOperations(TTransaction& t) HaveWriteMsg = true; } + PQ_LOG_D("t.WriteInfo->BodyKeys.size=" << t.WriteInfo->BodyKeys.size() << + ", t.WriteInfo->BlobsFromHead.size=" << t.WriteInfo->BlobsFromHead.size()); + PQ_LOG_D("Head=" << Head << ", NewHead=" << NewHead); + if (!t.WriteInfo->BodyKeys.empty()) { PartitionedBlob = TPartitionedBlob(Partition, NewHead.Offset, @@ -2109,6 +2186,7 @@ void TPartition::CommitWriteOperations(TTransaction& t) MaxBlobSize); for (auto& k : t.WriteInfo->BodyKeys) { + PQ_LOG_D("add key " << k.Key.ToString()); auto write = PartitionedBlob.Add(k.Key, k.Size); if (write && !write->Value.empty()) { AddCmdWrite(write, PersistRequest.Get(), ctx); @@ -2117,18 +2195,17 @@ void TPartition::CommitWriteOperations(TTransaction& t) } } - } - if (const auto& formedBlobs = PartitionedBlob.GetFormedBlobs(); !formedBlobs.empty()) { - ui32 curWrites = RenameTmpCmdWrites(PersistRequest.Get()); - RenameFormedBlobs(formedBlobs, - *Parameters, - curWrites, - PersistRequest.Get(), - ctx); - } + PQ_LOG_D("PartitionedBlob.GetFormedBlobs().size=" << PartitionedBlob.GetFormedBlobs().size()); + if (const auto& formedBlobs = PartitionedBlob.GetFormedBlobs(); !formedBlobs.empty()) { + ui32 curWrites = RenameTmpCmdWrites(PersistRequest.Get()); + RenameFormedBlobs(formedBlobs, + *Parameters, + curWrites, + PersistRequest.Get(), + ctx); + } - if (!t.WriteInfo->BodyKeys.empty()) { const auto& last = t.WriteInfo->BodyKeys.back(); NewHead.Offset += (last.Key.GetOffset() + last.Key.GetCount()); @@ -2175,6 +2252,7 @@ void TPartition::CommitWriteOperations(TTransaction& t) }, std::nullopt}; msg.Internal = true; + WriteInflightSize += msg.Msg.Data.size(); ExecRequest(msg, *Parameters, PersistRequest.Get()); auto& info = TxSourceIdForPostPersist[blob.SourceId]; @@ -2305,6 +2383,7 @@ void TPartition::OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx) if (ChangeConfig) { EndChangePartitionConfig(std::move(ChangeConfig->Config), + std::move(ChangeConfig->BootstrapConfig), ChangeConfig->TopicConverter, ctx); } @@ -2371,12 +2450,24 @@ void TPartition::OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx) } void TPartition::EndChangePartitionConfig(NKikimrPQ::TPQTabletConfig&& config, + NKikimrPQ::TBootstrapConfig&& bootstrapConfig, NPersQueue::TTopicConverterPtr topicConverter, const TActorContext& ctx) { Config = std::move(config); PartitionConfig = GetPartitionConfig(Config); PartitionGraph = MakePartitionGraph(Config); + + for (const auto& mg : bootstrapConfig.GetExplicitMessageGroups()) { + TMaybe keyRange; + if (mg.HasKeyRange()) { + keyRange = TPartitionKeyRange::Parse(mg.GetKeyRange()); + } + + TSourceIdInfo sourceId(0, 0, ctx.Now(), std::move(keyRange), false); + SourceIdStorage.RegisterSourceIdInfo(mg.GetId(), std::move(sourceId), true); + } + TopicConverter = topicConverter; NewPartition = false; @@ -2386,14 +2477,15 @@ void TPartition::EndChangePartitionConfig(NKikimrPQ::TPQTabletConfig&& config, InitSplitMergeSlidingWindow(); } - Send(ReadQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config)); - Send(WriteQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config)); + Send(ReadQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config, bootstrapConfig)); + Send(WriteQuotaTrackerActor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config, bootstrapConfig)); TotalPartitionWriteSpeed = config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(); if (Config.GetPartitionConfig().HasMirrorFrom()) { if (Mirrorer) { ctx.Send(Mirrorer->Actor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, - Config)); + Config, + bootstrapConfig)); } else { CreateMirrorerActor(); } @@ -2481,7 +2573,7 @@ TPartition::EProcessResult TPartition::PreProcessImmediateTx(const NKikimrPQ::TE void TPartition::ExecImmediateTx(TTransaction& t) { --ImmediateTxCount; - auto& record = t.ProposeTransaction->Record; + const auto& record = t.ProposeTransaction->GetRecord(); Y_ABORT_UNLESS(record.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kData); Y_ABORT_UNLESS(record.HasData()); @@ -2494,7 +2586,7 @@ void TPartition::ExecImmediateTx(TTransaction& t) t.Message); return; } - for (auto& operation : record.GetData().GetOperations()) { + for (const auto& operation : record.GetData().GetOperations()) { if (!operation.HasBegin() || !operation.HasEnd() || !operation.HasConsumer()) { continue; //Write operation - handled separately via WriteInfo } @@ -2843,6 +2935,10 @@ void TPartition::EmulatePostProcessUserAct(const TEvPQ::TEvSetClientInfo& act, userInfo.Offset = offset; + if (LastOffsetHasBeenCommited(userInfo)) { + SendReadingFinished(user); + } + auto counter = setSession ? COUNTER_PQ_CREATE_SESSION_OK : (dropSession ? COUNTER_PQ_DELETE_SESSION_OK : COUNTER_PQ_SET_CLIENT_OFFSET_OK); TabletCounters.Cumulative()[counter].Increment(1); } @@ -2880,6 +2976,10 @@ void TPartition::ScheduleReplyPropose(const NKikimrPQ::TEvProposeTransaction& ev NKikimrPQ::TError::EKind kind, const TString& reason) { + PQ_LOG_D("schedule TEvPersQueue::TEvProposeTransactionResult(" << + NKikimrPQ::TEvProposeTransactionResult_EStatus_Name(statusCode) << + ")" << + ", reason=" << reason); Replies.emplace_back(ActorIdFromProto(event.GetSourceActor()), MakeReplyPropose(event, statusCode, @@ -3295,6 +3395,8 @@ void TPartition::Handle(TEvPQ::TEvCheckPartitionStatusRequest::TPtr& ev, const T void TPartition::HandleOnInit(TEvPQ::TEvDeletePartition::TPtr& ev, const TActorContext&) { + PQ_LOG_D("HandleOnInit TEvPQ::TEvDeletePartition"); + Y_ABORT_UNLESS(IsSupportive()); PendingEvents.emplace_back(ev->ReleaseBase().Release()); @@ -3302,6 +3404,8 @@ void TPartition::HandleOnInit(TEvPQ::TEvDeletePartition::TPtr& ev, const TActorC void TPartition::Handle(TEvPQ::TEvDeletePartition::TPtr&, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvDeletePartition"); + Y_ABORT_UNLESS(IsSupportive()); Y_ABORT_UNLESS(DeletePartitionState == DELETION_NOT_INITED); @@ -3314,20 +3418,25 @@ void TPartition::ScheduleNegativeReplies() { auto processQueue = [&](std::deque& queue) { for (auto& event : queue) { - if (auto* setInfo = std::get_if<0>(&event.Event)) { - ScheduleNegativeReply(*setInfo->Get()); - } else if (auto* tx = std::get_if<1>(&event.Event)) { - if (tx->Get()->ProposeTransaction) { - ScheduleNegativeReply(*tx->Get()->ProposeTransaction); - } else { - ScheduleNegativeReply(*tx->Get()); + std::visit(TOverloaded{ + [this](TSimpleSharedPtr& v) { + ScheduleNegativeReply(*v); + }, + [this](TSimpleSharedPtr& v) { + if (v->ProposeTransaction) { + ScheduleNegativeReply(*v->ProposeTransaction); + } else { + ScheduleNegativeReply(*v); + } + }, + [this](TMessage& v) { + ScheduleNegativeReply(v); } - } else { - ScheduleNegativeReply(*(std::get_if<2>(&event.Event))); - } + }, event.Event); } queue.clear(); }; + processQueue(UserActionAndTransactionEvents); processQueue(UserActionAndTxPendingCommit); } diff --git a/ydb/core/persqueue/partition.h b/ydb/core/persqueue/partition.h index f907594bbfbb..c954012caa66 100644 --- a/ydb/core/persqueue/partition.h +++ b/ydb/core/persqueue/partition.h @@ -66,7 +66,6 @@ struct TTransaction { explicit TTransaction(TSimpleSharedPtr proposeConfig) : ProposeConfig(proposeConfig) { - Y_ABORT_UNLESS(ProposeConfig); } @@ -74,8 +73,9 @@ struct TTransaction { : ProposeTransaction(proposeTx) , State(ECommitState::Committed) { - if (proposeTx->Record.HasSupportivePartitionActor()) { - SupportivePartitionActor = ActorIdFromProto(proposeTx->Record.GetSupportivePartitionActor()); + const auto& record = proposeTx->GetRecord(); + if (record.HasSupportivePartitionActor()) { + SupportivePartitionActor = ActorIdFromProto(record.GetSupportivePartitionActor()); } Y_ABORT_UNLESS(ProposeTransaction); } @@ -148,7 +148,7 @@ class TPartition : public TActorBootstrapped { bool CanWrite() const; bool CanEnqueue() const; - bool LastOffsetHasBeenCommited(const TUserInfo& userInfo) const; + bool LastOffsetHasBeenCommited(const TUserInfoBase& userInfo) const; void ReplyError(const TActorContext& ctx, const ui64 dst, NPersQueue::NErrorCode::EErrorCode errorCode, const TString& error); void ReplyPropose(const TActorContext& ctx, const NKikimrPQ::TEvProposeTransaction& event, NKikimrPQ::TEvProposeTransactionResult::EStatus statusCode, @@ -391,6 +391,7 @@ class TPartition : public TActorBootstrapped { void OnProcessTxsAndUserActsWriteComplete(const TActorContext& ctx); void EndChangePartitionConfig(NKikimrPQ::TPQTabletConfig&& config, + NKikimrPQ::TBootstrapConfig&& bootstrapConfig, NPersQueue::TTopicConverterPtr topicConverter, const TActorContext& ctx); TString GetKeyConfig() const; @@ -458,7 +459,9 @@ class TPartition : public TActorBootstrapped { } // The size of the data realy was persisted in the storage by the partition - ui64 MeteringDataSize() const; + ui64 UserDataSize() const; + // The size of the data was metered to user + ui64 MeteringDataSize(TInstant now) const; // The size of the storage that was reserved by the partition ui64 ReserveSize() const; // The size of the storage that usud by the partition. That included combination of the reserver and realy persisted data. @@ -915,6 +918,8 @@ class TPartition : public TActorBootstrapped { TDeque> PendingEvents; TRowVersion LastEmittedHeartbeat; + TLastCounter SourceIdCounter; + const NKikimrPQ::TPQTabletConfig::TPartition* GetPartitionConfig(const NKikimrPQ::TPQTabletConfig& config); bool ClosedInternalPartition = false; @@ -952,4 +957,3 @@ class TPartition : public TActorBootstrapped { }; } // namespace NKikimr::NPQ - diff --git a/ydb/core/persqueue/partition_id.h b/ydb/core/persqueue/partition_id.h index 5ef5c4fa75e2..0c1dbb8d3afd 100644 --- a/ydb/core/persqueue/partition_id.h +++ b/ydb/core/persqueue/partition_id.h @@ -7,6 +7,7 @@ #include #include #include +#include #include @@ -51,6 +52,13 @@ class TPartitionId { } } + TString ToString() const + { + TStringBuilder s; + s << *this; + return s; + } + bool IsSupportivePartition() const { return WriteId.Defined(); diff --git a/ydb/core/persqueue/partition_init.cpp b/ydb/core/persqueue/partition_init.cpp index 2f2b9b4c6f42..e17775cbb93c 100644 --- a/ydb/core/persqueue/partition_init.cpp +++ b/ydb/core/persqueue/partition_init.cpp @@ -169,15 +169,14 @@ void TInitConfigStep::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorCon if (Partition()->Config.GetVersion() < Partition()->TabletConfig.GetVersion()) { auto event = MakeHolder(Partition()->TopicConverter, - Partition()->TabletConfig); + Partition()->TabletConfig, + NKikimrPQ::TBootstrapConfig()); Partition()->PushFrontDistrTx(event.Release()); } break; case NKikimrProto::NODATA: Partition()->Config = Partition()->TabletConfig; - Partition()->PartitionConfig = GetPartitionConfig(Partition()->Config, Partition()->Partition.OriginalPartitionId); - Partition()->PartitionGraph = MakePartitionGraph(Partition()->Config); break; case NKikimrProto::ERROR: @@ -191,6 +190,9 @@ void TInitConfigStep::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorCon Y_ABORT("bad status"); }; + Partition()->PartitionConfig = GetPartitionConfig(Partition()->Config, Partition()->Partition.OriginalPartitionId); + Partition()->PartitionGraph = MakePartitionGraph(Partition()->Config); + Done(ctx); } @@ -632,7 +634,7 @@ void TInitDataStep::Handle(TEvKeyValue::TEvResponse::TPtr &ev, const TActorConte Y_ABORT_UNLESS(size == read.GetValue().size()); for (TBlobIterator it(key, read.GetValue()); it.IsValid(); it.Next()) { - head.Batches.emplace_back(it.GetBatch()); + head.AddBatch(it.GetBatch()); } head.PackedSize += size; diff --git a/ydb/core/persqueue/partition_read.cpp b/ydb/core/persqueue/partition_read.cpp index 100ce3e6a827..387b51ae1938 100644 --- a/ydb/core/persqueue/partition_read.cpp +++ b/ydb/core/persqueue/partition_read.cpp @@ -626,13 +626,13 @@ TVector TPartition::GetReadRequestFromHead( Y_ABORT_UNLESS(pos != Max()); } ui32 lastBlobSize = 0; - for (;pos < Head.Batches.size(); ++pos) { + for (;pos < Head.GetBatches().size(); ++pos) { TVector blobs; - Head.Batches[pos].UnpackTo(&blobs); + Head.GetBatch(pos).UnpackTo(&blobs); ui32 i = 0; - ui64 offset = Head.Batches[pos].GetOffset(); - ui16 pno = Head.Batches[pos].GetPartNo(); + ui64 offset = Head.GetBatch(pos).GetOffset(); + ui16 pno = Head.GetBatch(pos).GetPartNo(); for (; i < blobs.size(); ++i) { ui64 curOffset = offset; @@ -796,12 +796,11 @@ void TPartition::DoRead(TEvPQ::TEvRead::TPtr&& readEvent, TDuration waitQuotaTim return; } - if (offset > EndOffset) { + if (offset >= EndOffset) { ReplyError(ctx, read->Cookie, NPersQueue::NErrorCode::BAD_REQUEST, TStringBuilder() << "Offset more than EndOffset. Offset=" << offset << ", EndOffset=" << EndOffset); return; } - Y_ABORT_UNLESS(offset < EndOffset); ProcessRead(ctx, std::move(info), cookie, false); } diff --git a/ydb/core/persqueue/partition_scale_manager.cpp b/ydb/core/persqueue/partition_scale_manager.cpp index 8d03b87e1d04..b36fdb18584e 100644 --- a/ydb/core/persqueue/partition_scale_manager.cpp +++ b/ydb/core/persqueue/partition_scale_manager.cpp @@ -8,12 +8,14 @@ namespace NPQ { TPartitionScaleManager::TPartitionScaleManager( const TString& topicName, + const TString& topicPath, const TString& databasePath, ui64 pathId, int version, const NKikimrPQ::TPQTabletConfig& config ) : TopicName(topicName) + , TopicPath(topicPath) , DatabasePath(databasePath) , BalancerConfig(pathId, version, config) { } @@ -45,6 +47,7 @@ void TPartitionScaleManager::TrySendScaleRequest(const TActorContext& ctx) { << "send split request"); CurrentScaleRequest = ctx.Register(new TPartitionScaleRequest( TopicName, + TopicPath, DatabasePath, BalancerConfig.PathId, BalancerConfig.PathVersion, diff --git a/ydb/core/persqueue/partition_scale_manager.h b/ydb/core/persqueue/partition_scale_manager.h index dc46b38f0831..39251d5b0610 100644 --- a/ydb/core/persqueue/partition_scale_manager.h +++ b/ydb/core/persqueue/partition_scale_manager.h @@ -47,7 +47,7 @@ class TPartitionScaleManager { }; public: - TPartitionScaleManager(const TString& topicPath, const TString& databasePath, ui64 pathId, int version, const NKikimrPQ::TPQTabletConfig& config); + TPartitionScaleManager(const TString& topicName, const TString& topicPath, const TString& databasePath, ui64 pathId, int version, const NKikimrPQ::TPQTabletConfig& config); public: void HandleScaleStatusChange(const ui32 partition, NKikimrPQ::EScaleStatus scaleStatus, const TActorContext& ctx); @@ -71,6 +71,7 @@ class TPartitionScaleManager { static const ui32 MAX_SCALE_REQUEST_REPEAT_SECONDS_TIMEOUT = 1000; const TString TopicName; + const TString TopicPath; TString DatabasePath = ""; TActorId CurrentScaleRequest; TDuration RequestTimeout = TDuration::MilliSeconds(0); diff --git a/ydb/core/persqueue/partition_scale_request.cpp b/ydb/core/persqueue/partition_scale_request.cpp index 28e7d8f7a595..4697a57b14fc 100644 --- a/ydb/core/persqueue/partition_scale_request.cpp +++ b/ydb/core/persqueue/partition_scale_request.cpp @@ -4,15 +4,17 @@ namespace NKikimr { namespace NPQ { TPartitionScaleRequest::TPartitionScaleRequest( - TString topicName, - TString databasePath, + const TString& topicName, + const TString& topicPath, + const TString& databasePath, ui64 pathId, ui64 pathVersion, - std::vector splits, - const std::vector merges, - NActors::TActorId parentActorId + const std::vector& splits, + const std::vector& merges, + const NActors::TActorId& parentActorId ) : Topic(topicName) + , TopicPath(topicPath) , DatabasePath(databasePath) , PathId(pathId) , PathVersion(pathVersion) @@ -30,14 +32,17 @@ void TPartitionScaleRequest::Bootstrap(const NActors::TActorContext &ctx) { void TPartitionScaleRequest::SendProposeRequest(const NActors::TActorContext &ctx) { auto proposal = std::make_unique(); proposal->Record.SetDatabaseName(CanonizePath(DatabasePath)); - FillProposeRequest(*proposal, DatabasePath, Topic, ctx); + FillProposeRequest(*proposal, ctx); ctx.Send(MakeTxProxyID(), proposal.release()); } -void TPartitionScaleRequest::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TString& workingDir, const TString& topicName, const NActors::TActorContext &ctx) { +void TPartitionScaleRequest::FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const NActors::TActorContext &ctx) { + auto workingDir = TopicPath.substr(0, TopicPath.size() - Topic.size()); + auto& modifyScheme = *proposal.Record.MutableTransaction()->MutableModifyScheme(); modifyScheme.SetOperationType(NKikimrSchemeOp::ESchemeOpAlterPersQueueGroup); modifyScheme.SetWorkingDir(workingDir); + modifyScheme.SetInternal(true); auto applyIf = modifyScheme.AddApplyIf(); applyIf->SetPathId(PathId); @@ -45,9 +50,9 @@ void TPartitionScaleRequest::FillProposeRequest(TEvTxUserProxy::TEvProposeTransa applyIf->SetCheckEntityVersion(true); NKikimrSchemeOp::TPersQueueGroupDescription groupDescription; - groupDescription.SetName(topicName); + groupDescription.SetName(Topic); TStringBuilder logMessage; - logMessage << "TPartitionScaleRequest::FillProposeRequest trying to scale partitions. Spilts: "; + logMessage << "TPartitionScaleRequest::FillProposeRequest trying to scale partitions of '" << workingDir << "/" << Topic << "'. Spilts: "; for(const auto& split: Splits) { auto* newSplit = groupDescription.AddSplit(); logMessage << "partition: " << split.GetPartition() << " boundary: '" << split.GetSplitBoundary() << "' "; diff --git a/ydb/core/persqueue/partition_scale_request.h b/ydb/core/persqueue/partition_scale_request.h index 017825e78d82..764c8f033eb0 100644 --- a/ydb/core/persqueue/partition_scale_request.h +++ b/ydb/core/persqueue/partition_scale_request.h @@ -26,7 +26,10 @@ class TPartitionScaleRequest: public NActors::TActorBootstrapped splits, const std::vector merges, NActors::TActorId parentActorId); + TPartitionScaleRequest(const TString& topicName, const TString& topicPath, const TString& databasePath, ui64 pathId, ui64 pathVersion, + const std::vector& splits, + const std::vector& merges, + const NActors::TActorId& parentActorId); public: void Bootstrap(const NActors::TActorContext &ctx); @@ -48,10 +51,11 @@ class TPartitionScaleRequest: public NActors::TActorBootstrapped SplitPath(const TString& path); void SendProposeRequest(const NActors::TActorContext &ctx); - void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const TString& workingDir, const TString& topicName, const NActors::TActorContext &ctx); + void FillProposeRequest(TEvTxUserProxy::TEvProposeTransaction& proposal, const NActors::TActorContext &ctx); private: const TString Topic; + const TString TopicPath; const TString DatabasePath; const ui64 PathId; const ui64 PathVersion; diff --git a/ydb/core/persqueue/partition_sourcemanager.cpp b/ydb/core/persqueue/partition_sourcemanager.cpp index f81b3a7da3b2..c9214300384a 100644 --- a/ydb/core/persqueue/partition_sourcemanager.cpp +++ b/ydb/core/persqueue/partition_sourcemanager.cpp @@ -81,7 +81,8 @@ void TPartitionSourceManager::TModificationBatch::Cancel() { } bool TPartitionSourceManager::TModificationBatch::HasModifications() const { - return !SourceIdWriter.GetSourceIdsToWrite().empty(); + return !SourceIdWriter.GetSourceIdsToWrite().empty() + || !SourceIdWriter.GetSourceIdsToDelete().empty(); } void TPartitionSourceManager::TModificationBatch::FillRequest(TEvKeyValue::TEvRequest* request) { diff --git a/ydb/core/persqueue/partition_write.cpp b/ydb/core/persqueue/partition_write.cpp index e0346e2f9010..92c4d24830a0 100644 --- a/ydb/core/persqueue/partition_write.cpp +++ b/ydb/core/persqueue/partition_write.cpp @@ -166,6 +166,12 @@ void TPartition::ProcessReserveRequests(const TActorContext& ctx) { const ui64& cookie = ReserveRequests.front()->Cookie; const bool& lastRequest = ReserveRequests.front()->LastRequest; + if (!IsActive()) { + ReplyOk(ctx, cookie); + ReserveRequests.pop_front(); + continue; + } + auto it = Owners.find(owner); if (ClosedInternalPartition) { ReplyError(ctx, cookie, NPersQueue::NErrorCode::BAD_REQUEST, "ReserveRequest to closed supportive partition"); @@ -239,13 +245,14 @@ void TPartition::HandleOnIdle(TEvPQ::TEvWrite::TPtr& ev, const TActorContext& ct void TPartition::AnswerCurrentWrites(const TActorContext& ctx) { PQ_LOG_T("TPartition::AnswerCurrentWrites. Responses.size()=" << Responses.size()); + const auto now = ctx.Now(); ui64 offset = EndOffset; while (!Responses.empty()) { const auto& response = Responses.front(); const TDuration queueTime = response.QueueTime; - const TDuration writeTime = ctx.Now() - response.WriteTimeBaseline; + const TDuration writeTime = now - response.WriteTimeBaseline; if (response.IsWrite()) { const auto& writeResponse = response.GetWrite(); @@ -257,6 +264,7 @@ void TPartition::AnswerCurrentWrites(const TActorContext& ctx) { bool already = false; + SourceIdCounter.Use(s, now); auto it = SourceIdStorage.GetInMemorySourceIds().find(s); ui64 maxSeqNo = 0; @@ -397,7 +405,7 @@ void TPartition::SyncMemoryStateWithKVState(const TActorContext& ctx) { Head.PackedSize = 0; Head.Offset = NewHead.Offset; Head.PartNo = NewHead.PartNo; //no partNo at this point - Head.Batches.clear(); + Head.ClearBatches(); } while (!CompactedKeys.empty()) { @@ -420,9 +428,8 @@ void TPartition::SyncMemoryStateWithKVState(const TActorContext& ctx) { } // head cleared, all data moved to body //append Head with newHead - while (!NewHead.Batches.empty()) { - Head.Batches.push_back(NewHead.Batches.front()); - NewHead.Batches.pop_front(); + while (!NewHead.GetBatches().empty()) { + Head.AddBatch(NewHead.ExtractFirstBatch()); } Head.PackedSize += NewHead.PackedSize; @@ -462,20 +469,14 @@ void TPartition::UpdateAfterWriteCounters(bool writeComplete) { // If supportive - update counters only prior to write, otherwise - only after writes; return; } - if (BytesWrittenGrpc) - BytesWrittenGrpc.Inc(WriteNewSizeInternal); - if (BytesWrittenTotal) - BytesWrittenTotal.Inc(WriteNewSize); - - if (BytesWrittenUncompressed) - BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); + BytesWrittenGrpc.Inc(WriteNewSizeInternal); + BytesWrittenTotal.Inc(WriteNewSize); + BytesWrittenUncompressed.Inc(WriteNewSizeUncompressed); if (BytesWrittenComp) BytesWrittenComp.Inc(WriteCycleSize); - if (MsgsWrittenGrpc) - MsgsWrittenGrpc.Inc(WriteNewMessagesInternal); - if (MsgsWrittenTotal) { - MsgsWrittenTotal.Inc(WriteNewMessages); - } + + MsgsWrittenGrpc.Inc(WriteNewMessagesInternal); + MsgsWrittenTotal.Inc(WriteNewMessages); } void TPartition::HandleWriteResponse(const TActorContext& ctx) { @@ -485,13 +486,15 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { } HaveWriteMsg = false; + const auto now = ctx.Now(); + for (auto& [sourceId, info] : TxSourceIdForPostPersist) { auto it = SourceIdStorage.GetInMemorySourceIds().find(sourceId); if (it.IsEnd()) { - SourceIdStorage.RegisterSourceId(sourceId, info.SeqNo, info.Offset, ctx.Now()); + SourceIdStorage.RegisterSourceId(sourceId, info.SeqNo, info.Offset, now); } else { ui64 seqNo = std::max(info.SeqNo, it->second.SeqNo); - SourceIdStorage.RegisterSourceId(sourceId, it->second.Updated(seqNo, info.Offset, ctx.Now())); + SourceIdStorage.RegisterSourceId(sourceId, it->second.Updated(seqNo, info.Offset, now)); } } TxSourceIdForPostPersist.clear(); @@ -505,8 +508,8 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { } ui64 prevEndOffset = EndOffset; - ui32 totalLatencyMs = (ctx.Now() - WriteCycleStartTime).MilliSeconds(); - ui32 writeLatencyMs = (ctx.Now() - WriteStartTime).MilliSeconds(); + ui32 totalLatencyMs = (now - WriteCycleStartTime).MilliSeconds(); + ui32 writeLatencyMs = (now - WriteStartTime).MilliSeconds(); WriteLatency.IncFor(writeLatencyMs, 1); if (writeLatencyMs >= AppData(ctx)->PQConfig.GetWriteLatencyBigMs()) { @@ -522,7 +525,6 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { UpdateAfterWriteCounters(true); //All ok - auto now = ctx.Now(); for (auto& avg : AvgWriteBytes) { avg.Update(WriteNewSize, now); } @@ -538,11 +540,9 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { if (SupportivePartitionTimeLag) { SupportivePartitionTimeLag->UpdateTimestamp(now.MilliSeconds()); } - if (SplitMergeEnabled(Config)) { - SplitMergeAvgWriteBytes->Update(WriteNewSizeFull, now); - auto needScaling = CheckScaleStatus(ctx); - ChangeScaleStatusIfNeeded(needScaling); - } + + auto writeNewSizeFull = WriteNewSizeFull; + WriteCycleSize = 0; WriteNewSize = 0; WriteNewSizeFull = 0; @@ -556,6 +556,12 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { AnswerCurrentWrites(ctx); SyncMemoryStateWithKVState(ctx); + if (SplitMergeEnabled(Config)) { + SplitMergeAvgWriteBytes->Update(writeNewSizeFull, now); + auto needScaling = CheckScaleStatus(ctx); + ChangeScaleStatusIfNeeded(needScaling); + } + //if EndOffset changed there could be subscriptions witch could be completed TVector> reads = Subscriber.GetReads(EndOffset); for (auto& read : reads) { @@ -569,7 +575,10 @@ void TPartition::HandleWriteResponse(const TActorContext& ctx) { } NKikimrPQ::EScaleStatus TPartition::CheckScaleStatus(const TActorContext& ctx) { - auto const writeSpeedUsagePercent = SplitMergeAvgWriteBytes->GetValue() * 100.0 / Config.GetPartitionStrategy().GetScaleThresholdSeconds() / TotalPartitionWriteSpeed; + const auto writeSpeedUsagePercent = SplitMergeAvgWriteBytes->GetValue() * 100.0 / Config.GetPartitionStrategy().GetScaleThresholdSeconds() / TotalPartitionWriteSpeed; + const auto sourceIdWindow = TDuration::Seconds(std::min(5, Config.GetPartitionStrategy().GetScaleThresholdSeconds())); + const auto sourceIdCount = SourceIdCounter.Count(ctx.Now() - sourceIdWindow); + LOG_DEBUG_S( ctx, NKikimrServices::PERSQUEUE, "TPartition::CheckScaleStatus" @@ -577,15 +586,17 @@ NKikimrPQ::EScaleStatus TPartition::CheckScaleStatus(const TActorContext& ctx) { << " writeSpeedUsagePercent# " << writeSpeedUsagePercent << " scaleThresholdSeconds# " << Config.GetPartitionStrategy().GetScaleThresholdSeconds() << " totalPartitionWriteSpeed# " << TotalPartitionWriteSpeed + << " sourceIdCount=" << sourceIdCount << " Topic: \"" << TopicName() << "\"." << " Partition: " << Partition ); + auto splitEnabled = Config.GetPartitionStrategy().GetPartitionStrategyType() == ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_CAN_SPLIT || Config.GetPartitionStrategy().GetPartitionStrategyType() == ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_CAN_SPLIT_AND_MERGE; auto mergeEnabled = Config.GetPartitionStrategy().GetPartitionStrategyType() == ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_CAN_SPLIT_AND_MERGE; - if (splitEnabled && writeSpeedUsagePercent >= Config.GetPartitionStrategy().GetScaleUpPartitionWriteSpeedThresholdPercent()) { + if (splitEnabled && writeSpeedUsagePercent >= Config.GetPartitionStrategy().GetScaleUpPartitionWriteSpeedThresholdPercent() && sourceIdCount > 1) { LOG_DEBUG_S( ctx, NKikimrServices::PERSQUEUE, "TPartition::CheckScaleStatus NEED_SPLIT" << " Topic: \"" << TopicName() << "\"." << @@ -596,7 +607,8 @@ NKikimrPQ::EScaleStatus TPartition::CheckScaleStatus(const TActorContext& ctx) { LOG_DEBUG_S( ctx, NKikimrServices::PERSQUEUE, "TPartition::CheckScaleStatus NEED_MERGE" << " Topic: \"" << TopicName() << "\"." << - " Partition: " << Partition + " Partition: " << Partition << " writeSpeedUsagePercent: " << writeSpeedUsagePercent << + " Threshold: " << Config.GetPartitionStrategy().GetScaleDownPartitionWriteSpeedThresholdPercent() ); return NKikimrPQ::EScaleStatus::NEED_MERGE; } @@ -1045,16 +1057,16 @@ void TPartition::RenameFormedBlobs(const std::deque= p.Msg.Data.size(), + "PQ %" PRIu64 ", Partition {%" PRIu32 ", %" PRIu32 "}, WriteInflightSize=%" PRIu64 ", p.Msg.Data.size=%" PRISZT, + TabletID, Partition.OriginalPartitionId, Partition.InternalPartitionId, + WriteInflightSize, p.Msg.Data.size()); WriteInflightSize -= p.Msg.Data.size(); TabletCounters.Percentile()[COUNTER_LATENCY_PQ_RECEIVE_QUEUE].IncrementFor(ctx.Now().MilliSeconds() - p.Msg.ReceiveTimestamp); @@ -1307,22 +1323,22 @@ bool TPartition::ExecRequest(TWriteMsg& p, ProcessParameters& parameters, TEvKey ctx); ui32 countOfLastParts = 0; for (auto& x : PartitionedBlob.GetClientBlobs()) { - if (NewHead.Batches.empty() || NewHead.Batches.back().Packed) { - NewHead.Batches.emplace_back(curOffset, x.GetPartNo(), TVector()); + if (NewHead.GetBatches().empty() || NewHead.GetLastBatch().Packed) { + NewHead.AddBatch(TBatch(curOffset, x.GetPartNo())); NewHead.PackedSize += GetMaxHeaderSize(); //upper bound for packed size } if (x.IsLastPart()) { ++countOfLastParts; } - Y_ABORT_UNLESS(!NewHead.Batches.back().Packed); - NewHead.Batches.back().AddBlob(x); + Y_ABORT_UNLESS(!NewHead.GetLastBatch().Packed); + NewHead.AddBlob(x); NewHead.PackedSize += x.GetBlobSize(); - if (NewHead.Batches.back().GetUnpackedSize() >= BATCH_UNPACK_SIZE_BORDER) { - NewHead.Batches.back().Pack(); - NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob + if (NewHead.GetLastBatch().GetUnpackedSize() >= BATCH_UNPACK_SIZE_BORDER) { + NewHead.MutableLastBatch().Pack(); + NewHead.PackedSize += NewHead.GetLastBatch().GetPackedSize(); //add real packed size for this blob NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound - NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); + NewHead.PackedSize -= NewHead.GetLastBatch().GetUnpackedSize(); } } @@ -1399,15 +1415,15 @@ void TPartition::AddNewWriteBlob(std::pair& res, TEvKeyValue::TEvReq valueD.reserve(res.second); ui32 pp = Head.FindPos(key.GetOffset(), key.GetPartNo()); if (pp < Max() && key.GetOffset() < EndOffset) { //this batch trully contains this offset - Y_ABORT_UNLESS(pp < Head.Batches.size()); - Y_ABORT_UNLESS(Head.Batches[pp].GetOffset() == key.GetOffset()); - Y_ABORT_UNLESS(Head.Batches[pp].GetPartNo() == key.GetPartNo()); - for (; pp < Head.Batches.size(); ++pp) { //TODO - merge small batches here - Y_ABORT_UNLESS(Head.Batches[pp].Packed); - Head.Batches[pp].SerializeTo(valueD); + Y_ABORT_UNLESS(pp < Head.GetBatches().size()); + Y_ABORT_UNLESS(Head.GetBatch(pp).GetOffset() == key.GetOffset()); + Y_ABORT_UNLESS(Head.GetBatch(pp).GetPartNo() == key.GetPartNo()); + for (; pp < Head.GetBatches().size(); ++pp) { //TODO - merge small batches here + Y_ABORT_UNLESS(Head.GetBatch(pp).Packed); + Head.GetBatch(pp).SerializeTo(valueD); } } - for (auto& b : NewHead.Batches) { + for (auto& b : NewHead.GetBatches()) { Y_ABORT_UNLESS(b.Packed); b.SerializeTo(valueD); } @@ -1519,6 +1535,10 @@ void TPartition::FilterDeadlinedWrites(const TActorContext& ctx, TMessageQueue& TabletCounters.Cumulative()[COUNTER_PQ_WRITE_ERROR].Increment(1); TabletCounters.Cumulative()[COUNTER_PQ_WRITE_BYTES_ERROR].Increment(msg.Data.size() + msg.SourceId.size()); + Y_DEBUG_ABORT_UNLESS(WriteInflightSize >= msg.Data.size(), + "PQ %" PRIu64 ", Partition {%" PRIu32 ", %" PRIu32 "}, WriteInflightSize=%" PRIu64 ", msg.Data.size=%" PRISZT, + TabletID, Partition.OriginalPartitionId, Partition.InternalPartitionId, + WriteInflightSize, msg.Data.size()); WriteInflightSize -= msg.Data.size(); } @@ -1680,7 +1700,7 @@ void TPartition::BeginAppendHeadWithNewWrites(const TActorContext& ctx) NewHead.PartNo = 0; NewHead.PackedSize = 0; - Y_ABORT_UNLESS(NewHead.Batches.empty()); + Y_ABORT_UNLESS(NewHead.GetBatches().empty()); Parameters->OldPartsCleared = false; Parameters->HeadCleared = (Head.PackedSize == 0); @@ -1725,12 +1745,12 @@ void TPartition::EndAppendHeadWithNewWrites(TEvKeyValue::TEvRequest* request, co UpdateWriteBufferIsFullState(ctx.Now()); - if (!NewHead.Batches.empty() && !NewHead.Batches.back().Packed) { - NewHead.Batches.back().Pack(); - NewHead.PackedSize += NewHead.Batches.back().GetPackedSize(); //add real packed size for this blob + if (!NewHead.GetBatches().empty() && !NewHead.GetLastBatch().Packed) { + NewHead.MutableLastBatch().Pack(); + NewHead.PackedSize += NewHead.GetLastBatch().GetPackedSize(); //add real packed size for this blob NewHead.PackedSize -= GetMaxHeaderSize(); //instead of upper bound - NewHead.PackedSize -= NewHead.Batches.back().GetUnpackedSize(); + NewHead.PackedSize -= NewHead.GetLastBatch().GetUnpackedSize(); } Y_ABORT_UNLESS((Parameters->HeadCleared ? 0 : Head.PackedSize) + NewHead.PackedSize <= MaxBlobSize); //otherwise last PartitionedBlob.Add must compact all except last cl @@ -1764,7 +1784,7 @@ bool TPartition::WaitingForSubDomainQuota(const TActorContext& /*ctx*/, const ui return withSize > 0 || Size() > 0; } - return MeteringDataSize() + withSize > ReserveSize(); + return UserDataSize() + withSize > ReserveSize(); } void TPartition::RequestBlobQuota(size_t quotaSize) diff --git a/ydb/core/persqueue/percentile_counter.cpp b/ydb/core/persqueue/percentile_counter.cpp index 85edf4a950e4..dec88c19391d 100644 --- a/ydb/core/persqueue/percentile_counter.cpp +++ b/ydb/core/persqueue/percentile_counter.cpp @@ -177,6 +177,9 @@ void TPartitionHistogramWrapper::Setup(bool isSupportivePartition, std::unique_p } } void TPartitionHistogramWrapper::IncFor(ui64 key, ui64 value) { + if (!Inited) + return; + if (!IsSupportivePartition) { return Histogram->IncFor(key, value); } diff --git a/ydb/core/persqueue/percentile_counter.h b/ydb/core/persqueue/percentile_counter.h index 89ec2a2dddc9..cb78e7e49b54 100644 --- a/ydb/core/persqueue/percentile_counter.h +++ b/ydb/core/persqueue/percentile_counter.h @@ -59,8 +59,8 @@ NKikimr::NPQ::TPercentileCounter CreateSLIDurationCounter( class TPartitionCounterWrapper { private: - bool DoSave; - bool DoReport; + bool DoSave = false; + bool DoReport = false; TMaybe Counter; ui64 CounterValue = 0; bool Inited = false; diff --git a/ydb/core/persqueue/pq_impl.cpp b/ydb/core/persqueue/pq_impl.cpp index 0a7c0ff0d000..aac7db2e1d7d 100644 --- a/ydb/core/persqueue/pq_impl.cpp +++ b/ydb/core/persqueue/pq_impl.cpp @@ -554,7 +554,7 @@ class TMonitoringProxy : public TActorBootstrapped { , TabletID(tabletId) , Inflight(inflight) { - for (auto& p: Partitions) { + for (auto& p : Partitions) { Results[p.first].push_back(Sprintf("Partition %u: NO DATA", p.first)); } } @@ -691,7 +691,11 @@ void TPersQueue::ApplyNewConfigAndReply(const TActorContext& ctx) ClearNewConfig(); for (auto& p : Partitions) { //change config for already created partitions - ctx.Send(p.second.Actor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config)); + if (p.first.IsSupportivePartition()) { + continue; + } + + ctx.Send(p.second.Actor, new TEvPQ::TEvChangePartitionConfig(TopicConverter, Config, BootstrapConfigTx ? *BootstrapConfigTx : NKikimrPQ::TBootstrapConfig())); } ChangePartitionConfigInflight += Partitions.size(); @@ -715,6 +719,8 @@ void TPersQueue::ApplyNewConfig(const NKikimrPQ::TPQTabletConfig& newConfig, { Config = newConfig; + PQ_LOG_D("Apply new config " << Config.ShortDebugString()); + ui32 cacheSize = CACHE_SIZE; if (Config.HasCacheSize()) { cacheSize = Config.GetCacheSize(); @@ -782,7 +788,7 @@ void TPersQueue::EndWriteConfig(const NKikimrClient::TResponse& resp, const TAct NewConfigShouldBeApplied = true; //when config will be inited with old value new config will be applied } -void TPersQueue::HandleConfigReadResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx) +void TPersQueue::HandleConfigReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx) { bool ok = (resp.GetStatus() == NMsgBusProxy::MSTATUS_OK) && @@ -790,16 +796,68 @@ void TPersQueue::HandleConfigReadResponse(const NKikimrClient::TResponse& resp, (resp.HasSetExecutorFastLogPolicyResult()) && (resp.GetSetExecutorFastLogPolicyResult().GetStatus() == NKikimrProto::OK); if (!ok) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << " Config read error: " << resp.DebugString() << " " << ctx.SelfID); - ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); + PQ_LOG_ERROR_AND_DIE("Config read error: " << resp.ShortDebugString()); + return; + } + + ConfigReadResponse = std::move(resp); + + BeginInitTransactions(); + SendTransactionsReadRequest(GetTxKey(Min()), true, ctx); +} + +void TPersQueue::SendTransactionsReadRequest(const TString& fromKey, bool includeFrom, + const TActorContext& ctx) +{ + THolder request(new TEvKeyValue::TEvRequest); + request->Record.SetCookie(READ_TXS_COOKIE); + + AddCmdReadTransactionRange(*request, fromKey, includeFrom); + + request->Record.MutableCmdSetExecutorFastLogPolicy() + ->SetIsAllowed(AppData(ctx)->PQConfig.GetTactic() == NKikimrClient::TKeyValueRequest::MIN_LATENCY); + ctx.Send(ctx.SelfID, request.Release()); +} + +TString GetLastKey(const NKikimrClient::TKeyValueResponse::TReadRangeResult& result) +{ + if (!result.PairSize()) { + return {}; + } + + return result.GetPair(result.PairSize() - 1).GetKey(); +} + +void TPersQueue::HandleTransactionsReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx) +{ + bool ok = + (resp.GetStatus() == NMsgBusProxy::MSTATUS_OK) && + (resp.ReadRangeResultSize() == 1) && + (resp.HasSetExecutorFastLogPolicyResult()) && + (resp.GetSetExecutorFastLogPolicyResult().GetStatus() == NKikimrProto::OK); + const auto& result = resp.GetReadRangeResult(0); + auto status = result.GetStatus(); + if (status != NKikimrProto::OVERRUN && + status != NKikimrProto::OK && + status != NKikimrProto::NODATA) { + ok = false; + } + if (!ok) { + PQ_LOG_ERROR_AND_DIE("Transactions read error: " << resp.ShortDebugString()); + return; + } + + TransactionsReadResults.emplace_back(std::move(result)); + + if (status == NKikimrProto::OVERRUN) { + SendTransactionsReadRequest(GetLastKey(result), false, ctx); return; } - ReadTxInfo(resp.GetReadResult(2), ctx); - ReadConfig(resp.GetReadResult(0), resp.GetReadRangeResult(0), ctx); - ReadTxWrites(resp.GetReadResult(2), ctx); - ReadState(resp.GetReadResult(1), ctx); + ReadTxInfo(ConfigReadResponse.GetReadResult(2), ctx); + ReadConfig(ConfigReadResponse.GetReadResult(0), TransactionsReadResults, ctx); + ReadTxWrites(ConfigReadResponse.GetReadResult(2), ctx); + ReadState(ConfigReadResponse.GetReadResult(1), ctx); } void TPersQueue::ReadTxInfo(const NKikimrClient::TKeyValueResponse::TReadResult& read, @@ -815,7 +873,7 @@ void TPersQueue::ReadTxInfo(const NKikimrClient::TKeyValueResponse::TReadResult& switch (read.GetStatus()) { case NKikimrProto::OK: { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " has a tx info"); + PQ_LOG_D("has a tx info"); NKikimrPQ::TTabletTxInfo info; Y_ABORT_UNLESS(info.ParseFromString(read.GetValue())); @@ -825,7 +883,7 @@ void TPersQueue::ReadTxInfo(const NKikimrClient::TKeyValueResponse::TReadResult& break; } case NKikimrProto::NODATA: { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " doesn't have tx info"); + PQ_LOG_D("doesn't have tx info"); InitPlanStep(); @@ -907,6 +965,37 @@ void TPersQueue::CreateOriginalPartition(const NKikimrPQ::TPQTabletConfig& confi ++OriginalPartitionsCount; } +void TPersQueue::MoveTopTxToCalculating(TDistributedTransaction& tx, + const TActorContext& ctx) +{ + std::tie(ExecStep, ExecTxId) = TxQueue.front(); + PQ_LOG_D("New ExecStep " << ExecStep << ", ExecTxId " << ExecTxId); + + switch (tx.Kind) { + case NKikimrPQ::TTransaction::KIND_DATA: + SendEvTxCalcPredicateToPartitions(ctx, tx); + break; + case NKikimrPQ::TTransaction::KIND_CONFIG: { + NPersQueue::TConverterFactoryPtr converterFactory; + CreateTopicConverter(tx.TabletConfig, + converterFactory, + tx.TopicConverter, + ctx); + CreateNewPartitions(tx.TabletConfig, + tx.TopicConverter, + ctx); + SendEvProposePartitionConfig(ctx, tx); + break; + } + case NKikimrPQ::TTransaction::KIND_UNKNOWN: + Y_ABORT_UNLESS(false); + } + + tx.State = NKikimrPQ::TTransaction::CALCULATING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); +} + void TPersQueue::AddSupportivePartition(const TPartitionId& partitionId) { Partitions.emplace(partitionId, @@ -958,23 +1047,28 @@ void TPersQueue::InitTxWrites(const NKikimrPQ::TTabletTxInfo& info, for (size_t i = 0; i != info.TxWritesSize(); ++i) { auto& txWrite = info.GetTxWrites(i); const TWriteId writeId = GetWriteId(txWrite); - ui32 partitionId = txWrite.GetOriginalPartitionId(); - TPartitionId shadowPartitionId(partitionId, writeId, txWrite.GetInternalPartitionId()); - TxWrites[writeId].Partitions.emplace(partitionId, shadowPartitionId); + TTxWriteInfo& writeInfo = TxWrites[writeId]; + if (txWrite.HasOriginalPartitionId()) { + ui32 partitionId = txWrite.GetOriginalPartitionId(); + TPartitionId shadowPartitionId(partitionId, writeId, txWrite.GetInternalPartitionId()); - AddSupportivePartition(shadowPartitionId); - CreateSupportivePartitionActor(shadowPartitionId, ctx); - SubscribeWriteId(writeId, ctx); + writeInfo.Partitions.emplace(partitionId, shadowPartitionId); - NextSupportivePartitionId = Max(NextSupportivePartitionId, shadowPartitionId.InternalPartitionId + 1); + AddSupportivePartition(shadowPartitionId); + CreateSupportivePartitionActor(shadowPartitionId, ctx); + + NextSupportivePartitionId = Max(NextSupportivePartitionId, shadowPartitionId.InternalPartitionId + 1); + } + + SubscribeWriteId(writeId, ctx); } NewSupportivePartitions.clear(); } void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& read, - const NKikimrClient::TKeyValueResponse::TReadRangeResult& readRange, + const TVector& readRanges, const TActorContext& ctx) { Y_ABORT_UNLESS(read.HasStatus()); @@ -986,15 +1080,6 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& return; } - Y_ABORT_UNLESS(readRange.HasStatus()); - if (readRange.GetStatus() != NKikimrProto::OK && readRange.GetStatus() != NKikimrProto::NODATA) { - LOG_ERROR_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << " Transactions read error " << ctx.SelfID << - " Error status code " << readRange.GetStatus()); - ctx.Send(ctx.SelfID, new TEvents::TEvPoisonPill()); - return; - } - Y_ABORT_UNLESS(!ConfigInited); if (read.GetStatus() == NKikimrProto::OK) { @@ -1027,15 +1112,54 @@ void TPersQueue::ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& Y_ABORT_UNLESS(TopicName.size(), "Need topic name here"); ctx.Send(CacheActor, new TEvPQ::TEvChangeCacheConfig(TopicName, cacheSize)); } else if (read.GetStatus() == NKikimrProto::NODATA) { - LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " no config, start with empty partitions and default config"); + PQ_LOG_D("no config, start with empty partitions and default config"); } else { PQ_LOG_ERROR_AND_DIE("Unexpected config read status: " << read.GetStatus()); return; } - THashMap> partitionTxs; - InitTransactions(readRange, partitionTxs); + for (const auto& readRange : readRanges) { + Y_ABORT_UNLESS(readRange.HasStatus()); + if (readRange.GetStatus() != NKikimrProto::OK && + readRange.GetStatus() != NKikimrProto::OVERRUN && + readRange.GetStatus() != NKikimrProto::NODATA) { + PQ_LOG_ERROR_AND_DIE("Transactions read error: " << readRange.GetStatus()); + return; + } + + for (size_t i = 0; i < readRange.PairSize(); ++i) { + const auto& pair = readRange.GetPair(i); + + PQ_LOG_D("ReadRange pair." << + " Key " << (pair.HasKey() ? pair.GetKey() : "unknown") << + ", Status " << pair.GetStatus()); + + NKikimrPQ::TTransaction tx; + Y_ABORT_UNLESS(tx.ParseFromString(pair.GetValue())); + + PQ_LOG_D("Load tx " << tx.ShortDebugString()); + + if (tx.GetState() == NKikimrPQ::TTransaction::CALCULATED) { + PQ_LOG_D("fix tx state"); + tx.SetState(NKikimrPQ::TTransaction::PLANNED); + } + + Txs.emplace(tx.GetTxId(), tx); + + if (tx.HasStep()) { + if (std::make_pair(tx.GetStep(), tx.GetTxId()) >= std::make_pair(ExecStep, ExecTxId)) { + PlannedTxs.emplace_back(tx.GetStep(), tx.GetTxId()); + } + } + } + } + + EndInitTransactions(); + EndReadConfig(ctx); +} +void TPersQueue::EndReadConfig(const TActorContext& ctx) +{ for (const auto& partition : Config.GetPartitions()) { // no partitions will be created with empty config const TPartitionId partitionId(partition.GetPartitionId()); CreateOriginalPartition(Config, @@ -1140,7 +1264,6 @@ void TPersQueue::InitializeMeteringSink(const TActorContext& ctx) { return result; }; - MeteringSink.Create(ctx.Now(), { .FlushInterval = TDuration::Seconds(pqConfig.GetBillingMeteringConfig().GetFlushIntervalSec()), .TabletId = ToString(TabletID()), @@ -1149,7 +1272,7 @@ void TPersQueue::InitializeMeteringSink(const TActorContext& ctx) { .YdbDatabaseId = Config.GetYdbDatabaseId(), .StreamName = streamName, .ResourceId = streamPath, - .PartitionsSize = Config.PartitionsSize(), + .PartitionsSize = CountActivePartitions(Config.GetPartitions()), .WriteQuota = Config.GetPartitionConfig().GetWriteSpeedInBytesPerSecond(), .ReservedSpace = storageLimitBytes, .ConsumersCount = countReadRulesWithPricing(ctx, Config), @@ -1224,12 +1347,16 @@ void TPersQueue::Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& break; case READ_CONFIG_COOKIE: // read is only for config - is signal to create interal actors - HandleConfigReadResponse(resp, ctx); + HandleConfigReadResponse(std::move(resp), ctx); + break; + case READ_TXS_COOKIE: + HandleTransactionsReadResponse(std::move(resp), ctx); break; case WRITE_STATE_COOKIE: EndWriteTabletState(resp, ctx); break; case WRITE_TX_COOKIE: + PQ_LOG_D("Handle TEvKeyValue::TEvResponse (WRITE_TX_COOKIE)"); EndWriteTxs(resp, ctx); break; default: @@ -1262,6 +1389,9 @@ TPartitionInfo& TPersQueue::GetPartitionInfo(const TPartitionId& partitionId) void TPersQueue::Handle(TEvPQ::TEvPartitionCounters::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvPartitionCounters" << + " PartitionId " << ev->Get()->Partition); + const auto& partitionId = ev->Get()->Partition; auto& partition = GetPartitionInfo(partitionId); auto diff = ev->Get()->Counters.MakeDiffForAggr(partition.Baseline); @@ -1387,9 +1517,10 @@ void TPersQueue::Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext& c ctx); } partition.PendingRequests.clear(); + } else { + ++PartitionsInited; } - ++PartitionsInited; Y_ABORT_UNLESS(ConfigInited);//partitions are inited only after config auto allInitialized = AllOriginalPartitionsInited(); @@ -1412,6 +1543,9 @@ void TPersQueue::Handle(TEvPQ::TEvInitComplete::TPtr& ev, const TActorContext& c void TPersQueue::Handle(TEvPQ::TEvError::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvError" << + " Cookie " << ev->Get()->Cookie << + ", Error " << ev->Get()->Error); auto it = ResponseProxy.find(ev->Get()->Cookie); if (it == ResponseProxy.end()) @@ -1447,6 +1581,7 @@ void TPersQueue::FinishResponse(THashMap>::iter void TPersQueue::Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvUpdateConfig"); if (!ConfigInited) { UpdateConfigRequests.emplace_back(ev->Release(), ev->Sender); return; @@ -1457,6 +1592,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvUpdateConfig::TPtr& ev, const TActorCon void TPersQueue::Handle(TEvPQ::TEvPartitionConfigChanged::TPtr&, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvPartitionConfigChanged"); + Y_ABORT_UNLESS(ChangePartitionConfigInflight > 0); --ChangePartitionConfigInflight; @@ -1499,9 +1636,35 @@ void TPersQueue::CreateTopicConverter(const NKikimrPQ::TPQTabletConfig& config, Y_ABORT_UNLESS(topicConverter->IsValid(), "%s", topicConverter->GetReason().c_str()); } +void TPersQueue::UpdateReadRuleGenerations(NKikimrPQ::TPQTabletConfig& cfg) const +{ + Y_ABORT_UNLESS(cfg.HasVersion()); + const int curConfigVersion = cfg.GetVersion(); + + // set rr generation for provided read rules + THashMap> existed; // map name -> rrVersion, rrGeneration + for (const auto& c : Config.GetConsumers()) { + existed[c.GetName()] = std::make_pair(c.GetVersion(), c.GetGeneration()); + } + + for (auto& c : *cfg.MutableConsumers()) { + auto it = existed.find(c.GetName()); + ui64 generation = 0; + if (it != existed.end() && it->second.first == c.GetVersion()) { + generation = it->second.second; + } else { + generation = curConfigVersion; + } + c.SetGeneration(generation); + if (ReadRuleCompatible()) { + cfg.AddReadRuleGenerations(generation); + } + } +} + void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr ev, const TActorId& sender, const TActorContext& ctx) { - auto& record = ev->Record; + const auto& record = ev->GetRecord(); int oldConfigVersion = Config.HasVersion() ? Config.GetVersion() : -1; int newConfigVersion = NewConfig.HasVersion() ? NewConfig.GetVersion() : oldConfigVersion; @@ -1511,7 +1674,7 @@ void TPersQueue::ProcessUpdateConfigRequest(TAutoPtr> existed; // map name -> rrVersion, rrGeneration - for (const auto& c : Config.GetConsumers()) { - existed[c.GetName()] = std::make_pair(c.GetVersion(), c.GetGeneration()); - } - - for (auto& c : *cfg.MutableConsumers()) { - auto it = existed.find(c.GetName()); - ui64 generation = 0; - if (it != existed.end() && it->second.first == c.GetVersion()) { - generation = it->second.second; - } else { - generation = curConfigVersion; - } - c.SetGeneration(generation); - if (ReadRuleCompatible()) { - cfg.AddReadRuleGenerations(generation); - } - } - } + UpdateReadRuleGenerations(cfg); LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Config update version " << cfg.GetVersion() << "(current " << Config.GetVersion() << ") received from actor " << sender @@ -1682,7 +1825,7 @@ void TPersQueue::AddCmdWriteConfig(TEvKeyValue::TEvRequest* request, keyRange = TPartitionKeyRange::Parse(mg.GetKeyRange()); } - sourceIdWriter.RegisterSourceId(mg.GetId(), 0, 0, ctx.Now(), std::move(keyRange)); + sourceIdWriter.RegisterSourceId(mg.GetId(), 0, 0, ctx.Now(), std::move(keyRange), false); } for (const auto& partition : cfg.GetPartitions()) { @@ -1698,6 +1841,8 @@ void TPersQueue::ClearNewConfig() void TPersQueue::Handle(TEvPersQueue::TEvDropTablet::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvDropTablet"); + auto& record = ev->Get()->Record; ui64 txId = record.GetTxId(); @@ -1732,13 +1877,19 @@ void TPersQueue::Handle(TEvPersQueue::TEvOffsets::TPtr& ev, const TActorContext& } ui32 cnt = 0; for (auto& p : Partitions) { - cnt += p.second.InitDone; + if (p.first.IsSupportivePartition()) { + continue; + } + + cnt += p.second.InitDone; } TActorId ans = CreateOffsetsProxyActor(TabletID(), ev->Sender, cnt, ctx); for (auto& p : Partitions) { - if (!p.second.InitDone) + if (!p.second.InitDone || p.first.IsSupportivePartition()) { continue; + } + THolder event = MakeHolder(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : ""); ctx.Send(p.second.Actor, event.Release()); @@ -1782,23 +1933,34 @@ void TPersQueue::ProcessStatusRequests(const TActorContext &ctx) { void TPersQueue::Handle(TEvPersQueue::TEvStatus::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvStatus"); + ReadBalancerActorId = ev->Sender; if (!ConfigInited || !AllOriginalPartitionsInited()) { - StatusRequests.push_back(ev); - return; + PQ_LOG_D("Postpone the request." << + " ConfigInited " << static_cast(ConfigInited) << + ", PartitionsInited " << PartitionsInited << + ", OriginalPartitionsCount " << OriginalPartitionsCount); + StatusRequests.push_back(ev); + return; } ui32 cnt = 0; - for (auto& [_, partitionInfo] : Partitions) { - cnt += partitionInfo.InitDone; + for (auto& [partitionId, partitionInfo] : Partitions) { + if (partitionId.IsSupportivePartition()) { + continue; + } + + cnt += partitionInfo.InitDone; } TActorId ans = CreateStatusProxyActor(TabletID(), ev->Sender, cnt, ev->Cookie, ctx); for (auto& p : Partitions) { - if (!p.second.InitDone) { + if (!p.second.InitDone || p.first.IsSupportivePartition()) { continue; } + THolder event; if (ev->Get()->Record.GetConsumers().empty()) { event = MakeHolder(ans, ev->Get()->Record.HasClientId() ? ev->Get()->Record.GetClientId() : "", @@ -2542,7 +2704,9 @@ const TPartitionInfo& TPersQueue::GetPartitionInfo(const NKikimrClient::TPersQue const TWriteId writeId = GetWriteId(req); ui32 originalPartitionId = req.GetPartition(); - Y_ABORT_UNLESS(TxWrites.contains(writeId) && TxWrites.at(writeId).Partitions.contains(originalPartitionId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId) && TxWrites.at(writeId).Partitions.contains(originalPartitionId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}, Partition %" PRIu32, + TabletID(), writeId.NodeId, writeId.KeyId, originalPartitionId); const TPartitionId& partitionId = TxWrites.at(writeId).Partitions.at(originalPartitionId); Y_ABORT_UNLESS(Partitions.contains(partitionId)); @@ -2837,6 +3001,8 @@ void TPersQueue::Handle(TEvPersQueue::TEvRequest::TPtr& ev, const TActorContext& void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvServerConnected"); + auto it = PipesInfo.insert({ev->Get()->ClientId, {}}).first; it->second.ServerActors++; LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " server connected, pipe " @@ -2848,6 +3014,8 @@ void TPersQueue::Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActo void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvServerDisconnected"); + //inform partition if needed; auto it = PipesInfo.find(ev->Get()->ClientId); if (it != PipesInfo.end()) { @@ -2871,6 +3039,8 @@ void TPersQueue::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TA void TPersQueue::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvClientConnected"); + Y_ABORT_UNLESS(ev->Get()->Leader, "Unexpectedly connected to follower of tablet %" PRIu64, ev->Get()->TabletId); if (PipeClientCache->OnConnect(ev)) { @@ -2885,6 +3055,8 @@ void TPersQueue::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev, const TActo void TPersQueue::Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvTabletPipe::TEvClientDestroyed"); + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Client pipe to tablet " << ev->Get()->TabletId << " is reset"); @@ -3032,14 +3204,15 @@ void TPersQueue::Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, MediatorTimeCastEntry = message->Entry; Y_ABORT_UNLESS(MediatorTimeCastEntry); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << - "Registered with mediator time cast"); + PQ_LOG_D("Registered with mediator time cast"); TryWriteTxs(ctx); } void TPersQueue::Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvInterconnect::TEvNodeInfo"); + Y_ABORT_UNLESS(ev->Get()->Node); DCId = ev->Get()->Node->Location.GetDataCenterId(); ResourceMetrics = Executor()->GetResourceMetrics(); @@ -3051,19 +3224,28 @@ void TPersQueue::Handle(TEvInterconnect::TEvNodeInfo::TPtr& ev, const TActorCont request->Record.AddCmdRead()->SetKey(KeyState()); request->Record.AddCmdRead()->SetKey(KeyTxInfo()); - auto cmd = request->Record.AddCmdReadRange(); - cmd->MutableRange()->SetFrom(GetTxKey(Min())); - cmd->MutableRange()->SetIncludeFrom(true); - cmd->MutableRange()->SetTo(GetTxKey(Max())); - cmd->MutableRange()->SetIncludeTo(true); - cmd->SetIncludeData(true); - request->Record.MutableCmdSetExecutorFastLogPolicy() ->SetIsAllowed(AppData(ctx)->PQConfig.GetTactic() == NKikimrClient::TKeyValueRequest::MIN_LATENCY); ctx.Send(ctx.SelfID, request.Release()); + ctx.Schedule(TDuration::Seconds(5), new TEvents::TEvWakeup()); } +void TPersQueue::AddCmdReadTransactionRange(TEvKeyValue::TEvRequest& request, + const TString& fromKey, bool includeFrom) +{ + auto cmd = request.Record.AddCmdReadRange(); + cmd->MutableRange()->SetFrom(fromKey); + cmd->MutableRange()->SetIncludeFrom(includeFrom); + cmd->MutableRange()->SetTo(GetTxKey(Max())); + cmd->MutableRange()->SetIncludeTo(true); + cmd->SetIncludeData(true); + + PQ_LOG_D("Transactions request." << + " From " << cmd->MutableRange()->GetFrom() << + ", To " << cmd->MutableRange()->GetTo()); +} + void TPersQueue::HandleWakeup(const TActorContext& ctx) { THashSet groups; for (auto& p : Partitions) { @@ -3098,7 +3280,7 @@ void TPersQueue::DeleteExpiredTransactions(const TActorContext& ctx) void TPersQueue::Handle(TEvPersQueue::TEvCancelTransactionProposal::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvPersQueue::TEvCancelTransactionProposal"); + PQ_LOG_D("Handle TEvPersQueue::TEvCancelTransactionProposal"); NKikimrPQ::TEvCancelTransactionProposal& event = ev->Get()->Record; Y_ABORT_UNLESS(event.HasTxId()); @@ -3114,9 +3296,9 @@ void TPersQueue::Handle(TEvPersQueue::TEvCancelTransactionProposal::TPtr& ev, co void TPersQueue::Handle(TEvPersQueue::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx) { - PQ_LOG_D("Handle TEvPersQueue::TEvProposeTransaction " << ev->Get()->Record.DebugString()); + const NKikimrPQ::TEvProposeTransaction& event = ev->Get()->GetRecord(); + PQ_LOG_D("Handle TEvPersQueue::TEvProposeTransaction " << event.ShortDebugString()); - NKikimrPQ::TEvProposeTransaction& event = ev->Get()->Record; switch (event.GetTxBodyCase()) { case NKikimrPQ::TEvProposeTransaction::kData: HandleDataTransaction(ev->Release(), ctx); @@ -3141,7 +3323,7 @@ bool TPersQueue::CheckTxWriteOperation(const NKikimrPQ::TPartitionOperation& ope TPartitionId partitionId(operation.GetPartitionId(), writeId, operation.GetSupportivePartition()); - PQ_LOG_D("partitionId=" << partitionId); + PQ_LOG_D("PartitionId " << partitionId << " for WriteId " << writeId); return Partitions.contains(partitionId); } @@ -3152,7 +3334,6 @@ bool TPersQueue::CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBod } const TWriteId writeId = GetWriteId(txBody); - PQ_LOG_D("writeId=" << writeId); for (auto& operation : txBody.GetOperations()) { auto isWrite = [](const NKikimrPQ::TPartitionOperation& o) { @@ -3172,13 +3353,13 @@ bool TPersQueue::CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBod void TPersQueue::HandleDataTransaction(TAutoPtr ev, const TActorContext& ctx) { - NKikimrPQ::TEvProposeTransaction& event = ev->Record; + NKikimrPQ::TEvProposeTransaction& event = *ev->MutableRecord(); Y_ABORT_UNLESS(event.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kData); Y_ABORT_UNLESS(event.HasData()); const NKikimrPQ::TDataTransaction& txBody = event.GetData(); if (TabletState != NKikimrPQ::ENormal) { - PQ_LOG_D("invalid PQ tablet state (" << NKikimrPQ::ETabletState_Name(TabletState) << ")"); + PQ_LOG_D("TxId " << event.GetTxId() << " invalid PQ tablet state (" << NKikimrPQ::ETabletState_Name(TabletState) << ")"); SendProposeTransactionAbort(ActorIdFromProto(event.GetSourceActor()), event.GetTxId(), NKikimrPQ::TError::ERROR, @@ -3192,7 +3373,7 @@ void TPersQueue::HandleDataTransaction(TAutoPtr partitionId = FindPartitionId(txBody); if (!partitionId.Defined()) { - PQ_LOG_D("unknown partition for WriteId " << txBody.GetWriteId()); + PQ_LOG_W("TxId " << event.GetTxId() << " unknown partition for WriteId " << txBody.GetWriteId()); SendProposeTransactionAbort(ActorIdFromProto(event.GetSourceActor()), event.GetTxId(), NKikimrPQ::TError::INTERNAL, @@ -3256,7 +3464,7 @@ void TPersQueue::HandleDataTransaction(TAutoPtr ev, const TActorContext& ctx) { - NKikimrPQ::TEvProposeTransaction& event = ev->Record; + const NKikimrPQ::TEvProposeTransaction& event = ev->GetRecord(); Y_ABORT_UNLESS(event.GetTxBodyCase() == NKikimrPQ::TEvProposeTransaction::kConfig); Y_ABORT_UNLESS(event.HasConfig()); @@ -3267,14 +3475,7 @@ void TPersQueue::HandleConfigTransaction(TAutoPtrGet()->Record; - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet: " << TabletID() << - ", PlanStep: " << event.GetStep() << - ", Mediator: " << event.GetMediatorID()); + PQ_LOG_D("Handle TEvTxProcessing::TEvPlanStep " << ev->Get()->Record.ShortDebugString()); EvPlanStepQueue.emplace_back(ev->Sender, ev->Release().Release()); @@ -3283,7 +3484,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvPlanStep::TPtr& ev, const TActorCont void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvTxProcessing::TEvReadSet"); + PQ_LOG_D("Handle TEvTxProcessing::TEvReadSet " << ev->Get()->Record.ShortDebugString()); NKikimrTx::TEvReadSet& event = ev->Get()->Record; Y_ABORT_UNLESS(event.HasTxId()); @@ -3293,7 +3494,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorConte ack = std::make_unique(*ev->Get(), TabletID()); } - if (auto tx = GetTransaction(ctx, event.GetTxId()); tx && tx->Senders.contains(event.GetTabletProducer())) { + if (auto tx = GetTransaction(ctx, event.GetTxId()); tx && tx->PredicatesReceived.contains(event.GetTabletProducer())) { tx->OnReadSet(event, ev->Sender, std::move(ack)); if (tx->State == NKikimrPQ::TTransaction::WAIT_RS) { @@ -3302,6 +3503,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorConte TryWriteTxs(ctx); } } else if (ack) { + PQ_LOG_D("send TEvReadSetAck to " << event.GetTabletProducer()); // // для неизвестных транзакций подтверждение отправляется сразу // @@ -3311,7 +3513,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorConte void TPersQueue::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvTxProcessing::TEvReadSetAck"); + PQ_LOG_D("Handle TEvTxProcessing::TEvReadSetAck " << ev->Get()->Record.ShortDebugString()); NKikimrTx::TEvReadSetAck& event = ev->Get()->Record; Y_ABORT_UNLESS(event.HasTxId()); @@ -3324,7 +3526,7 @@ void TPersQueue::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorCo tx->OnReadSetAck(event); tx->UnbindMsgsFromPipe(event.GetTabletConsumer()); - if (tx->State == NKikimrPQ::TTransaction::EXECUTED) { + if (tx->State == NKikimrPQ::TTransaction::WAIT_RS_ACKS) { CheckTxState(ctx, *tx); TryWriteTxs(ctx); @@ -3335,19 +3537,19 @@ void TPersQueue::Handle(TEvPQ::TEvTxCalcPredicateResult::TPtr& ev, const TActorC { const TEvPQ::TEvTxCalcPredicateResult& event = *ev->Get(); - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << - " Handle TEvPQ::TEvTxCalcPredicateResult" << - " Step " << event.Step << - " TxId " << event.TxId << - " Partition " << event.Partition << - " Predicate " << (event.Predicate ? "true" : "false")); + PQ_LOG_D("Handle TEvPQ::TEvTxCalcPredicateResult" << + " Step " << event.Step << + ", TxId " << event.TxId << + ", Partition " << event.Partition << + ", Predicate " << event.Predicate); auto tx = GetTransaction(ctx, event.TxId); if (!tx) { return; } + Y_ABORT_UNLESS(tx->State == NKikimrPQ::TTransaction::CALCULATING); + tx->OnTxCalcPredicateResult(event); CheckTxState(ctx, *tx); @@ -3359,6 +3561,11 @@ void TPersQueue::Handle(TEvPQ::TEvProposePartitionConfigResult::TPtr& ev, const { const TEvPQ::TEvProposePartitionConfigResult& event = *ev->Get(); + PQ_LOG_D("Handle TEvPQ::TEvProposePartitionConfigResult" << + " Step " << event.Step << + ", TxId " << event.TxId << + ", Partition " << event.Partition); + auto tx = GetTransaction(ctx, event.TxId); if (!tx) { return; @@ -3375,10 +3582,13 @@ void TPersQueue::Handle(TEvPQ::TEvProposePartitionConfigResult::TPtr& ev, const void TPersQueue::Handle(TEvPQ::TEvTxCommitDone::TPtr& ev, const TActorContext& ctx) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " Handle TEvPQ::TEvTxCommitDone"); - const TEvPQ::TEvTxCommitDone& event = *ev->Get(); + PQ_LOG_D("Handle TEvPQ::TEvTxCommitDone" << + " Step " << event.Step << + ", TxId " << event.TxId << + ", Partition " << event.Partition); + auto tx = GetTransaction(ctx, event.TxId); if (!tx) { return; @@ -3424,6 +3634,7 @@ bool TPersQueue::CanProcessTxWrites() const void TPersQueue::SubscribeWriteId(const TWriteId& writeId, const TActorContext& ctx) { + PQ_LOG_D("send TEvSubscribeLock for WriteId " << writeId); ctx.Send(NLongTxService::MakeLongTxServiceID(writeId.NodeId), new NLongTxService::TEvLongTxService::TEvSubscribeLock(writeId.KeyId, writeId.NodeId)); } @@ -3431,6 +3642,7 @@ void TPersQueue::SubscribeWriteId(const TWriteId& writeId, void TPersQueue::UnsubscribeWriteId(const TWriteId& writeId, const TActorContext& ctx) { + PQ_LOG_D("send TEvUnsubscribeLock for WriteId " << writeId); ctx.Send(NLongTxService::MakeLongTxServiceID(writeId.NodeId), new NLongTxService::TEvLongTxService::TEvUnsubscribeLock(writeId.KeyId, writeId.NodeId)); } @@ -3475,6 +3687,7 @@ void TPersQueue::BeginWriteTxs(const TActorContext& ctx) PendingSupportivePartitions = std::move(NewSupportivePartitions); NewSupportivePartitions.clear(); + PQ_LOG_D("Send TEvKeyValue::TEvRequest (WRITE_TX_COOKIE)"); ctx.Send(ctx.SelfID, request.Release()); TryReturnTabletStateAll(ctx); @@ -3529,7 +3742,7 @@ void TPersQueue::ProcessProposeTransactionQueue(const TActorContext& ctx) const auto front = std::move(EvProposeTransactionQueue.front()); EvProposeTransactionQueue.pop_front(); - const NKikimrPQ::TEvProposeTransaction& event = front->Record; + const NKikimrPQ::TEvProposeTransaction& event = front->GetRecord(); TDistributedTransaction& tx = Txs[event.GetTxId()]; switch (tx.State) { @@ -3537,9 +3750,15 @@ void TPersQueue::ProcessProposeTransactionQueue(const TActorContext& ctx) tx.OnProposeTransaction(event, GetAllowedStep(), TabletID()); + if (tx.Kind == NKikimrPQ::TTransaction::KIND_CONFIG) { + UpdateReadRuleGenerations(tx.TabletConfig); + } + if (tx.WriteId.Defined()) { const TWriteId& writeId = *tx.WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", TxId %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), tx.TxId, writeId.NodeId, writeId.KeyId); TTxWriteInfo& writeInfo = TxWrites.at(writeId); writeInfo.TxId = tx.TxId; } @@ -3657,20 +3876,34 @@ void TPersQueue::ProcessWriteTxs(const TActorContext& ctx, void TPersQueue::ProcessDeleteTxs(const TActorContext& ctx, NKikimrClient::TKeyValueRequest& request) { - Y_ABORT_UNLESS(!WriteTxsInProgress); + Y_ABORT_UNLESS(!WriteTxsInProgress, + "PQ %" PRIu64, + TabletID()); for (ui64 txId : DeleteTxs) { - auto tx = GetTransaction(ctx, txId); - Y_ABORT_UNLESS(tx); + PQ_LOG_D("delete key for TxId " << txId); + AddCmdDeleteTx(request, txId); - tx->AddCmdDelete(request); - - Txs.erase(tx->TxId); + auto tx = GetTransaction(ctx, txId); + if (tx) { + ChangedTxs.insert(txId); + } } DeleteTxs.clear(); } +void TPersQueue::AddCmdDeleteTx(NKikimrClient::TKeyValueRequest& request, + ui64 txId) +{ + TString key = GetTxKey(txId); + auto range = request.AddCmdDeleteRange()->MutableRange(); + range->SetFrom(key); + range->SetIncludeFrom(true); + range->SetTo(key); + range->SetIncludeTo(true); +} + void TPersQueue::ProcessConfigTx(const TActorContext& ctx, TEvKeyValue::TEvRequest* request) { @@ -3715,11 +3948,16 @@ void TPersQueue::SavePlanStep(NKikimrPQ::TTabletTxInfo& info) void TPersQueue::SaveTxWrites(NKikimrPQ::TTabletTxInfo& info) { for (auto& [writeId, write] : TxWrites) { - for (auto [partitionId, shadowPartitionId] : write.Partitions) { + if (write.Partitions.empty()) { auto* txWrite = info.MutableTxWrites()->Add(); SetWriteId(*txWrite, writeId); - txWrite->SetOriginalPartitionId(partitionId); - txWrite->SetInternalPartitionId(shadowPartitionId.InternalPartitionId); + } else { + for (auto [partitionId, shadowPartitionId] : write.Partitions) { + auto* txWrite = info.MutableTxWrites()->Add(); + SetWriteId(*txWrite, writeId); + txWrite->SetOriginalPartitionId(partitionId); + txWrite->SetInternalPartitionId(shadowPartitionId.InternalPartitionId); + } } } @@ -3773,8 +4011,8 @@ void TPersQueue::SendEvReadSetToReceivers(const TActorContext& ctx, TString body; Y_ABORT_UNLESS(data.SerializeToString(&body)); - PQ_LOG_D("Send TEvTxProcessing::TEvReadSet to " << tx.Receivers.size() << " receivers. Wait TEvTxProcessing::TEvReadSet from " << tx.Senders.size() << " senders."); - for (ui64 receiverId : tx.Receivers) { + PQ_LOG_D("Send TEvTxProcessing::TEvReadSet to " << tx.PredicateRecipients.size() << " receivers. Wait TEvTxProcessing::TEvReadSet from " << tx.PredicatesReceived.size() << " senders."); + for (auto& [receiverId, _] : tx.PredicateRecipients) { if (receiverId != TabletID()) { auto event = std::make_unique(tx.Step, tx.TxId, @@ -3783,6 +4021,7 @@ void TPersQueue::SendEvReadSetToReceivers(const TActorContext& ctx, TabletID(), body, 0); + PQ_LOG_D("Send TEvReadSet to tablet " << receiverId); SendToPipe(receiverId, tx, std::move(event), ctx); } } @@ -3791,7 +4030,9 @@ void TPersQueue::SendEvReadSetToReceivers(const TActorContext& ctx, void TPersQueue::SendEvReadSetAckToSenders(const TActorContext& ctx, TDistributedTransaction& tx) { + PQ_LOG_D("TPersQueue::SendEvReadSetAckToSenders"); for (auto& [target, event] : tx.ReadSetAcks) { + PQ_LOG_D("Send TEvTxProcessing::TEvReadSetAck " << event->ToString()); ctx.Send(target, event.release()); } } @@ -3849,7 +4090,9 @@ void TPersQueue::SendEvTxCalcPredicateToPartitions(const TActorContext& ctx, if (tx.WriteId.Defined()) { const TWriteId& writeId = *tx.WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", TxId %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), tx.TxId, writeId.NodeId, writeId.KeyId); const TTxWriteInfo& writeInfo = TxWrites.at(writeId); for (auto& [originalPartitionId, partitionId] : writeInfo.Partitions) { @@ -3886,7 +4129,9 @@ void TPersQueue::SendEvTxCommitToPartitions(const TActorContext& ctx, auto event = std::make_unique(tx.Step, tx.TxId); auto p = Partitions.find(TPartitionId(partitionId)); - Y_ABORT_UNLESS(p != Partitions.end()); + Y_ABORT_UNLESS(p != Partitions.end(), + "Tablet %" PRIu64 ", Partition %" PRIu32 ", TxId %" PRIu64, + TabletID(), partitionId, tx.TxId); ctx.Send(p->second.Actor, event.release()); } @@ -3969,11 +4214,10 @@ const THashSet& TPersQueue::GetBindedTxs(ui64 tabletId) TDistributedTransaction* TPersQueue::GetTransaction(const TActorContext& ctx, ui64 txId) { + Y_UNUSED(ctx); auto p = Txs.find(txId); if (p == Txs.end()) { - LOG_WARN_S(ctx, NKikimrServices::PERSQUEUE, - "Tablet " << TabletID() << - " Unknown transaction " << txId); + PQ_LOG_W("Unknown transaction " << txId); return nullptr; } return &p->second; @@ -3982,116 +4226,95 @@ TDistributedTransaction* TPersQueue::GetTransaction(const TActorContext& ctx, void TPersQueue::CheckTxState(const TActorContext& ctx, TDistributedTransaction& tx) { + PQ_LOG_D("TxId " << tx.TxId << + ", State " << NKikimrPQ::TTransaction_EState_Name(tx.State)); + switch (tx.State) { case NKikimrPQ::TTransaction::UNKNOWN: - Y_ABORT_UNLESS(tx.TxId != Max()); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=UNKNOWN"); + Y_ABORT_UNLESS(tx.TxId != Max(), + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); WriteTx(tx, NKikimrPQ::TTransaction::PREPARED); ScheduleProposeTransactionResult(tx); tx.State = NKikimrPQ::TTransaction::PREPARING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); break; case NKikimrPQ::TTransaction::PREPARING: - Y_ABORT_UNLESS(tx.WriteInProgress); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=PREPARING"); + Y_ABORT_UNLESS(tx.WriteInProgress, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); tx.WriteInProgress = false; - // - // запланированные события будут отправлены в EndWriteTxs - // + // scheduled events will be sent to EndWriteTxs tx.State = NKikimrPQ::TTransaction::PREPARED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); break; case NKikimrPQ::TTransaction::PREPARED: - Y_ABORT_UNLESS(tx.Step != Max()); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=PREPARED"); + Y_ABORT_UNLESS(tx.Step != Max(), + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); WriteTx(tx, NKikimrPQ::TTransaction::PLANNED); tx.State = NKikimrPQ::TTransaction::PLANNING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); break; case NKikimrPQ::TTransaction::PLANNING: - Y_ABORT_UNLESS(tx.WriteInProgress); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=PLANNING"); + Y_ABORT_UNLESS(tx.WriteInProgress, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); tx.WriteInProgress = false; - // - // запланированные события будут отправлены в EndWriteTxs - // + // scheduled events will be sent to EndWriteTxs tx.State = NKikimrPQ::TTransaction::PLANNED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); [[fallthrough]]; case NKikimrPQ::TTransaction::PLANNED: - PQ_LOG_T("TxId="<< tx.TxId << ", State=PLANNED" << - ", (!TxQueue.empty())=" << !TxQueue.empty()); + PQ_LOG_D("TxQueue.size " << TxQueue.size()); if (!TxQueue.empty() && (TxQueue.front().second == tx.TxId)) { - std::tie(ExecStep, ExecTxId) = TxQueue.front(); - PQ_LOG_D("ExecStep " << ExecStep << ", ExecTxId " << ExecTxId); - - switch (tx.Kind) { - case NKikimrPQ::TTransaction::KIND_DATA: - SendEvTxCalcPredicateToPartitions(ctx, tx); - break; - case NKikimrPQ::TTransaction::KIND_CONFIG: { - NPersQueue::TConverterFactoryPtr converterFactory; - CreateTopicConverter(tx.TabletConfig, - converterFactory, - tx.TopicConverter, - ctx); - CreateNewPartitions(tx.TabletConfig, - tx.TopicConverter, - ctx); - SendEvProposePartitionConfig(ctx, tx); - break; - } - case NKikimrPQ::TTransaction::KIND_UNKNOWN: - Y_ABORT_UNLESS(false); - } - - tx.State = NKikimrPQ::TTransaction::CALCULATING; + MoveTopTxToCalculating(tx, ctx); } break; case NKikimrPQ::TTransaction::CALCULATING: - Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected); + Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected, + "PQ %" PRIu64 ", TxId %" PRIu64 ", PartitionRepliesCount %" PRISZT ", PartitionRepliesExpected %" PRISZT, + TabletID(), tx.TxId, + tx.PartitionRepliesCount, tx.PartitionRepliesExpected); - PQ_LOG_T("TxId="<< tx.TxId << ", State=CALCULATING" << - ", tx.PartitionRepliesCount=" << tx.PartitionRepliesCount << - ", tx.PartitionRepliesExpected=" << tx.PartitionRepliesExpected); + PQ_LOG_D("Received " << tx.PartitionRepliesCount << + ", Expected " << tx.PartitionRepliesExpected); if (tx.PartitionRepliesCount == tx.PartitionRepliesExpected) { switch (tx.Kind) { case NKikimrPQ::TTransaction::KIND_DATA: - SendEvReadSetToReceivers(ctx, tx); - - WriteTx(tx, NKikimrPQ::TTransaction::WAIT_RS); - - tx.State = NKikimrPQ::TTransaction::CALCULATED; - break; - case NKikimrPQ::TTransaction::KIND_CONFIG: - SendEvReadSetToReceivers(ctx, tx); + WriteTx(tx, NKikimrPQ::TTransaction::CALCULATED); - tx.State = NKikimrPQ::TTransaction::WAIT_RS; + tx.State = NKikimrPQ::TTransaction::CALCULATED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); - CheckTxState(ctx, tx); break; case NKikimrPQ::TTransaction::KIND_UNKNOWN: @@ -4102,25 +4325,25 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, break; case NKikimrPQ::TTransaction::CALCULATED: - Y_ABORT_UNLESS(tx.WriteInProgress); - - PQ_LOG_T("TxId="<< tx.TxId << ", State=CALCULATED"); - - tx.WriteInProgress = false; - tx.State = NKikimrPQ::TTransaction::WAIT_RS; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); - [[fallthrough]]; - - case NKikimrPQ::TTransaction::WAIT_RS: // // the number of TEvReadSetAck sent should not be greater than the number of senders // from TEvProposeTransaction // - Y_ABORT_UNLESS(tx.ReadSetAcks.size() <= tx.Senders.size()); + Y_ABORT_UNLESS(tx.ReadSetAcks.size() <= tx.PredicatesReceived.size(), + "PQ %" PRIu64 ", TxId %" PRIu64 ", ReadSetAcks.size %" PRISZT ", PredicatesReceived.size %" PRISZT, + TabletID(), tx.TxId, + tx.ReadSetAcks.size(), tx.PredicatesReceived.size()); - PQ_LOG_T("TxId="<< tx.TxId << ", State=WAIT_RS" << - ", tx.HaveParticipantsDecision()=" << tx.HaveParticipantsDecision()); + SendEvReadSetToReceivers(ctx, tx); + + [[fallthrough]]; + + case NKikimrPQ::TTransaction::WAIT_RS: + PQ_LOG_D("HaveParticipantsDecision " << tx.HaveParticipantsDecision()); if (tx.HaveParticipantsDecision()) { if (tx.GetDecision() == NKikimrTx::TReadSetData::DECISION_COMMIT) { @@ -4130,6 +4353,8 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, } tx.State = NKikimrPQ::TTransaction::EXECUTING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); } else { break; } @@ -4137,23 +4362,28 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, [[fallthrough]]; case NKikimrPQ::TTransaction::EXECUTING: - Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected); + Y_ABORT_UNLESS(tx.PartitionRepliesCount <= tx.PartitionRepliesExpected, + "PQ %" PRIu64 ", TxId %" PRIu64 ", PartitionRepliesCount %" PRISZT ", PartitionRepliesExpected %" PRISZT, + TabletID(), tx.TxId, + tx.PartitionRepliesCount, tx.PartitionRepliesExpected); + + PQ_LOG_D("Received " << tx.PartitionRepliesCount << + ", Expected " << tx.PartitionRepliesExpected); - PQ_LOG_T("TxId="<< tx.TxId << ", State=EXECUTING" << - ", tx.PartitionRepliesCount=" << tx.PartitionRepliesCount << - ", tx.PartitionRepliesExpected=" << tx.PartitionRepliesExpected); if (tx.PartitionRepliesCount == tx.PartitionRepliesExpected) { - Y_ABORT_UNLESS(!TxQueue.empty()); - Y_ABORT_UNLESS(TxQueue.front().second == tx.TxId); + Y_ABORT_UNLESS(!TxQueue.empty(), + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); + Y_ABORT_UNLESS(TxQueue.front().second == tx.TxId, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), tx.TxId); SendEvProposeTransactionResult(ctx, tx); switch (tx.Kind) { case NKikimrPQ::TTransaction::KIND_DATA: - SendEvReadSetAckToSenders(ctx, tx); break; case NKikimrPQ::TTransaction::KIND_CONFIG: - SendEvReadSetAckToSenders(ctx, tx); ApplyNewConfig(tx.TabletConfig, ctx); TabletConfigTx = tx.TabletConfig; BootstrapConfigTx = tx.BootstrapConfig; @@ -4162,30 +4392,85 @@ void TPersQueue::CheckTxState(const TActorContext& ctx, Y_ABORT_UNLESS(false); } - tx.State = NKikimrPQ::TTransaction::EXECUTED; + WriteTx(tx, NKikimrPQ::TTransaction::EXECUTED); - TxQueue.pop(); - TryStartTransaction(ctx); - } else { - break; + PQ_LOG_D("delete partitions for TxId " << tx.TxId); + BeginDeletePartitions(tx); + + tx.State = NKikimrPQ::TTransaction::EXECUTED; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); } - [[fallthrough]]; + break; case NKikimrPQ::TTransaction::EXECUTED: - PQ_LOG_T("TxId="<< tx.TxId << ", State=EXECUTED, tx.HaveAllRecipientsReceive()=" << tx.HaveAllRecipientsReceive()); - if (tx.HaveAllRecipientsReceive()) { - if (tx.WriteId.Defined()) { - BeginDeleteTx(tx); - } else { - DeleteTx(tx); - } + SendEvReadSetAckToSenders(ctx, tx); + + tx.State = NKikimrPQ::TTransaction::WAIT_RS_ACKS; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); + + [[fallthrough]]; + + case NKikimrPQ::TTransaction::WAIT_RS_ACKS: + PQ_LOG_D("HaveAllRecipientsReceive " << tx.HaveAllRecipientsReceive() << + ", AllSupportivePartitionsHaveBeenDeleted " << AllSupportivePartitionsHaveBeenDeleted(tx.WriteId)); + if (tx.HaveAllRecipientsReceive() && AllSupportivePartitionsHaveBeenDeleted(tx.WriteId)) { + DeleteTx(tx); + // implicitly switch to the state DELETING } break; + + case NKikimrPQ::TTransaction::DELETING: + // The PQ tablet has persisted its state. Now she can delete the transaction and take the next one. + if (!TxQueue.empty() && (TxQueue.front().second == tx.TxId)) { + TxQueue.pop(); + TryStartTransaction(ctx); + } + + DeleteWriteId(tx.WriteId); + PQ_LOG_D("delete TxId " << tx.TxId); + Txs.erase(tx.TxId); + + // If this was the last transaction, then you need to send responses to messages about changes + // in the status of the PQ tablet (if they came) + TryReturnTabletStateAll(ctx); + break; } } +bool TPersQueue::AllSupportivePartitionsHaveBeenDeleted(const TMaybe& writeId) const +{ + if (!writeId.Defined()) { + return true; + } + + Y_ABORT_UNLESS(TxWrites.contains(*writeId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), writeId->NodeId, writeId->KeyId); + const TTxWriteInfo& writeInfo = TxWrites.at(*writeId); + + PQ_LOG_D("WriteId " << *writeId << + " Partitions.size=" << writeInfo.Partitions.size()); + bool deleted = + writeInfo.Partitions.empty() + ; + + return deleted; +} + +void TPersQueue::DeleteWriteId(const TMaybe& writeId) +{ + if (!writeId.Defined() || !TxWrites.contains(*writeId)) { + return; + } + + PQ_LOG_D("delete WriteId " << *writeId); + TxWrites.erase(*writeId); +} + void TPersQueue::WriteTx(TDistributedTransaction& tx, NKikimrPQ::TTransaction::EState state) { WriteTxs[tx.TxId] = state; @@ -4195,8 +4480,14 @@ void TPersQueue::WriteTx(TDistributedTransaction& tx, NKikimrPQ::TTransaction::E void TPersQueue::DeleteTx(TDistributedTransaction& tx) { + PQ_LOG_D("add an TxId " << tx.TxId << " to the list for deletion"); + DeleteTxs.insert(tx.TxId); + tx.State = NKikimrPQ::TTransaction::DELETING; + PQ_LOG_D("TxId " << tx.TxId << + ", NewState " << NKikimrPQ::TTransaction_EState_Name(tx.State)); + tx.WriteInProgress = true; } @@ -4212,7 +4503,9 @@ void TPersQueue::CheckChangedTxStates(const TActorContext& ctx) { for (ui64 txId : ChangedTxs) { auto tx = GetTransaction(ctx, txId); - Y_ABORT_UNLESS(tx); + Y_ABORT_UNLESS(tx, + "PQ %" PRIu64 ", TxId %" PRIu64, + TabletID(), txId); CheckTxState(ctx, *tx); } @@ -4278,17 +4571,22 @@ void TPersQueue::SendProposeTransactionAbort(const TActorId& target, void TPersQueue::SendEvProposePartitionConfig(const TActorContext& ctx, TDistributedTransaction& tx) { - for (auto& [_, partition] : Partitions) { + for (auto& [partitionId, partition] : Partitions) { + if (partitionId.IsSupportivePartition()) { + continue; + } + auto event = std::make_unique(tx.Step, tx.TxId); event->TopicConverter = tx.TopicConverter; event->Config = tx.TabletConfig; + event->BootstrapConfig = tx.BootstrapConfig; ctx.Send(partition.Actor, std::move(event)); } tx.PartitionRepliesCount = 0; - tx.PartitionRepliesExpected = Partitions.size(); + tx.PartitionRepliesExpected = OriginalPartitionsCount; } TActorId TPersQueue::GetPartitionQuoter(const TPartitionId& partition) { @@ -4371,49 +4669,32 @@ void TPersQueue::EnsurePartitionsAreNotDeleted(const NKikimrPQ::TPQTabletConfig& } } -void TPersQueue::InitTransactions(const NKikimrClient::TKeyValueResponse::TReadRangeResult& readRange, - THashMap>& partitionTxs) +void TPersQueue::BeginInitTransactions() { Txs.clear(); TxQueue.clear(); - std::deque> plannedTxs; - const auto& ctx = ActorContext(); - - for (size_t i = 0; i < readRange.PairSize(); ++i) { - auto& pair = readRange.GetPair(i); - - NKikimrPQ::TTransaction tx; - Y_ABORT_UNLESS(tx.ParseFromString(pair.GetValue())); - - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " " << - "Tx: " << tx.DebugString()); - - Txs.emplace(tx.GetTxId(), tx); + PlannedTxs.clear(); +} - if (tx.HasStep()) { - if (std::make_pair(tx.GetStep(), tx.GetTxId()) >= std::make_pair(ExecStep, ExecTxId)) { - plannedTxs.emplace_back(tx.GetStep(), tx.GetTxId()); - } - } - } +void TPersQueue::EndInitTransactions() +{ + PQ_LOG_D("Txs.size=" << Txs.size() << ", PlannedTxs.size=" << PlannedTxs.size()); - std::sort(plannedTxs.begin(), plannedTxs.end()); - for (auto& item : plannedTxs) { + std::sort(PlannedTxs.begin(), PlannedTxs.end()); + for (auto& item : PlannedTxs) { TxQueue.push(item); } if (!TxQueue.empty()) { - LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE, "Tablet " << TabletID() << " " << - "top tx queue (" << TxQueue.front().first << ", " << TxQueue.front().second << ")"); + PQ_LOG_D("top tx queue (" << TxQueue.front().first << ", " << TxQueue.front().second << ")"); } - - Y_UNUSED(partitionTxs); } void TPersQueue::TryStartTransaction(const TActorContext& ctx) { if (TxQueue.empty()) { + PQ_LOG_D("empty tx queue"); return; } @@ -4421,6 +4702,8 @@ void TPersQueue::TryStartTransaction(const TActorContext& ctx) Y_ABORT_UNLESS(next); CheckTxState(ctx, *next); + + TryWriteTxs(ctx); } void TPersQueue::OnInitComplete(const TActorContext& ctx) @@ -4463,6 +4746,8 @@ void TPersQueue::Handle(TEvPQ::TEvSubDomainStatus::TPtr& ev, const TActorContext void TPersQueue::Handle(TEvPersQueue::TEvProposeTransactionAttach::TPtr &ev, const TActorContext &ctx) { + PQ_LOG_D("Handle TEvPersQueue::TEvProposeTransactionAttach " << ev->Get()->Record.ShortDebugString()); + const ui64 txId = ev->Get()->Record.GetTxId(); NKikimrProto::EReplyStatus status = NKikimrProto::NODATA; @@ -4487,17 +4772,17 @@ void TPersQueue::Handle(TEvPQ::TEvCheckPartitionStatusRequest::TPtr& ev, const T { auto& record = ev->Get()->Record; auto it = Partitions.find(TPartitionId(TPartitionId(record.GetPartition()))); - if (it == Partitions.end()) { + if (InitCompleted && it == Partitions.end()) { LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE, "Unknown partition " << record.GetPartition()); - auto response = THolder(); + auto response = MakeHolder(); response->Record.SetStatus(NKikimrPQ::ETopicPartitionStatus::Deleted); Send(ev->Sender, response.Release()); return; } - if (it->second.InitDone) { + if (it != Partitions.end() && it->second.InitDone) { Forward(ev, it->second.Actor); } else { CheckPartitionStatusRequests[record.GetPartition()].push_back(ev); @@ -4518,13 +4803,16 @@ void TPersQueue::ProcessCheckPartitionStatusRequests(const TPartitionId& partiti } } -void TPersQueue::Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev, const TActorContext&) +void TPersQueue::Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev) { + PQ_LOG_D("Handle TEvLongTxService::TEvLockStatus " << ev->Get()->Record.ShortDebugString()); + auto& record = ev->Get()->Record; const TWriteId writeId(record.GetLockNode(), record.GetLockId()); if (!TxWrites.contains(writeId)) { // the transaction has already been completed + PQ_LOG_D("unknown WriteId " << writeId); return; } @@ -4532,13 +4820,18 @@ void TPersQueue::Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& e writeInfo.LongTxSubscriptionStatus = record.GetStatus(); if (writeInfo.LongTxSubscriptionStatus == NKikimrLongTxService::TEvLockStatus::STATUS_SUBSCRIBED) { + PQ_LOG_D("subscribed WriteId " << writeId); return; } - if (!writeInfo.TxId.Defined()) { - // the message TEvProposeTransaction will not come anymore - BeginDeletePartitions(writeInfo); + if (writeInfo.TxId.Defined()) { + // the message `TEvProposeTransaction` has already arrived + PQ_LOG_D("there is already a transaction TxId " << writeInfo.TxId << " for WriteId " << writeId); + return; } + + PQ_LOG_D("delete partitions for WriteId " << writeId); + BeginDeletePartitions(writeInfo); } void TPersQueue::Handle(TEvPQ::TEvReadingPartitionStatusRequest::TPtr& ev, const TActorContext& ctx) @@ -4555,12 +4848,29 @@ void TPersQueue::Handle(TEvPQ::TEvPartitionScaleStatusChanged::TPtr& ev, const T } } +void TPersQueue::DeletePartition(const TPartitionId& partitionId, const TActorContext& ctx) +{ + auto p = Partitions.find(partitionId); + if (p == Partitions.end()) { + return; + } + + const TPartitionInfo& partition = p->second; + ctx.Send(partition.Actor, new TEvents::TEvPoisonPill()); + + Partitions.erase(partitionId); +} + void TPersQueue::Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorContext& ctx) { + PQ_LOG_D("Handle TEvPQ::TEvDeletePartitionDone " << ev->Get()->PartitionId); + auto* event = ev->Get(); Y_ABORT_UNLESS(event->PartitionId.WriteId.Defined()); const TWriteId& writeId = *event->PartitionId.WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), writeId.NodeId, writeId.KeyId); TTxWriteInfo& writeInfo = TxWrites.at(writeId); Y_ABORT_UNLESS(writeInfo.Partitions.contains(event->PartitionId.OriginalPartitionId)); const TPartitionId& partitionId = writeInfo.Partitions.at(event->PartitionId.OriginalPartitionId); @@ -4568,18 +4878,18 @@ void TPersQueue::Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorCon Y_ABORT_UNLESS(partitionId.IsSupportivePartition()); Y_ABORT_UNLESS(Partitions.contains(partitionId)); - Partitions.erase(partitionId); + DeletePartition(partitionId, ctx); writeInfo.Partitions.erase(partitionId.OriginalPartitionId); if (writeInfo.Partitions.empty()) { UnsubscribeWriteId(writeId, ctx); if (writeInfo.TxId.Defined()) { if (auto tx = GetTransaction(ctx, *writeInfo.TxId); tx) { - DeleteTx(*tx); + if (tx->State == NKikimrPQ::TTransaction::WAIT_RS_ACKS) { + CheckTxState(ctx, *tx); + } } } - PQ_LOG_D("delete WriteId " << writeId); - TxWrites.erase(writeId); } TxWritesChanged = true; @@ -4588,49 +4898,49 @@ void TPersQueue::Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorCon void TPersQueue::Handle(TEvPQ::TEvTransactionCompleted::TPtr& ev, const TActorContext&) { + PQ_LOG_D("Handle TEvPQ::TEvTransactionCompleted" << + " WriteId " << ev->Get()->WriteId); + auto* event = ev->Get(); if (!event->WriteId.Defined()) { return; } const TWriteId& writeId = *event->WriteId; - Y_ABORT_UNLESS(TxWrites.contains(writeId)); + Y_ABORT_UNLESS(TxWrites.contains(writeId), + "PQ %" PRIu64 ", WriteId {%" PRIu64 ", %" PRIu64 "}", + TabletID(), writeId.NodeId, writeId.KeyId); TTxWriteInfo& writeInfo = TxWrites.at(writeId); Y_ABORT_UNLESS(writeInfo.Partitions.size() == 1); BeginDeletePartitions(writeInfo); } -void TPersQueue::BeginDeleteTx(const TDistributedTransaction& tx) -{ - Y_ABORT_UNLESS(tx.WriteId.Defined()); - const TWriteId& writeId = *tx.WriteId; - if (!TxWrites.contains(writeId)) { - // the transaction has already been completed - return; - } - - TTxWriteInfo& writeInfo = TxWrites.at(writeId); - if (writeInfo.LongTxSubscriptionStatus == NKikimrLongTxService::TEvLockStatus::STATUS_SUBSCRIBED) { - return; - } - - BeginDeletePartitions(writeInfo); -} - void TPersQueue::BeginDeletePartitions(TTxWriteInfo& writeInfo) { if (writeInfo.Deleting) { + PQ_LOG_D("Already deleting WriteInfo"); return; } for (auto& [_, partitionId] : writeInfo.Partitions) { Y_ABORT_UNLESS(Partitions.contains(partitionId)); const TPartitionInfo& partition = Partitions.at(partitionId); + PQ_LOG_D("send TEvPQ::TEvDeletePartition to partition " << partitionId); Send(partition.Actor, new TEvPQ::TEvDeletePartition); } writeInfo.Deleting = true; } +void TPersQueue::BeginDeletePartitions(const TDistributedTransaction& tx) +{ + if (!tx.WriteId.Defined() || !TxWrites.contains(*tx.WriteId)) { + return; + } + + TTxWriteInfo& writeInfo = TxWrites.at(*tx.WriteId); + BeginDeletePartitions(writeInfo); +} + TString TPersQueue::LogPrefix() const { return TStringBuilder() << "[PQ: " << TabletID() << "] "; } @@ -4685,7 +4995,7 @@ bool TPersQueue::HandleHook(STFUNC_SIG) HFuncTraced(TEvMediatorTimecast::TEvRegisterTabletResult, Handle); HFuncTraced(TEvPQ::TEvCheckPartitionStatusRequest, Handle); HFuncTraced(TEvPQ::TEvPartitionScaleStatusChanged, Handle); - HFuncTraced(NLongTxService::TEvLongTxService::TEvLockStatus, Handle); + hFuncTraced(NLongTxService::TEvLongTxService::TEvLockStatus, Handle); HFuncTraced(TEvPQ::TEvReadingPartitionStatusRequest, Handle); HFuncTraced(TEvPQ::TEvDeletePartitionDone, Handle); HFuncTraced(TEvPQ::TEvTransactionCompleted, Handle); diff --git a/ydb/core/persqueue/pq_impl.h b/ydb/core/persqueue/pq_impl.h index a754c6828749..e7fabe310422 100644 --- a/ydb/core/persqueue/pq_impl.h +++ b/ydb/core/persqueue/pq_impl.h @@ -35,6 +35,7 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { READ_CONFIG_COOKIE = 3, WRITE_STATE_COOKIE = 4, WRITE_TX_COOKIE = 5, + READ_TXS_COOKIE = 6, }; void CreatedHook(const TActorContext& ctx) override; @@ -97,7 +98,8 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { //response from KV on READ or WRITE config request void Handle(TEvKeyValue::TEvResponse::TPtr& ev, const TActorContext& ctx); - void HandleConfigReadResponse(const NKikimrClient::TResponse& resp, const TActorContext& ctx); + void HandleConfigReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx); + void HandleTransactionsReadResponse(NKikimrClient::TResponse&& resp, const TActorContext& ctx); void ApplyNewConfigAndReply(const TActorContext& ctx); void ApplyNewConfig(const NKikimrPQ::TPQTabletConfig& newConfig, const TActorContext& ctx); @@ -108,7 +110,7 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { void ReadTxWrites(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); void ReadConfig(const NKikimrClient::TKeyValueResponse::TReadResult& read, - const NKikimrClient::TKeyValueResponse::TReadRangeResult& readRange, + const TVector& readRanges, const TActorContext& ctx); void ReadState(const NKikimrClient::TKeyValueResponse::TReadResult& read, const TActorContext& ctx); @@ -493,16 +495,43 @@ class TPersQueue : public NKeyValue::TKeyValueFlat { bool AllOriginalPartitionsInited() const; - void Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev, const TActorContext& ctx); + void Handle(NLongTxService::TEvLongTxService::TEvLockStatus::TPtr& ev); void Handle(TEvPQ::TEvDeletePartitionDone::TPtr& ev, const TActorContext& ctx); void Handle(TEvPQ::TEvTransactionCompleted::TPtr& ev, const TActorContext& ctx); - void BeginDeleteTx(const TDistributedTransaction& tx); void BeginDeletePartitions(TTxWriteInfo& writeInfo); + void BeginDeletePartitions(const TDistributedTransaction& tx); bool CheckTxWriteOperation(const NKikimrPQ::TPartitionOperation& operation, const TWriteId& writeId) const; bool CheckTxWriteOperations(const NKikimrPQ::TDataTransaction& txBody) const; + + void MoveTopTxToCalculating(TDistributedTransaction& tx, const TActorContext& ctx); + void DeletePartition(const TPartitionId& partitionId, const TActorContext& ctx); + + std::deque> PlannedTxs; + + void BeginInitTransactions(); + void EndInitTransactions(); + + void EndReadConfig(const TActorContext& ctx); + + void AddCmdReadTransactionRange(TEvKeyValue::TEvRequest& request, + const TString& fromKey, bool includeFrom); + + NKikimrClient::TResponse ConfigReadResponse; + TVector TransactionsReadResults; + + void SendTransactionsReadRequest(const TString& fromKey, bool includeFrom, + const TActorContext& ctx); + + void AddCmdDeleteTx(NKikimrClient::TKeyValueRequest& request, + ui64 txId); + + bool AllSupportivePartitionsHaveBeenDeleted(const TMaybe& writeId) const; + void DeleteWriteId(const TMaybe& writeId); + + void UpdateReadRuleGenerations(NKikimrPQ::TPQTabletConfig& cfg) const; }; diff --git a/ydb/core/persqueue/read_balancer.cpp b/ydb/core/persqueue/read_balancer.cpp index 495277081072..5fc814b94bc0 100644 --- a/ydb/core/persqueue/read_balancer.cpp +++ b/ydb/core/persqueue/read_balancer.cpp @@ -21,10 +21,12 @@ static constexpr TDuration ACL_SUCCESS_RETRY_TIMEOUT = TDuration::Seconds(30); static constexpr TDuration ACL_ERROR_RETRY_TIMEOUT = TDuration::Seconds(5); static constexpr TDuration ACL_EXPIRATION_TIMEOUT = TDuration::Minutes(5); -NKikimrPQ::EConsumerScalingSupport DefaultScalingSupport() { - // TODO fix me after support of paremeter ConsumerScalingSupport - return AppData()->FeatureFlags.GetEnableTopicSplitMerge() ? NKikimrPQ::EConsumerScalingSupport::FULL_SUPPORT - : NKikimrPQ::EConsumerScalingSupport::NOT_SUPPORT; +TString EncodeAnchor(const TString& v) { + auto r = Base64Encode(v); + while (r.EndsWith('=')) { + r.resize(r.size() - 1); + } + return r; } TPersQueueReadBalancer::TPersQueueReadBalancer(const TActorId &tablet, TTabletStorageInfo *info) @@ -185,110 +187,142 @@ bool TPersQueueReadBalancer::OnRenderAppHtmlPage(NMon::TEvRemoteHttpInfo::TPtr e TString TPersQueueReadBalancer::GenerateStat() { auto& metrics = AggregatedStats.Metrics; - auto balancerStatistcs = Balancer->GetStatistics(); TStringStream str; HTML(str) { - TAG(TH2) {str << "PersQueueReadBalancer Tablet";} - TAG(TH3) {str << "Topic: " << Topic;} - TAG(TH3) {str << "Generation: " << Generation;} - TAG(TH3) {str << "Inited: " << Inited;} - TAG(TH3) {str << "ActivePipes: " << balancerStatistcs.Sessions.size();} - if (Inited) { - TAG(TH3) {str << "Active partitions: " << NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedSec: " << metrics.TotalAvgWriteSpeedPerSec << "/" << metrics.MaxAvgWriteSpeedPerSec << "/" << metrics.TotalAvgWriteSpeedPerSec / NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedMin: " << metrics.TotalAvgWriteSpeedPerMin << "/" << metrics.MaxAvgWriteSpeedPerMin << "/" << metrics.TotalAvgWriteSpeedPerMin / NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedHour: " << metrics.TotalAvgWriteSpeedPerHour << "/" << metrics.MaxAvgWriteSpeedPerHour << "/" << metrics.TotalAvgWriteSpeedPerHour / NumActiveParts;} - TAG(TH3) {str << "[Total/Max/Avg]WriteSpeedDay: " << metrics.TotalAvgWriteSpeedPerDay << "/" << metrics.MaxAvgWriteSpeedPerDay << "/" << metrics.TotalAvgWriteSpeedPerDay / NumActiveParts;} - TAG(TH3) {str << "TotalDataSize: " << AggregatedStats.TotalDataSize;} - TAG(TH3) {str << "ReserveSize: " << PartitionReserveSize();} - TAG(TH3) {str << "TotalUsedReserveSize: " << AggregatedStats.TotalUsedReserveSize;} - } + str << ""; + + TAG(TH3) {str << "PersQueueReadBalancer " << TabletID() << " (" << Path << ")";} + + auto property = [&](const TString& name, const auto value) { + TABLER() { + TABLED() { str << name;} + TABLED() { str << value; } + } + }; UL_CLASS("nav nav-tabs") { LI_CLASS("active") { - str << "partitions"; + str << "Generic Info"; } - for (auto& consumer : balancerStatistcs.Consumers) { + LI() { + str << "Partitions"; + } + for (auto& [consumerName, _] : Balancer->GetConsumers()) { LI() { - str << "" << NPersQueue::ConvertOldConsumerName(consumer.ConsumerName) << ""; + str << "" << NPersQueue::ConvertOldConsumerName(consumerName) << ""; } } } + DIV_CLASS("tab-content") { - DIV_CLASS_ID("tab-pane fade in active", "main") { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "partition";} - TABLEH() {str << "tabletId";} - } - } + DIV_CLASS_ID("tab-pane fade in active", "generic") { + TABLE_CLASS("tgrid") { TABLEBODY() { - for (auto& p : PartitionsInfo) { - TABLER() { - TABLED() { str << p.first;} - TABLED() { str << p.second.TabletId;} + TABLER() { + TABLED() { + TABLE_CLASS("properties") { + CAPTION() { str << "Tablet info"; } + TABLEBODY() { + property("Topic", Topic); + property("Path", Path); + property("Initialized", Inited ? "yes" : "no"); + property("SchemeShard", TStringBuilder() << "" << SchemeShardId << ""); + property("PathId", PathId); + property("Version", Version); + property("Generation", Generation); + } + } + } + TABLED() { + if (Inited) { + TABLE_CLASS("properties") { + CAPTION() { str << "Statistics"; } + TABLEBODY() { + property("Active pipes", Balancer->GetSessions().size()); + property("Active partitions", NumActiveParts); + property("Total data size", AggregatedStats.TotalDataSize); + property("Reserve size", PartitionReserveSize()); + property("Used reserve size", AggregatedStats.TotalUsedReserveSize); + property("[Total/Max/Avg]WriteSpeedSec", TStringBuilder() << metrics.TotalAvgWriteSpeedPerSec << "/" << metrics.MaxAvgWriteSpeedPerSec << "/" << metrics.TotalAvgWriteSpeedPerSec / NumActiveParts); + property("[Total/Max/Avg]WriteSpeedMin", TStringBuilder() << metrics.TotalAvgWriteSpeedPerMin << "/" << metrics.MaxAvgWriteSpeedPerMin << "/" << metrics.TotalAvgWriteSpeedPerMin / NumActiveParts); + property("[Total/Max/Avg]WriteSpeedHour", TStringBuilder() << metrics.TotalAvgWriteSpeedPerHour << "/" << metrics.MaxAvgWriteSpeedPerHour << "/" << metrics.TotalAvgWriteSpeedPerHour / NumActiveParts); + property("[Total/Max/Avg]WriteSpeedDay", TStringBuilder() << metrics.TotalAvgWriteSpeedPerDay << "/" << metrics.MaxAvgWriteSpeedPerDay << "/" << metrics.TotalAvgWriteSpeedPerDay / NumActiveParts); + } + } + } } } } } } - for (auto& consumer : balancerStatistcs.Consumers) { - DIV_CLASS_ID("tab-pane fade", "client_" + Base64Encode(consumer.ConsumerName)) { - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { - TABLER() { - TABLEH() {str << "partition";} - TABLEH() {str << "tabletId";} - TABLEH() {str << "state";} - TABLEH() {str << "session";} - } - } - TABLEBODY() { - for (auto& partition : consumer.Partitions) { - TABLER() { - TABLED() { str << partition.PartitionId;} - TABLED() { str << partition.TabletId;} - TABLED() { str << partition.State;} - TABLED() { str << partition.Session;} - } - } + + DIV_CLASS_ID("tab-pane fade", "partitions") { + auto partitionAnchor = [&](const ui32 partitionId) { + return TStringBuilder() << "P" << partitionId; + }; + + TABLE_CLASS("table") { + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Partition"; } + TABLEH() { str << "Status"; } + TABLEH() { str << "TabletId"; } + TABLEH() { str << "Parents"; } + TABLEH() { str << "Children"; } + TABLEH() { str << "Size"; } } } + TABLEBODY() { + for (auto& [partitionId, partitionInfo] : PartitionsInfo) { + const auto& stats = AggregatedStats.Stats[partitionId]; + const auto* node = PartitionGraph.GetPartition(partitionId); + TString style = node && node->Children.empty() ? "text-success" : "text-muted"; - TABLE_SORTABLE_CLASS("table") { - TABLEHEAD() { TABLER() { - TABLEH() {str << "session";} - TABLEH() {str << "suspended partitions";} - TABLEH() {str << "active partitions";} - TABLEH() {str << "inactive partitions";} - TABLEH() {str << "total partitions";} - } - } - TABLEBODY() { - - for (auto& session : balancerStatistcs.Sessions) { - TABLER() { - TABLED() { str << session.Session;} - TABLED() { str << session.SuspendedPartitionCount;} - TABLED() { str << session.ActivePartitionCount;} - TABLED() { str << session.InactivePartitionCount;} - TABLED() { str << session.TotalPartitionCount;} + TABLED() { + DIV_CLASS_ID(style, partitionAnchor(partitionId)) { + str << partitionId; + } } - } - - TABLER() { - TABLED() { str << "FREE";} - TABLED() { str << 0;} - TABLED() { str << balancerStatistcs.FreePartitions;} - TABLED() { str << balancerStatistcs.FreePartitions;} + TABLED() { + if (node) { + str << (node->Children.empty() ? "Active" : "Inactive"); + if (node->IsRoot()) { + str << " (root)"; + } + } + } + TABLED() { HREF(TStringBuilder() << "?TabletID=" << partitionInfo.TabletId) { str << partitionInfo.TabletId; } } + TABLED() { + if (node) { + for (auto* parent : node->Parents) { + HREF("#" + partitionAnchor(parent->Id)) { str << parent->Id; } + str << ", "; + } + } + } + TABLED() { + if (node) { + for (auto* child : node->Children) { + HREF("#" + partitionAnchor(child->Id)) { str << child->Id; } + str << ", "; + } + } + } + TABLED() { str << stats.DataSize; } } } } } } + + Balancer->RenderApp(str); } } return str.Str(); @@ -503,7 +537,7 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvUpdateBalancerConfig::TPtr if (SplitMergeEnabled(TabletConfig)) { if (!PartitionsScaleManager) { - PartitionsScaleManager = std::make_unique(Topic, DatabasePath, PathId, Version, TabletConfig); + PartitionsScaleManager = std::make_unique(Topic, Path, DatabasePath, PathId, Version, TabletConfig); } else { PartitionsScaleManager->UpdateBalancerConfig(PathId, Version, TabletConfig); } @@ -689,14 +723,6 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, c continue; } - auto generation = partRes.GetGeneration(); - auto cookie = partRes.GetCookie(); - for (const auto& consumer : partRes.GetConsumerResult()) { - if (consumer.GetReadingFinished()) { - Balancer->SetCommittedState(consumer.GetConsumer(), partitionId, generation, cookie, ctx); - } - } - if (SplitMergeEnabled(TabletConfig) && PartitionsScaleManager) { PartitionsScaleManager->HandleScaleStatusChange(partitionId, partRes.GetScaleStatus(), ctx); } @@ -708,8 +734,11 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, c AggregatedStats.Stats[partitionId].HasCounters = true; } + Balancer->Handle(ev, ctx); + if (AggregatedStats.Cookies.empty()) { CheckStat(ctx); + Balancer->ProcessPendingStats(ctx); } } @@ -813,7 +842,6 @@ void TPersQueueReadBalancer::CheckStat(const TActorContext& ctx) { NTabletPipe::SendData(ctx, GetPipeClient(SchemeShardId, ctx), ev); - UpdateCounters(ctx); } @@ -1238,16 +1266,20 @@ void TPersQueueReadBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& void TPersQueueReadBalancer::Handle(TEvPQ::TEvPartitionScaleStatusChanged::TPtr& ev, const TActorContext& ctx) { if (!SplitMergeEnabled(TabletConfig)) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "Skip TEvPartitionScaleStatusChanged: autopartitioning disabled."); return; } auto& record = ev->Get()->Record; auto* node = PartitionGraph.GetPartition(record.GetPartitionId()); if (!node) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "Skip TEvPartitionScaleStatusChanged: partition " << record.GetPartitionId() << " not found."); return; } if (PartitionsScaleManager) { PartitionsScaleManager->HandleScaleStatusChange(record.GetPartitionId(), record.GetScaleStatus(), ctx); + } else { + LOG_NOTICE_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, "Skip TEvPartitionScaleStatusChanged: scale manager isn`t initialized."); } } diff --git a/ydb/core/persqueue/read_balancer.h b/ydb/core/persqueue/read_balancer.h index e91c14d7b734..3f79f7b75b38 100644 --- a/ydb/core/persqueue/read_balancer.h +++ b/ydb/core/persqueue/read_balancer.h @@ -353,7 +353,7 @@ class TPersQueueReadBalancer : public TActor, public TTa }; -NKikimrPQ::EConsumerScalingSupport DefaultScalingSupport(); +TString EncodeAnchor(const TString& value); } } diff --git a/ydb/core/persqueue/read_balancer__balancing.cpp b/ydb/core/persqueue/read_balancer__balancing.cpp index 4130dd83b749..138d5608e373 100644 --- a/ydb/core/persqueue/read_balancer__balancing.cpp +++ b/ydb/core/persqueue/read_balancer__balancing.cpp @@ -6,6 +6,14 @@ namespace NKikimr::NPQ::NBalancing { +struct LowLoadSessionComparator { + bool operator()(const TSession* lhs, const TSession* rhs) const; +}; + +using TLowLoadOrderedSessions = std::set; + + + // // TPartition // @@ -65,6 +73,7 @@ bool TPartition::Reset() { bool result = IsInactive(); ScaleAwareSDK = false; + StartedReadingFromEndOffset = false; ReadingFinished = false; Commited = false; ++Cookie; @@ -284,6 +293,8 @@ void TPartitionFamily::AfterRelease() { Partitions.clear(); Partitions.insert(Partitions.end(), RootPartitions.begin(), RootPartitions.end()); + LockedPartitions.clear(); + ClassifyPartitions(); UpdatePartitionMapping(Partitions); // After reducing the number of partitions in the family, the list of reading sessions that can read this family may expand. @@ -338,7 +349,7 @@ void TPartitionFamily::AttachePartitions(const std::vector& partitions, co } auto [activePartitionCount, inactivePartitionCount] = ClassifyPartitions(newPartitions); - ChangePartitionCounters(activePartitionCount, activePartitionCount); + ChangePartitionCounters(activePartitionCount, inactivePartitionCount); if (IsActive()) { if (!Session->AllPartitionsReadable(newPartitions)) { @@ -388,7 +399,7 @@ void TPartitionFamily::InactivatePartition(ui32 partitionId) { ActivePartitionCount += active; InactivePartitionCount += inactive; - if (IsActive()) { + if (IsActive() && Session) { Session->ActivePartitionCount += active; Session->InactivePartitionCount += inactive; } @@ -408,7 +419,10 @@ void TPartitionFamily::Merge(TPartitionFamily* other) { other->RootPartitions.clear(); WantedPartitions.insert(other->WantedPartitions.begin(), other->WantedPartitions.end()); - WantedPartitions.clear(); + other->WantedPartitions.clear(); + + LockedPartitions.insert(other->LockedPartitions.begin(), other->LockedPartitions.end()); + other->LockedPartitions.clear(); ChangePartitionCounters(other->ActivePartitionCount, other->InactivePartitionCount); other->ChangePartitionCounters(-other->ActivePartitionCount, -other->InactivePartitionCount); @@ -463,7 +477,7 @@ bool TPartitionFamily::PossibleForBalance(TSession* session) { void TPartitionFamily::ClassifyPartitions() { auto [activePartitionCount, inactivePartitionCount] = ClassifyPartitions(Partitions); - ChangePartitionCounters(activePartitionCount, inactivePartitionCount); + ChangePartitionCounters(activePartitionCount - ActivePartitionCount, inactivePartitionCount - InactivePartitionCount); } template @@ -533,6 +547,7 @@ std::unique_ptr TPartitionFamily::MakeEvRelea r.SetPath(TopicPath()); r.SetGeneration(TabletGeneration()); r.SetClientId(Session->ClientId); + r.SetCount(1); r.SetGroup(partitionId + 1); ActorIdToProto(Session->Pipe, r.MutablePipeClient()); @@ -703,13 +718,13 @@ bool TConsumer::BreakUpFamily(TPartitionFamily* family, ui32 partitionId, bool d } std::vector members; - GetPartitionGraph().Travers(id, [&](auto childId) { if (partitions.contains(childId)) { - members.push_back(childId); auto [_, i] = processedPartitions.insert(childId); if (!i) { familiesIntersect = true; + } else { + members.push_back(childId); } return true; @@ -717,16 +732,25 @@ bool TConsumer::BreakUpFamily(TPartitionFamily* family, ui32 partitionId, bool d return false; }); - auto* f = CreateFamily({id}, family->Status, ctx); - f->Partitions.insert(f->Partitions.end(), members.begin(), members.end()); + bool locked = family->Session && (family->LockedPartitions.contains(id) || + std::any_of(members.begin(), members.end(), [family](auto id) { return family->LockedPartitions.contains(id); })); + auto* f = CreateFamily({id}, locked ? family->Status : TPartitionFamily::EStatus::Free, ctx); f->TargetStatus = family->TargetStatus; - f->Session = family->Session; - f->LockedPartitions = Intercept(family->LockedPartitions, f->Partitions); + f->Partitions.insert(f->Partitions.end(), members.begin(), members.end()); f->LastPipe = family->LastPipe; - if (f->Session) { + f->UpdatePartitionMapping(f->Partitions); + f->ClassifyPartitions(); + if (locked) { + f->LockedPartitions = Intercept(family->LockedPartitions, f->Partitions); + + f->Session = family->Session; f->Session->Families.try_emplace(f->Id, f); + f->Session->ActivePartitionCount += f->ActivePartitionCount; + f->Session->InactivePartitionCount += f->InactivePartitionCount; if (f->IsActive()) { ++f->Session->ActiveFamilyCount; + } else if (f->IsRelesing()) { + ++f->Session->ReleasingFamilyCount; } } @@ -1094,12 +1118,14 @@ void TConsumer::FinishReading(TEvPersQueue::TEvReadingPartitionFinishedRequest:: LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "Reading of the partition " << partitionId << " was finished by " << ConsumerName << " but the partition hasn't family"); + return; } if (!family->Session) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "Reading of the partition " << partitionId << " was finished by " << ConsumerName << " but the partition hasn't reading session"); + return; } auto& partition = Partitions[partitionId]; @@ -1139,11 +1165,11 @@ void TConsumer::ScheduleBalance(const TActorContext& ctx) { ctx.Send(Balancer.TopicActor.SelfId(), new TEvPQ::TEvBalanceConsumer(ConsumerName)); } -TOrderedSessions OrderSessions( +TLowLoadOrderedSessions OrderSessions( const std::unordered_map& values, std::function predicate = [](const TSession*) { return true; } ) { - TOrderedSessions result; + TLowLoadOrderedSessions result; for (auto& [_, v] : values) { if (predicate(v)) { result.insert(v); @@ -1227,7 +1253,7 @@ void TConsumer::Balance(const TActorContext& ctx) { } } - TOrderedSessions commonSessions = OrderSessions(Sessions, [](auto* session) { + TLowLoadOrderedSessions commonSessions = OrderSessions(Sessions, [](auto* session) { return !session->WithGroups(); }); @@ -1236,7 +1262,7 @@ void TConsumer::Balance(const TActorContext& ctx) { auto families = OrderFamilies(UnreadableFamilies); for (auto it = families.rbegin(); it != families.rend(); ++it) { auto* family = *it; - TOrderedSessions specialSessions; + TLowLoadOrderedSessions specialSessions; auto& sessions = (family->IsCommon()) ? commonSessions : (specialSessions = OrderSessions(family->SpecialSessions)); auto sit = sessions.begin(); @@ -1280,7 +1306,9 @@ void TConsumer::Balance(const TActorContext& ctx) { GetPrefix() << "start rebalancing. familyCount=" << familyCount << ", sessionCount=" << commonSessions.size() << ", desiredFamilyCount=" << desiredFamilyCount << ", allowPlusOne=" << allowPlusOne); - for (auto it = commonSessions.rbegin(); it != commonSessions.rend(); ++it) { + TOrderedSessions orderedSession; + orderedSession.insert(commonSessions.begin(), commonSessions.end()); + for (auto it = orderedSession.begin(); it != orderedSession.end(); ++it) { auto* session = *it; auto targerFamilyCount = desiredFamilyCount + (allowPlusOne ? 1 : 0); auto families = OrderFamilies(session->Families); @@ -1291,7 +1319,7 @@ void TConsumer::Balance(const TActorContext& ctx) { } } - if (session->ActiveFamilyCount > desiredFamilyCount) { + if (allowPlusOne) { --allowPlusOne; } } @@ -1380,7 +1408,8 @@ TSession::TSession(const TActorId& pipe) , InactivePartitionCount(0) , ReleasingPartitionCount(0) , ActiveFamilyCount(0) - , ReleasingFamilyCount(0) { + , ReleasingFamilyCount(0) + , Order(RandomNumber()) { } bool TSession::WithGroups() const { return !Partitions.empty(); } @@ -1461,50 +1490,15 @@ TConsumer* TBalancer::GetConsumer(const TString& consumerName) { return it->second.get(); } -const TStatistics TBalancer::GetStatistics() const { - TStatistics result; - - result.Consumers.reserve(Consumers.size()); - for (auto& [_, consumer] : Consumers) { - result.Consumers.push_back(TStatistics::TConsumerStatistics()); - auto& c = result.Consumers.back(); - - c.ConsumerName = consumer->ConsumerName; - c.Partitions.reserve(GetPartitionsInfo().size()); - for (auto [partitionId, partitionInfo] : GetPartitionsInfo()) { - c.Partitions.push_back(TStatistics::TConsumerStatistics::TPartitionStatistics()); - auto& p = c.Partitions.back(); - p.PartitionId = partitionId; - p.TabletId = partitionInfo.TabletId; - - auto* family = consumer->FindFamily(partitionId); - if (family && family->Session && family->LockedPartitions.contains(partitionId)) { - p.Session = family->Session->SessionName; - p.State = 1; - } - } - } - - size_t readablePartitionCount = 0; - - result.Sessions.reserve(Sessions.size()); - for (auto& [_, session] : Sessions) { - result.Sessions.push_back(TStatistics::TSessionStatistics()); - auto& s = result.Sessions.back(); - s.Session = session->SessionName; - s.ActivePartitionCount = session->ActivePartitionCount; - s.InactivePartitionCount = session->InactivePartitionCount; - s.SuspendedPartitionCount = session->ReleasingPartitionCount; - s.TotalPartitionCount = s.ActivePartitionCount + s.InactivePartitionCount; - - readablePartitionCount += s.TotalPartitionCount; - } - - result.FreePartitions = GetPartitionsInfo().size() - readablePartitionCount; +const std::unordered_map>& TBalancer::GetConsumers() const { + return Consumers; +} - return result; +const std::unordered_map>& TBalancer::GetSessions() const { + return Sessions; } + void TBalancer::UpdateConfig(std::vector addedPartitions, std::vector deletedPartitions, const TActorContext& ctx) { LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "updating configuration. Deleted partitions [" << JoinRange(", ", deletedPartitions.begin(), deletedPartitions.end()) @@ -1629,6 +1623,12 @@ void TBalancer::Handle(TEvPQ::TEvWakeupReleasePartition::TPtr &ev, const TActorC return; } + if (partition->Commited) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, + GetPrefix() << "skip releasing partition " << msg->PartitionId << " of consumer \"" << msg->Consumer << "\" by reading finished timeout because offset is commited"); + return; + } + LOG_INFO_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, GetPrefix() << "releasing partition " << msg->PartitionId << " of consumer \"" << msg->Consumer << "\" by reading finished timeout"); @@ -1782,10 +1782,9 @@ void TBalancer::Handle(TEvPersQueue::TEvGetReadSessionsInfo::TPtr& ev, const TAc pi->SetPartition(partitionId); auto* family = consumer->FindFamily(partitionId); - if (family && family->LockedPartitions.contains(partitionId)) { + if (family && family->Session && family->LockedPartitions.contains(partitionId)) { auto* session = family->Session; - Y_ABORT_UNLESS(session != nullptr); pi->SetClientNode(session->ClientNode); pi->SetProxyNodeId(session->ProxyNodeId); pi->SetSession(session->SessionName); @@ -1818,6 +1817,31 @@ void TBalancer::Handle(TEvPQ::TEvBalanceConsumer::TPtr& ev, const TActorContext& } } +void TBalancer::Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext&) { + const auto& record = ev->Get()->Record; + for (const auto& partResult : record.GetPartResult()) { + for (const auto& consumerResult : partResult.GetConsumerResult()) { + PendingUpdates[partResult.GetPartition()].push_back(TData{partResult.GetGeneration(), partResult.GetCookie(), consumerResult.GetConsumer(), consumerResult.GetReadingFinished()}); + } + } +} + +void TBalancer::ProcessPendingStats(const TActorContext& ctx) { + LOG_DEBUG_S(ctx, NKikimrServices::PERSQUEUE_READ_BALANCER, + GetPrefix() << "ProcessPendingStats. PendingUpdates size " << PendingUpdates.size()); + + GetPartitionGraph().Travers([&](ui32 id) { + for (auto& d : PendingUpdates[id]) { + if (d.Commited) { + SetCommittedState(d.Consumer, id, d.Generation, d.Cookie, ctx); + } + } + return true; + }); + + PendingUpdates.clear(); +} + TString TBalancer::GetPrefix() const { return TStringBuilder() << "balancer: [" << TopicActor.TabletID() << "] topic " << Topic() << " "; } @@ -1838,18 +1862,23 @@ bool TPartitionFamilyComparator::operator()(const TPartitionFamily* lhs, const T } bool SessionComparator::operator()(const TSession* lhs, const TSession* rhs) const { + if (lhs->Order != rhs->Order) { + return lhs->Order < rhs->Order; + } + return lhs->SessionName < rhs->SessionName; +} + + +bool LowLoadSessionComparator::operator()(const TSession* lhs, const TSession* rhs) const { if (lhs->ActiveFamilyCount != rhs->ActiveFamilyCount) { return lhs->ActiveFamilyCount < rhs->ActiveFamilyCount; } - if (lhs->ActivePartitionCount != rhs->ActivePartitionCount) { - return lhs->ActivePartitionCount < rhs->ActivePartitionCount; - } - if (lhs->InactivePartitionCount != rhs->InactivePartitionCount) { - return lhs->InactivePartitionCount < rhs->InactivePartitionCount; - } if (lhs->Partitions.size() != rhs->Partitions.size()) { return lhs->Partitions.size() < rhs->Partitions.size(); } + if (lhs->Order != rhs->Order) { + return lhs->Order < rhs->Order; + } return lhs->SessionName < rhs->SessionName; } diff --git a/ydb/core/persqueue/read_balancer__balancing.h b/ydb/core/persqueue/read_balancer__balancing.h index a48c2d6637f6..6f98329f3350 100644 --- a/ydb/core/persqueue/read_balancer__balancing.h +++ b/ydb/core/persqueue/read_balancer__balancing.h @@ -276,6 +276,8 @@ struct TSession { // The partition families that are being read by this session. std::unordered_map Families; + size_t Order; + // true if client connected to read from concret partitions bool WithGroups() const; @@ -285,32 +287,6 @@ struct TSession { TString DebugStr() const; }; -struct TStatistics { - struct TConsumerStatistics { - struct TPartitionStatistics { - ui32 PartitionId; - ui64 TabletId = 0; - ui32 State = 0; - TString Session; - }; - - TString ConsumerName; - std::vector Partitions; - }; - - struct TSessionStatistics { - TString Session; - size_t ActivePartitionCount; - size_t InactivePartitionCount; - size_t SuspendedPartitionCount; - size_t TotalPartitionCount; - }; - - std::vector Consumers; - std::vector Sessions; - - size_t FreePartitions; -}; class TBalancer { friend struct TConsumer; @@ -328,7 +304,8 @@ class TBalancer { i32 GetLifetimeSeconds() const; TConsumer* GetConsumer(const TString& consumerName); - const TStatistics GetStatistics() const; + const std::unordered_map>& GetConsumers() const; + const std::unordered_map>& GetSessions() const; void UpdateConfig(std::vector addedPartitions, std::vector deletedPartitions, const TActorContext& ctx); bool SetCommittedState(const TString& consumer, ui32 partitionId, ui32 generation, ui64 cookie, const TActorContext& ctx); @@ -350,6 +327,11 @@ class TBalancer { void Handle(TEvPQ::TEvBalanceConsumer::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPersQueue::TEvStatusResponse::TPtr& ev, const TActorContext& ctx); + void ProcessPendingStats(const TActorContext& ctx); + + void RenderApp(TStringStream& str) const; + private: TString GetPrefix() const; ui32 NextStep(); @@ -361,6 +343,14 @@ class TBalancer { std::unordered_map> Consumers; ui32 Step; + + struct TData { + ui32 Generation; + ui64 Cookie; + const TString Consumer; + bool Commited; + }; + std::unordered_map> PendingUpdates; }; } diff --git a/ydb/core/persqueue/read_balancer__balancing_app.cpp b/ydb/core/persqueue/read_balancer__balancing_app.cpp new file mode 100644 index 000000000000..30b64fd1db22 --- /dev/null +++ b/ydb/core/persqueue/read_balancer__balancing_app.cpp @@ -0,0 +1,228 @@ +#include "read_balancer__balancing.h" + +#include + +#define DEBUG(message) + + +namespace NKikimr::NPQ::NBalancing { + +void TBalancer::RenderApp(TStringStream& str) const { + auto& __stream = str; + + for (auto& [consumerName, consumer] : Consumers) { + auto consumerAnchor = "c_" + EncodeAnchor(consumerName); + + auto familyAnchor = [&](const size_t familyId) { + return TStringBuilder() << consumerAnchor << "_F" << familyId; + }; + auto partitionAnchor = [&](const ui32 partitionId) { + return TStringBuilder() << consumerAnchor << "_P" << partitionId; + }; + + DIV_CLASS_ID("tab-pane fade", consumerAnchor) { + TABLE_CLASS("table") { + CAPTION() { str << "Families"; } + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Id"; } + TABLEH() { str << "Status"; } + TABLEH() { str << "Partitions"; } + TABLEH() { str << "Session"; } + TABLEH() { str << "Statistics"; } + } + } + + TABLEBODY() { + for (auto& [familyId, family] : consumer->Families) { + TABLER() { + TABLED() { DIV_CLASS_ID("text-info", familyAnchor(familyId)) { str << familyId; } } + TABLED() { str << family->Status; } + TABLED() { + for (auto partitionId : family->Partitions) { + HREF("#" + partitionAnchor(partitionId)) { str << partitionId; } + str << ", "; + } + } + TABLED() { str << (family->Session ? family->Session->SessionName : ""); } + TABLED() { str << "Active " << family->ActivePartitionCount << " / Inactive " << family->InactivePartitionCount << " / Locked " << family->LockedPartitions.size(); } + } + } + } + } + + size_t free = 0; + size_t finished = 0; + size_t read = 0; + size_t ready = 0; + + TABLE_CLASS("table") { + CAPTION() { str << "Partitions"; } + TABLEHEAD() { + TABLER() { + TABLEH() { str << "Id"; } + TABLEH() { str << "Family"; } + TABLEH() { str << "Status"; }; + TABLEH() { str << "Parents"; } + TABLEH() { str << "Description"; } + TABLEH() { str << "P Generation"; } + TABLEH() { str << "P Cookie"; } + } + } + + TABLEBODY() { + for (auto& [partitionId, partition] : consumer->Partitions) { + const auto* family = consumer->FindFamily(partitionId); + const auto* node = consumer->GetPartitionGraph().GetPartition(partitionId); + TString style = node && node->Children.empty() ? "text-success" : "text-muted"; + auto* partitionInfo = GetPartitionInfo(partitionId); + + TABLER() { + TABLED() { DIV_CLASS_ID(style, partitionAnchor(partitionId)) { + str << partitionId << " "; + if (partitionInfo) { + HREF(TStringBuilder() << "?TabletID=" << partitionInfo->TabletId) { str << "#"; } + } + } } + TABLED() { + if (family) { + HREF("#" + familyAnchor(family->Id)) { str << family->Id; } + } + } + TABLED() { + if (family) { + if (partition.IsInactive()) { + str << "Finished"; + ++finished; + } else { + str << "Read"; + ++read; + } + } else if (consumer->IsReadable(partitionId)) { + str << "Ready"; + ++ready; + } else { + str << "Free"; + ++free; + } + } + TABLED() { + if (node) { + for (auto* parent : node->Parents) { + HREF("#" + partitionAnchor(parent->Id)) { str << parent->Id; } + str << ", "; + } + } else { + str << "error: not found"; + } + } + TABLED() { + if (partition.Commited) { + str << "commited"; + } else if (partition.ReadingFinished) { + if (partition.ScaleAwareSDK) { + str << "reading child"; + } else if (partition.StartedReadingFromEndOffset) { + str << "finished"; + } else { + str << "scheduled. iteration: " << partition.Iteration; + } + } else if (partition.Iteration) { + str << "iteration: " << partition.Iteration; + } + } + TABLED() { str << partition.PartitionGeneration; } + TABLED() { str << partition.PartitionCookie; } + } + } + } + } + + TABLE_CLASS("table") { + CAPTION() { str << "Statistics"; } + TABLEBODY() { + TABLER() { + TABLED() { str << "Free"; } + TABLED() { str << free; } + } + TABLER() { + TABLED() { str << "Ready"; } + TABLED() { str << ready; } + } + TABLER() { + TABLED() { str << "Read"; } + TABLED() { str << read; } + } + TABLER() { + TABLED() { str << "Finished"; } + TABLED() { str << finished; } + } + TABLER() { + TABLED() { STRONG() { str << "Total"; }} + TABLED() { str << (finished + read + ready + free); } + } + } + } + + TABLE_CLASS("table") { + CAPTION() { str << "Sessions"; } + TABLEHEAD() { + TABLER() { + TABLEH() { } + TABLEH() { str << "Id"; } + TABLEH() { str << "Partitions"; } + TABLEH() { str << "Families"; } + TABLEH() { str << "Statistics"; }; + TABLEH() { str << "Client node"; } + TABLEH() { str << "Proxy node"; } + } + } + TABLEBODY() { + size_t familyAllCount = 0; + size_t activeFamilyCount = 0; + size_t releasingFamilyCount = 0; + size_t activePartitionCount = 0; + size_t inactivePartitionCount = 0; + size_t releasingPartitionCount = 0; + + size_t i = 0; + for (auto& [pipe, session] : Sessions) { + if (session->ClientId != consumerName) { + continue; + } + + familyAllCount += session->Families.size(); + activeFamilyCount += session->ActiveFamilyCount; + releasingFamilyCount += session->ReleasingFamilyCount; + activePartitionCount += session->ActivePartitionCount; + inactivePartitionCount += session->InactivePartitionCount; + releasingPartitionCount += session->ReleasingPartitionCount; + + TABLER() { + TABLED() { str << ++i; } + TABLED() { str << session->SessionName; } + TABLED() { str << (session->Partitions.empty() ? "" : JoinRange(", ", session->Partitions.begin(), session->Partitions.end())); } + TABLED() { str << session->Families.size() << " / " << session->ActiveFamilyCount << " / " << session->ReleasingFamilyCount; } + TABLED() { str << (session->ActivePartitionCount + session->InactivePartitionCount + session->ReleasingPartitionCount) + << " / " << session->ActivePartitionCount << " / " << session->InactivePartitionCount << " / " << session->ReleasingPartitionCount; } + TABLED() { str << session->ClientNode; } + TABLED() { str << session->ProxyNodeId; } + } + } + TABLER() { + TABLED() { } + TABLED() { str << "Total:"; } + TABLED() { } + TABLED() { str << familyAllCount << " / " << activeFamilyCount << " / " << releasingFamilyCount; } + TABLED() { str << (activePartitionCount + inactivePartitionCount + releasingPartitionCount) << " / " << activePartitionCount << " / " + << inactivePartitionCount << " / " << releasingPartitionCount; } + TABLED() { } + TABLED() { } + } + } + } + } + } +} + +} diff --git a/ydb/core/persqueue/read_balancer__txinit.h b/ydb/core/persqueue/read_balancer__txinit.h index cc9a26ad4678..8b2367a15489 100644 --- a/ydb/core/persqueue/read_balancer__txinit.h +++ b/ydb/core/persqueue/read_balancer__txinit.h @@ -60,7 +60,7 @@ struct TPersQueueReadBalancer::TTxInit : public ITransaction { Self->PartitionGraph = MakePartitionGraph(Self->TabletConfig); if (SplitMergeEnabled(Self->TabletConfig)) { - Self->PartitionsScaleManager = std::make_unique(Self->Topic, Self->DatabasePath, Self->PathId, Self->Version, Self->TabletConfig); + Self->PartitionsScaleManager = std::make_unique(Self->Topic, Self->Path, Self->DatabasePath, Self->PathId, Self->Version, Self->TabletConfig); } Self->UpdateConfigCounters(); } diff --git a/ydb/core/persqueue/sourceid.cpp b/ydb/core/persqueue/sourceid.cpp index e73e950b8917..3ffae841c6c0 100644 --- a/ydb/core/persqueue/sourceid.cpp +++ b/ydb/core/persqueue/sourceid.cpp @@ -530,55 +530,60 @@ void THeartbeatEmitter::Process(const TString& sourceId, THeartbeat&& heartbeat) TMaybe THeartbeatEmitter::CanEmit() const { if (Storage.ExplicitSourceIds.size() != (Storage.SourceIdsWithHeartbeat.size() + NewSourceIdsWithHeartbeat.size())) { + // there is no quorum return Nothing(); } if (SourceIdsByHeartbeat.empty()) { + // there is no new heartbeats, nothing to emit return Nothing(); } - if (!NewSourceIdsWithHeartbeat.empty()) { // just got quorum - if (!Storage.SourceIdsByHeartbeat.empty() && Storage.SourceIdsByHeartbeat.begin()->first < SourceIdsByHeartbeat.begin()->first) { + if (Storage.SourceIdsByHeartbeat.empty()) { + // got quorum, memory state + return GetFromDiff(SourceIdsByHeartbeat.begin()); + } + + if (!NewSourceIdsWithHeartbeat.empty()) { + // got quorum, mixed state + if (Storage.SourceIdsByHeartbeat.begin()->first < SourceIdsByHeartbeat.begin()->first) { return GetFromStorage(Storage.SourceIdsByHeartbeat.begin()); } else { return GetFromDiff(SourceIdsByHeartbeat.begin()); } - } else if (SourceIdsByHeartbeat.begin()->first > Storage.SourceIdsByHeartbeat.begin()->first) { - auto storage = Storage.SourceIdsByHeartbeat.begin(); - auto diff = SourceIdsByHeartbeat.begin(); - - TMaybe newVersion; - while (storage != Storage.SourceIdsByHeartbeat.end()) { - const auto& [version, sourceIds] = *storage; - - auto rest = sourceIds.size(); - for (const auto& sourceId : sourceIds) { - auto it = Heartbeats.find(sourceId); - if (it != Heartbeats.end() && it->second.Version > version && version <= diff->first) { - --rest; - } else { - break; - } - } + } - if (!rest) { - if (++storage != Storage.SourceIdsByHeartbeat.end()) { - newVersion = storage->first; - } else { - newVersion = diff->first; - } + TMaybe emitVersion; + + for (auto it = Storage.SourceIdsByHeartbeat.begin(), end = Storage.SourceIdsByHeartbeat.end(); it != end; ++it) { + const auto& [version, sourceIds] = *it; + auto rest = sourceIds.size(); + + for (const auto& sourceId : sourceIds) { + if (Heartbeats.contains(sourceId) && Heartbeats.at(sourceId).Version > version) { + --rest; } else { break; } } - if (newVersion) { - storage = Storage.SourceIdsByHeartbeat.find(*newVersion); - if (storage != Storage.SourceIdsByHeartbeat.end()) { - return GetFromStorage(storage); - } else { - return GetFromDiff(diff); - } + if (rest) { + break; + } + + if (auto next = std::next(it); next != end && next->first < SourceIdsByHeartbeat.begin()->first) { + emitVersion = next->first; + } else { + emitVersion = SourceIdsByHeartbeat.begin()->first; + break; + } + } + + if (emitVersion) { + if (auto it = Storage.SourceIdsByHeartbeat.find(*emitVersion); it != Storage.SourceIdsByHeartbeat.end()) { + return GetFromStorage(it); + } else { + return GetFromDiff(SourceIdsByHeartbeat.begin()); } } diff --git a/ydb/core/persqueue/sourceid.h b/ydb/core/persqueue/sourceid.h index 992e1271c847..775b5878c8fd 100644 --- a/ydb/core/persqueue/sourceid.h +++ b/ydb/core/persqueue/sourceid.h @@ -51,6 +51,7 @@ class TSourceIdStorage: private THeartbeatProcessor { void RegisterSourceId(const TString& sourceId, Args&&... args) { RegisterSourceIdInfo(sourceId, TSourceIdInfo(std::forward(args)...), false); } + void RegisterSourceIdInfo(const TString& sourceId, TSourceIdInfo&& sourceIdInfo, bool load); void DeregisterSourceId(const TString& sourceId); @@ -65,7 +66,6 @@ class TSourceIdStorage: private THeartbeatProcessor { private: void LoadRawSourceIdInfo(const TString& key, const TString& data, TInstant now); void LoadProtoSourceIdInfo(const TString& key, const TString& data); - void RegisterSourceIdInfo(const TString& sourceId, TSourceIdInfo&& sourceIdInfo, bool load); private: TSourceIdMap InMemorySourceIds; @@ -85,6 +85,10 @@ class TSourceIdWriter { return Registrations; } + const THashSet& GetSourceIdsToDelete() const { + return Deregistrations; + } + template void RegisterSourceId(const TString& sourceId, Args&&... args) { Registrations[sourceId] = TSourceIdInfo(std::forward(args)...); diff --git a/ydb/core/persqueue/transaction.cpp b/ydb/core/persqueue/transaction.cpp index 50fc8e21bfc6..8bc361b938a1 100644 --- a/ydb/core/persqueue/transaction.cpp +++ b/ydb/core/persqueue/transaction.cpp @@ -16,11 +16,29 @@ TDistributedTransaction::TDistributedTransaction(const NKikimrPQ::TTransaction& MinStep = tx.GetMinStep(); MaxStep = tx.GetMaxStep(); - for (ui64 tabletId : tx.GetSenders()) { - Senders.insert(tabletId); + ReadSetCount = 0; + + for (auto& p : tx.GetPredicatesReceived()) { + PredicatesReceived[p.GetTabletId()] = p; + + if (p.HasPredicate()) { + SetDecision(ParticipantsDecision, + p.GetPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT); + ++ReadSetCount; + } } - for (ui64 tabletId : tx.GetReceivers()) { - Receivers.insert(tabletId); + + PredicateAcksCount = 0; + + for (ui64 tabletId : tx.GetPredicateRecipients()) { + PredicateRecipients[tabletId] = false; + } + + if (tx.HasPredicate()) { + SelfDecision = + tx.GetPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; + } else { + SelfDecision = NKikimrTx::TReadSetData::DECISION_UNKNOWN; } switch (Kind) { @@ -34,15 +52,6 @@ TDistributedTransaction::TDistributedTransaction(const NKikimrPQ::TTransaction& Y_FAIL_S("unknown transaction type"); } - if (tx.HasSelfPredicate()) { - SelfDecision = - tx.GetSelfPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; - } - if (tx.HasAggrPredicate()) { - ParticipantsDecision = - tx.GetAggrPredicate() ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; - } - Y_ABORT_UNLESS(tx.HasSourceActor()); SourceActor = ActorIdFromProto(tx.GetSourceActor()); @@ -51,6 +60,11 @@ TDistributedTransaction::TDistributedTransaction(const NKikimrPQ::TTransaction& } } +TString TDistributedTransaction::LogPrefix() const +{ + return TStringBuilder() << "[TxId: " << TxId << "] "; +} + void TDistributedTransaction::InitDataTransaction(const NKikimrPQ::TTransaction& tx) { InitPartitions(tx.GetOperations()); @@ -115,6 +129,11 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TEvProposeTr Y_FAIL_S("unknown TxBody case"); } + PartitionRepliesCount = 0; + PartitionRepliesExpected = 0; + + ReadSetCount = 0; + Y_ABORT_UNLESS(event.HasSourceActor()); SourceActor = ActorIdFromProto(event.GetSourceActor()); } @@ -124,15 +143,15 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TDataTransac { Kind = NKikimrPQ::TTransaction::KIND_DATA; - for (ui64 tablet : txBody.GetSendingShards()) { - if (tablet != extractTabletId) { - Senders.insert(tablet); + for (ui64 tabletId : txBody.GetSendingShards()) { + if (tabletId != extractTabletId) { + PredicatesReceived[tabletId].SetTabletId(tabletId); } } - for (ui64 tablet : txBody.GetReceivingShards()) { - if (tablet != extractTabletId) { - Receivers.insert(tablet); + for (ui64 tabletId : txBody.GetReceivingShards()) { + if (tabletId != extractTabletId) { + PredicateRecipients[tabletId] = false; } } @@ -143,11 +162,6 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TDataTransac } else { WriteId = Nothing(); } - - PartitionRepliesCount = 0; - PartitionRepliesExpected = 0; - - ReadSetCount = 0; } void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTransaction& txBody, @@ -172,7 +186,7 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTrans if (node->Children.empty()) { for (const auto* r : node->Parents) { if (extractTabletId != r->TabletId) { - Senders.insert(r->TabletId); + PredicatesReceived[r->TabletId].SetTabletId(r->TabletId); } } } @@ -180,18 +194,13 @@ void TDistributedTransaction::OnProposeTransaction(const NKikimrPQ::TConfigTrans for (const auto* r : node->Children) { if (r->Children.empty()) { if (extractTabletId != r->TabletId) { - Receivers.insert(r->TabletId); + PredicateRecipients[r->TabletId] = false; } } } } InitPartitions(); - - PartitionRepliesCount = 0; - PartitionRepliesExpected = 0; - - ReadSetCount = 0; } void TDistributedTransaction::OnPlanStep(ui64 step) @@ -204,44 +213,65 @@ void TDistributedTransaction::OnPlanStep(ui64 step) void TDistributedTransaction::OnTxCalcPredicateResult(const TEvPQ::TEvTxCalcPredicateResult& event) { - OnPartitionResult(event, - event.Predicate ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT); + PQ_LOG_D("Handle TEvTxCalcPredicateResult"); + + TMaybe decision; + + if (event.Predicate.Defined()) { + decision = *event.Predicate ? NKikimrTx::TReadSetData::DECISION_COMMIT : NKikimrTx::TReadSetData::DECISION_ABORT; + } + + OnPartitionResult(event, decision); } void TDistributedTransaction::OnProposePartitionConfigResult(const TEvPQ::TEvProposePartitionConfigResult& event) { + PQ_LOG_D("Handle TEvProposePartitionConfigResult"); + OnPartitionResult(event, NKikimrTx::TReadSetData::DECISION_COMMIT); } template -void TDistributedTransaction::OnPartitionResult(const E& event, EDecision decision) +void TDistributedTransaction::OnPartitionResult(const E& event, TMaybe decision) { Y_ABORT_UNLESS(Step == event.Step); Y_ABORT_UNLESS(TxId == event.TxId); Y_ABORT_UNLESS(Partitions.contains(event.Partition.OriginalPartitionId)); - SetDecision(SelfDecision, decision); + if (decision.Defined()) { + SetDecision(SelfDecision, *decision); + } ++PartitionRepliesCount; + + PQ_LOG_D("Partition responses " << PartitionRepliesCount << "/" << PartitionRepliesExpected); } void TDistributedTransaction::OnReadSet(const NKikimrTx::TEvReadSet& event, const TActorId& sender, std::unique_ptr ack) { + PQ_LOG_D("Handle TEvReadSet"); + Y_ABORT_UNLESS((Step == Max()) || (event.HasStep() && (Step == event.GetStep()))); Y_ABORT_UNLESS(event.HasTxId() && (TxId == event.GetTxId())); - if (Senders.contains(event.GetTabletProducer())) { + if (PredicatesReceived.contains(event.GetTabletProducer())) { NKikimrTx::TReadSetData data; Y_ABORT_UNLESS(event.HasReadSet() && data.ParseFromString(event.GetReadSet())); SetDecision(ParticipantsDecision, data.GetDecision()); ReadSetAcks[sender] = std::move(ack); - ++ReadSetCount; + auto& p = PredicatesReceived[event.GetTabletProducer()]; + if (!p.HasPredicate()) { + p.SetPredicate(data.GetDecision() == NKikimrTx::TReadSetData::DECISION_COMMIT); + ++ReadSetCount; + + PQ_LOG_D("Predicates " << ReadSetCount << "/" << PredicatesReceived.size()); + } } else { Y_DEBUG_ABORT("unknown sender tablet %" PRIu64, event.GetTabletProducer()); } @@ -249,10 +279,17 @@ void TDistributedTransaction::OnReadSet(const NKikimrTx::TEvReadSet& event, void TDistributedTransaction::OnReadSetAck(const NKikimrTx::TEvReadSetAck& event) { + PQ_LOG_D("Handle TEvReadSetAck"); + Y_ABORT_UNLESS(event.HasStep() && (Step == event.GetStep())); Y_ABORT_UNLESS(event.HasTxId() && (TxId == event.GetTxId())); - Receivers.erase(event.GetTabletConsumer()); + if (PredicateRecipients.contains(event.GetTabletConsumer())) { + PredicateRecipients[event.GetTabletConsumer()] = true; + ++PredicateAcksCount; + + PQ_LOG_D("Predicate acks " << PredicateAcksCount << "/" << PredicateRecipients.size()); + } } void TDistributedTransaction::OnTxCommitDone(const TEvPQ::TEvTxCommitDone& event) @@ -271,7 +308,7 @@ auto TDistributedTransaction::GetDecision() const -> EDecision constexpr EDecision abort = NKikimrTx::TReadSetData::DECISION_ABORT; constexpr EDecision unknown = NKikimrTx::TReadSetData::DECISION_UNKNOWN; - EDecision aggrDecision = Senders.empty() ? commit : ParticipantsDecision; + const EDecision aggrDecision = PredicatesReceived.empty() ? commit : ParticipantsDecision; if ((SelfDecision == commit) && (aggrDecision == commit)) { return commit; @@ -286,14 +323,15 @@ auto TDistributedTransaction::GetDecision() const -> EDecision bool TDistributedTransaction::HaveParticipantsDecision() const { return - (Senders.size() == ReadSetCount) && + (PredicatesReceived.size() == ReadSetCount) && (ParticipantsDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) || - Senders.empty(); + PredicatesReceived.empty(); } bool TDistributedTransaction::HaveAllRecipientsReceive() const { - return Receivers.empty(); + PQ_LOG_D("PredicateAcks: " << PredicateAcksCount << "/" << PredicateRecipients.size()); + return PredicateRecipients.size() == PredicateAcksCount; } void TDistributedTransaction::AddCmdWrite(NKikimrClient::TKeyValueRequest& request, @@ -321,6 +359,18 @@ void TDistributedTransaction::AddCmdWrite(NKikimrClient::TKeyValueRequest& reque Y_FAIL_S("unknown transaction type"); } + tx.MutableOperations()->Add(Operations.begin(), Operations.end()); + if (SelfDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) { + tx.SetPredicate(SelfDecision == NKikimrTx::TReadSetData::DECISION_COMMIT); + } + + for (auto& [_, predicate] : PredicatesReceived) { + *tx.AddPredicatesReceived() = predicate; + } + for (auto& [tabletId, _] : PredicateRecipients) { + tx.AddPredicateRecipients(tabletId); + } + Y_ABORT_UNLESS(SourceActor != TActorId()); ActorIdToProto(SourceActor, tx.MutableSourceActor()); @@ -336,19 +386,6 @@ void TDistributedTransaction::AddCmdWrite(NKikimrClient::TKeyValueRequest& reque void TDistributedTransaction::AddCmdWriteDataTx(NKikimrPQ::TTransaction& tx) { - for (ui64 tabletId : Senders) { - tx.AddSenders(tabletId); - } - for (ui64 tabletId : Receivers) { - tx.AddReceivers(tabletId); - } - tx.MutableOperations()->Add(Operations.begin(), Operations.end()); - if (SelfDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) { - tx.SetSelfPredicate(SelfDecision == NKikimrTx::TReadSetData::DECISION_COMMIT); - } - if (ParticipantsDecision != NKikimrTx::TReadSetData::DECISION_UNKNOWN) { - tx.SetAggrPredicate(ParticipantsDecision == NKikimrTx::TReadSetData::DECISION_COMMIT); - } if (WriteId.Defined()) { SetWriteId(tx, *WriteId); } @@ -360,16 +397,6 @@ void TDistributedTransaction::AddCmdWriteConfigTx(NKikimrPQ::TTransaction& tx) *tx.MutableBootstrapConfig() = BootstrapConfig; } -void TDistributedTransaction::AddCmdDelete(NKikimrClient::TKeyValueRequest& request) -{ - TString key = GetKey(); - auto range = request.AddCmdDeleteRange()->MutableRange(); - range->SetFrom(key); - range->SetIncludeFrom(true); - range->SetTo(key); - range->SetIncludeTo(true); -} - void TDistributedTransaction::SetDecision(NKikimrTx::TReadSetData::EDecision& var, NKikimrTx::TReadSetData::EDecision value) { if ((var == NKikimrTx::TReadSetData::DECISION_UNKNOWN) || (value == NKikimrTx::TReadSetData::DECISION_ABORT)) { diff --git a/ydb/core/persqueue/transaction.h b/ydb/core/persqueue/transaction.h index 151dac913233..549a10252062 100644 --- a/ydb/core/persqueue/transaction.h +++ b/ydb/core/persqueue/transaction.h @@ -46,8 +46,8 @@ struct TDistributedTransaction { EState State = NKikimrPQ::TTransaction::UNKNOWN; ui64 MinStep = Max(); ui64 MaxStep = Max(); - THashSet Senders; // список отправителей TEvReadSet - THashSet Receivers; // список получателей TEvReadSet + THashMap PredicatesReceived; + THashMap PredicateRecipients; TVector Operations; TMaybe WriteId; @@ -75,7 +75,6 @@ struct TDistributedTransaction { bool HaveAllRecipientsReceive() const; void AddCmdWrite(NKikimrClient::TKeyValueRequest& request, EState state); - void AddCmdDelete(NKikimrClient::TKeyValueRequest& request); static void SetDecision(NKikimrTx::TReadSetData::EDecision& var, NKikimrTx::TReadSetData::EDecision value); @@ -91,7 +90,9 @@ struct TDistributedTransaction { void InitPartitions(); template - void OnPartitionResult(const E& event, EDecision decision); + void OnPartitionResult(const E& event, TMaybe decision); + + TString LogPrefix() const; struct TSerializedMessage { ui32 Type; @@ -111,6 +112,7 @@ struct TDistributedTransaction { const TVector& GetBindedMsgs(ui64 tabletId); bool HasWriteOperations = false; + size_t PredicateAcksCount = 0; }; } diff --git a/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp b/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp index 4f01439c8601..db447a2c5558 100644 --- a/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/autoscaling_ut_common.cpp @@ -96,7 +96,7 @@ TTopicSdkTestSetup CreateSetup() { NKikimrConfig::TFeatureFlags ff; ff.SetEnableTopicSplitMerge(true); ff.SetEnablePQConfigTransactionsAtSchemeShard(true); - //ff.SetEnableTopicServiceTx(true); + ff.SetEnableTopicServiceTx(true); auto settings = TTopicSdkTestSetup::MakeServerSettings(); settings.SetFeatureFlags(ff); diff --git a/ydb/core/persqueue/ut/common/pq_ut_common.cpp b/ydb/core/persqueue/ut/common/pq_ut_common.cpp index b0f3c7339d15..9c2ba84317ca 100644 --- a/ydb/core/persqueue/ut/common/pq_ut_common.cpp +++ b/ydb/core/persqueue/ut/common/pq_ut_common.cpp @@ -44,7 +44,7 @@ void PQTabletPrepare(const TTabletPreparationParameters& parameters, try { runtime.ResetScheduledCount(); - THolder request(new TEvPersQueue::TEvUpdateConfig()); + auto request = MakeHolder(); for (ui32 i = 0; i < parameters.partitions; ++i) { request->Record.MutableTabletConfig()->AddPartitionIds(i); } diff --git a/ydb/core/persqueue/ut/internals_ut.cpp b/ydb/core/persqueue/ut/internals_ut.cpp index 14c3414e2bbb..a151447b4f3d 100644 --- a/ydb/core/persqueue/ut/internals_ut.cpp +++ b/ydb/core/persqueue/ut/internals_ut.cpp @@ -39,38 +39,38 @@ void Test(bool headCompacted, ui32 parts, ui32 partSize, ui32 leftInHead) THead head; head.Offset = 100; TString value(100_KB, 'a'); - head.Batches.push_back(TBatch(head.Offset, 0, TVector())); + head.AddBatch(TBatch(head.Offset, 0)); for (ui32 i = 0; i < 50; ++i) { - head.Batches.back().AddBlob(TClientBlob( + head.AddBlob(TClientBlob( "sourceId" + TString(1,'a' + rand() % 26), i + 1, value, TMaybe(), TInstant::MilliSeconds(i + 1), TInstant::MilliSeconds(i + 1), 1, "", "" )); if (!headCompacted) - all.push_back(head.Batches.back().Blobs.back()); + all.push_back(head.GetLastBatch().Blobs.back()); } - head.Batches.back().Pack(); - UNIT_ASSERT(head.Batches.back().Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); - head.Batches.back().Unpack(); - head.Batches.back().Pack(); + head.MutableLastBatch().Pack(); + UNIT_ASSERT(head.GetLastBatch().Header.GetFormat() == NKikimrPQ::TBatchHeader::ECompressed); + head.MutableLastBatch().Unpack(); + head.MutableLastBatch().Pack(); TString str; - head.Batches.back().SerializeTo(str); + head.GetLastBatch().SerializeTo(str); auto header = ExtractHeader(str.c_str(), str.size()); TBatch batch(header, str.c_str() + header.ByteSize() + sizeof(ui16)); batch.Unpack(); - head.PackedSize = head.Batches.back().GetPackedSize(); - UNIT_ASSERT(head.Batches.back().GetUnpackedSize() + GetMaxHeaderSize() >= head.Batches.back().GetPackedSize()); + head.PackedSize = head.GetLastBatch().GetPackedSize(); + UNIT_ASSERT(head.GetLastBatch().GetUnpackedSize() + GetMaxHeaderSize() >= head.GetLastBatch().GetPackedSize()); THead newHead; newHead.Offset = head.GetNextOffset(); - newHead.Batches.push_back(TBatch(newHead.Offset, 0, TVector())); + newHead.AddBatch(TBatch(newHead.Offset, 0)); for (ui32 i = 0; i < 10; ++i) { - newHead.Batches.back().AddBlob(TClientBlob( + newHead.AddBlob(TClientBlob( "sourceId2", i + 1, value, TMaybe(), TInstant::MilliSeconds(i + 1000), TInstant::MilliSeconds(i + 1000), 1, "", "" )); - all.push_back(newHead.Batches.back().Blobs.back()); //newHead always glued + all.push_back(newHead.GetLastBatch().Blobs.back()); //newHead always glued } - newHead.PackedSize = newHead.Batches.back().GetUnpackedSize(); + newHead.PackedSize = newHead.GetLastBatch().GetUnpackedSize(); TString value2(partSize, 'b'); ui32 maxBlobSize = 8 << 20; TPartitionedBlob blob(TPartitionId(0), newHead.GetNextOffset(), "sourceId3", 1, parts, parts * value2.size(), head, newHead, headCompacted, false, maxBlobSize); @@ -125,16 +125,16 @@ void Test(bool headCompacted, ui32 parts, ui32 partSize, ui32 leftInHead) if (formed.empty()) { //nothing compacted - newHead must be here if (!headCompacted) { - for (auto& p : head.Batches) { - p.Unpack(); - for (const auto& b : p.Blobs) + for (ui32 pp = 0; pp < head.GetBatches().size(); ++pp) { + head.MutableBatch(pp).Unpack(); + for (const auto& b : head.GetBatch(pp).Blobs) real.push_back(b); } } - for (auto& p : newHead.Batches) { - p.Unpack(); - for (const auto& b : p.Blobs) + for (ui32 pp = 0; pp < newHead.GetBatches().size(); ++pp) { + newHead.MutableBatch(pp).Unpack(); + for (const auto& b : newHead.GetBatch(pp).Blobs) real.push_back(b); } } diff --git a/ydb/core/persqueue/ut/make_config.cpp b/ydb/core/persqueue/ut/make_config.cpp index c3cbe325cc75..eb9ae5e6486f 100644 --- a/ydb/core/persqueue/ut/make_config.cpp +++ b/ydb/core/persqueue/ut/make_config.cpp @@ -1,27 +1,44 @@ #include "make_config.h" #include +#include #include namespace NKikimr::NPQ::NHelpers { -NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, - const TVector& consumers, - ui32 partitionsCount, - NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode) +NKikimrPQ::TPQTabletConfig MakeConfig(const TMakeConfigParams& params) { NKikimrPQ::TPQTabletConfig config; - config.SetVersion(version); + config.SetVersion(params.Version); - for (auto& c : consumers) { + for (auto& c : params.Consumers) { config.AddReadRules(c.Consumer); config.AddReadRuleGenerations(c.Generation); } - for (ui32 id = 0; id < partitionsCount; ++id) { - config.AddPartitionIds(id); + for (const auto& e : params.AllPartitions) { + auto* p = config.AddAllPartitions(); + p->SetPartitionId(e.Id); + p->SetTabletId(e.TabletId); + for (auto t : e.Children) { + p->AddChildPartitionIds(t); + } + for (auto t : e.Parents) { + p->AddParentPartitionIds(t); + } + } + + for (const auto& e : params.Partitions) { + auto* p = config.AddPartitions(); + p->SetPartitionId(e.Id); + } + + if (params.AllPartitions.empty() && params.Partitions.empty()) { + for (ui32 id = 0; id < params.PartitionsCount; ++id) { + config.AddPartitionIds(id); + } } config.SetTopicName("rt3.dc1--account--topic"); @@ -30,15 +47,37 @@ NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, config.SetLocalDC(true); config.SetYdbDatabasePath(""); - config.SetMeteringMode(meteringMode); + config.SetMeteringMode(params.MeteringMode); config.MutablePartitionConfig()->SetLifetimeSeconds(TDuration::Hours(24).Seconds()); config.MutablePartitionConfig()->SetWriteSpeedInBytesPerSecond(10 << 20); + if (params.HugeConfig) { + for (size_t i = 0; i < 2'500; ++i) { + TString name = Sprintf("fake-consumer-%s-%" PRISZT, + TString(3'000, 'a').data(), i); + config.AddReadRules(name); + config.AddReadRuleGenerations(1); + } + } + Migrate(config); return config; } +NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, + const TVector& consumers, + ui32 partitionsCount, + NKikimrPQ::TPQTabletConfig::EMeteringMode meteringMode) +{ + TMakeConfigParams params; + params.Version = version; + params.Consumers = consumers; + params.PartitionsCount = partitionsCount; + params.MeteringMode = meteringMode; + return MakeConfig(params); +} + NKikimrPQ::TBootstrapConfig MakeBootstrapConfig() { return {}; diff --git a/ydb/core/persqueue/ut/make_config.h b/ydb/core/persqueue/ut/make_config.h index bdfec27058bc..9b072116be93 100644 --- a/ydb/core/persqueue/ut/make_config.h +++ b/ydb/core/persqueue/ut/make_config.h @@ -18,6 +18,25 @@ struct TCreateConsumerParams { ui64 ReadRuleGeneration = 0; }; +struct TPartitionParams { + ui32 Id = Max(); + ui64 TabletId = Max(); + TVector Children; + TVector Parents; +}; + +struct TMakeConfigParams { + ui64 Version = 0; + TVector Consumers; + TVector Partitions; + TVector AllPartitions; + ui32 PartitionsCount = 1; + NKikimrPQ::TPQTabletConfig::EMeteringMode MeteringMode = NKikimrPQ::TPQTabletConfig::METERING_MODE_REQUEST_UNITS; + bool HugeConfig = false; +}; + +NKikimrPQ::TPQTabletConfig MakeConfig(const TMakeConfigParams& params); + NKikimrPQ::TPQTabletConfig MakeConfig(ui64 version, const TVector& consumers, ui32 partitionsCount = 1, diff --git a/ydb/core/persqueue/ut/partition_ut.cpp b/ydb/core/persqueue/ut/partition_ut.cpp index 8d93159cee4b..1235bb85526a 100644 --- a/ydb/core/persqueue/ut/partition_ut.cpp +++ b/ydb/core/persqueue/ut/partition_ut.cpp @@ -882,7 +882,7 @@ void TPartitionFixture::SendProposeTransactionRequest(ui32 partition, bool immediate, ui64 txId) { - auto event = MakeHolder(); + auto event = MakeHolder(); ActorIdToProto(Ctx->Edge, event->Record.MutableSourceActor()); auto* body = event->Record.MutableData(); @@ -988,7 +988,8 @@ void TPartitionFixture::SendChangePartitionConfig(const TConfigParams& config) auto event = MakeHolder(TopicConverter, MakeConfig(config.Version, config.Consumers, 1, - config.MeteringMode)); + config.MeteringMode), + NKikimrPQ::TBootstrapConfig()); Ctx->Runtime->SingleSys()->Send(new IEventHandle(ActorId, Ctx->Edge, event.Release())); } @@ -1605,7 +1606,7 @@ ui64 TPartitionTxTestHelper::MakeAndSendWriteTx(const TSrcIdMap& srcIdsAffected) ui64 TPartitionTxTestHelper::MakeAndSendImmediateTx(const TSrcIdMap& srcIdsAffected) { auto actIter = AddWriteTxImpl(srcIdsAffected, NextActId++, 0); - auto event = MakeHolder(); + auto event = MakeHolder(); ActorIdToProto(Ctx->Edge, event->Record.MutableSourceActor()); auto* body = event->Record.MutableData(); diff --git a/ydb/core/persqueue/ut/partitiongraph_ut.cpp b/ydb/core/persqueue/ut/partitiongraph_ut.cpp index 9067d76fece7..eb8d1cff01cf 100644 --- a/ydb/core/persqueue/ut/partitiongraph_ut.cpp +++ b/ydb/core/persqueue/ut/partitiongraph_ut.cpp @@ -43,8 +43,7 @@ Y_UNIT_TEST_SUITE(TPartitionGraphTest) { p5->AddParentPartitionIds(3); p5->AddParentPartitionIds(4); - TPartitionGraph graph; - graph = std::move(MakePartitionGraph(config)); + TPartitionGraph graph = MakePartitionGraph(config); const auto n0 = graph.GetPartition(0); const auto n1 = graph.GetPartition(1); @@ -76,5 +75,45 @@ Y_UNIT_TEST_SUITE(TPartitionGraphTest) { UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n2) != n5->HierarhicalParents.end()); UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n3) != n5->HierarhicalParents.end()); UNIT_ASSERT(std::find(n5->HierarhicalParents.cbegin(), n5->HierarhicalParents.cend(), n4) != n5->HierarhicalParents.end()); + + { + std::set traversedNodes; + graph.Travers([&](ui32 id) { + traversedNodes.insert(id); + return true; + }); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 6); + } + + { + std::set traversedNodes; + graph.Travers(0,[&](ui32 id) { + traversedNodes.insert(id); + return true; + }); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 0); + } + + { + std::set traversedNodes; + graph.Travers(0,[&](ui32 id) { + traversedNodes.insert(id); + return true; + }, true); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 1); + UNIT_ASSERT(traversedNodes.contains(0)); + } + + { + std::set traversedNodes; + graph.Travers(2,[&](ui32 id) { + traversedNodes.insert(id); + return true; + }, true); + UNIT_ASSERT_VALUES_EQUAL(traversedNodes.size(), 3); + UNIT_ASSERT(traversedNodes.contains(2)); + UNIT_ASSERT(traversedNodes.contains(3)); + UNIT_ASSERT(traversedNodes.contains(5)); + } } } diff --git a/ydb/core/persqueue/ut/pqtablet_ut.cpp b/ydb/core/persqueue/ut/pqtablet_ut.cpp index 6159a8780b0c..f6388c452904 100644 --- a/ydb/core/persqueue/ut/pqtablet_ut.cpp +++ b/ydb/core/persqueue/ut/pqtablet_ut.cpp @@ -177,6 +177,7 @@ class TPQTabletFixture : public NUnitTest::TBaseFixture { void SetUp(NUnitTest::TTestContext&) override; void TearDown(NUnitTest::TTestContext&) override; + void ResetPipe(); void EnsurePipeExist(); void SendToPipe(const TActorId& sender, IEventBase* event, @@ -206,7 +207,9 @@ class TPQTabletFixture : public NUnitTest::TBaseFixture { void StartPQWriteTxsObserver(); void WaitForPQWriteTxs(); - void WaitForCalcPredicateResult(); + template void WaitForEvent(size_t count); + void WaitForCalcPredicateResult(size_t count = 1); + void WaitForProposePartitionConfigResult(size_t count = 1); void TestWaitingForTEvReadSet(size_t senders, size_t receivers); @@ -244,6 +247,8 @@ class TPQTabletFixture : public NUnitTest::TBaseFixture { void TPQTabletFixture::SetUp(NUnitTest::TTestContext&) { Ctx.ConstructInPlace(); + Ctx->EnableDetailedPQLog = true; + Finalizer.ConstructInPlace(*Ctx); Ctx->Prepare(); @@ -251,9 +256,15 @@ void TPQTabletFixture::SetUp(NUnitTest::TTestContext&) } void TPQTabletFixture::TearDown(NUnitTest::TTestContext&) +{ + ResetPipe(); +} + +void TPQTabletFixture::ResetPipe() { if (Pipe != TActorId()) { Ctx->Runtime->ClosePipe(Pipe, Ctx->Edge, 0); + Pipe = TActorId(); } } @@ -283,7 +294,7 @@ void TPQTabletFixture::SendToPipe(const TActorId& sender, void TPQTabletFixture::SendProposeTransactionRequest(const TProposeTransactionParams& params) { - auto event = MakeHolder(); + auto event = MakeHolder(); THashSet partitions; ActorIdToProto(Ctx->Edge, event->Record.MutableSourceActor()); @@ -406,33 +417,36 @@ void TPQTabletFixture::WaitReadSet(NHelpers::TPQTabletMock& tablet, const TReadS UNIT_ASSERT(Ctx->Runtime->DispatchEvents(options)); } + auto readSet = std::move(*tablet.ReadSet); + tablet.ReadSet = Nothing(); + if (matcher.Step.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasStep()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Step, tablet.ReadSet->GetStep()); + UNIT_ASSERT(readSet.HasStep()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Step, readSet.GetStep()); } if (matcher.TxId.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTxId()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.TxId, tablet.ReadSet->GetTxId()); + UNIT_ASSERT(readSet.HasTxId()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.TxId, readSet.GetTxId()); } if (matcher.Source.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTabletSource()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Source, tablet.ReadSet->GetTabletSource()); + UNIT_ASSERT(readSet.HasTabletSource()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Source, readSet.GetTabletSource()); } if (matcher.Target.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTabletDest()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Target, tablet.ReadSet->GetTabletDest()); + UNIT_ASSERT(readSet.HasTabletDest()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Target, readSet.GetTabletDest()); } if (matcher.Decision.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasReadSet()); + UNIT_ASSERT(readSet.HasReadSet()); NKikimrTx::TReadSetData data; - Y_ABORT_UNLESS(data.ParseFromString(tablet.ReadSet->GetReadSet())); + Y_ABORT_UNLESS(data.ParseFromString(readSet.GetReadSet())); UNIT_ASSERT_EQUAL(*matcher.Decision, data.GetDecision()); } if (matcher.Producer.Defined()) { - UNIT_ASSERT(tablet.ReadSet->HasTabletProducer()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Producer, tablet.ReadSet->GetTabletProducer()); + UNIT_ASSERT(readSet.HasTabletProducer()); + UNIT_ASSERT_VALUES_EQUAL(*matcher.Producer, readSet.GetTabletProducer()); } } @@ -521,19 +535,23 @@ void TPQTabletFixture::WaitDropTabletReply(const TDropTabletReplyMatcher& matche } } -void TPQTabletFixture::WaitForCalcPredicateResult() +template +void TPQTabletFixture::WaitForEvent(size_t count) { bool found = false; + size_t received = 0; - auto observer = [&found](TAutoPtr& event) { - if (auto* msg = event->CastAsLocal()) { - found = true; + TTestActorRuntimeBase::TEventObserver prev; + auto observer = [&found, &prev, &received, count](TAutoPtr& event) { + if (auto* msg = event->CastAsLocal()) { + ++received; + found = (received >= count); } - return TTestActorRuntimeBase::EEventAction::PROCESS; + return prev ? prev(event) : TTestActorRuntimeBase::EEventAction::PROCESS; }; - Ctx->Runtime->SetObserverFunc(observer); + prev = Ctx->Runtime->SetObserverFunc(observer); TDispatchOptions options; options.CustomFinalCondition = [&found]() { @@ -541,6 +559,18 @@ void TPQTabletFixture::WaitForCalcPredicateResult() }; UNIT_ASSERT(Ctx->Runtime->DispatchEvents(options)); + + Ctx->Runtime->SetObserverFunc(prev); +} + +void TPQTabletFixture::WaitForCalcPredicateResult(size_t count) +{ + WaitForEvent(count); +} + +void TPQTabletFixture::WaitForProposePartitionConfigResult(size_t count) +{ + WaitForEvent(count); } std::unique_ptr TPQTabletFixture::MakeGetOwnershipRequest(const TGetOwnershipRequestParams& params, @@ -669,20 +699,40 @@ void TPQTabletFixture::SendWriteRequest(const TWriteRequestParams& params) void TPQTabletFixture::WaitWriteResponse(const TWriteResponseMatcher& matcher) { - auto event = Ctx->Runtime->GrabEdgeEvent(); - UNIT_ASSERT(event != nullptr); + bool found = false; - if (matcher.Cookie.Defined()) { - UNIT_ASSERT(event->Record.HasCookie()); - UNIT_ASSERT_VALUES_EQUAL(*matcher.Cookie, event->Record.GetCookie()); - } + auto observer = [&found, &matcher](TAutoPtr& event) { + if (auto* msg = event->CastAsLocal()) { + if (matcher.Cookie.Defined()) { + if (msg->Record.HasCookie() && (*matcher.Cookie == msg->Record.GetCookie())) { + found = true; + } + } + } + + return TTestActorRuntimeBase::EEventAction::PROCESS; + }; + + auto prev = Ctx->Runtime->SetObserverFunc(observer); + + TDispatchOptions options; + options.CustomFinalCondition = [&found]() { + return found; + }; + + UNIT_ASSERT(Ctx->Runtime->DispatchEvents(options)); + + Ctx->Runtime->SetObserverFunc(prev); } void TPQTabletFixture::StartPQWriteObserver(bool& flag, unsigned cookie) { flag = false; + auto observer = [&flag, cookie](TAutoPtr& event) { if (auto* kvResponse = event->CastAsLocal()) { + if (kvResponse->Record.HasCookie()) { + } if ((event->Sender == event->Recipient) && kvResponse->Record.HasCookie() && (kvResponse->Record.GetCookie() == cookie)) { @@ -692,6 +742,7 @@ void TPQTabletFixture::StartPQWriteObserver(bool& flag, unsigned cookie) return TTestActorRuntimeBase::EEventAction::PROCESS; }; + Ctx->Runtime->SetObserverFunc(observer); } @@ -797,8 +848,8 @@ Y_UNIT_TEST_F(Multiple_PQTablets, TPQTabletFixture) WaitProposeTransactionResponse({.TxId=txId_2, .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); - WaitReadSetAck(*tablet, {.Step=100, .TxId=txId_2, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId_2, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId_2, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); // // TODO(abcdef): проверить, что удалена информация о транзакции @@ -872,8 +923,8 @@ Y_UNIT_TEST_F(PQTablet_Send_RS_With_Abort, TPQTabletFixture) WaitProposeTransactionResponse({.TxId=txId, .Status=NKikimrPQ::TEvProposeTransactionResult::ABORTED}); - WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); // // TODO(abcdef): проверить, что удалена информация о транзакции @@ -911,8 +962,8 @@ Y_UNIT_TEST_F(Partition_Send_Predicate_With_False, TPQTabletFixture) WaitProposeTransactionResponse({.TxId=txId, .Status=NKikimrPQ::TEvProposeTransactionResult::ABORTED}); - WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=22222, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); // // TODO(abcdef): проверить, что удалена информация о транзакции @@ -1313,6 +1364,271 @@ Y_UNIT_TEST_F(ProposeTx_Command_After_Propose, TPQTabletFixture) .Status=NMsgBusProxy::MSTATUS_ERROR}); } +Y_UNIT_TEST_F(Read_TEvTxCommit_After_Restart, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + SendProposeTransactionRequest({.TxId=txId, + .Senders={mockTabletId}, .Receivers={mockTabletId}, + .TxOps={ + {.Partition=0, .Consumer="user", .Begin=0, .End=0, .Path="/topic"}, + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForCalcPredicateResult(); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(Config_TEvTxCommit_After_Restart, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0} + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={}, .Parents={1}}, + {.Id=1, .TabletId=mockTabletId, .Children={0}, .Parents={}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(One_Tablet_For_All_Partitions, TPQTabletFixture) +{ + const ui64 txId = 67890; + + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + {.Id=2} + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={1, 2}, .Parents={}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={0}}, + {.Id=2, .TabletId=Ctx->TabletId, .Children={}, .Parents={0}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(2); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); +} + +Y_UNIT_TEST_F(One_New_Partition_In_Another_Tablet, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={1, 2}, .Parents={}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={0}}, + {.Id=2, .TabletId=mockTabletId, .Children={}, .Parents={0}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(2); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(All_New_Partitions_In_Another_Tablet, TPQTabletFixture) +{ + const ui64 txId = 67890; + const ui64 mockTabletId = 22222; + + NHelpers::TPQTabletMock* tablet = CreatePQTabletMock(mockTabletId); + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7} + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=2, .TabletId=mockTabletId, .Children={0, 1}, .Parents={}} + }}); + + SendProposeTransactionRequest({.TxId=txId, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + SendPlanStep({.Step=100, .TxIds={txId}}); + + WaitForProposePartitionConfigResult(2); + + // the transaction is now in the WAIT_RS state in memory and PLANNED state in disk + + PQTabletRestart(*Ctx); + + tablet->SendReadSet(*Ctx->Runtime, {.Step=100, .TxId=txId, .Target=Ctx->TabletId, .Decision=NKikimrTx::TReadSetData::DECISION_COMMIT}); + + WaitProposeTransactionResponse({.TxId=txId, + .Status=NKikimrPQ::TEvProposeTransactionResult::COMPLETE}); + + tablet->SendReadSetAck(*Ctx->Runtime, {.Step=100, .TxId=txId, .Source=Ctx->TabletId}); + WaitReadSetAck(*tablet, {.Step=100, .TxId=txId, .Source=mockTabletId, .Target=Ctx->TabletId, .Consumer=Ctx->TabletId}); +} + +Y_UNIT_TEST_F(Huge_ProposeTransacton, TPQTabletFixture) +{ + const ui64 mockTabletId = 22222; + + PQTabletPrepare({.partitions=1}, {}, *Ctx); + + auto tabletConfig = NHelpers::MakeConfig({.Version=2, + .Consumers={ + {.Consumer="client-1", .Generation=0}, + {.Consumer="client-3", .Generation=7}, + }, + .Partitions={ + {.Id=0}, + {.Id=1}, + }, + .AllPartitions={ + {.Id=0, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=1, .TabletId=Ctx->TabletId, .Children={}, .Parents={2}}, + {.Id=2, .TabletId=mockTabletId, .Children={0, 1}, .Parents={}} + }, + .HugeConfig = true}); + + const ui64 txId_1 = 67890; + SendProposeTransactionRequest({.TxId=txId_1, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId_1, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + const ui64 txId_2 = 67891; + SendProposeTransactionRequest({.TxId=txId_2, + .Configs=NHelpers::TConfigParams{ + .Tablet=tabletConfig, + .Bootstrap=NHelpers::MakeBootstrapConfig(), + }}); + WaitProposeTransactionResponse({.TxId=txId_2, + .Status=NKikimrPQ::TEvProposeTransactionResult::PREPARED}); + + PQTabletRestart(*Ctx); + ResetPipe(); + + SendPlanStep({.Step=100, .TxIds={txId_1, txId_2}}); + WaitPlanStepAck({.Step=100, .TxIds={txId_1, txId_2}}); + WaitPlanStepAccepted({.Step=100}); +} + } } diff --git a/ydb/core/persqueue/ut/sourceid_ut.cpp b/ydb/core/persqueue/ut/sourceid_ut.cpp index 254cf2465281..8f96c33bd06b 100644 --- a/ydb/core/persqueue/ut/sourceid_ut.cpp +++ b/ydb/core/persqueue/ut/sourceid_ut.cpp @@ -458,6 +458,40 @@ Y_UNIT_TEST_SUITE(TSourceIdTests) { emitter.Process(TestSourceId(2), MakeHeartbeat(4)); UNIT_ASSERT(!emitter.CanEmit().Defined()); } + + // gaps + TSourceIdStorage storage2; + storage2.RegisterSourceId(TestSourceId(1), MakeExplicitSourceIdInfo(++offset, MakeHeartbeat(1))); + storage2.RegisterSourceId(TestSourceId(2), MakeExplicitSourceIdInfo(++offset, MakeHeartbeat(3))); + { + THeartbeatEmitter emitter(storage2); + UNIT_ASSERT(!emitter.CanEmit().Defined()); + + emitter.Process(TestSourceId(1), MakeHeartbeat(2)); + emitter.Process(TestSourceId(2), MakeHeartbeat(4)); + { + const auto heartbeat = emitter.CanEmit(); + UNIT_ASSERT(heartbeat.Defined()); + UNIT_ASSERT_VALUES_EQUAL(heartbeat->Version, MakeHeartbeat(2).Version); + } + } + + // full update + TSourceIdStorage storage3; + storage3.RegisterSourceId(TestSourceId(1), MakeExplicitSourceIdInfo(++offset, MakeHeartbeat(1))); + storage3.RegisterSourceId(TestSourceId(2), MakeExplicitSourceIdInfo(++offset, MakeHeartbeat(2))); + { + THeartbeatEmitter emitter(storage3); + UNIT_ASSERT(!emitter.CanEmit().Defined()); + + emitter.Process(TestSourceId(1), MakeHeartbeat(3)); + emitter.Process(TestSourceId(2), MakeHeartbeat(4)); + { + const auto heartbeat = emitter.CanEmit(); + UNIT_ASSERT(heartbeat.Defined()); + UNIT_ASSERT_VALUES_EQUAL(heartbeat->Version, MakeHeartbeat(3).Version); + } + } } Y_UNIT_TEST(SourceIdMinSeqNo) { diff --git a/ydb/core/persqueue/ut/user_action_processor_ut.cpp b/ydb/core/persqueue/ut/user_action_processor_ut.cpp index c45bc09741bb..0796c620f39b 100644 --- a/ydb/core/persqueue/ut/user_action_processor_ut.cpp +++ b/ydb/core/persqueue/ut/user_action_processor_ut.cpp @@ -647,7 +647,7 @@ void TUserActionProcessorFixture::SendProposeTransactionRequest(ui32 partition, bool immediate, ui64 txId) { - auto event = MakeHolder(); + auto event = MakeHolder(); ActorIdToProto(Ctx->Edge, event->Record.MutableSource()); auto* body = event->Record.MutableTxBody(); @@ -665,7 +665,7 @@ void TUserActionProcessorFixture::SendProposeTransactionRequest(ui32 partition, void TUserActionProcessorFixture::SendProposeTransactionRequest(const TProposeTransactionParams& params) { - auto event = MakeHolder(); + auto event = MakeHolder(); // // Source diff --git a/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp b/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp index 80de3bd5b9c5..e8dc6c2a3de7 100644 --- a/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp +++ b/ydb/core/persqueue/ut/ut_with_sdk/autoscaling_ut.cpp @@ -400,9 +400,9 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { } - writeSession1->Close(TDuration::Seconds(1)); - writeSession2->Close(TDuration::Seconds(1)); - writeSession3->Close(TDuration::Seconds(1)); + writeSession1->Close(TDuration::Seconds(2)); + writeSession2->Close(TDuration::Seconds(2)); + writeSession3->Close(TDuration::Seconds(2)); readSession.Close(); } @@ -723,16 +723,74 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { f.Wait(); auto v = f.GetValueSync(); - UNIT_ASSERT_C(!v.IsSuccess(), "Must receve error becuse max-partition is not 0"); + UNIT_ASSERT_C(!v.IsSuccess(), "Must receve error becuse disabling is not supported"); + } + } + + Y_UNIT_TEST(ControlPlane_BackCompatibility) { + auto topicName = "back-compatibility-test"; + + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + { + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(3) + .EndConfigurePartitioningSettings(); + client.CreateTopic(topicName, createSettings).Wait(); + } + + { + auto describeAfterAlter = client.DescribeTopic(topicName).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 3); } { TAlterTopicSettings alterSettings; alterSettings .BeginAlterPartitioningSettings() - .MaxActivePartitions(0) + .MinActivePartitions(5) + .EndAlterTopicPartitioningSettings(); + client.AlterTopic(topicName, alterSettings).Wait(); + } + + { + auto describeAfterAlter = client.DescribeTopic(topicName).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 5); + } + } + + Y_UNIT_TEST(ControlPlane_PauseAutoPartitioning) { + auto topicName = "autoscalit-topic"; + + TTopicSdkTestSetup setup = CreateSetup(); + TTopicClient client = setup.MakeClient(); + + { + TCreateTopicSettings createSettings; + createSettings + .BeginConfigurePartitioningSettings() + .MinActivePartitions(1) + .MaxActivePartitions(100) + .BeginConfigureAutoPartitioningSettings() + .Strategy(EAutoPartitioningStrategy::ScaleUp) + .EndConfigureAutoPartitioningSettings() + .EndConfigurePartitioningSettings(); + client.CreateTopic(topicName, createSettings).Wait(); + } + + { + TAlterTopicSettings alterSettings; + alterSettings + .BeginAlterPartitioningSettings() + .MinActivePartitions(3) + .MaxActivePartitions(107) .BeginAlterAutoPartitioningSettings() - .Strategy(EAutoPartitioningStrategy::Disabled) + .Strategy(EAutoPartitioningStrategy::Paused) .EndAlterAutoPartitioningSettings() .EndAlterTopicPartitioningSettings(); auto f = client.AlterTopic(topicName, alterSettings); @@ -741,6 +799,14 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { auto v = f.GetValueSync(); UNIT_ASSERT_C(v.IsSuccess(), "Error: " << v); } + + { + auto describeAfterAlter = client.DescribeTopic(topicName).GetValueSync(); + + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMinActivePartitions(), 3); + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetMaxActivePartitions(), 107); + UNIT_ASSERT_VALUES_EQUAL(describeAfterAlter.GetTopicDescription().GetPartitioningSettings().GetAutoPartitioningSettings().GetStrategy(), EAutoPartitioningStrategy::Paused); + } } Y_UNIT_TEST(ControlPlane_AutoscalingWithStorageSizeRetention) { @@ -785,7 +851,7 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { .BeginConfigureAutoPartitioningSettings() .UpUtilizationPercent(2) .DownUtilizationPercent(1) - .StabilizationWindow(TDuration::Seconds(1)) + .StabilizationWindow(TDuration::Seconds(2)) .Strategy(EAutoPartitioningStrategy::ScaleUp) .EndConfigureAutoPartitioningSettings() .EndConfigurePartitioningSettings(); @@ -793,19 +859,93 @@ Y_UNIT_TEST_SUITE(TopicAutoscaling) { auto msg = TString(1_MB, 'a'); - auto writeSession = CreateWriteSession(client, "producer-1", 0, TEST_TOPIC, false); - UNIT_ASSERT(writeSession->Write(Msg(msg, 1))); - UNIT_ASSERT(writeSession->Write(Msg(msg, 2))); - Sleep(TDuration::Seconds(5)); - auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); - UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); - - auto writeSession2 = CreateWriteSession(client, "producer-1", 1, TEST_TOPIC, false); - UNIT_ASSERT(writeSession2->Write(Msg(msg, 3))); - UNIT_ASSERT(writeSession2->Write(Msg(msg, 4))); - Sleep(TDuration::Seconds(5)); - auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); - UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + auto writeSession_1 = CreateWriteSession(client, "producer-1", 0, TEST_TOPIC, false); + auto writeSession_2 = CreateWriteSession(client, "producer-2", 0, TEST_TOPIC, false); + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 1))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 2))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + { + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 3))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 4))); + UNIT_ASSERT(writeSession_1->Write(Msg(msg, 5))); + UNIT_ASSERT(writeSession_2->Write(Msg(msg, 6))); + Sleep(TDuration::Seconds(5)); + auto describe = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 3); + } + + auto writeSession2_1 = CreateWriteSession(client, "producer-1", 1, TEST_TOPIC, false); + auto writeSession2_2 = CreateWriteSession(client, "producer-2", 1, TEST_TOPIC, false); + + { + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 7))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 8))); + UNIT_ASSERT(writeSession2_1->Write(Msg(msg, 9))); + UNIT_ASSERT(writeSession2_2->Write(Msg(msg, 10))); + Sleep(TDuration::Seconds(5)); + auto describe2 = client.DescribeTopic(TEST_TOPIC).GetValueSync(); + UNIT_ASSERT_EQUAL(describe2.GetTopicDescription().GetPartitions().size(), 5); + } + } + + void ExecuteQuery(NYdb::NTable::TSession& session, const TString& query ) { + const auto result = session.ExecuteSchemeQuery(query).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + } + + Y_UNIT_TEST(WithDir_PartitionSplit_AutosplitByLoad) { + TTopicSdkTestSetup setup = CreateSetup(); + auto client = setup.MakeClient(); + auto tableClient = setup.MakeTableClient(); + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + + setup.GetServer().AnnoyingClient->MkDir("/Root", "dir"); + + ExecuteQuery(session, R"( + --!syntax_v1 + CREATE TOPIC `/Root/dir/origin` + WITH ( + AUTO_PARTITIONING_STRATEGY = 'SCALE_UP', + MAX_ACTIVE_PARTITIONS = 50 + ); + )"); + + { + auto describe = client.DescribeTopic("/Root/dir/origin").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL(describe.GetTopicDescription().GetPartitions().size(), 1); + } + + ui64 balancerTabletId; + { + auto pathDescr = setup.GetServer().AnnoyingClient->Ls("/Root/dir/origin")->Record.GetPathDescription().GetSelf(); + balancerTabletId = pathDescr.GetBalancerTabletID(); + Cerr << ">>>>> BalancerTabletID=" << balancerTabletId << Endl << Flush; + UNIT_ASSERT(balancerTabletId); + } + + { + const auto edge = setup.GetRuntime().AllocateEdgeActor(); + setup.GetRuntime().SendToPipe(balancerTabletId, edge, new TEvPQ::TEvPartitionScaleStatusChanged(0, NKikimrPQ::EScaleStatus::NEED_SPLIT)); + } + + { + size_t partitionCount = 0; + for (size_t i = 0; i < 10; ++i) { + Sleep(TDuration::Seconds(1)); + auto describe = client.DescribeTopic("/Root/dir/origin").GetValueSync(); + partitionCount = describe.GetTopicDescription().GetPartitions().size(); + if (partitionCount == 3) { + break; + } + } + UNIT_ASSERT_VALUES_EQUAL(partitionCount, 3); + } } Y_UNIT_TEST(MidOfRange) { diff --git a/ydb/core/persqueue/ut/utils_ut.cpp b/ydb/core/persqueue/ut/utils_ut.cpp new file mode 100644 index 000000000000..85513ea70a94 --- /dev/null +++ b/ydb/core/persqueue/ut/utils_ut.cpp @@ -0,0 +1,81 @@ +#include + +#include + +namespace NKikimr::NPQ { + +Y_UNIT_TEST_SUITE(TPQUtilsTest) { + Y_UNIT_TEST(TLastCounter) { + TLastCounter counter; + + TInstant now = TInstant::Now(); + + { + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 0); + } + + { + counter.Use("v-1", now); + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + { + counter.Use("v-1", now); + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-1", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + { + auto r = counter.Count(now); + UNIT_ASSERT_VALUES_EQUAL(r, 1); + } + + { + counter.Use("v-2", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + { + counter.Use("v-1", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-3", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-3", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + + now += TDuration::Seconds(1); + + { + counter.Use("v-2", now); + auto r = counter.Count(now - TDuration::Seconds(10)); + UNIT_ASSERT_VALUES_EQUAL(r, 2); + } + } +} + +} diff --git a/ydb/core/persqueue/ut/ya.make b/ydb/core/persqueue/ut/ya.make index 8b3e64ac2351..c91c10ecd965 100644 --- a/ydb/core/persqueue/ut/ya.make +++ b/ydb/core/persqueue/ut/ya.make @@ -45,6 +45,7 @@ SRCS( pqrb_describes_ut.cpp microseconds_sliding_window_ut.cpp fetch_request_ut.cpp + utils_ut.cpp ) RESOURCE( diff --git a/ydb/core/persqueue/utils.cpp b/ydb/core/persqueue/utils.cpp index 4decfc2a6c7c..1003a8003a00 100644 --- a/ydb/core/persqueue/utils.cpp +++ b/ydb/core/persqueue/utils.cpp @@ -36,6 +36,12 @@ bool SplitMergeEnabled(const NKikimrPQ::TPQTabletConfig& config) { return config.has_partitionstrategy() && config.partitionstrategy().has_partitionstrategytype() && config.partitionstrategy().partitionstrategytype() != ::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED; } +size_t CountActivePartitions(const ::google::protobuf::RepeatedPtrField< ::NKikimrPQ::TPQTabletConfig_TPartition >& partitions) { + return std::count_if(partitions.begin(), partitions.end(), [](const auto& p) { + return p.GetStatus() == ::NKikimrPQ::ETopicPartitionStatus::Active; + }); +} + static constexpr ui64 PUT_UNIT_SIZE = 40960u; // 40Kb ui64 PutUnitsSize(const ui64 size) { @@ -159,7 +165,36 @@ std::set TPartitionGraph::GetActiveChildren(ui32 id) const { return result; } -void TPartitionGraph::Travers(ui32 id, std::function func, bool includeSelf) const { +void Travers0(std::deque& queue, const std::function& func) { + while(!queue.empty()) { + auto* node = queue.front(); + queue.pop_front(); + + if (func(node->Id)) { + queue.insert(queue.end(), node->Children.begin(), node->Children.end()); + } + } +} + +void TPartitionGraph::Travers(const std::function& func) const { + std::deque queue; + + for (auto& [id, n] : Partitions) { + if (!n.IsRoot()) { + continue; + } + + if (!func(id)) { + continue; + } + + queue.insert(queue.end(), n.Children.begin(), n.Children.end()); + } + + Travers0(queue, func); +} + +void TPartitionGraph::Travers(ui32 id, const std::function& func, bool includeSelf) const { auto* n = GetPartition(id); if (!n) { return; @@ -172,17 +207,9 @@ void TPartitionGraph::Travers(ui32 id, std::function func, bool std::deque queue; queue.insert(queue.end(), n->Children.begin(), n->Children.end()); - while(!queue.empty()) { - auto* node = queue.front(); - queue.pop_front(); - - if (func(node->Id)) { - queue.insert(queue.end(), node->Children.begin(), node->Children.end()); - } - } + Travers0(queue, func); } - template inline int GetPartitionId(TPartition p) { return p.GetPartitionId(); @@ -271,4 +298,31 @@ TPartitionGraph MakePartitionGraph(const NKikimrSchemeOp::TPersQueueGroupDescrip return TPartitionGraph(BuildGraph(config.GetPartitions())); } +void TLastCounter::Use(const TString& value, const TInstant& now) { + const auto full = MaxValueCount == Values.size(); + if (!Values.empty() && Values[0].Value == value) { + auto& v0 = Values[0]; + if (v0.LastUseTime < now) { + v0.LastUseTime = now; + if (full && Values[1].LastUseTime != now) { + Values.push_back(std::move(v0)); + Values.pop_front(); + } + } + } else if (full && Values[1].Value == value) { + Values[1].LastUseTime = now; + } else if (!full || Values[0].LastUseTime < now) { + if (full) { + Values.pop_front(); + } + Values.push_back(Data{now, value}); + } +} + +size_t TLastCounter::Count(const TInstant& expirationTime) { + return std::count_if(Values.begin(), Values.end(), [&](const auto& i) { + return i.LastUseTime >= expirationTime; + }); +} + } // NKikimr::NPQ diff --git a/ydb/core/persqueue/utils.h b/ydb/core/persqueue/utils.h index c81442f1128c..7c42e70ff59c 100644 --- a/ydb/core/persqueue/utils.h +++ b/ydb/core/persqueue/utils.h @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include #include @@ -11,6 +13,8 @@ ui64 TopicPartitionReserveThroughput(const NKikimrPQ::TPQTabletConfig& config); bool SplitMergeEnabled(const NKikimrPQ::TPQTabletConfig& config); +size_t CountActivePartitions(const ::google::protobuf::RepeatedPtrField< ::NKikimrPQ::TPQTabletConfig_TPartition >& partitions); + ui64 PutUnitsSize(const ui64 size); TString SourceIdHash(const TString& sourceId); @@ -55,7 +59,9 @@ class TPartitionGraph { const Node* GetPartition(ui32 id) const; std::set GetActiveChildren(ui32 id) const; - void Travers(ui32 id, std::function func, bool includeSelf = false) const; + + void Travers(const std::function& func) const; + void Travers(ui32 id, const std::function& func, bool includeSelf = false) const; private: std::unordered_map Partitions; @@ -65,4 +71,20 @@ TPartitionGraph MakePartitionGraph(const NKikimrPQ::TPQTabletConfig& config); TPartitionGraph MakePartitionGraph(const NKikimrPQ::TUpdateBalancerConfig& config); TPartitionGraph MakePartitionGraph(const NKikimrSchemeOp::TPersQueueGroupDescription& config); +class TLastCounter { + static constexpr size_t MaxValueCount = 2; + +public: + void Use(const TString& value, const TInstant& now); + size_t Count(const TInstant& expirationTime); + +private: + struct Data { + TInstant LastUseTime; + TString Value; + }; + std::deque Values; +}; + + } // NKikimr::NPQ diff --git a/ydb/core/persqueue/writer/metadata_initializers.cpp b/ydb/core/persqueue/writer/metadata_initializers.cpp index dff6ade3ada7..b9e3103f59d3 100644 --- a/ydb/core/persqueue/writer/metadata_initializers.cpp +++ b/ydb/core/persqueue/writer/metadata_initializers.cpp @@ -74,6 +74,16 @@ void TSrcIdMetaInitializer::DoPrepare(NInitializer::IInitializerInput::TPtr cont result.emplace_back(new NInitializer::TGenericTableModifier(request, "add_column_SeqNo")); } + + { + Ydb::Table::AlterTableRequest request; + request.set_session_id(""); + request.set_path(tablePath); + request.mutable_alter_partitioning_settings()->set_min_partitions_count(50); + request.mutable_alter_partitioning_settings()->set_partitioning_by_load(::Ydb::FeatureFlag_Status::FeatureFlag_Status_ENABLED); + + result.emplace_back(new NInitializer::TGenericTableModifier(request, "enable_autopartitioning_by_load")); + } } result.emplace_back(NInitializer::TACLModifierConstructor::GetReadOnlyModifier(tablePath, "acl")); controller->OnPreparationFinished(result); diff --git a/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h b/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h index b6cb8688a8fb..ce9f1b20c2c1 100644 --- a/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h +++ b/ydb/core/persqueue/writer/partition_chooser_impl__abstract_chooser_actor.h @@ -217,7 +217,7 @@ class TAbstractPartitionChooserActor: public TActorBootstrapped { TThis::Become(&TThis::StateCheckPartition); if (!Partition) { - return ReplyError(ErrorCode::INITIALIZING, "Partition not choosed", ctx); + return ReplyError(TThis::PreferedPartition ? ErrorCode::WRITE_ERROR_PARTITION_INACTIVE : ErrorCode::INITIALIZING, "Partition not choosed", ctx); } PartitionHelper.Open(Partition->TabletId, ctx); diff --git a/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h b/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h index 33e1f3dd2786..0b89f1e2039f 100644 --- a/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h +++ b/ydb/core/persqueue/writer/partition_chooser_impl__old_chooser_actor.h @@ -100,6 +100,7 @@ class TPartitionChooserActor: public TAbstractPartitionChooserActorGetTypeRewrite()) { HFunc(TEvPersQueue::TEvGetPartitionIdForWriteResponse, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); SFunc(TEvents::TEvPoison, TThis::Die); } diff --git a/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h b/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h index 2342ae49f308..c44afa64c239 100644 --- a/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h +++ b/ydb/core/persqueue/writer/partition_chooser_impl__sm_chooser_actor.h @@ -71,7 +71,7 @@ class TSMPartitionChooserActor: public TAbstractPartitionChooserActor GetPropertiesMap(TPoolSettings& settings, std::unordered_map properties = { {"concurrent_query_limit", &settings.ConcurrentQueryLimit}, {"queue_size", &settings.QueueSize}, - {"query_memory_limit_percent_per_node", &settings.QueryMemoryLimitPercentPerNode} + {"query_memory_limit_percent_per_node", &settings.QueryMemoryLimitPercentPerNode}, + {"database_load_cpu_threshold", &settings.DatabaseLoadCpuThreshold} }; if (!restricted) { properties.insert({"query_cancel_after_seconds", &settings.QueryCancelAfter}); diff --git a/ydb/core/resource_pools/resource_pool_settings.h b/ydb/core/resource_pools/resource_pool_settings.h index c2dc319838e1..cecfd4eefb59 100644 --- a/ydb/core/resource_pools/resource_pool_settings.h +++ b/ydb/core/resource_pools/resource_pool_settings.h @@ -17,6 +17,8 @@ struct TPoolSettings { TPercent QueryMemoryLimitPercentPerNode = -1; // Percent from node memory capacity, -1 = disabled + TPercent DatabaseLoadCpuThreshold = -1; // -1 = disabled + bool operator==(const TPoolSettings& other) const = default; }; diff --git a/ydb/core/scheme/scheme_tablecell.cpp b/ydb/core/scheme/scheme_tablecell.cpp index 2ee86265b1a4..c2b541bb7984 100644 --- a/ydb/core/scheme/scheme_tablecell.cpp +++ b/ydb/core/scheme/scheme_tablecell.cpp @@ -307,46 +307,6 @@ bool TSerializedCellMatrix::DoTryParse(const TString& data) { return TryDeserializeCellMatrix(data, Buf, Cells, RowCount, ColCount); } -TCellsBatcher::TCellsBatcher(ui16 colCount, ui64 maxBytesPerBatch) - : ColCount(colCount) - , MaxBytesPerBatch(maxBytesPerBatch) { -} - -bool TCellsBatcher::IsEmpty() const { - return Batches.empty(); -} - -TCellsBatcher::TBatch TCellsBatcher::Flush(bool force) { - TBatch res; - if ((!Batches.empty() && force) || Batches.size() > 1) { - res = std::move(Batches.front()); - Batches.pop_front(); - } - return res; -} - -ui64 TCellsBatcher::AddRow(TArrayRef cells) { - Y_ABORT_UNLESS(cells.size() == ColCount); - ui64 newMemory = 0; - for (const auto& cell : cells) { - newMemory += cell.Size(); - } - if (Batches.empty() || newMemory + sizeof(TCellHeader) * ColCount + Batches.back().MemorySerialized > MaxBytesPerBatch) { - Batches.emplace_back(); - Batches.back().Memory = 0; - Batches.back().MemorySerialized = CellMatrixHeaderSize; - } - - for (auto& cell : cells) { - Batches.back().Data.emplace_back(std::move(cell)); - } - - Batches.back().Memory += newMemory; - Batches.back().MemorySerialized += newMemory + sizeof(TCellHeader) * ColCount; - - return newMemory; -} - void TCellsStorage::Reset(TArrayRef cells) { size_t cellsSize = cells.size(); @@ -499,5 +459,13 @@ TString DbgPrintTuple(const TDbTupleRef& row, const NScheme::TTypeRegistry& type return res; } +size_t GetCellMatrixHeaderSize() { + return CellMatrixHeaderSize; +} + +size_t GetCellHeaderSize() { + return sizeof(TCellHeader); +} + } // namespace NKikimr diff --git a/ydb/core/scheme/scheme_tablecell.h b/ydb/core/scheme/scheme_tablecell.h index b7635a424725..0e501c10d587 100644 --- a/ydb/core/scheme/scheme_tablecell.h +++ b/ydb/core/scheme/scheme_tablecell.h @@ -541,6 +541,10 @@ class TSerializedCellVec { return Cells; } + explicit operator bool() const { + return !Cells.empty(); + } + static void Serialize(TString& res, TConstArrayRef cells); static TString Serialize(TConstArrayRef cells); @@ -653,29 +657,6 @@ class TSerializedCellMatrix { ui16 ColCount; }; -class TCellsBatcher { -public: - explicit TCellsBatcher(ui16 colCount, ui64 maxBytesPerBatch); - - bool IsEmpty() const; - - struct TBatch { - ui64 Memory = 0; - ui64 MemorySerialized = 0; - TVector Data; - }; - - TBatch Flush(bool force); - - ui64 AddRow(TArrayRef cells); - -private: - std::deque Batches; - - ui16 ColCount; - ui64 MaxBytesPerBatch; -}; - class TCellsStorage { public: @@ -760,4 +741,7 @@ void DbgPrintValue(TString&, const TCell&, NScheme::TTypeInfo typeInfo); TString DbgPrintCell(const TCell& r, NScheme::TTypeInfo typeInfo, const NScheme::TTypeRegistry& typeRegistry); TString DbgPrintTuple(const TDbTupleRef& row, const NScheme::TTypeRegistry& typeRegistry); +size_t GetCellMatrixHeaderSize(); +size_t GetCellHeaderSize(); + } diff --git a/ydb/core/security/certificate_check/cert_auth_processor.cpp b/ydb/core/security/certificate_check/cert_auth_processor.cpp index f2effa29e9a7..f2275b35a441 100644 --- a/ydb/core/security/certificate_check/cert_auth_processor.cpp +++ b/ydb/core/security/certificate_check/cert_auth_processor.cpp @@ -1,6 +1,7 @@ #include "cert_auth_processor.h" #include +#include #include #include #include @@ -100,6 +101,56 @@ TVector> X509CertificateReader::ReadIssuerTerms(cons return ReadTerms(name); } +static void FreeList(GENERAL_NAMES* list) { + sk_GENERAL_NAME_pop_free(list, GENERAL_NAME_free); +} + +TVector X509CertificateReader::ReadSubjectDns(const X509Ptr& x509, const std::vector>& subjectTerms) { + TVector result; + // 1. Subject's common name (CN) must be a subject DNS name, so add it to DNS names of subject first + for (const auto& [k, v] : subjectTerms) { + if (k == "CN") { + result.emplace_back(v); + } + } + + using TGeneralNamesPtr = std::unique_ptr>; + TGeneralNamesPtr subjectAltNames((GENERAL_NAMES*)X509_get_ext_d2i(x509.get(), NID_subject_alt_name, NULL, NULL)); + if (!subjectAltNames) { + return result; + } + const int subjectAltNamesCount = sk_GENERAL_NAME_num(subjectAltNames.get()); + if (subjectAltNamesCount <= 0) { + return result; + } + + result.reserve(static_cast(subjectAltNamesCount) + result.size()); + // 2. Additionally find subject alternative names with type=DNS + for (int i = 0; i < subjectAltNamesCount; ++i) { + const GENERAL_NAME* name = sk_GENERAL_NAME_value(subjectAltNames.get(), i); + if (!name) { + continue; + } + if (name->type == GEN_DNS) { + const ASN1_STRING* value = name->d.dNSName; + if (!value) { + continue; + } + + const char* data = reinterpret_cast(ASN1_STRING_get0_data(value)); + if (!data) { + continue; + } + int size = ASN1_STRING_length(value); + if (size <= 0) { + continue; + } + result.emplace_back(data, static_cast(size)); + } + } + return result; +} + TString X509CertificateReader::GetFingerprint(const X509Ptr& x509) { static constexpr size_t FINGERPRINT_LENGTH = SHA_DIGEST_LENGTH; unsigned char fingerprint[FINGERPRINT_LENGTH]; @@ -109,14 +160,16 @@ TString X509CertificateReader::GetFingerprint(const X509Ptr& x509) { return HexEncode(fingerprint, FINGERPRINT_LENGTH); } -TCertificateAuthorizationParams::TCertificateAuthorizationParams(const TDN& dn, bool requireSameIssuer, const std::vector& groups) +TCertificateAuthorizationParams::TCertificateAuthorizationParams(const TDN& dn, const std::optional& subjectDns, bool requireSameIssuer, const std::vector& groups) : SubjectDn(dn) + , SubjectDns(subjectDns) , RequireSameIssuer(requireSameIssuer) , Groups(groups) {} -TCertificateAuthorizationParams::TCertificateAuthorizationParams(TDN&& dn, bool requireSameIssuer, std::vector&& groups) +TCertificateAuthorizationParams::TCertificateAuthorizationParams(TDN&& dn, std::optional&& subjectDns, bool requireSameIssuer, std::vector&& groups) : SubjectDn(std::move(dn)) + , SubjectDns(std::move(subjectDns)) , RequireSameIssuer(requireSameIssuer) , Groups(std::move(groups)) {} @@ -127,59 +180,44 @@ TCertificateAuthorizationParams::TDN& TCertificateAuthorizationParams::TDN::AddR } TCertificateAuthorizationParams::operator bool() const { - return SubjectDn; + return SubjectDn || SubjectDns; } -bool TCertificateAuthorizationParams::CheckSubject(const std::unordered_map>& subjectDescription) const { - bool isDescriptionMatched = false; - for (const auto& rdn: SubjectDn.RDNs) { - isDescriptionMatched = false; +bool TCertificateAuthorizationParams::CheckSubject(const std::unordered_map>& subjectDescription, const std::vector& subjectDns) const { + for (const TRDN& rdn: SubjectDn.RDNs) { auto fieldIt = subjectDescription.find(rdn.Attribute); if (fieldIt == subjectDescription.cend()) { - break; + return false; } const auto& attributeValues = fieldIt->second; - bool attributeMatched = false; - for (const auto& attributeValue : attributeValues) { - attributeMatched = false; - for (const auto& value: rdn.Values) { - if (value == attributeValue) { - attributeMatched = true; - break; - } - } - if (!attributeMatched) { - for (const auto& suffix: rdn.Suffixes) { - if (attributeValue.EndsWith(suffix)) { - attributeMatched = true; - break; - } - } - } - if (!attributeMatched) { + if (!rdn.Match(attributeValues)) { + return false; + } + } + + if (SubjectDns) { + bool dnsMatched = false; + for (const TString& dns : subjectDns) { + if (SubjectDns->Match(dns)) { + dnsMatched = true; break; } } - if (!attributeMatched) { - isDescriptionMatched = false; - break; + if (!dnsMatched) { + return false; } - isDescriptionMatched = true; } - if (isDescriptionMatched) { - return true; - } - return false; + return true; } TCertificateAuthorizationParams::TDN::operator bool() const { return !RDNs.empty(); } -TCertificateAuthorizationParams::TRDN::TRDN(const TString& Attribute) - :Attribute(Attribute) +TCertificateAuthorizationParams::TRDN::TRDN(const TString& attribute) + : Attribute(attribute) {} TCertificateAuthorizationParams::TRDN& TCertificateAuthorizationParams::TRDN::AddValue(const TString& val) @@ -194,4 +232,30 @@ TCertificateAuthorizationParams::TRDN& TCertificateAuthorizationParams::TRDN::Ad return *this; } +bool TCertificateAuthorizationParams::TRDN::Match(const TString& value) const +{ + for (const auto& v : Values) { + if (value == v) { + return true; + } + } + for (const auto& s : Suffixes) { + if (value.EndsWith(s)) { + return true; + } + } + + return false; +} + +bool TCertificateAuthorizationParams::TRDN::Match(const std::vector& values) const +{ + for (const auto& value : values) { + if (!Match(value)) { + return false; + } + } + return true; +} + } //namespace NKikimr { diff --git a/ydb/core/security/certificate_check/cert_auth_processor.h b/ydb/core/security/certificate_check/cert_auth_processor.h index 42ec4bf3981c..bcbc1bb7c8a1 100644 --- a/ydb/core/security/certificate_check/cert_auth_processor.h +++ b/ydb/core/security/certificate_check/cert_auth_processor.h @@ -15,9 +15,11 @@ struct TCertificateAuthorizationParams { TVector Values; TVector Suffixes; - TRDN(const TString& Attribute); + TRDN(const TString& attribute); TRDN& AddValue(const TString& val); TRDN& AddSuffix(const TString& suffix); + bool Match(const std::vector& values) const; + bool Match(const TString& value) const; }; struct TDN { @@ -27,11 +29,11 @@ struct TCertificateAuthorizationParams { operator bool () const; }; - TCertificateAuthorizationParams(const TDN& dn = TDN(), bool requireSameIssuer = true, const std::vector& groups = {}); - TCertificateAuthorizationParams(TDN&& dn, bool requireSameIssuer = true, std::vector&& groups = {}); + TCertificateAuthorizationParams(const TDN& dn = TDN(), const std::optional& subjectDns = std::nullopt, bool requireSameIssuer = true, const std::vector& groups = {}); + TCertificateAuthorizationParams(TDN&& dn, std::optional&& subjectDns, bool requireSameIssuer = true, std::vector&& groups = {}); operator bool () const; - bool CheckSubject(const std::unordered_map>& subjectDescription) const; + bool CheckSubject(const std::unordered_map>& subjectDescription, const std::vector& subjectDns) const; void SetSubjectDn(const TDN& subjectDn) { SubjectDn = subjectDn; } @@ -42,6 +44,7 @@ struct TCertificateAuthorizationParams { bool CanCheckNodeByAttributeCN = false; TDN SubjectDn; + std::optional SubjectDns; bool RequireSameIssuer = true; std::vector Groups; }; @@ -61,6 +64,7 @@ struct X509CertificateReader { static X509Ptr ReadCertAsPEM(const TStringBuf& cert); static TVector> ReadSubjectTerms(const X509Ptr& x509); + static TVector ReadSubjectDns(const X509Ptr& x509, const std::vector>& subjectTerms); static TVector> ReadAllSubjectTerms(const X509Ptr& x509); static TVector> ReadIssuerTerms(const X509Ptr& x509); static TString GetFingerprint(const X509Ptr& x509); diff --git a/ydb/core/security/certificate_check/cert_auth_utils.cpp b/ydb/core/security/certificate_check/cert_auth_utils.cpp index a310c197d2f0..82c380d9df50 100644 --- a/ydb/core/security/certificate_check/cert_auth_utils.cpp +++ b/ydb/core/security/certificate_check/cert_auth_utils.cpp @@ -17,7 +17,7 @@ namespace NKikimr { -std::vector GetCertificateAuthorizationParams(const NKikimrConfig::TClientCertificateAuthorization &clientCertificateAuth) { +std::vector GetCertificateAuthorizationParams(const NKikimrConfig::TClientCertificateAuthorization& clientCertificateAuth) { std::vector certAuthParams; certAuthParams.reserve(clientCertificateAuth.ClientCertificateDefinitionsSize()); @@ -33,9 +33,19 @@ std::vector GetCertificateAuthorizationParams(c } dn.AddRDN(std::move(rdn)); } - if (dn) { + std::optional subjectDns; + if (const auto& subjectDnsCfg = clientCertificateDefinition.GetSubjectDns(); subjectDnsCfg.ValuesSize() || subjectDnsCfg.SuffixesSize()) { + TCertificateAuthorizationParams::TRDN& dns = subjectDns.emplace(TString()); + for (const auto& value: subjectDnsCfg.GetValues()) { + dns.AddValue(value); + } + for (const auto& suffix: subjectDnsCfg.GetSuffixes()) { + dns.AddSuffix(suffix); + } + } + if (dn || subjectDns) { std::vector groups(clientCertificateDefinition.GetMemberGroups().cbegin(), clientCertificateDefinition.GetMemberGroups().cend()); - certAuthParams.emplace_back(std::move(dn), clientCertificateDefinition.GetRequireSameIssuer(), std::move(groups)); + certAuthParams.emplace_back(std::move(dn), std::move(subjectDns), clientCertificateDefinition.GetRequireSameIssuer(), std::move(groups)); } } @@ -130,8 +140,8 @@ int FillNameFromProps(X509_NAME* name, const TProps& props) { return 1; } - if (!props.Coutry.empty()) { - X509_NAME_add_entry_by_txt(name, SN_countryName, MBSTRING_ASC, (const unsigned char*)props.Coutry.c_str(), -1, -1, 0); + if (!props.Country.empty()) { + X509_NAME_add_entry_by_txt(name, SN_countryName, MBSTRING_ASC, (const unsigned char*)props.Country.c_str(), -1, -1, 0); } if (!props.State.empty()) { @@ -377,7 +387,7 @@ X509REQPtr GenerateRequest(PKeyPtr& pkey, const TProps& props) { return std::move(request); } -X509Ptr SingRequest(X509REQPtr& request, X509Ptr& rootCert, PKeyPtr& rootKey, const TProps& props) { +X509Ptr SignRequest(X509REQPtr& request, X509Ptr& rootCert, PKeyPtr& rootKey, const TProps& props) { auto* pktmp = X509_REQ_get0_pubkey(request.get()); // X509_REQ_get0_pubkey returns the key, that shouldn't freed CHECK(pktmp, "Error unpacking public key from request."); @@ -455,7 +465,7 @@ TCertAndKey GenerateSignedCert(const TCertAndKey& rootCA, const TProps& props) { auto rootCert = ReadCertAsPEM(rootCA.Certificate); auto rootKey = ReadPrivateKeyAsPEM(rootCA.PrivateKey); - auto cert = SingRequest(request, rootCert, rootKey, props); // NID_authority_key_identifier must see ca + auto cert = SignRequest(request, rootCert, rootKey, props); // NID_authority_key_identifier must see ca TCertAndKey result; result.Certificate = WriteAsPEM(cert); @@ -475,7 +485,7 @@ TProps TProps::AsCA() { TProps props; props.SecondsValid = 3*365 * 24 * 60 *60; // 3 years - props.Coutry = "RU"; + props.Country = "RU"; props.State = "MSK"; props.Location = "MSK"; props.Organization = "YA"; diff --git a/ydb/core/security/certificate_check/cert_auth_utils.h b/ydb/core/security/certificate_check/cert_auth_utils.h index 9bffa64093db..f76a94665029 100644 --- a/ydb/core/security/certificate_check/cert_auth_utils.h +++ b/ydb/core/security/certificate_check/cert_auth_utils.h @@ -22,7 +22,7 @@ struct TCertAndKey { struct TProps { long SecondsValid = 0; - std::string Coutry; // C + std::string Country; // C std::string State; // ST std::string Location; // L std::string Organization; // O diff --git a/ydb/core/security/certificate_check/cert_check.cpp b/ydb/core/security/certificate_check/cert_check.cpp index 18808760c350..85b6c928c27e 100644 --- a/ydb/core/security/certificate_check/cert_check.cpp +++ b/ydb/core/security/certificate_check/cert_check.cpp @@ -71,6 +71,7 @@ TCertificateChecker::TReadClientSubjectResult TCertificateChecker::ReadSubjectFr result.Error = { .Message = "Cannot extract subject from client certificate", .Retryable = false }; return result; } + result.SubjectDns = X509CertificateReader::ReadSubjectDns(pemCertificates.ClientCertX509, result.SubjectDn); return result; } @@ -84,14 +85,14 @@ TString TCertificateChecker::CreateUserSidFromSubjectDn(const std::vector>& subjectDn, const TCertificateAuthorizationParams& authParams) const { +TEvTicketParser::TError TCertificateChecker::CheckClientSubject(const TReadClientSubjectResult& subjectInfo, const TCertificateAuthorizationParams& authParams) const { std::unordered_map> subjectDescription; - for (const auto& [attribute, value] : subjectDn) { + for (const auto& [attribute, value] : subjectInfo.SubjectDn) { auto& attributeValues = subjectDescription[attribute]; attributeValues.push_back(value); } - if (!authParams.CheckSubject(subjectDescription)) { + if (!authParams.CheckSubject(subjectDescription, subjectInfo.SubjectDns)) { return { .Message = "Client certificate failed verification", .Retryable = false }; } return {}; @@ -128,7 +129,7 @@ TCertificateChecker::TCertificateCheckResult TCertificateChecker::CheckClientCer continue; } - auto checkClientSubjectError = CheckClientSubject(readClientSubjectResult.SubjectDn, authParams); + auto checkClientSubjectError = CheckClientSubject(readClientSubjectResult, authParams); if (!checkClientSubjectError.empty()) { continue; } diff --git a/ydb/core/security/certificate_check/cert_check.h b/ydb/core/security/certificate_check/cert_check.h index 7525cc0fb600..84cb978e32fc 100644 --- a/ydb/core/security/certificate_check/cert_check.h +++ b/ydb/core/security/certificate_check/cert_check.h @@ -27,6 +27,7 @@ class TCertificateChecker { struct TReadClientSubjectResult { std::vector> SubjectDn; + std::vector SubjectDns; // Subject alternative names, DNS TEvTicketParser::TError Error; }; @@ -47,7 +48,7 @@ class TCertificateChecker { TEvTicketParser::TError CheckIssuers(const TPemCertificates& pemCertificates) const; TReadClientSubjectResult ReadSubjectFromClientCertificate(const TPemCertificates& pemCertificates) const; TString CreateUserSidFromSubjectDn(const std::vector>& subjectDn) const; - TEvTicketParser::TError CheckClientSubject(const std::vector>& subjectDn, const TCertificateAuthorizationParams& authParams) const; + TEvTicketParser::TError CheckClientSubject(const TReadClientSubjectResult& subjectInfo, const TCertificateAuthorizationParams& authParams) const; TCertificateCheckResult DefaultCheckClientCertificate(const TPemCertificates& pemCertificates) const; TCertificateCheckResult CheckClientCertificate(const TPemCertificates& pemCertificates) const; TString GetDefaultGroup() const; diff --git a/ydb/core/security/certificate_check/cert_check_ut.cpp b/ydb/core/security/certificate_check/cert_check_ut.cpp new file mode 100644 index 000000000000..d9d1d48c0912 --- /dev/null +++ b/ydb/core/security/certificate_check/cert_check_ut.cpp @@ -0,0 +1,282 @@ +#include "cert_check.h" +#include "cert_auth_utils.h" + +#include + +#include +#include + +namespace NKikimr { + +TTempFile SaveToTempFile(const std::string& content, const char* prefix = "cert") { + TTempFile file = MakeTempName(nullptr, prefix); + TUnbufferedFileOutput(file.Name()).Write(content); + return file; +} + +Y_UNIT_TEST_SUITE(TCertificateCheckerTest) { + Y_UNIT_TEST(CheckSubjectDns) { + using TTestSubjectTerm = std::pair>; + struct TTestSubjectDnsData { + TString CommonName = "localhost"; + std::vector AltNames; + std::vector DnsValues; + std::vector DnsSuffixes; + std::optional SubjectTerm; // one is enough, because we test DNS now + bool CheckResult = false; + }; + + std::vector tests = { + { + .AltNames = { + "IP:1.2.3.4", + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsSuffixes = { + ".cluster.net", + }, + .CheckResult = true, + }, + { + .AltNames = { + "IP:1.2.3.4", // not DNS + }, + .DnsValues = { + "1.2.3.4", + }, + .CheckResult = false, + }, + { + .CommonName = "the.only.name.net", // CN is also FQDN + .DnsValues = { + "the.only.name.net", + }, + .CheckResult = true, + }, + { + .CommonName = "the.only.name.net", // CN is also FQDN + .DnsSuffixes = { + ".name.net", + ".some.other.domain.net", + }, + .CheckResult = true, + }, + { + .CommonName = "", // no DNS in cert + .DnsSuffixes = { + ".cluster.net", + }, + .CheckResult = false, + }, + { + .CommonName = "", // no DNS in cert + .DnsValues = { + "node-1.cluster.net", + }, + .CheckResult = false, + }, + { + // Complex matching + .AltNames = { + "IP:1.2.3.4", + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + "DNS:my-host.us", + }, + .DnsValues = { + "hello.su", + "balancer.cluster.net", + }, + .DnsSuffixes = { + ".123.us", + ".cluster-0.net", + ".cluster-1.net", + }, + .CheckResult = true, + }, + { + // Complex matching + .AltNames = { + "IP:1.2.3.4", + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + "DNS:my-host.us", + }, + .DnsValues = { + "hello.su", + "no-name", + }, + .DnsSuffixes = { + ".123.us", + ".cluster-0.net", + ".cluster-1.net", + "my-host.us", + }, + .CheckResult = true, + }, + { + // Additional conditions + // No DNS + // Subject OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "L", + {"TLV", "MSK"}, + }, + .CheckResult = true, + }, + { + // Additional conditions + // No DNS + // Subject not OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "O", + {"Google", "Meta"}, + }, + .CheckResult = false, + }, + { + // Additional conditions + // DNS OK + // Subject OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsValues = { + "node-1.cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "L", + {"TLV", "MSK"}, + }, + .CheckResult = true, + }, + { + // Additional conditions + // DNS not OK + // Subject OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsSuffixes = { + ".my-cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "L", + {"TLV", "MSK"}, + }, + .CheckResult = false, + }, + { + // Additional conditions + // DNS not OK + // Subject not OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + }, + .DnsSuffixes = { + ".my-cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "O", + {"Google", "Meta"}, + }, + .CheckResult = false, + }, + { + // Additional conditions + // DNS OK + // Subject not OK + .AltNames = { + "DNS:other.name.net", + "DNS:node-1.cluster.net", + "DNS:*.cluster.net", + "DNS:balancer.cluster.net", + "DNS:balancer.my-cluster.net", + }, + .DnsSuffixes = { + ".my-cluster.net", + }, + .SubjectTerm = TTestSubjectTerm{ + "O", + {"Google", "Meta"}, + }, + .CheckResult = false, + }, + }; + + TCertAndKey ca = GenerateCA(TProps::AsCA()); + + for (size_t testNumber = 0; testNumber < tests.size(); ++testNumber) { + const TTestSubjectDnsData& test = tests[testNumber]; + TProps props = TProps::AsClientServer(); + props.CommonName = test.CommonName; + props.AltNames = test.AltNames; + TCertAndKey clientServer = GenerateSignedCert(ca, props); + VerifyCert(clientServer.Certificate, ca.Certificate); + + TCertificateAuthValues opts; + opts.Domain = "cert"; + TTempFile serverCert = SaveToTempFile(clientServer.Certificate); + opts.ServerCertificateFilePath = serverCert.Name(); + auto* defs = opts.ClientCertificateAuthorization.AddClientCertificateDefinitions(); + defs->AddMemberGroups("ClusterNodeGroup@cert"); + + if (!test.DnsValues.empty() || !test.DnsSuffixes.empty()) { + auto* dnsCondition = defs->MutableSubjectDns(); + for (const TString& v : test.DnsValues) { + dnsCondition->AddValues(v); + } + for (const TString& s : test.DnsSuffixes) { + dnsCondition->AddSuffixes(s); + } + } + if (test.SubjectTerm) { + auto* t = defs->AddSubjectTerms(); + t->SetShortName(test.SubjectTerm->first); + for (const TString& v : test.SubjectTerm->second) { + t->AddValues(v); + } + } + TCertificateChecker checker(opts); + + TCertificateChecker::TCertificateCheckResult result = checker.Check(TString(clientServer.Certificate)); + if (test.CheckResult) { + UNIT_ASSERT_C(result.Error.empty(), "Test number: " << testNumber << ". Error: " << result.Error); + UNIT_ASSERT_VALUES_EQUAL_C(result.Groups.size(), 1, "Test number: " << testNumber); + UNIT_ASSERT_VALUES_EQUAL_C(result.Groups[0], "ClusterNodeGroup@cert", "Test number: " << testNumber); + } else { + UNIT_ASSERT_C(!result.Error.empty(), "Test number: " << testNumber); + } + } + } +} + +} // namespace NKikimr diff --git a/ydb/core/security/certificate_check/cert_utils_ut.cpp b/ydb/core/security/certificate_check/cert_utils_ut.cpp index 583182da7360..9dfce0ebb59a 100644 --- a/ydb/core/security/certificate_check/cert_utils_ut.cpp +++ b/ydb/core/security/certificate_check/cert_utils_ut.cpp @@ -29,7 +29,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { .AddRDN(TCertificateAuthorizationParams::TRDN("O").AddValue("YA")) .AddRDN(TCertificateAuthorizationParams::TRDN("OU").AddValue("UtTest").AddValue("OtherUnit")) .AddRDN(TCertificateAuthorizationParams::TRDN("CN").AddValue("localhost").AddSuffix(".yandex.ru")); - TCertificateAuthorizationParams authParams(std::move(dn)); + TCertificateAuthorizationParams authParams(std::move(dn), std::nullopt); { std::unordered_map> subjectTerms; @@ -40,7 +40,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("localhost"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -53,7 +53,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("OtherUnit"); subjectTerms["CN"].push_back("localhost"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -66,7 +66,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("WrongUnit"); subjectTerms["CN"].push_back("localhost"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); } { @@ -78,7 +78,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.yandex.ru"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -91,7 +91,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["CN"].push_back("test.yandex.ru"); subjectTerms["ELSE"].push_back("WhatEver"); - UNIT_ASSERT(authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(authParams.CheckSubject(subjectTerms, {})); } { @@ -103,7 +103,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.yandex.ru"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); } { @@ -115,7 +115,7 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.not-yandex.ru"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); } { @@ -127,7 +127,19 @@ Y_UNIT_TEST_SUITE(TCertificateAuthUtilsTest) { subjectTerms["OU"].push_back("UtTest"); subjectTerms["CN"].push_back("test.yandex.ru"); - UNIT_ASSERT(!authParams.CheckSubject(subjectTerms)); + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {})); + } + + { + std::unordered_map> subjectTerms; + //subjectTerms["C"] = "RU"; + subjectTerms["ST"].push_back("MSK"); + subjectTerms["L"].push_back("MSK"); + subjectTerms["O"].push_back("YA"); + subjectTerms["OU"].push_back("UtTest"); + subjectTerms["CN"].push_back("test.yandex.ru"); + + UNIT_ASSERT(!authParams.CheckSubject(subjectTerms, {"test.yandex.ru"})); } } } diff --git a/ydb/core/security/certificate_check/ut/ya.make b/ydb/core/security/certificate_check/ut/ya.make index 228fa7162126..5e0c281d3093 100644 --- a/ydb/core/security/certificate_check/ut/ya.make +++ b/ydb/core/security/certificate_check/ut/ya.make @@ -13,6 +13,7 @@ PEERDIR( YQL_LAST_ABI_VERSION() SRCS( + cert_check_ut.cpp cert_utils_ut.cpp ) diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp index a3ec33e9adf5..857015d85c8e 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include "ldap_auth_provider.h" #include "ldap_utils.h" @@ -69,6 +71,7 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped TLdapAuthProvider(const NKikimrProto::TLdapAuthentication& settings) : Settings(settings) , FilterCreator(Settings) + , UrisCreator(Settings, Settings.GetPort() != 0 ? Settings.GetPort() : NKikimrLdap::GetPort(Settings.GetScheme())) { const TString& requestedGroupAttribute = Settings.GetRequestedGroupAttribute(); RequestedAttributes[0] = const_cast(requestedGroupAttribute.empty() ? "memberOf" : requestedGroupAttribute.c_str()); @@ -135,18 +138,33 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped } LDAPMessage* entry = NKikimrLdap::FirstEntry(ld, searchUserResponse.SearchMessage); BerElement* ber = nullptr; - std::vector groupsDn; + std::vector directUserGroups; char* attribute = NKikimrLdap::FirstAttribute(ld, entry, &ber); if (attribute != nullptr) { - groupsDn = NKikimrLdap::GetAllValuesOfAttribute(ld, entry, attribute); + directUserGroups = NKikimrLdap::GetAllValuesOfAttribute(ld, entry, attribute); NKikimrLdap::MemFree(attribute); } if (ber) { NKikimrLdap::BerFree(ber, 0); } + std::vector allUserGroups; + auto& extendedSettings = Settings.GetExtendedSettings(); + if (extendedSettings.GetEnableNestedGroupsSearch() && !directUserGroups.empty()) { + // Active Directory has special matching rule to fetch nested groups in one request it is MatchingRuleInChain + // We don`t know what is ldap server. Is it Active Directory or OpenLdap or other server? + // If using MatchingRuleInChain return empty list of groups it means that ldap server isn`t Active Directory + // but it is known that there are groups and we are trying to do tree traversal + allUserGroups = TryToGetGroupsUseMatchingRuleInChain(ld, entry); + if (allUserGroups.empty()) { + allUserGroups = std::move(directUserGroups); + GetNestedGroups(ld, &allUserGroups); + } + } else { + allUserGroups = std::move(directUserGroups); + } NKikimrLdap::MsgFree(entry); NKikimrLdap::Unbind(ld); - Send(ev->Sender, new TEvLdapAuthProvider::TEvEnrichGroupsResponse(request->Key, request->User, groupsDn)); + Send(ev->Sender, new TEvLdapAuthProvider::TEvEnrichGroupsResponse(request->Key, request->User, allUserGroups)); } TInitAndBindResponse InitAndBind(LDAP** ld, std::function(const TEvLdapAuthProvider::EStatus&, const TEvLdapAuthProvider::TError&)> eventFabric) { @@ -173,7 +191,7 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped result = NKikimrLdap::Bind(*ld, Settings.GetBindDn(), Settings.GetBindPassword()); if (!NKikimrLdap::IsSuccess(result)) { TEvLdapAuthProvider::TError error { - .Message = "Could not perform initial LDAP bind for dn " + Settings.GetBindDn() + " on server " + UrisList + "\n" + .Message = "Could not perform initial LDAP bind for dn " + Settings.GetBindDn() + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::ErrorToString(result), .Retryable = NKikimrLdap::IsRetryableError(result) }; @@ -202,12 +220,10 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped } } - const ui32 port = Settings.GetPort() != 0 ? Settings.GetPort() : NKikimrLdap::GetPort(Settings.GetScheme()); - UrisList = GetUris(port); - result = NKikimrLdap::Init(ld, Settings.GetScheme(), UrisList, port); + result = NKikimrLdap::Init(ld, Settings.GetScheme(), UrisCreator.GetUris(), UrisCreator.GetConfiguredPort()); if (!NKikimrLdap::IsSuccess(result)) { return {{TEvLdapAuthProvider::EStatus::UNAVAILABLE, - {.Message = "Could not initialize LDAP connection for uris: " + UrisList + ". " + NKikimrLdap::LdapError(*ld), + {.Message = "Could not initialize LDAP connection for uris: " + UrisCreator.GetUris() + ". " + NKikimrLdap::LdapError(*ld), .Retryable = false}}}; } @@ -237,14 +253,20 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped char* dn = NKikimrLdap::GetDn(*request.Ld, request.Entry); if (dn == nullptr) { return {{TEvLdapAuthProvider::EStatus::UNAUTHORIZED, - {.Message = "Could not get dn for the first entry matching " + FilterCreator.GetFilter(request.Login) + " on server " + UrisList + "\n" + {.Message = "Could not get dn for the first entry matching " + FilterCreator.GetFilter(request.Login) + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::LdapError(*request.Ld), .Retryable = false}}}; } + if (request.Password.Empty()) { + NKikimrLdap::MemFree(dn); + return {{TEvLdapAuthProvider::EStatus::UNAUTHORIZED, + {.Message = "LDAP login failed. Empty password", + .Retryable = false}}}; + } TEvLdapAuthProvider::TError error; int result = NKikimrLdap::Bind(*request.Ld, dn, request.Password); if (!NKikimrLdap::IsSuccess(result)) { - error.Message = "LDAP login failed for user " + TString(dn) + " on server " + UrisList + "\n" + error.Message = "LDAP login failed for user " + TString(dn) + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::ErrorToString((result)); error.Retryable = NKikimrLdap::IsRetryableError(result); } @@ -266,7 +288,7 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped TSearchUserResponse response; if (!NKikimrLdap::IsSuccess(result)) { response.Status = NKikimrLdap::ErrorToStatus(result); - response.Error = {.Message = "Could not search for filter " + searchFilter + " on server " + UrisList + "\n" + response.Error = {.Message = "Could not search for filter " + searchFilter + " on server " + UrisCreator.GetUris() + "\n" + NKikimrLdap::ErrorToString(result), .Retryable = NKikimrLdap::IsRetryableError(result)}; return response; @@ -275,11 +297,11 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped if (countEntries != 1) { if (countEntries == 0) { response.Error = {.Message = "LDAP user " + request.User + " does not exist. " - "LDAP search for filter " + searchFilter + " on server " + UrisList + " return no entries", + "LDAP search for filter " + searchFilter + " on server " + UrisCreator.GetUris() + " return no entries", .Retryable = false}; } else { response.Error = {.Message = "LDAP user " + request.User + " is not unique. " - "LDAP search for filter " + searchFilter + " on server " + UrisList + " return " + countEntries + " entries", + "LDAP search for filter " + searchFilter + " on server " + UrisCreator.GetUris() + " return " + countEntries + " entries", .Retryable = false}; } response.Status = TEvLdapAuthProvider::EStatus::UNAUTHORIZED; @@ -290,6 +312,85 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped return response; } + std::vector TryToGetGroupsUseMatchingRuleInChain(LDAP* ld, LDAPMessage* entry) const { + static const TString matchingRuleInChain = "1.2.840.113556.1.4.1941"; // Only Active Directory supports + TStringBuilder filter; + char* dn = NKikimrLdap::GetDn(ld, entry); + filter << "(member:" << matchingRuleInChain << ":=" << dn << ')'; + NKikimrLdap::MemFree(dn); + dn = nullptr; + LDAPMessage* searchMessage = nullptr; + int result = NKikimrLdap::Search(ld, Settings.GetBaseDn(), NKikimrLdap::EScope::SUBTREE, filter, NKikimrLdap::noAttributes, 0, &searchMessage); + if (!NKikimrLdap::IsSuccess(result)) { + return {}; + } + const int countEntries = NKikimrLdap::CountEntries(ld, searchMessage); + if (countEntries == 0) { + NKikimrLdap::MsgFree(searchMessage); + return {}; + } + std::vector groups; + groups.reserve(countEntries); + for (LDAPMessage* groupEntry = NKikimrLdap::FirstEntry(ld, searchMessage); groupEntry != nullptr; groupEntry = NKikimrLdap::NextEntry(ld, groupEntry)) { + dn = NKikimrLdap::GetDn(ld, groupEntry); + groups.push_back(dn); + NKikimrLdap::MemFree(dn); + dn = nullptr; + } + NKikimrLdap::MsgFree(searchMessage); + return groups; + } + + void GetNestedGroups(LDAP* ld, std::vector* groups) { + std::unordered_set viewedGroups(groups->cbegin(), groups->cend()); + std::queue queue; + for (const auto& group : *groups) { + queue.push(group); + } + while (!queue.empty()) { + TStringBuilder filter; + filter << "(|"; + filter << "(entryDn=" << queue.front() << ')'; + queue.pop(); + //should filter string is separated into several batches + while (!queue.empty()) { + // entryDn specific for OpenLdap, may get this value from config + filter << "(entryDn=" << queue.front() << ')'; + queue.pop(); + } + filter << ')'; + LDAPMessage* searchMessage = nullptr; + int result = NKikimrLdap::Search(ld, Settings.GetBaseDn(), NKikimrLdap::EScope::SUBTREE, filter, RequestedAttributes, 0, &searchMessage); + if (!NKikimrLdap::IsSuccess(result)) { + return; + } + if (NKikimrLdap::CountEntries(ld, searchMessage) == 0) { + NKikimrLdap::MsgFree(searchMessage); + return; + } + for (LDAPMessage* groupEntry = NKikimrLdap::FirstEntry(ld, searchMessage); groupEntry != nullptr; groupEntry = NKikimrLdap::NextEntry(ld, groupEntry)) { + BerElement* ber = nullptr; + std::vector foundGroups; + char* attribute = NKikimrLdap::FirstAttribute(ld, groupEntry, &ber); + if (attribute != nullptr) { + foundGroups = NKikimrLdap::GetAllValuesOfAttribute(ld, groupEntry, attribute); + NKikimrLdap::MemFree(attribute); + } + if (ber) { + NKikimrLdap::BerFree(ber, 0); + } + for (const auto& newGroup : foundGroups) { + if (!viewedGroups.contains(newGroup)) { + viewedGroups.insert(newGroup); + queue.push(newGroup); + groups->push_back(newGroup); + } + } + } + NKikimrLdap::MsgFree(searchMessage); + } + } + TInitializeLdapConnectionResponse CheckRequiredSettingsParameters() const { if (Settings.GetHosts().empty() && Settings.GetHost().empty()) { return {TEvLdapAuthProvider::EStatus::UNAVAILABLE, {.Message = "List of ldap server hosts is empty", .Retryable = false}}; @@ -306,42 +407,11 @@ class TLdapAuthProvider : public NActors::TActorBootstrapped return {TEvLdapAuthProvider::EStatus::SUCCESS, {}}; } - TString GetUris(ui32 port) const { - TStringBuilder uris; - if (Settings.HostsSize() > 0) { - for (const auto& host : Settings.GetHosts()) { - uris << CreateUri(host, port) << " "; - } - uris.remove(uris.size() - 1); - } else { - uris << CreateUri(Settings.GetHost(), port); - } - return uris; - } - - TString CreateUri(const TString& endpoint, ui32 port) const { - TStringBuilder uri; - uri << Settings.GetScheme() << "://" << endpoint; - if (!HasEndpointPort(endpoint)) { - uri << ':' << port; - } - return uri; - } - - static bool HasEndpointPort(const TString& endpoint) { - size_t colonPos = endpoint.rfind(':'); - if (colonPos == TString::npos) { - return false; - } - ++colonPos; - return (endpoint.size() - colonPos) > 0; - } - private: const NKikimrProto::TLdapAuthentication Settings; const TSearchFilterCreator FilterCreator; + const TLdapUrisCreator UrisCreator; char* RequestedAttributes[2]; - TString UrisList; }; IActor* CreateLdapAuthProvider(const NKikimrProto::TLdapAuthentication& settings) { diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp index f7826f019a44..4125182f3d74 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_linux.cpp @@ -77,6 +77,10 @@ LDAPMessage* FirstEntry(LDAP* ld, LDAPMessage* chain) { return ldap_first_entry(ld, chain); } +LDAPMessage* NextEntry(LDAP* ld, LDAPMessage* entry) { + return ldap_next_entry(ld, entry); +} + char* FirstAttribute(LDAP* ld, LDAPMessage* entry, BerElement** berout) { return ldap_first_attribute(ld, entry, berout); } diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp index 5c5ac98a27ae..6ceddc0c5749 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_ut.cpp @@ -47,6 +47,8 @@ void InitLdapSettings(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldap ldapSettings->SetBindDn("cn=robouser,dc=search,dc=yandex,dc=net"); ldapSettings->SetBindPassword("robouserPassword"); ldapSettings->SetSearchFilter("uid=$username"); + auto extendedSettings = ldapSettings->MutableExtendedSettings(); + extendedSettings->SetEnableNestedGroupsSearch(true); const auto setCertificate = [&ldapSettings] (bool useStartTls, TTempFileHandle& certificateFile) { auto useTls = ldapSettings->MutableUseTls(); @@ -64,6 +66,12 @@ void InitLdapSettings(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldap } } +void InitLdapSettingsDisableSearchNestedGroups(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldapPort, TTempFileHandle& certificateFile, const ESecurityConnectionType& securityConnectionType) { + InitLdapSettings(ldapSettings, ldapPort, certificateFile, securityConnectionType); + auto extendedSettings = ldapSettings->MutableExtendedSettings(); + extendedSettings->SetEnableNestedGroupsSearch(false); +} + void InitLdapSettingsWithInvalidRobotUserLogin(NKikimrProto::TLdapAuthentication* ldapSettings, ui16 ldapPort, TTempFileHandle& certificateFile, const ESecurityConnectionType& securityConnectionType) { InitLdapSettings(ldapSettings, ldapPort, certificateFile, securityConnectionType); ldapSettings->SetBindDn("cn=invalidRobouser,dc=search,dc=yandex,dc=net"); @@ -184,23 +192,424 @@ TAutoPtr LdapAuthenticate(TLdapKikimrServer& server, const TString return handle; } +// Scheme of groups +// *-> cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | +// |*-> cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | | +// | |*-> cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | | +// | |*-> uid=ldapuser,dc=search,dc=yandex,dc=net +// | +// |*-> cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | +// |*-> cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net +// | +// |*-> uid=ldapuser,dc=search,dc=yandex,dc=net + class TCorrectLdapResponse { public: - static std::vector Groups; - static LdapMock::TLdapMockResponses GetResponses(const TString& login, const TString& groupAttribute = "memberOf"); + static std::vector DirectGroups; + static std::vector UpdatedDirectGroups; + static std::vector ManagerGroups; + static std::vector DevelopersGroups; + static std::vector PeopleGroups; + static LdapMock::TLdapMockResponses GetResponses(const TString& login, bool doReturnDirectedGroups = false, const TString& groupAttribute = "memberOf"); + static LdapMock::TLdapMockResponses GetAdResponses(const TString& login, bool doReturnDirectedGroups = false, const TString& groupAttribute = "memberOf"); + static LdapMock::TLdapMockResponses GetUpdatedResponses(const TString& login, bool doReturnDirectedGroups = false, const TString& groupAttribute = "memberOf"); + static THashSet GetAllGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(DirectGroups); + AddGroups(ManagerGroups); + AddGroups(DevelopersGroups); + AddGroups(PeopleGroups); + return result; + } + + static THashSet GetDirectedGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(DirectGroups); + return result; + } + + static THashSet GetAllUpdatedGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(UpdatedDirectGroups); + AddGroups(DevelopersGroups); + AddGroups(PeopleGroups); + return result; + } + + static THashSet GetUpdatedDirectedGroups(const TString& domain) { + THashSet result; + auto AddGroups = [&result, &domain] (const std::vector& groups) { + std::transform(groups.begin(), groups.end(), std::inserter(result, result.end()), [&domain](const TString& group) { + return TString(group).append(domain); + }); + }; + AddGroups(UpdatedDirectGroups); + return result; + } +}; + +std::vector TCorrectLdapResponse::DirectGroups { + "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net" +}; + +std::vector TCorrectLdapResponse::UpdatedDirectGroups { + "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net" +}; + +std::vector TCorrectLdapResponse::ManagerGroups { + "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", +}; + +std::vector TCorrectLdapResponse::DevelopersGroups { + "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", }; -std::vector TCorrectLdapResponse::Groups { - "ou=groups,dc=search,dc=yandex,dc=net", +std::vector TCorrectLdapResponse::PeopleGroups { "cn=people,ou=groups,dc=search,dc=yandex,dc=net", - "cn=developers,ou=groups,dc=search,dc=yandex,dc=net" }; -LdapMock::TLdapMockResponses TCorrectLdapResponse::GetResponses(const TString& login, const TString& groupAttribute) { +LdapMock::TLdapMockResponses TCorrectLdapResponse::GetResponses(const TString& login, bool doReturnDirectedGroups, const TString& groupAttribute) { + LdapMock::TLdapMockResponses responses; + responses.BindResponses.push_back({{{.Login = "cn=robouser,dc=search,dc=yandex,dc=net", .Password = "robouserPassword"}}, {.Status = LdapMock::EStatus::SUCCESS}}); + + LdapMock::TSearchRequestInfo requestDirectedUserGroups { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY, .Attribute = "uid", .Value = login}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseDirectedUserGroupsEntries { + { + .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::DirectGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseDirectedUserGroups { + .ResponseEntries = responseDirectedUserGroupsEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestDirectedUserGroups, responseDirectedUserGroups}); + + if (doReturnDirectedGroups) { + return responses; + } + + std::shared_ptr filterToGetGroupOfManagers = std::make_shared(); + filterToGetGroupOfManagers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupOfManagers->Attribute = "entryDn"; + filterToGetGroupOfManagers->Value = "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::shared_ptr filterToGetGroupOfDevelopers = std::make_shared(); + filterToGetGroupOfDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupOfDevelopers->Attribute = "entryDn"; + filterToGetGroupOfDevelopers->Value = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupsOfManagersAndDevelopers = { + filterToGetGroupOfManagers, + filterToGetGroupOfDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupsOfManagersAndDevelopers { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupsOfManagersAndDevelopers}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseEntriesWithGroupsOfManagersAndDevelopers { + { + .Dn = "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::ManagerGroups} + } + }, + { + .Dn = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::DevelopersGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupsOfManagersAndDevelopers { + .ResponseEntries = responseEntriesWithGroupsOfManagersAndDevelopers, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupsOfManagersAndDevelopers, responseWithGroupsOfManagersAndDevelopers}); + + std::shared_ptr filterToGetGroupPeopleFromManagers = std::make_shared(); + filterToGetGroupPeopleFromManagers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupPeopleFromManagers->Attribute = "entryDn"; + filterToGetGroupPeopleFromManagers->Value = "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::shared_ptr filterToGetGroupPeopleFromDevelopers = std::make_shared(); + filterToGetGroupPeopleFromDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupPeopleFromDevelopers->Attribute = "entryDn"; + filterToGetGroupPeopleFromDevelopers->Value = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupOfPeople = { + filterToGetGroupPeopleFromManagers, filterToGetGroupPeopleFromDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseWithGroupOfPeopleEntries { + { + .Dn = "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::PeopleGroups} + } + }, + { + .Dn = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::PeopleGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupOfPeople { + .ResponseEntries = responseWithGroupOfPeopleEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupOfPeople, responseWithGroupOfPeople}); + + std::shared_ptr filterToGetParentGroupOfPeople = std::make_shared(); + filterToGetParentGroupOfPeople->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetParentGroupOfPeople->Attribute = "entryDn"; + filterToGetParentGroupOfPeople->Value = "cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetParentGroupOfPeople = { + filterToGetParentGroupOfPeople + }; + LdapMock::TSearchRequestInfo requestToGetParentGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetParentGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + LdapMock::TSearchResponseInfo responseWithParentGroupOfPeople { + .ResponseEntries = {}, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetParentGroupOfPeople, responseWithParentGroupOfPeople}); + + LdapMock::TSearchRequestInfo requestToGetAllNestedGroupsFromAd { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EXT, + .Attribute = "member", + .Value = "uid=ldapuser,dc=search,dc=yandex,dc=net", + .MatchingRule = "1.2.840.113556.1.4.1941", + .DnAttributes = false, + .NestedFilters = {}}, + .Attributes = {"1.1"} + } + }; + + LdapMock::TSearchResponseInfo responseWithAllNestedGroupsFromAd { + .ResponseEntries = {}, // LDAP server is not Active Directory. Return empty entries + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetAllNestedGroupsFromAd, responseWithAllNestedGroupsFromAd}); + + return responses; +} + +LdapMock::TLdapMockResponses TCorrectLdapResponse::GetUpdatedResponses(const TString& login, bool doReturnDirectedGroups, const TString& groupAttribute) { + LdapMock::TLdapMockResponses responses; + responses.BindResponses.push_back({{{.Login = "cn=robouser,dc=search,dc=yandex,dc=net", .Password = "robouserPassword"}}, {.Status = LdapMock::EStatus::SUCCESS}}); + + LdapMock::TSearchRequestInfo requestDirectedUserGroups { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY, .Attribute = "uid", .Value = login}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseDirectedUserGroupsEntries { + { + .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, UpdatedDirectGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseDirectedUserGroups { + .ResponseEntries = responseDirectedUserGroupsEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestDirectedUserGroups, responseDirectedUserGroups}); + + if (doReturnDirectedGroups) { + return responses; + } + + std::shared_ptr filterToGetGroupOfDevelopers = std::make_shared(); + filterToGetGroupOfDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupOfDevelopers->Attribute = "entryDn"; + filterToGetGroupOfDevelopers->Value = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupsOfDevelopers = { + filterToGetGroupOfDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupsOfDevelopers { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupsOfDevelopers}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseEntriesWithGroupsOfDevelopers { + { + .Dn = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::DevelopersGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupsOfDevelopers { + .ResponseEntries = responseEntriesWithGroupsOfDevelopers, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupsOfDevelopers, responseWithGroupsOfDevelopers}); + + std::shared_ptr filterToGetGroupPeopleFromDevelopers = std::make_shared(); + filterToGetGroupPeopleFromDevelopers->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetGroupPeopleFromDevelopers->Attribute = "entryDn"; + filterToGetGroupPeopleFromDevelopers->Value = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetGroupOfPeople = { + filterToGetGroupPeopleFromDevelopers + }; + LdapMock::TSearchRequestInfo requestToGetGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + std::vector responseWithGroupOfPeopleEntries { + { + .Dn = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = { + {groupAttribute, TCorrectLdapResponse::PeopleGroups} + } + }, + }; + + LdapMock::TSearchResponseInfo responseWithGroupOfPeople { + .ResponseEntries = responseWithGroupOfPeopleEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetGroupOfPeople, responseWithGroupOfPeople}); + + std::shared_ptr filterToGetParentGroupOfPeople = std::make_shared(); + filterToGetParentGroupOfPeople->Type = LdapMock::EFilterType::LDAP_FILTER_EQUALITY; + filterToGetParentGroupOfPeople->Attribute = "entryDn"; + filterToGetParentGroupOfPeople->Value = "cn=people,ou=groups,dc=search,dc=yandex,dc=net"; + + std::vector> nestedFiltersToGetParentGroupOfPeople = { + filterToGetParentGroupOfPeople + }; + LdapMock::TSearchRequestInfo requestToGetParentGroupOfPeople { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_OR, .NestedFilters = nestedFiltersToGetParentGroupOfPeople}, + .Attributes = {groupAttribute} + } + }; + + LdapMock::TSearchResponseInfo responseWithParentGroupOfPeople { + .ResponseEntries = {}, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetParentGroupOfPeople, responseWithParentGroupOfPeople}); + + LdapMock::TSearchRequestInfo requestToGetAllNestedGroupsFromAd { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EXT, + .Attribute = "member", + .Value = "uid=ldapuser,dc=search,dc=yandex,dc=net", + .MatchingRule = "1.2.840.113556.1.4.1941", + .DnAttributes = false, + .NestedFilters = {}}, + .Attributes = {"1.1"} + } + }; + + LdapMock::TSearchResponseInfo responseWithAllNestedGroupsFromAd { + .ResponseEntries = {}, // LDAP server is not Active Directory. Return empty entries + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestToGetAllNestedGroupsFromAd, responseWithAllNestedGroupsFromAd}); + + return responses; +} + +LdapMock::TLdapMockResponses TCorrectLdapResponse::GetAdResponses(const TString& login, bool doReturnDirectedGroups, const TString& groupAttribute) { LdapMock::TLdapMockResponses responses; responses.BindResponses.push_back({{{.Login = "cn=robouser,dc=search,dc=yandex,dc=net", .Password = "robouserPassword"}}, {.Status = LdapMock::EStatus::SUCCESS}}); - LdapMock::TSearchRequestInfo fetchGroupsSearchRequestInfo { + LdapMock::TSearchRequestInfo requestDirectedUserGroups { { .BaseDn = "dc=search,dc=yandex,dc=net", .Scope = 2, @@ -210,20 +619,69 @@ LdapMock::TLdapMockResponses TCorrectLdapResponse::GetResponses(const TString& l } }; - std::vector fetchGroupsSearchResponseEntries { + std::vector responseDirectedUserGroupsEntries { { .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", .AttributeList = { - {groupAttribute, TCorrectLdapResponse::Groups} + {groupAttribute, TCorrectLdapResponse::DirectGroups} } + }, + }; + + LdapMock::TSearchResponseInfo responseDirectedUserGroups { + .ResponseEntries = responseDirectedUserGroupsEntries, + .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} + }; + responses.SearchResponses.push_back({requestDirectedUserGroups, responseDirectedUserGroups}); + + if (doReturnDirectedGroups) { + return responses; + } + + LdapMock::TSearchRequestInfo requestToGetAllNestedGroupsFromAd { + { + .BaseDn = "dc=search,dc=yandex,dc=net", + .Scope = 2, + .DerefAliases = 0, + .Filter = {.Type = LdapMock::EFilterType::LDAP_FILTER_EXT, + .Attribute = "member", + .Value = "uid=ldapuser,dc=search,dc=yandex,dc=net", + .MatchingRule = "1.2.840.113556.1.4.1941", + .DnAttributes = false, + .NestedFilters = {}}, + .Attributes = {"1.1"} } }; - LdapMock::TSearchResponseInfo fetchGroupsSearchResponseInfo { - .ResponseEntries = fetchGroupsSearchResponseEntries, + std::vector responseWithAllNestedGroupsFromAdEntries { + { + .Dn = "cn=managerOfProject1,cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=project1,cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=managers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=developers,cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + { + .Dn = "cn=people,ou=groups,dc=search,dc=yandex,dc=net", + .AttributeList = {} + }, + }; + + LdapMock::TSearchResponseInfo responseWithAllNestedGroupsFromAd { + .ResponseEntries = responseWithAllNestedGroupsFromAdEntries, .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} }; - responses.SearchResponses.push_back({fetchGroupsSearchRequestInfo, fetchGroupsSearchResponseInfo}); + responses.SearchResponses.push_back({requestToGetAllNestedGroupsFromAd, responseWithAllNestedGroupsFromAd}); + return responses; } @@ -248,10 +706,7 @@ void CheckRequiredLdapSettings(std::functionToken->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -281,24 +733,12 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood_nonSecure) { - LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood_StartTls) { - LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood_LdapsScheme) { - LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); - } - - void LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(const ESecurityConnectionType& secureType) { + void LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; - TLdapKikimrServer server(InitLdapSettingsWithListOfHosts, secureType); - LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login), secureType == ESecurityConnectionType::LDAPS_SCHEME); + TLdapKikimrServer server(InitLdapSettingsDisableSearchNestedGroups, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login, true), secureType == ESecurityConnectionType::LDAPS_SCHEME); TAutoPtr handle = LdapAuthenticate(server, login, password); TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); @@ -309,10 +749,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetDirectedGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -323,24 +760,12 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts_nonSecure) { - LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts_StartTls) { - LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts_LdapsScheme) { - LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::LDAPS_SCHEME); - } - - void LdapFetchGroupsWithCustomGroupAttributeGood(const ESecurityConnectionType& secureType) { + void LdapFetchGroupsFromAdLdapServer(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; - TLdapKikimrServer server(InitLdapSettingsWithCustomGroupAttribute, secureType); - LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login, "groupDN"), secureType == ESecurityConnectionType::LDAPS_SCHEME); + TLdapKikimrServer server(InitLdapSettings, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetAdResponses(login), secureType == ESecurityConnectionType::LDAPS_SCHEME); TAutoPtr handle = LdapAuthenticate(server, login, password); TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); @@ -351,10 +776,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -365,17 +787,86 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood_nonSecure) { - LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood_StartTls) { - LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::START_TLS); - } + void LdapFetchGroupsDisableRequestToAD(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; - Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood_LdapsScheme) { - LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); - } + TLdapKikimrServer server(InitLdapSettingsDisableSearchNestedGroups, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetAdResponses(login, true), secureType == ESecurityConnectionType::LDAPS_SCHEME); + + TAutoPtr handle = LdapAuthenticate(server, login, password); + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + const TString ldapDomain = "@ldap"; + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); + + THashSet expectedGroups = TCorrectLdapResponse::GetDirectedGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); + + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } + + ldapServer.Stop(); + } + + void LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; + + TLdapKikimrServer server(InitLdapSettingsWithListOfHosts, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login), secureType == ESecurityConnectionType::LDAPS_SCHEME); + + TAutoPtr handle = LdapAuthenticate(server, login, password); + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + const TString ldapDomain = "@ldap"; + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); + + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); + + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } + + ldapServer.Stop(); + } + + void LdapFetchGroupsWithCustomGroupAttributeGood(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; + + TLdapKikimrServer server(InitLdapSettingsWithCustomGroupAttribute, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), TCorrectLdapResponse::GetResponses(login, false, "groupDN"), secureType == ESecurityConnectionType::LDAPS_SCHEME); + + TAutoPtr handle = LdapAuthenticate(server, login, password); + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = handle->Get(); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + const TString ldapDomain = "@ldap"; + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); + + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); + + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } + + ldapServer.Stop(); + } void LdapFetchGroupsWithDontExistGroupAttribute(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; @@ -424,18 +915,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute_nonSecure) { - LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute_StartTls) { - LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute_LdapsScheme) { - LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsWithInvalidRobotUserLoginBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -459,18 +938,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad_nonSecure) { - LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad_StartTls) { - LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad_LdapsScheme) { - LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsWithInvalidRobotUserPasswordBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -494,18 +961,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad_nonSecure) { - LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad_StartTls) { - LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad_LdapsScheme) { - LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsWithRemovedUserCredentialsBad(const ESecurityConnectionType& secureType) { TString removedUserLogin = "ldapuser"; TString removedUserPassword = "ldapUserPassword"; @@ -544,18 +999,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad_nonSecure) { - LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad_StartTls) { - LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad_LdapsScheme) { - LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapFetchGroupsUseInvalidSearchFilterBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -577,76 +1020,70 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad_nonSecure) { - LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::NON_SECURE); - } + void LdapRefreshGroupsInfoGood(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; - Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad_StartTls) { - LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::START_TLS); - } + auto responses = TCorrectLdapResponse::GetResponses(login); + LdapMock::TLdapMockResponses updatedResponses = TCorrectLdapResponse::GetUpdatedResponses(login); + const TString ldapDomain = "@ldap"; - Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad_LdapsScheme) { - LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::LDAPS_SCHEME); - } + TLdapKikimrServer server(InitLdapSettings, secureType); + LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), {responses, updatedResponses}, secureType == ESecurityConnectionType::LDAPS_SCHEME); - Y_UNIT_TEST(LdapServerIsUnavailable) { - CheckRequiredLdapSettings(InitLdapSettingsWithUnavailableHost, "Could not start TLS\nCan't contact LDAP server", ESecurityConnectionType::START_TLS); - } + auto loginResponse = GetLoginResponse(server, login, password); + TTestActorRuntime* runtime = server.GetRuntime(); + TActorId sender = runtime->AllocateEdgeActor(); + runtime->Send(new IEventHandle(MakeTicketParserID(), sender, new TEvTicketParser::TEvAuthorizeTicket(loginResponse.Token)), 0); + TAutoPtr handle; + TEvTicketParser::TEvAuthorizeTicketResult* ticketParserResult = runtime->GrabEdgeEvent(handle); - Y_UNIT_TEST(LdapRequestWithEmptyHost) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyHost, "List of ldap server hosts is empty"); - } + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + ldapDomain); + const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - Y_UNIT_TEST(LdapRequestWithEmptyBaseDn) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBaseDn, "Parameter BaseDn is empty"); - } + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); + expectedGroups.insert("all-users@well-known"); - Y_UNIT_TEST(LdapRequestWithEmptyBindDn) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindDn, "Parameter BindDn is empty"); - } + UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); + for (const auto& expectedGroup : expectedGroups) { + UNIT_ASSERT_C(groups.contains(expectedGroup), "Can not find " + expectedGroup); + } - Y_UNIT_TEST(LdapRequestWithEmptyBindPassword) { - CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindPassword, "Parameter BindPassword is empty"); - } + ldapServer.UpdateResponses(); + Sleep(TDuration::Seconds(10)); - void LdapRefreshGroupsInfoGood(const ESecurityConnectionType& secureType) { - TString login = "ldapuser"; - TString password = "ldapUserPassword"; + runtime->Send(new IEventHandle(MakeTicketParserID(), sender, new TEvTicketParser::TEvAuthorizeTicket(loginResponse.Token)), 0); + ticketParserResult = runtime->GrabEdgeEvent(handle); + UNIT_ASSERT_C(ticketParserResult->Error.empty(), ticketParserResult->Error); + UNIT_ASSERT(ticketParserResult->Token != nullptr); + UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + "@ldap"); + const auto& newFetchedGroups = ticketParserResult->Token->GetGroupSIDs(); + THashSet newGroups(newFetchedGroups.begin(), newFetchedGroups.end()); - auto responses = TCorrectLdapResponse::GetResponses(login); - LdapMock::TLdapMockResponses updatedResponses = responses; + THashSet newExpectedGroups = TCorrectLdapResponse::GetAllUpdatedGroups(ldapDomain); + newExpectedGroups.insert("all-users@well-known"); - std::vector newLdapGroups { - "ou=groups,dc=search,dc=yandex,dc=net", - "cn=people,ou=groups,dc=search,dc=yandex,dc=net", - "cn=designers,ou=groups,dc=search,dc=yandex,dc=net" - }; - std::vector newFetchGroupsSearchResponseEntries { - { - .Dn = "uid=" + login + ",dc=search,dc=yandex,dc=net", - .AttributeList = { - {"memberOf", newLdapGroups} - } - } - }; + UNIT_ASSERT_VALUES_EQUAL(newFetchedGroups.size(), newExpectedGroups.size()); + for (const auto& expectedGroup : newExpectedGroups) { + UNIT_ASSERT_C(newGroups.contains(expectedGroup), "Can not find " + expectedGroup); + } - const TString ldapDomain = "@ldap"; - THashSet newExpectedGroups; - std::transform(newLdapGroups.begin(), newLdapGroups.end(), std::inserter(newExpectedGroups, newExpectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); - newExpectedGroups.insert("all-users@well-known"); + ldapServer.Stop(); + } - LdapMock::TSearchResponseInfo newFetchGroupsSearchResponseInfo { - .ResponseEntries = newFetchGroupsSearchResponseEntries, - .ResponseDone = {.Status = LdapMock::EStatus::SUCCESS} - }; + void LdapRefreshGroupsInfoDisableNestedGroupsGood(const ESecurityConnectionType& secureType) { + TString login = "ldapuser"; + TString password = "ldapUserPassword"; - auto& searchResponse = updatedResponses.SearchResponses.front(); - searchResponse.second = newFetchGroupsSearchResponseInfo; + auto responses = TCorrectLdapResponse::GetResponses(login, true); + LdapMock::TLdapMockResponses updatedResponses = TCorrectLdapResponse::GetUpdatedResponses(login, true); + const TString ldapDomain = "@ldap"; - TLdapKikimrServer server(InitLdapSettings, secureType); + TLdapKikimrServer server(InitLdapSettingsDisableSearchNestedGroups, secureType); LdapMock::TLdapSimpleServer ldapServer(server.GetLdapPort(), {responses, updatedResponses}, secureType == ESecurityConnectionType::LDAPS_SCHEME); auto loginResponse = GetLoginResponse(server, login, password); @@ -662,10 +1099,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetDirectedGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -684,6 +1118,10 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& UNIT_ASSERT_VALUES_EQUAL(ticketParserResult->Token->GetUserSID(), login + "@ldap"); const auto& newFetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet newGroups(newFetchedGroups.begin(), newFetchedGroups.end()); + + THashSet newExpectedGroups = TCorrectLdapResponse::GetUpdatedDirectedGroups(ldapDomain); + newExpectedGroups.insert("all-users@well-known"); + UNIT_ASSERT_VALUES_EQUAL(newFetchedGroups.size(), newExpectedGroups.size()); for (const auto& expectedGroup : newExpectedGroups) { UNIT_ASSERT_C(newGroups.contains(expectedGroup), "Can not find " + expectedGroup); @@ -692,18 +1130,6 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapRefreshGroupsInfoGood_nonSecure) { - LdapRefreshGroupsInfoGood(ESecurityConnectionType::NON_SECURE); - } - - Y_UNIT_TEST(LdapRefreshGroupsInfoGood_StartTls) { - LdapRefreshGroupsInfoGood(ESecurityConnectionType::START_TLS); - } - - Y_UNIT_TEST(LdapRefreshGroupsInfoGood_LdapsScheme) { - LdapRefreshGroupsInfoGood(ESecurityConnectionType::LDAPS_SCHEME); - } - void LdapRefreshRemoveUserBad(const ESecurityConnectionType& secureType) { TString login = "ldapuser"; TString password = "ldapUserPassword"; @@ -734,10 +1160,7 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& const auto& fetchedGroups = ticketParserResult->Token->GetGroupSIDs(); THashSet groups(fetchedGroups.begin(), fetchedGroups.end()); - THashSet expectedGroups; - std::transform(TCorrectLdapResponse::Groups.begin(), TCorrectLdapResponse::Groups.end(), std::inserter(expectedGroups, expectedGroups.end()), [&ldapDomain](TString& group) { - return group.append(ldapDomain); - }); + THashSet expectedGroups = TCorrectLdapResponse::GetAllGroups(ldapDomain); expectedGroups.insert("all-users@well-known"); UNIT_ASSERT_VALUES_EQUAL(fetchedGroups.size(), expectedGroups.size()); @@ -763,18 +1186,200 @@ void LdapFetchGroupsWithDefaultGroupAttributeGood(const ESecurityConnectionType& ldapServer.Stop(); } - Y_UNIT_TEST(LdapRefreshRemoveUserBad_nonSecure) { - LdapRefreshRemoveUserBad(ESecurityConnectionType::NON_SECURE); +Y_UNIT_TEST_SUITE(LdapAuthProviderTest) { + Y_UNIT_TEST(LdapServerIsUnavailable) { + CheckRequiredLdapSettings(InitLdapSettingsWithUnavailableHost, "Could not start TLS\nCan't contact LDAP server", ESecurityConnectionType::START_TLS); } - Y_UNIT_TEST(LdapRefreshRemoveUserBad_StartTls) { - LdapRefreshRemoveUserBad(ESecurityConnectionType::START_TLS); + Y_UNIT_TEST(LdapRequestWithEmptyHost) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyHost, "List of ldap server hosts is empty"); + } + + Y_UNIT_TEST(LdapRequestWithEmptyBaseDn) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBaseDn, "Parameter BaseDn is empty"); + } + + Y_UNIT_TEST(LdapRequestWithEmptyBindDn) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindDn, "Parameter BindDn is empty"); + } + + Y_UNIT_TEST(LdapRequestWithEmptyBindPassword) { + CheckRequiredLdapSettings(InitLdapSettingsWithEmptyBindPassword, "Parameter BindPassword is empty"); + } +} + +Y_UNIT_TEST_SUITE(LdapAuthProviderTest_LdapsScheme) { + Y_UNIT_TEST(LdapFetchGroupsFromAdLdapServer) { + LdapFetchGroupsFromAdLdapServer(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsDisableRequestToAD) { + LdapFetchGroupsDisableRequestToAD(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood) { + LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood) { + LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts) { + LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood) { + LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute) { + LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad) { + LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad) { + LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad) { + LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad) { + LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::LDAPS_SCHEME); } - Y_UNIT_TEST(LdapRefreshRemoveUserBad_LdapsScheme) { + Y_UNIT_TEST(LdapRefreshGroupsInfoGood) { + LdapRefreshGroupsInfoGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoDisableNestedGroupsGood) { + LdapRefreshGroupsInfoDisableNestedGroupsGood(ESecurityConnectionType::LDAPS_SCHEME); + } + + Y_UNIT_TEST(LdapRefreshRemoveUserBad) { LdapRefreshRemoveUserBad(ESecurityConnectionType::LDAPS_SCHEME); } +} + +Y_UNIT_TEST_SUITE(LdapAuthProviderTest_StartTls) { + Y_UNIT_TEST(LdapFetchGroupsFromAdLdapServer) { + LdapFetchGroupsFromAdLdapServer(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsDisableRequestToAD) { + LdapFetchGroupsDisableRequestToAD(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood) { + LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood) { + LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts) { + LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood) { + LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute) { + LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad) { + LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad) { + LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad) { + LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad) { + LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoGood) { + LdapRefreshGroupsInfoGood(ESecurityConnectionType::START_TLS); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoDisableNestedGroupsGood) { + LdapRefreshGroupsInfoDisableNestedGroupsGood(ESecurityConnectionType::START_TLS); + } + Y_UNIT_TEST(LdapRefreshRemoveUserBad) { + LdapRefreshRemoveUserBad(ESecurityConnectionType::START_TLS); + } +} + +Y_UNIT_TEST_SUITE(LdapAuthProviderTest_nonSecure) { + Y_UNIT_TEST(LdapFetchGroupsFromAdLdapServer) { + LdapFetchGroupsFromAdLdapServer(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsDisableRequestToAD) { + LdapFetchGroupsDisableRequestToAD(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGood) { + LdapFetchGroupsWithDefaultGroupAttributeGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood) { + LdapFetchGroupsWithDefaultGroupAttributeDisableNestedGroupsGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts) { + LdapFetchGroupsWithDefaultGroupAttributeGoodUseListOfHosts(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithCustomGroupAttributeGood) { + LdapFetchGroupsWithCustomGroupAttributeGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithDontExistGroupAttribute) { + LdapFetchGroupsWithDontExistGroupAttribute(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserLoginBad) { + LdapFetchGroupsWithInvalidRobotUserLoginBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithInvalidRobotUserPasswordBad) { + LdapFetchGroupsWithInvalidRobotUserPasswordBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsWithRemovedUserCredentialsBad) { + LdapFetchGroupsWithRemovedUserCredentialsBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapFetchGroupsUseInvalidSearchFilterBad) { + LdapFetchGroupsUseInvalidSearchFilterBad(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoGood) { + LdapRefreshGroupsInfoGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapRefreshGroupsInfoDisableNestedGroupsGood) { + LdapRefreshGroupsInfoDisableNestedGroupsGood(ESecurityConnectionType::NON_SECURE); + } + + Y_UNIT_TEST(LdapRefreshRemoveUserBad) { + LdapRefreshRemoveUserBad(ESecurityConnectionType::NON_SECURE); + } } } // NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp index 399a1f2916cd..d1fc38a449ce 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_auth_provider_win.cpp @@ -1,7 +1,7 @@ #include #include #include -#include "ticket_parser_log.h" +#include #include "ldap_auth_provider.h" #include @@ -66,6 +66,10 @@ LDAPMessage* FirstEntry(LDAP* ld, LDAPMessage* chain) { return ldap_first_entry(ld, chain); } +LDAPMessage* NextEntry(LDAP* ld, LDAPMessage* entry) { + return ldap_next_entry(ld, entry); +} + char* FirstAttribute(LDAP* ld, LDAPMessage* entry, BerElement** berout) { return ldap_first_attribute(ld, entry, berout); } diff --git a/ydb/core/security/ldap_auth_provider/ldap_compat.h b/ydb/core/security/ldap_auth_provider/ldap_compat.h index 15aee21e9c7f..48e04441f082 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_compat.h +++ b/ydb/core/security/ldap_auth_provider/ldap_compat.h @@ -37,6 +37,7 @@ int Search(LDAP* ld, TString LdapError(LDAP* ld); TString ErrorToString(int err); LDAPMessage* FirstEntry(LDAP* ld, LDAPMessage* chain); +LDAPMessage* NextEntry(LDAP* ld, LDAPMessage* entry); char* FirstAttribute(LDAP* ld, LDAPMessage* entry, BerElement** berout); void MemFree(char* p); void BerFree(BerElement* ber, int freebuf); diff --git a/ydb/core/security/ldap_auth_provider/ldap_utils.cpp b/ydb/core/security/ldap_auth_provider/ldap_utils.cpp index 3087cbaf137a..355c77aba9f7 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_utils.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_utils.cpp @@ -1,11 +1,13 @@ #include +#include +#include #include "ldap_utils.h" namespace NKikimr { TSearchFilterCreator::TSearchFilterCreator(const NKikimrProto::TLdapAuthentication& settings) : Settings(settings) - {} +{} TString TSearchFilterCreator::GetFilter(const TString& userName) const { if (!Settings.GetSearchFilter().empty()) { @@ -34,4 +36,70 @@ TString TSearchFilterCreator::GetFormatSearchFilter(const TString& userName) con return result.Str(); } +TLdapUrisCreator::TLdapUrisCreator(const NKikimrProto::TLdapAuthentication& settings, ui32 configuredPort) + : Settings(settings) + , Scheme(Settings.GetScheme() == "ldaps" ? Settings.GetScheme() : "ldap") + , ConfiguredPort(configuredPort) +{} + +TString TLdapUrisCreator::GetUris() const { + if (Uris.empty()) { + Uris = CreateUrisList(); + } + return Uris; +} + +ui32 TLdapUrisCreator::GetConfiguredPort() const { + return ConfiguredPort; +} + +TString TLdapUrisCreator::CreateUrisList() const { + TStringBuilder uris; + if (Settings.HostsSize() > 0) { + for (const auto& host : Settings.GetHosts()) { + uris << CreateUri(host) << " "; + } + uris.remove(uris.size() - 1); + } else { + uris << CreateUri(Settings.GetHost()); + } + return uris; +} + +TString TLdapUrisCreator::CreateUri(const TString& address) const { + TString hostname; + ui32 port = 0; + size_t first_colon_pos = address.find(':'); + if (first_colon_pos != TString::npos) { + size_t last_colon_pos = address.rfind(':'); + if (last_colon_pos == first_colon_pos) { + // only one colon, simple case + try { + port = FromString(address.substr(first_colon_pos + 1)); + } catch (TFromStringException& ex) { + port = 0; + } + hostname = address.substr(0, first_colon_pos); + } else { + // ipv6? + size_t closing_bracket_pos = address.rfind(']'); + if (closing_bracket_pos == TString::npos || closing_bracket_pos > last_colon_pos) { + // whole address is ipv6 host + hostname = address; + } else { + try { + port = FromString(address.substr(last_colon_pos + 1)); + } catch (TFromStringException& ex) { + port = 0; + } + hostname = address.substr(0, last_colon_pos); + } + } + } else { + hostname = address; + } + port = (port != 0) ? port : ConfiguredPort; + return TStringBuilder() << Scheme << "://" << hostname << ':' << port; +} + } // namespace NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ldap_utils.h b/ydb/core/security/ldap_auth_provider/ldap_utils.h index 62fd188d825e..5cfb3f957251 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_utils.h +++ b/ydb/core/security/ldap_auth_provider/ldap_utils.h @@ -16,4 +16,22 @@ class TSearchFilterCreator { const NKikimrProto::TLdapAuthentication& Settings; }; +class TLdapUrisCreator { +public: + TLdapUrisCreator(const NKikimrProto::TLdapAuthentication& settings, ui32 configuredPort); + + TString GetUris() const; + ui32 GetConfiguredPort() const; + +private: + TString CreateUrisList() const; + TString CreateUri(const TString& address) const; + +private: + const NKikimrProto::TLdapAuthentication& Settings; + const TString Scheme; + const ui32 ConfiguredPort; + mutable TString Uris; +}; + } // namespace NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp b/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp index 26a0cb5a5caf..c615e18ab164 100644 --- a/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp +++ b/ydb/core/security/ldap_auth_provider/ldap_utils_ut.cpp @@ -3,7 +3,7 @@ namespace NKikimr { -Y_UNIT_TEST_SUITE(TLdapUtilsTest) { +Y_UNIT_TEST_SUITE(TLdapUtilsSearchFilterCreatorTest) { Y_UNIT_TEST(GetDefaultFilter) { NKikimrProto::TLdapAuthentication settings; TSearchFilterCreator filterCreator(settings); @@ -62,4 +62,58 @@ Y_UNIT_TEST_SUITE(TLdapUtilsTest) { } } +Y_UNIT_TEST_SUITE(TLdapUtilsUrisCreatorTest) { + Y_UNIT_TEST(CreateUrisFromHostnames) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "test.hostname-001"; + *settings.AddHosts() = "test.hostname-002:1234"; + *settings.AddHosts() = "test.hostname-003:"; + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://test.hostname-001:389 ldap://test.hostname-002:1234 ldap://test.hostname-003:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromIpV4List) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "192.168.0.1"; + *settings.AddHosts() = "192.168.0.2:1234"; + *settings.AddHosts() = "192.168.0.3:"; + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://192.168.0.1:389 ldap://192.168.0.2:1234 ldap://192.168.0.3:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromIpV6List) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "[2a02:6b8:bf00::]"; + *settings.AddHosts() = "[2a02:6b8:bf01::]:1234"; + *settings.AddHosts() = "[2a02:6b8:bf02::]:"; + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://[2a02:6b8:bf00::]:389 ldap://[2a02:6b8:bf01::]:1234 ldap://[2a02:6b8:bf02::]:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromHostnamesLdapsScheme) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "test.hostname-001"; + *settings.AddHosts() = "test.hostname-002:1234"; + *settings.AddHosts() = "test.hostname-003:"; + settings.SetScheme("ldaps"); + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldaps://test.hostname-001:389 ldaps://test.hostname-002:1234 ldaps://test.hostname-003:389", urisCreator.GetUris()); + } + + Y_UNIT_TEST(CreateUrisFromHostnamesUnknownScheme) { + NKikimrProto::TLdapAuthentication settings; + *settings.AddHosts() = "test.hostname-001"; + *settings.AddHosts() = "test.hostname-002:1234"; + *settings.AddHosts() = "test.hostname-003:"; + settings.SetScheme("http"); + + TLdapUrisCreator urisCreator(settings, 389); + UNIT_ASSERT_VALUES_EQUAL("ldap://test.hostname-001:389 ldap://test.hostname-002:1234 ldap://test.hostname-003:389", urisCreator.GetUris()); + } +} + } // namespace NKikimr diff --git a/ydb/core/security/ldap_auth_provider/ya.make b/ydb/core/security/ldap_auth_provider/ya.make index abdc3f210725..a0c9671eda1e 100644 --- a/ydb/core/security/ldap_auth_provider/ya.make +++ b/ydb/core/security/ldap_auth_provider/ya.make @@ -25,6 +25,7 @@ ENDIF() PEERDIR( ydb/core/base ydb/core/protos + ydb/core/util ) END() diff --git a/ydb/core/security/login_page.cpp b/ydb/core/security/login_page.cpp index cb6db091f421..88d0c1765190 100644 --- a/ydb/core/security/login_page.cpp +++ b/ydb/core/security/login_page.cpp @@ -6,6 +6,8 @@ #include #include +#include +#include #include #include #include @@ -21,6 +23,24 @@ using namespace NKikimr; using namespace NSchemeShard; using namespace NMonitoring; +void AuditLogWebUILogout(const NHttp::THttpIncomingRequest& request, const TString& userSID) { + static const TString WebLoginComponentName = "web-login"; + static const TString LogoutOperationName = "LOGOUT"; + static const TString EmptyValue = "{none}"; + + auto remoteAddress = NKikimr::NAddressClassifier::ExtractAddress(request.Address->ToString()); + + // NOTE: audit field set here must be in sync with ydb/core/tx/schemeshard/schemeshard_audit_log.h, AuditLogLogin() + AUDIT_LOG( + AUDIT_PART("component", WebLoginComponentName) + AUDIT_PART("remote_address", (!remoteAddress.empty() ? remoteAddress : EmptyValue)) + AUDIT_PART("subject", (!userSID.empty() ? userSID : EmptyValue)) + //NOTE: no database specified as web logout considered cluster-wide + AUDIT_PART("operation", LogoutOperationName) + AUDIT_PART("status", TString("SUCCESS")) + ); +} + using THttpResponsePtr = THolder; class TLoginRequest : public NActors::TActorBootstrapped { @@ -96,17 +116,7 @@ class TLoginRequest : public NActors::TActorBootstrapped { ALOG_DEBUG(NActorsServices::HTTP, "Login: Requesting LDAP provider for user " << AuthCredentials.Login); Send(MakeLdapAuthProviderID(), new TEvLdapAuthProvider::TEvAuthenticateRequest(AuthCredentials.Login, AuthCredentials.Password)); } else { - auto *domain = AppData()->DomainsInfo->GetDomain(); - TString rootDatabase = "/" + domain->Name; - ui64 rootSchemeShardTabletId = domain->SchemeRoot; - if (!Database.empty() && Database != rootDatabase) { - Database = rootDatabase; - ALOG_DEBUG(NActorsServices::HTTP, "Login: Requesting schemecache for database " << Database); - Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(CreateNavigateKeySetRequest(Database).Release())); - } else { - Database = rootDatabase; - RequestSchemeShard(rootSchemeShardTabletId); - } + RequestLoginProvider(); } Become(&TThis::StateWork, Timeout, new TEvents::TEvWakeup()); } @@ -123,6 +133,7 @@ class TLoginRequest : public NActors::TActorBootstrapped { PipeClient = RegisterWithSameMailbox(pipe); THolder request = MakeHolder(); request.Get()->Record = CreateLoginRequest(AuthCredentials, AppData()->AuthConfig); + request.Get()->Record.SetPeerName(Request->Address->ToString()); NTabletPipe::SendData(SelfId(), PipeClient, request.Release()); } @@ -146,10 +157,23 @@ class TLoginRequest : public NActors::TActorBootstrapped { void Handle(TEvLdapAuthProvider::TEvAuthenticateResponse::TPtr& ev) { TEvLdapAuthProvider::TEvAuthenticateResponse* response = ev->Get(); if (response->Status == TEvLdapAuthProvider::EStatus::SUCCESS) { + RequestLoginProvider(); + } else { + ReplyErrorAndPassAway("403", "Forbidden", response->Error.Message); + } + } + + void RequestLoginProvider() { + auto *domain = AppData()->DomainsInfo->GetDomain(); + TString rootDatabase = "/" + domain->Name; + ui64 rootSchemeShardTabletId = domain->SchemeRoot; + if (!Database.empty() && Database != rootDatabase) { + Database = rootDatabase; ALOG_DEBUG(NActorsServices::HTTP, "Login: Requesting schemecache for database " << Database); Send(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(CreateNavigateKeySetRequest(Database).Release())); } else { - ReplyErrorAndPassAway("403", "Forbidden", response->Error.Message); + Database = rootDatabase; + RequestSchemeShard(rootSchemeShardTabletId); } } @@ -247,6 +271,8 @@ class TLogoutRequest : public NActors::TActorBootstrapped { STATEFN(StateWork) { switch (ev->GetTypeRewrite()) { hFunc(TEvents::TEvPoisonPill, HandlePoisonPill); + hFunc(TEvTicketParser::TEvAuthorizeTicketResult, Handle); + cFunc(TEvents::TSystem::Wakeup, HandleTimeout); } } @@ -261,13 +287,42 @@ class TLogoutRequest : public NActors::TActorBootstrapped { return ReplyErrorAndPassAway("400", "Bad Request", "Invalid method"); } - ReplyDeleteCookieAndPassAway(); + NHttp::TCookies cookies(NHttp::THeaders(Request->Headers)["Cookie"]); + TStringBuf ydbSessionId = cookies["ydb_session_id"]; + if (ydbSessionId.empty()) { + return ReplyErrorAndPassAway("401", "Unauthorized", "No ydb_session_id cookie"); + } + + Send(NKikimr::MakeTicketParserID(), new NKikimr::TEvTicketParser::TEvAuthorizeTicket({ + .Database = TString(), + .Ticket = TString("Login ") + ydbSessionId, + .PeerName = Request->Address->ToString(), + })); + + Become(&TThis::StateWork, Timeout, new TEvents::TEvWakeup()); + } + + void Handle(TEvTicketParser::TEvAuthorizeTicketResult::TPtr& ev) { + const TEvTicketParser::TEvAuthorizeTicketResult& result = *ev->Get(); + if (result.Error) { + return ReplyErrorAndPassAway("403", "Forbidden", result.Error.Message); + } + if (result.Token == nullptr) { + return ReplyErrorAndPassAway("403", "Forbidden", "Empty token"); + } + + ReplyDeleteCookieAndPassAway(result.Token->GetUserSID()); } void HandlePoisonPill(TEvents::TEvPoisonPill::TPtr&) { PassAway(); } + void HandleTimeout() { + ALOG_ERROR(NActorsServices::HTTP, Request->Address << " " << Request->Method << " " << Request->URL << " timeout"); + ReplyErrorAndPassAway("504", "Gateway Timeout", "Timeout"); + } + void ReplyOptionsAndPassAway() { NHttp::THeadersBuilder headers; headers.Set("Allow", "OPTIONS, POST"); @@ -287,12 +342,15 @@ class TLogoutRequest : public NActors::TActorBootstrapped { headers.Set("Access-Control-Allow-Methods", "OPTIONS, GET, POST"); } - void ReplyDeleteCookieAndPassAway() { + void ReplyDeleteCookieAndPassAway(const TString& userSID) { ALOG_DEBUG(NActorsServices::HTTP, "Logout success"); NHttp::THeadersBuilder headers; SetCORS(headers); headers.Set("Set-Cookie", "ydb_session_id=; Max-Age=0"); Send(Sender, new NHttp::TEvHttpProxy::TEvHttpOutgoingResponse(Request->CreateResponse("200", "OK", headers))); + + AuditLogWebUILogout(*Request, userSID); + PassAway(); } @@ -310,6 +368,7 @@ class TLogoutRequest : public NActors::TActorBootstrapped { protected: TActorId Sender; NHttp::THttpIncomingRequestPtr Request; + TDuration Timeout = TDuration::Seconds(5); }; class TLoginService : public TActor { diff --git a/ydb/core/security/login_shared_func.cpp b/ydb/core/security/login_shared_func.cpp index 95ea9ee15914..f013534bf821 100644 --- a/ydb/core/security/login_shared_func.cpp +++ b/ydb/core/security/login_shared_func.cpp @@ -19,9 +19,9 @@ THolder CreateNavigateKeySetRequest(const TS TAuthCredentials PrepareCredentials(const TString& login, const TString& password, const NKikimrProto::TAuthConfig& config) { if (config.HasLdapAuthentication() && !config.GetLdapAuthenticationDomain().empty()) { - size_t n = login.find("@" + config.GetLdapAuthenticationDomain()); - if (n != TString::npos) { - return {.AuthType = TAuthCredentials::EAuthType::Ldap, .Login = login.substr(0, n), .Password = password}; + const TString domain = "@" + config.GetLdapAuthenticationDomain(); + if (login.EndsWith(domain)) { + return {.AuthType = TAuthCredentials::EAuthType::Ldap, .Login = login.substr(0, login.size() - domain.size()), .Password = password}; } } return {.AuthType = TAuthCredentials::EAuthType::Internal, .Login = login, .Password = password}; @@ -38,6 +38,9 @@ NKikimrScheme::TEvLogin CreateLoginRequest(const TAuthCredentials& credentials, } default: {} } + if (config.HasLoginTokenExpireTime()) { + record.SetExpiresAfterMs(TDuration::Parse(config.GetLoginTokenExpireTime()).MilliSeconds()); + } return record; } diff --git a/ydb/core/security/ticket_parser_impl.h b/ydb/core/security/ticket_parser_impl.h index b6c842470fbd..8fa41b6086f9 100644 --- a/ydb/core/security/ticket_parser_impl.h +++ b/ydb/core/security/ticket_parser_impl.h @@ -1721,6 +1721,8 @@ class TTicketParserImpl : public TActorBootstrapped { record.RefreshRetryableErrorImmediately = false; GetDerived()->CanRefreshTicket(key, record); Respond(record); + CounterTicketsErrors->Inc(); + return; } } else { record.UnsetToken(); diff --git a/ydb/core/security/ya.make b/ydb/core/security/ya.make index b4dd549e0dac..d9361c2de4d7 100644 --- a/ydb/core/security/ya.make +++ b/ydb/core/security/ya.make @@ -16,6 +16,7 @@ PEERDIR( ydb/library/grpc/actor_client library/cpp/monlib/service/pages library/cpp/openssl/io + ydb/core/audit ydb/core/base ydb/core/protos ydb/library/aclib diff --git a/ydb/core/statistics/stat_service.cpp b/ydb/core/statistics/stat_service.cpp index 548a80c89028..6bb451822593 100644 --- a/ydb/core/statistics/stat_service.cpp +++ b/ydb/core/statistics/stat_service.cpp @@ -298,7 +298,7 @@ class TStatService : public TActorBootstrapped { request.StatType = ev->Get()->StatType; request.StatRequests.swap(ev->Get()->StatRequests); - if (!EnableStatistics) { + if (!EnableStatistics || IsStatisticsDisabledInSA) { ReplyFailed(requestId, true); return; } @@ -463,6 +463,8 @@ class TStatService : public TActorBootstrapped { Send(ev->Sender, new TEvStatistics::TEvPropagateStatisticsResponse); + IsStatisticsDisabledInSA = false; + auto* record = ev->Get()->MutableRecord(); for (const auto& entry : record->GetEntries()) { ui64 schemeShardId = entry.GetSchemeShardId(); @@ -547,6 +549,7 @@ class TStatService : public TActorBootstrapped { << ", status = " << ev->Get()->Status); if (ev->Get()->Status != NKikimrProto::OK) { + IsStatisticsDisabledInSA = false; SAPipeClientId = TActorId(); ConnectToSA(); SyncNode(); @@ -560,12 +563,14 @@ class TStatService : public TActorBootstrapped { << ", client id = " << ev->Get()->ClientId << ", server id = " << ev->Get()->ServerId); + IsStatisticsDisabledInSA = false; SAPipeClientId = TActorId(); ConnectToSA(); SyncNode(); } void Handle(TEvStatistics::TEvStatisticsIsDisabled::TPtr&) { + IsStatisticsDisabledInSA = true; ReplyAllFailed(); } @@ -839,6 +844,7 @@ class TStatService : public TActorBootstrapped { private: bool EnableStatistics = false; bool EnableColumnStatistics = false; + bool IsStatisticsDisabledInSA = false; static constexpr size_t StatFanOut = 10; diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index c11266041cf1..81542a645969 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -415,7 +415,7 @@ struct Schema : NIceDb::Schema { struct BlobId : Column<10, NScheme::NTypeIds::Utf8> {}; struct BlobRangeOffset : Column<11, NScheme::NTypeIds::Uint64> {}; struct BlobRangeSize : Column<12, NScheme::NTypeIds::Uint64> {}; - struct Activity : Column<13, NScheme::NTypeIds::Bool> {}; + struct Activity : Column<13, NScheme::NTypeIds::Uint8> {}; struct TierName: Column<14, NScheme::NTypeIds::Utf8> {}; struct EntityType: Column<15, NScheme::NTypeIds::Utf8> {}; @@ -525,9 +525,10 @@ struct Schema : NIceDb::Schema { struct ColumnBlobBytes: Column<7, NScheme::NTypeIds::Uint64> {}; struct IndexBlobBytes: Column<8, NScheme::NTypeIds::Uint64> {}; struct PortionId: Column<9, NScheme::NTypeIds::Uint64> {}; - struct Activity: Column<10, NScheme::NTypeIds::Bool> {}; + struct Activity: Column<10, NScheme::NTypeIds::Uint8> {}; struct TierName: Column<11, NScheme::NTypeIds::Utf8> {}; struct Stats: Column<12, NScheme::NTypeIds::Utf8> {}; + struct Optimized: Column<13, NScheme::NTypeIds::Uint8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -542,7 +543,8 @@ struct Schema : NIceDb::Schema { PortionId, Activity, TierName, - Stats + Stats, + Optimized >; }; diff --git a/ydb/core/sys_view/sessions/sessions.cpp b/ydb/core/sys_view/sessions/sessions.cpp index f9e81b1c27ab..c9593fecaba6 100644 --- a/ydb/core/sys_view/sessions/sessions.cpp +++ b/ydb/core/sys_view/sessions/sessions.cpp @@ -178,6 +178,7 @@ class TSessionsScan : public NKikimr::NSysView::TScanActorBase { const auto& nodeId = PendingNodes.front(); auto kqpProxyId = NKqp::MakeKqpProxyID(nodeId); auto req = std::make_unique(); + req->Record.SetTenantName(TenantName); if (!ContinuationToken.empty()) { req->Record.SetSessionIdStart(ContinuationToken); req->Record.SetSessionIdStartInclusive(true); diff --git a/ydb/core/sys_view/ut_kqp.cpp b/ydb/core/sys_view/ut_kqp.cpp index 9a919a7fa0d7..d418c3667e6f 100644 --- a/ydb/core/sys_view/ut_kqp.cpp +++ b/ydb/core/sys_view/ut_kqp.cpp @@ -999,7 +999,7 @@ Y_UNIT_TEST_SUITE(SystemView) { check.String("Default"); // Kind check.Uint64(env.GetServer().GetRuntime()->GetNodeId(0)); // NodeId check.Uint64(1u); // PDiskId - check.String("ERROR"); // Status + check.Null(); // Status check.Uint64(0u); // VDisk check.Uint64(1000u); // VSlotId } diff --git a/ydb/core/tablet/node_whiteboard.cpp b/ydb/core/tablet/node_whiteboard.cpp index f9ffcfb1fd73..900f0e1134e0 100644 --- a/ydb/core/tablet/node_whiteboard.cpp +++ b/ydb/core/tablet/node_whiteboard.cpp @@ -392,6 +392,117 @@ class TNodeWhiteboardService : public TActorBootstrapped return modified; } + static void CopyField(::google::protobuf::Message& protoTo, + const ::google::protobuf::Message& protoFrom, + const ::google::protobuf::Reflection& reflectionTo, + const ::google::protobuf::Reflection& reflectionFrom, + const ::google::protobuf::FieldDescriptor* field) { + using namespace ::google::protobuf; + if (field->is_repeated()) { + FieldDescriptor::CppType type = field->cpp_type(); + int size = reflectionFrom.FieldSize(protoFrom, field); + if (size != 0) { + reflectionTo.ClearField(&protoTo, field); + for (int i = 0; i < size; ++i) { + switch (type) { + case FieldDescriptor::CPPTYPE_INT32: + reflectionTo.AddInt32(&protoTo, field, reflectionFrom.GetRepeatedInt32(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_INT64: + reflectionTo.AddInt64(&protoTo, field, reflectionFrom.GetRepeatedInt64(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_UINT32: + reflectionTo.AddUInt32(&protoTo, field, reflectionFrom.GetRepeatedUInt32(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_UINT64: + reflectionTo.AddUInt64(&protoTo, field, reflectionFrom.GetRepeatedUInt64(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + reflectionTo.AddDouble(&protoTo, field, reflectionFrom.GetRepeatedDouble(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + reflectionTo.AddFloat(&protoTo, field, reflectionFrom.GetRepeatedFloat(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_BOOL: + reflectionTo.AddBool(&protoTo, field, reflectionFrom.GetRepeatedBool(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_ENUM: + reflectionTo.AddEnum(&protoTo, field, reflectionFrom.GetRepeatedEnum(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_STRING: + reflectionTo.AddString(&protoTo, field, reflectionFrom.GetRepeatedString(protoFrom, field, i)); + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + reflectionTo.AddMessage(&protoTo, field)->CopyFrom(reflectionFrom.GetRepeatedMessage(protoFrom, field, i)); + break; + } + } + } + } else { + if (reflectionFrom.HasField(protoFrom, field)) { + FieldDescriptor::CppType type = field->cpp_type(); + switch (type) { + case FieldDescriptor::CPPTYPE_INT32: + reflectionTo.SetInt32(&protoTo, field, reflectionFrom.GetInt32(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_INT64: + reflectionTo.SetInt64(&protoTo, field, reflectionFrom.GetInt64(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_UINT32: + reflectionTo.SetUInt32(&protoTo, field, reflectionFrom.GetUInt32(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_UINT64: + reflectionTo.SetUInt64(&protoTo, field, reflectionFrom.GetUInt64(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_DOUBLE: + reflectionTo.SetDouble(&protoTo, field, reflectionFrom.GetDouble(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_FLOAT: + reflectionTo.SetFloat(&protoTo, field, reflectionFrom.GetFloat(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_BOOL: + reflectionTo.SetBool(&protoTo, field, reflectionFrom.GetBool(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_ENUM: + reflectionTo.SetEnum(&protoTo, field, reflectionFrom.GetEnum(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_STRING: + reflectionTo.SetString(&protoTo, field, reflectionFrom.GetString(protoFrom, field)); + break; + case FieldDescriptor::CPPTYPE_MESSAGE: + reflectionTo.MutableMessage(&protoTo, field)->CopyFrom(reflectionFrom.GetMessage(protoFrom, field)); + break; + } + } + } + } + + static void SelectiveCopy(::google::protobuf::Message& protoTo, const ::google::protobuf::Message& protoFrom, const ::google::protobuf::RepeatedField& fields) { + using namespace ::google::protobuf; + const Descriptor& descriptor = *protoTo.GetDescriptor(); + const Reflection& reflectionTo = *protoTo.GetReflection(); + const Reflection& reflectionFrom = *protoFrom.GetReflection(); + for (auto fieldNumber : fields) { + const FieldDescriptor* field = descriptor.FindFieldByNumber(fieldNumber); + if (field) { + CopyField(protoTo, protoFrom, reflectionTo, reflectionFrom, field); + } + } + } + + template + static void Copy(TMessage& to, const TMessage& from, const TRequest& request) { + if (request.FieldsRequiredSize() > 0) { + if (request.FieldsRequiredSize() == 1 && request.GetFieldsRequired(0) == -1) { // all fields + to.CopyFrom(from); + } else { + SelectiveCopy(to, from, request.GetFieldsRequired()); + } + } else { + SelectiveCopy(to, from, GetDefaultWhiteboardFields()); + } + } + void SetRole(TStringBuf roleName) { for (const auto& role : SystemStateInfo.GetRoles()) { if (role == roleName) { @@ -686,14 +797,6 @@ class TNodeWhiteboardService : public TActorBootstrapped } } - static void CopyTabletStateInfo( - NKikimrWhiteboard::TTabletStateInfo& dst, - const NKikimrWhiteboard::TTabletStateInfo& src, - const NKikimrWhiteboard::TEvTabletStateRequest&) - { - dst = src; - } - void Handle(TEvWhiteboard::TEvTabletStateRequest::TPtr &ev, const TActorContext &ctx) { auto now = TMonotonic::Now(); const auto& request = ev->Get()->Record; @@ -716,7 +819,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : TabletStateInfo) { if (pr.second.changetime() >= changedSince) { NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); - CopyTabletStateInfo(tabletStateInfo, pr.second, request); + Copy(tabletStateInfo, pr.second, request); } } } else { @@ -725,12 +828,12 @@ class TNodeWhiteboardService : public TActorBootstrapped if (it != TabletStateInfo.end()) { if (it->second.changetime() >= changedSince) { NKikimrWhiteboard::TTabletStateInfo& tabletStateInfo = *record.add_tabletstateinfo(); - CopyTabletStateInfo(tabletStateInfo, it->second, request); + Copy(tabletStateInfo, it->second, request); } } } } - } else if (request.groupby() == "Type,State") { // the only supported group-by for now + } else if (request.groupby() == "Type,State" || request.groupby() == "NodeId,Type,State") { // the only supported group-by for now std::unordered_map, NKikimrWhiteboard::TTabletStateInfo> stateGroupBy; for (const auto& [id, stateInfo] : TabletStateInfo) { @@ -761,7 +864,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : NodeStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TNodeStateInfo &nodeStateInfo = *record.AddNodeStateInfo(); - nodeStateInfo.CopyFrom(pr.second); + Copy(nodeStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -792,7 +895,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : PDiskStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TPDiskStateInfo &pDiskStateInfo = *record.AddPDiskStateInfo(); - pDiskStateInfo.CopyFrom(pr.second); + Copy(pDiskStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -816,7 +919,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : VDiskStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TVDiskStateInfo &vDiskStateInfo = *record.AddVDiskStateInfo(); - vDiskStateInfo.CopyFrom(pr.second); + Copy(vDiskStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -831,7 +934,7 @@ class TNodeWhiteboardService : public TActorBootstrapped for (const auto& pr : BSGroupStateInfo) { if (pr.second.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TBSGroupStateInfo &bSGroupStateInfo = *record.AddBSGroupStateInfo(); - bSGroupStateInfo.CopyFrom(pr.second); + Copy(bSGroupStateInfo, pr.second, request); } } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); @@ -845,7 +948,7 @@ class TNodeWhiteboardService : public TActorBootstrapped auto& record = response->Record; if (SystemStateInfo.GetChangeTime() >= changedSince) { NKikimrWhiteboard::TSystemStateInfo &systemStateInfo = *record.AddSystemStateInfo(); - systemStateInfo.CopyFrom(SystemStateInfo); + Copy(systemStateInfo, SystemStateInfo, request); } response->Record.SetResponseTime(ctx.Now().MilliSeconds()); ctx.Send(ev->Sender, response.Release(), 0, ev->Cookie); @@ -999,6 +1102,30 @@ class TNodeWhiteboardService : public TActorBootstrapped } }; +template +::google::protobuf::RepeatedField InitDefaultWhiteboardFields() { + using namespace ::google::protobuf; + const Descriptor& descriptor = *TMessage::GetDescriptor(); + ::google::protobuf::RepeatedField defaultFields; + int fieldCount = descriptor.field_count(); + for (int index = 0; index < fieldCount; ++index) { + const FieldDescriptor* field = descriptor.field(index); + const auto& options(field->options()); + if (options.HasExtension(NKikimrWhiteboard::DefaultField)) { + if (options.GetExtension(NKikimrWhiteboard::DefaultField)) { + defaultFields.Add(field->number()); + } + } + } + return defaultFields; +} + +template +::google::protobuf::RepeatedField GetDefaultWhiteboardFields() { + static ::google::protobuf::RepeatedField defaultFields = InitDefaultWhiteboardFields(); + return defaultFields; +} + IActor* CreateNodeWhiteboardService() { return new TNodeWhiteboardService(); } diff --git a/ydb/core/tablet/private/aggregated_counters.h b/ydb/core/tablet/private/aggregated_counters.h index c099942d8e7b..0aa7012c8110 100644 --- a/ydb/core/tablet/private/aggregated_counters.h +++ b/ydb/core/tablet/private/aggregated_counters.h @@ -140,7 +140,7 @@ class TAggregatedHistogramCounters { TVector CountersByTabletId; // each index is map from tablet to counter value }; -class TAggregatedLabeledCounters { +class TAggregatedLabeledCounters : public TThrRefBase { public: // TAggregatedLabeledCounters(ui32 count, const ui8* aggrFunc, const char * const * names, const ui8* types, const TString& groupNames); diff --git a/ydb/core/tablet/private/labeled_db_counters.cpp b/ydb/core/tablet/private/labeled_db_counters.cpp index d1de85c23708..8bb365fceab7 100644 --- a/ydb/core/tablet/private/labeled_db_counters.cpp +++ b/ydb/core/tablet/private/labeled_db_counters.cpp @@ -31,7 +31,7 @@ void TPQCounters::Apply(ui64 tabletId, const NKikimr::TTabletLabeledCountersBase } } - auto& el = LabeledCountersByGroup.InsertIfAbsent(group, new TAggregatedLabeledCounters( + auto el = LabeledCountersByGroup.InsertIfAbsent(group, new TAggregatedLabeledCounters( labeledCounters->GetCounters().Size(), labeledCounters->GetAggrFuncs(), labeledCounters->GetNames(), labeledCounters->GetTypes(), groupNames)); diff --git a/ydb/core/tablet/private/labeled_db_counters.h b/ydb/core/tablet/private/labeled_db_counters.h index deb8c6e2ece6..799c8e1ed050 100644 --- a/ydb/core/tablet/private/labeled_db_counters.h +++ b/ydb/core/tablet/private/labeled_db_counters.h @@ -13,7 +13,7 @@ namespace NKikimr::NPrivate { class TPQCounters : public ILabeledCounters { protected: - TConcurrentRWHashMap, 256> LabeledCountersByGroup; + TConcurrentRWHashMap, 256> LabeledCountersByGroup; NMonitoring::TDynamicCounterPtr Group; public: diff --git a/ydb/core/tablet/resource_broker.cpp b/ydb/core/tablet/resource_broker.cpp index 86d08828ffcd..7f4d73d78c19 100644 --- a/ydb/core/tablet/resource_broker.cpp +++ b/ydb/core/tablet/resource_broker.cpp @@ -1,6 +1,7 @@ #include "resource_broker_impl.h" #include +#include #include @@ -1269,10 +1270,10 @@ NKikimrResourceBroker::TResourceBrokerConfig MakeDefaultConfig() const ui64 KqpRmQueueCPU = 4; const ui64 KqpRmQueueMemory = 10ULL << 30; - const ui64 CSTTLCompactionMemoryLimit = 1ULL << 30; - const ui64 CSInsertCompactionMemoryLimit = 1ULL << 30; - const ui64 CSGeneralCompactionMemoryLimit = 3ULL << 30; - const ui64 CSScanMemoryLimit = 3ULL << 30; + const ui64 CSTTLCompactionMemoryLimit = NOlap::TGlobalLimits::TTLCompactionMemoryLimit; + const ui64 CSInsertCompactionMemoryLimit = NOlap::TGlobalLimits::InsertCompactionMemoryLimit; + const ui64 CSGeneralCompactionMemoryLimit = NOlap::TGlobalLimits::GeneralCompactionMemoryLimit; + const ui64 CSScanMemoryLimit = NOlap::TGlobalLimits::ScanMemoryLimit; const ui64 TotalCPU = 20; const ui64 TotalMemory = 16ULL << 30; diff --git a/ydb/core/tablet/tablet_counters_aggregator.cpp b/ydb/core/tablet/tablet_counters_aggregator.cpp index bf77a032fa90..28e7497d2529 100644 --- a/ydb/core/tablet/tablet_counters_aggregator.cpp +++ b/ydb/core/tablet/tablet_counters_aggregator.cpp @@ -159,7 +159,7 @@ class TTabletMon { } for (ui32 i = 0, e = labeledCounters->GetCounters().Size(); i < e; ++i) { - if(!strlen(labeledCounters->GetCounterName(i))) + if(!strlen(labeledCounters->GetCounterName(i))) continue; const ui64& value = labeledCounters->GetCounters()[i].Get(); const ui64& id = labeledCounters->GetIds()[i].Get(); @@ -765,10 +765,16 @@ class TTabletMon { TCounterPtr DatashardSizeBytes; TCounterPtr DatashardCacheHitBytes; TCounterPtr DatashardCacheMissBytes; + TCounterPtr ColumnShardReadRows_; + TCounterPtr ColumnShardReadBytes_; TCounterPtr ColumnShardScanRows_; TCounterPtr ColumnShardScanBytes_; + TCounterPtr ColumnShardWriteRows_; + TCounterPtr ColumnShardWriteBytes_; TCounterPtr ColumnShardBulkUpsertRows_; TCounterPtr ColumnShardBulkUpsertBytes_; + TCounterPtr ColumnShardEraseRows_; + TCounterPtr ColumnShardEraseBytes_; TCounterPtr ResourcesStorageUsedBytes; TCounterPtr ResourcesStorageUsedBytesOnSsd; TCounterPtr ResourcesStorageUsedBytesOnHdd; @@ -787,6 +793,7 @@ class TTabletMon { TCounterPtr ResourcesStreamReservedStorageLimit; THistogramPtr ShardCpuUtilization; + THistogramPtr ColumnShardCpuUtilization; TCounterPtr RowUpdates; TCounterPtr RowUpdateBytes; @@ -808,8 +815,11 @@ class TTabletMon { TCounterPtr ColumnShardScannedBytes_; TCounterPtr ColumnShardScannedRows_; - TCounterPtr ColumnShardUpsertBlobsWritten_; - TCounterPtr ColumnShardUpsertBytesWritten_; + TCounterPtr ColumnShardOperationsRowsWritten_; + TCounterPtr ColumnShardOperationsBytesWritten_; + TCounterPtr ColumnShardErasedBytes_; + TCounterPtr ColumnShardErasedRows_; + THistogramPtr ColumnShardConsumedCpuHistogram; TCounterPtr DiskSpaceTablesTotalBytes; TCounterPtr DiskSpaceTablesTotalBytesOnSsd; @@ -859,14 +869,26 @@ class TTabletMon { DatashardCacheMissBytes = ydbGroup->GetNamedCounter("name", "table.datashard.cache_miss.bytes", true); + ColumnShardReadRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.read.rows", true); + ColumnShardReadBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.read.bytes", true); ColumnShardScanRows_ = ydbGroup->GetNamedCounter("name", "table.columnshard.scan.rows", true); ColumnShardScanBytes_ = ydbGroup->GetNamedCounter("name", "table.columnshard.scan.bytes", true); + ColumnShardWriteRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.write.rows", true); + ColumnShardWriteBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.write.bytes", true); ColumnShardBulkUpsertRows_ = ydbGroup->GetNamedCounter("name", "table.columnshard.bulk_upsert.rows", true); ColumnShardBulkUpsertBytes_ = ydbGroup->GetNamedCounter("name", "table.columnshard.bulk_upsert.bytes", true); + ColumnShardEraseRows_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.erase.rows", true); + ColumnShardEraseBytes_ = ydbGroup->GetNamedCounter("name", + "table.columnshard.erase.bytes", true); ResourcesStorageUsedBytes = ydbGroup->GetNamedCounter("name", "resources.storage.used_bytes", false); @@ -908,6 +930,8 @@ class TTabletMon { ShardCpuUtilization = ydbGroup->GetNamedHistogram("name", "table.datashard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); + ColumnShardCpuUtilization = ydbGroup->GetNamedHistogram("name", + "table.columnshard.used_core_percents", NMonitoring::LinearHistogram(12, 0, 10), false); }; void Initialize(::NMonitoring::TDynamicCounterPtr counters, bool hasDatashard, bool hasSchemeshard, bool hasColumnShard) { @@ -943,8 +967,11 @@ class TTabletMon { ColumnShardScannedBytes_ = appGroup->GetCounter("ColumnShard/ScannedBytes"); ColumnShardScannedRows_ = appGroup->GetCounter("ColumnShard/ScannedRows"); - ColumnShardUpsertBlobsWritten_ = appGroup->GetCounter("ColumnShard/UpsertBlobsWritten"); - ColumnShardUpsertBytesWritten_ = appGroup->GetCounter("ColumnShard/UpsertBytesWritten"); + ColumnShardOperationsRowsWritten_ = appGroup->GetCounter("ColumnShard/OperationsRowsWritten"); + ColumnShardOperationsBytesWritten_ = appGroup->GetCounter("ColumnShard/OperationsBytesWritten"); + ColumnShardErasedBytes_ = appGroup->GetCounter("ColumnShard/BytesErased"); + ColumnShardErasedRows_ = appGroup->GetCounter("ColumnShard/RowsErased"); + ColumnShardConsumedCpuHistogram = appGroup->FindHistogram("HIST(ConsumedCPU)"); } if (hasSchemeshard && !DiskSpaceTablesTotalBytes) { @@ -990,10 +1017,20 @@ class TTabletMon { } if (ColumnShardScannedBytes_) { + ColumnShardReadRows_->Set(0); + ColumnShardReadBytes_->Set(0); ColumnShardScanRows_->Set(ColumnShardScannedRows_->Val()); ColumnShardScanBytes_->Set(ColumnShardScannedBytes_->Val()); - ColumnShardBulkUpsertRows_->Set(ColumnShardUpsertBlobsWritten_->Val()); - ColumnShardBulkUpsertBytes_->Set(ColumnShardUpsertBytesWritten_->Val()); + ColumnShardWriteRows_->Set(ColumnShardOperationsRowsWritten_->Val()); + ColumnShardWriteBytes_->Set(ColumnShardOperationsBytesWritten_->Val()); + ColumnShardBulkUpsertRows_->Set(ColumnShardOperationsRowsWritten_->Val()); + ColumnShardBulkUpsertBytes_->Set(ColumnShardOperationsBytesWritten_->Val()); + ColumnShardEraseRows_->Set(ColumnShardErasedRows_->Val()); + ColumnShardEraseBytes_->Set(ColumnShardErasedBytes_->Val()); + + if (ColumnShardConsumedCpuHistogram) { + TransferBuckets(ColumnShardCpuUtilization, ColumnShardConsumedCpuHistogram); + } } if (DiskSpaceTablesTotalBytes) { @@ -1464,7 +1501,8 @@ TTabletCountersAggregatorActor::HandleWork(TEvTabletCounters::TEvTabletLabeledCo continue; } if (groupNames[j] == "Client") { - group = group->GetSubgroup("ConsumerPath", NPersQueue::ConvertOldConsumerName(groups[j], ctx)); + group = group->GetSubgroup("ConsumerPath", + NPersQueue::ConvertOldConsumerName(groups[j], AppData(ctx)->PQConfig)); continue; } } @@ -2058,7 +2096,7 @@ class TClusterLabeledCountersAggregatorActorV3 : public TClusterLabeledCountersA if (groups.size() == 1) { //topic case ff = groups[0]; } else if (groups.size() == 3) { //client important topic - res = NPersQueue::ConvertOldConsumerName(groups[0], ctx) + "|" + groups[1] + "|"; + res = NPersQueue::ConvertOldConsumerName(groups[0], AppData(ctx)->PQConfig) + "|" + groups[1] + "|"; ff = groups[2]; } else { continue; diff --git a/ydb/core/tablet_flat/flat_database.cpp b/ydb/core/tablet_flat/flat_database.cpp index b87ee8056d59..ef50fab90283 100644 --- a/ydb/core/tablet_flat/flat_database.cpp +++ b/ydb/core/tablet_flat/flat_database.cpp @@ -494,7 +494,8 @@ ui64 TDatabase::GetTableMemOpsCount(ui32 tableId) const { } ui64 TDatabase::GetTableIndexSize(ui32 tableId) const { - return Require(tableId)->Stat().Parts.IndexBytes; + const auto& partStats = Require(tableId)->Stat().Parts; + return partStats.FlatIndexBytes + partStats.BTreeIndexBytes; } ui64 TDatabase::GetTableSearchHeight(ui32 tableId) const { diff --git a/ydb/core/tablet_flat/flat_dbase_misc.h b/ydb/core/tablet_flat/flat_dbase_misc.h index 1163eb05f9ba..4c3fe35fd252 100644 --- a/ydb/core/tablet_flat/flat_dbase_misc.h +++ b/ydb/core/tablet_flat/flat_dbase_misc.h @@ -14,7 +14,7 @@ namespace NTable { void Describe(IOutputStream &out) const noexcept { - const ui64 sys = Parts.IndexBytes + Parts.ByKeyBytes + Parts.OtherBytes; + const ui64 sys = Parts.FlatIndexBytes + Parts.BTreeIndexBytes + Parts.ByKeyBytes + Parts.OtherBytes; out << "DBase{" << Tables << "t " << Parts.PartsCount << "p" diff --git a/ydb/core/tablet_flat/flat_executor.cpp b/ydb/core/tablet_flat/flat_executor.cpp index 8d73dd4cc04a..152064a6d907 100644 --- a/ydb/core/tablet_flat/flat_executor.cpp +++ b/ydb/core/tablet_flat/flat_executor.cpp @@ -3556,23 +3556,23 @@ void TExecutor::Handle(NOps::TEvResult *ops, TProdCompact *msg, bool cancelled) } void TExecutor::UpdateUsedTabletMemory() { - UsedTabletMemory = 0; - // Estimate memory usage for internal executor structures. - UsedTabletMemory += 50 << 10; // 50kb - // Count the number of bytes exclusive to private cache. + // Estimate memory usage for internal executor structures: + UsedTabletMemory = 50 << 10; // 50kb + + // Count the number of bytes kept in private cache (can't be offloaded right now): if (PrivatePageCache) { - UsedTabletMemory += PrivatePageCache->GetStats().TotalExclusive; + UsedTabletMemory += PrivatePageCache->GetStats().TotalPinnedBody; + UsedTabletMemory += PrivatePageCache->GetStats().PinnedLoadSize; } - // Estimate memory used by database structures. + + // Estimate memory used by internal database structures: auto &counters = Database->Counters(); UsedTabletMemory += counters.MemTableWaste; UsedTabletMemory += counters.MemTableBytes; - UsedTabletMemory += counters.Parts.IndexBytes; UsedTabletMemory += counters.Parts.OtherBytes; - UsedTabletMemory += counters.Parts.ByKeyBytes; UsedTabletMemory += Stats->PacksMetaBytes; - // Add tablet memory usage. + // Add tablet memory usage: UsedTabletMemory += Owner->GetMemoryUsage(); } @@ -3590,7 +3590,9 @@ void TExecutor::UpdateCounters(const TActorContext &ctx) { { /* Memory consumption of common for leader and follower components */ Counters->Simple()[TExecutorCounters::DB_WARM_BYTES].Set(dbCounters.MemTableBytes); Counters->Simple()[TExecutorCounters::DB_META_BYTES].Set(Stats->PacksMetaBytes); - Counters->Simple()[TExecutorCounters::DB_INDEX_BYTES].Set(dbCounters.Parts.IndexBytes); + Counters->Simple()[TExecutorCounters::DB_FLAT_INDEX_BYTES].Set(dbCounters.Parts.FlatIndexBytes); + Counters->Simple()[TExecutorCounters::DB_B_TREE_INDEX_BYTES].Set(dbCounters.Parts.BTreeIndexBytes); + Counters->Simple()[TExecutorCounters::DB_INDEX_BYTES].Set(dbCounters.Parts.FlatIndexBytes + dbCounters.Parts.BTreeIndexBytes); Counters->Simple()[TExecutorCounters::DB_OTHER_BYTES].Set(dbCounters.Parts.OtherBytes); Counters->Simple()[TExecutorCounters::DB_BYKEY_BYTES].Set(dbCounters.Parts.ByKeyBytes); Counters->Simple()[TExecutorCounters::USED_TABLET_MEMORY].Set(UsedTabletMemory); diff --git a/ydb/core/tablet_flat/flat_executor_counters.h b/ydb/core/tablet_flat/flat_executor_counters.h index 914fdad59995..22a1dc7b03b2 100644 --- a/ydb/core/tablet_flat/flat_executor_counters.h +++ b/ydb/core/tablet_flat/flat_executor_counters.h @@ -62,6 +62,8 @@ namespace NTabletFlatExecutor { XX(CONSUMED_STORAGE, "ConsumedStorage") \ XX(CONSUMED_MEMORY, "ConsumedMemory") \ XX(COMPACTION_READ_IN_FLY, "CompactionReadInFly") \ + XX(DB_FLAT_INDEX_BYTES, "DbFlatIndexBytes") \ + XX(DB_B_TREE_INDEX_BYTES, "DbBTreeIndexBytes") \ // don't change order! #define FLAT_EXECUTOR_CUMULATIVE_COUNTERS_MAP(XX) \ diff --git a/ydb/core/tablet_flat/flat_executor_ut.cpp b/ydb/core/tablet_flat/flat_executor_ut.cpp index 5f9552f6f833..7583f10e790e 100644 --- a/ydb/core/tablet_flat/flat_executor_ut.cpp +++ b/ydb/core/tablet_flat/flat_executor_ut.cpp @@ -6228,13 +6228,15 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { } }; - Y_UNIT_TEST(EnableLocalDBBtreeIndex_Default) { // uses b-tree index + Y_UNIT_TEST(EnableLocalDBBtreeIndex_Default) { // uses flat index TMyEnvBase env; TRowsModel rows; auto &appData = env->GetAppData(); UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.HasEnableLocalDBBtreeIndex(), false); UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.HasEnableLocalDBFlatIndex(), false); + UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.GetEnableLocalDBBtreeIndex(), false); + UNIT_ASSERT_VALUES_EQUAL(appData.FeatureFlags.GetEnableLocalDBFlatIndex(), true); auto counters = MakeIntrusive(env->GetDynamicCounters()); int readRows = 0, failedAttempts = 0; @@ -6250,8 +6252,8 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); env.WaitFor(); - // all pages are always kept in shared cache (except flat index) - UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 334); + // all pages are always kept in shared cache + UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 290); env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }); UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); @@ -6264,7 +6266,7 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { // after restart we have no pages in private cache env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }, true); UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); - UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 330); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 288); } Y_UNIT_TEST(EnableLocalDBBtreeIndex_True) { // uses b-tree index @@ -6305,6 +6307,45 @@ Y_UNIT_TEST_SUITE(TFlatTableExecutorBTreeIndex) { UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 330); } + Y_UNIT_TEST(EnableLocalDBBtreeIndex_False) { // uses flat index + TMyEnvBase env; + TRowsModel rows; + + auto &appData = env->GetAppData(); + + appData.FeatureFlags.SetEnableLocalDBBtreeIndex(false); + auto counters = MakeIntrusive(env->GetDynamicCounters()); + int readRows = 0, failedAttempts = 0; + + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + auto policy = MakeIntrusive(); + policy->MinBTreeIndexNodeSize = 128; + env.SendSync(rows.MakeScheme(std::move(policy))); + + env.SendSync(rows.VersionTo(TRowVersion(1, 10)).RowTo(0).MakeRows(1000, 950)); + env.SendSync(rows.VersionTo(TRowVersion(2, 20)).RowTo(0).MakeRows(1000, 950)); + + env.SendSync(new NFake::TEvCompact(TRowsModel::TableId)); + env.WaitFor(); + + // all pages are always kept in shared cache + UNIT_ASSERT_VALUES_EQUAL(counters->ActivePages->Val(), 290); + + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }); + UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 0); + + // restart tablet + env.SendSync(new TEvents::TEvPoison, false, true); + env.FireDummyTablet(ui32(NFake::TDummy::EFlg::Comp)); + + // after restart we have no pages in private cache + env.SendSync(new NFake::TEvExecute{ new TTxFullScan(readRows, failedAttempts) }, true); + UNIT_ASSERT_VALUES_EQUAL(readRows, 1000); + UNIT_ASSERT_VALUES_EQUAL(failedAttempts, 288); + } + Y_UNIT_TEST(EnableLocalDBBtreeIndex_True_EnableLocalDBFlatIndex_False) { // uses b-tree index TMyEnvBase env; TRowsModel rows; diff --git a/ydb/core/tablet_flat/flat_part_loader.cpp b/ydb/core/tablet_flat/flat_part_loader.cpp index b031c698cce2..33c69b131c87 100644 --- a/ydb/core/tablet_flat/flat_part_loader.cpp +++ b/ydb/core/tablet_flat/flat_part_loader.cpp @@ -210,17 +210,22 @@ TAutoPtr TLoader::StageCreatePartView() noexcept TEpoch epoch = Epoch != TEpoch::Max() ? Epoch : TEpoch(Root.GetEpoch()); // TODO: put index size to stat? - // TODO: include history indexes bytes size_t indexesRawSize = 0; if (BTreeGroupIndexes) { for (const auto &meta : BTreeGroupIndexes) { indexesRawSize += meta.IndexSize; } + for (const auto &meta : BTreeHistoricIndexes) { + indexesRawSize += meta.IndexSize; + } // Note: although we also have flat index, it shouldn't be loaded; so let's not count it here } else { for (auto indexPage : FlatGroupIndexes) { indexesRawSize += Packs[0]->GetPageSize(indexPage); } + for (auto indexPage : FlatHistoricIndexes) { + indexesRawSize += Packs[0]->GetPageSize(indexPage); + } } auto *partStore = new TPartStore( diff --git a/ydb/core/tablet_flat/flat_stat_table.cpp b/ydb/core/tablet_flat/flat_stat_table.cpp index 08a850256e90..afacbe7f3747 100644 --- a/ydb/core/tablet_flat/flat_stat_table.cpp +++ b/ydb/core/tablet_flat/flat_stat_table.cpp @@ -17,9 +17,6 @@ bool BuildStats(const TSubset& subset, TStats& stats, ui64 rowCountResolution, u } } - // TODO: enable b-tree index after benchmarks - mixedIndex = true; - return mixedIndex ? BuildStatsMixedIndex(subset, stats, rowCountResolution, dataSizeResolution, env, yieldHandler) : BuildStatsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler); diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp new file mode 100644 index 000000000000..e25f160cfef3 --- /dev/null +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.cpp @@ -0,0 +1,217 @@ +#include "flat_stat_table.h" +#include "flat_table_subset.h" +#include "flat_stat_table_btree_index.h" + +namespace NKikimr::NTable { + +namespace { + +using TGroupId = NPage::TGroupId; +using TFrames = NPage::TFrames; +using TBtreeIndexNode = NPage::TBtreeIndexNode; +using TChild = TBtreeIndexNode::TChild; +using TColumns = TBtreeIndexNode::TColumns; +using TCells = NPage::TCells; + +ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { + auto& meta = part->IndexPages.GetBTree(groupId); + + if (rowId == 0) { + return 0; + } + if (rowId >= meta.GetRowCount()) { + return meta.GetDataSize(); + } + + TPageId pageId = meta.GetPageId(); + ui64 prevDataSize = 0; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = env->TryGetPage(part, pageId, {}); + if (!page) { + ready = false; + return prevDataSize; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(rowId); + + pageId = node.GetShortChild(pos).GetPageId(); + if (pos) { + prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); + } + } + + return prevDataSize; +} + +ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { + Y_ABORT_UNLESS(groupId == TGroupId(0, true)); + + auto& meta = part->IndexPages.GetBTree(groupId); + + if (rowId == 0) { + historicRowId = 0; + return 0; + } + if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { + historicRowId = meta.GetRowCount(); + return meta.GetDataSize(); + } + + TPageId pageId = meta.GetPageId(); + ui64 prevDataSize = 0; + historicRowId = 0; + + // Minimum key is (startRowId, max, max) + ui64 startStep = Max(); + ui64 startTxId = Max(); + TCell key1Cells[3] = { + TCell::Make(rowId), + TCell::Make(startStep), + TCell::Make(startTxId), + }; + TCells key1{ key1Cells, 3 }; + + for (ui32 height = 0; height < meta.LevelCount; height++) { + auto page = env->TryGetPage(part, pageId, {}); + if (!page) { + ready = false; + return prevDataSize; + } + auto node = TBtreeIndexNode(*page); + auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); + + pageId = node.GetShortChild(pos).GetPageId(); + if (pos) { + const auto& prevChild = node.GetShortChild(pos - 1); + prevDataSize = prevChild.GetDataSize(); + historicRowId = prevChild.GetRowCount(); + } + } + + return prevDataSize; +} + +void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { + ui32 page = frames->Lower(beginRowId, 0, Max()); + + while (auto &rel = frames->Relation(page)) { + if (rel.Row < endRowId) { + auto channel = part->GetPageChannel(lob, page); + stats.Add(rel.Size, channel); + ++page; + } else if (!rel.IsHead()) { + Y_ABORT("Got unaligned TFrames head record"); + } else { + break; + } + } +} + +bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { + bool ready = true; + + if (!part.Slices || part.Slices->empty()) { + return true; + } + + if (part->GroupsCount) { // main group + TGroupId groupId{}; + auto channel = part->GetGroupChannel(groupId); + + for (const auto& slice : *part.Slices) { + yieldHandler(); + + stats.RowCount += slice.EndRowId() - slice.BeginRowId(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + + if (part->Small) { + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); + } + if (part->Large) { + AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); + } + } + } + + for (ui32 groupIndex : xrange(1, part->GroupsCount)) { + TGroupId groupId{groupIndex}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : *part.Slices) { + yieldHandler(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + } + } + + TVector> historicSlices; + + if (part->HistoricGroupsCount) { // main historic group + TGroupId groupId{0, true}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : *part.Slices) { + yieldHandler(); + + TRowId beginRowId, endRowId; + bool readySlice = true; + ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); + ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); + ready &= readySlice; + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + if (readySlice && endRowId > beginRowId) { + historicSlices.emplace_back(beginRowId, endRowId); + } + } + } + + for (ui32 groupIndex : xrange(1, part->HistoricGroupsCount)) { + TGroupId groupId{groupIndex, true}; + auto channel = part->GetGroupChannel(groupId); + for (const auto& slice : historicSlices) { + yieldHandler(); + + ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); + ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); + if (ready && endDataSize > beginDataSize) { + stats.DataSize.Add(endDataSize - beginDataSize, channel); + } + } + } + + return ready; +} + +} + +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { + stats.Clear(); + + bool ready = true; + for (const auto& part : subset.Flatten) { + stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); + ready &= AddDataSize(part, stats, env, yieldHandler); + } + + if (!ready) { + return false; + } + + ready &= BuildStatsHistogramsBTreeIndex(subset, stats, + stats.RowCount / histogramBucketsCount, stats.DataSize.Size / histogramBucketsCount, + env, yieldHandler); + + return ready; +} + +} diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index.h b/ydb/core/tablet_flat/flat_stat_table_btree_index.h index a82c8e70a41f..a01a92d8890b 100644 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index.h +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index.h @@ -2,216 +2,12 @@ #include "flat_stat_table.h" #include "flat_table_subset.h" -#include "flat_stat_table_btree_index_histogram.h" namespace NKikimr::NTable { -namespace { +bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler); -using TGroupId = NPage::TGroupId; -using TFrames = NPage::TFrames; -using TBtreeIndexNode = NPage::TBtreeIndexNode; -using TChild = TBtreeIndexNode::TChild; -using TColumns = TBtreeIndexNode::TColumns; -using TCells = NPage::TCells; +bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler); -ui64 GetPrevDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, bool& ready) { - auto& meta = part->IndexPages.GetBTree(groupId); - - if (rowId == 0) { - return 0; - } - if (rowId >= meta.GetRowCount()) { - return meta.GetDataSize(); - } - - TPageId pageId = meta.GetPageId(); - ui64 prevDataSize = 0; - - for (ui32 height = 0; height < meta.LevelCount; height++) { - auto page = env->TryGetPage(part, pageId, {}); - if (!page) { - ready = false; - return prevDataSize; - } - auto node = TBtreeIndexNode(*page); - auto pos = node.Seek(rowId); - - pageId = node.GetShortChild(pos).GetPageId(); - if (pos) { - prevDataSize = node.GetShortChild(pos - 1).GetDataSize(); - } - } - - return prevDataSize; -} - -ui64 GetPrevHistoricDataSize(const TPart* part, TGroupId groupId, TRowId rowId, IPages* env, TRowId& historicRowId, bool& ready) { - Y_ABORT_UNLESS(groupId == TGroupId(0, true)); - - auto& meta = part->IndexPages.GetBTree(groupId); - - if (rowId == 0) { - historicRowId = 0; - return 0; - } - if (rowId >= part->IndexPages.GetBTree({}).GetRowCount()) { - historicRowId = meta.GetRowCount(); - return meta.GetDataSize(); - } - - TPageId pageId = meta.GetPageId(); - ui64 prevDataSize = 0; - historicRowId = 0; - - // Minimum key is (startRowId, max, max) - ui64 startStep = Max(); - ui64 startTxId = Max(); - TCell key1Cells[3] = { - TCell::Make(rowId), - TCell::Make(startStep), - TCell::Make(startTxId), - }; - TCells key1{ key1Cells, 3 }; - - for (ui32 height = 0; height < meta.LevelCount; height++) { - auto page = env->TryGetPage(part, pageId, {}); - if (!page) { - ready = false; - return prevDataSize; - } - auto node = TBtreeIndexNode(*page); - auto pos = node.Seek(ESeek::Lower, key1, part->Scheme->HistoryGroup.ColsKeyIdx, part->Scheme->HistoryKeys.Get()); - - pageId = node.GetShortChild(pos).GetPageId(); - if (pos) { - const auto& prevChild = node.GetShortChild(pos - 1); - prevDataSize = prevChild.GetDataSize(); - historicRowId = prevChild.GetRowCount(); - } - } - - return prevDataSize; -} - -void AddBlobsSize(const TPart* part, TChanneledDataSize& stats, const TFrames* frames, ELargeObj lob, TRowId beginRowId, TRowId endRowId) noexcept { - ui32 page = frames->Lower(beginRowId, 0, Max()); - - while (auto &rel = frames->Relation(page)) { - if (rel.Row < endRowId) { - auto channel = part->GetPageChannel(lob, page); - stats.Add(rel.Size, channel); - ++page; - } else if (!rel.IsHead()) { - Y_ABORT("Got unaligned TFrames head record"); - } else { - break; - } - } -} - -bool AddDataSize(const TPartView& part, TStats& stats, IPages* env, TBuildStatsYieldHandler yieldHandler) { - bool ready = true; - - if (!part.Slices || part.Slices->empty()) { - return true; - } - - if (part->GroupsCount) { // main group - TGroupId groupId{}; - auto channel = part->GetGroupChannel(groupId); - - for (const auto& slice : *part.Slices) { - yieldHandler(); - - stats.RowCount += slice.EndRowId() - slice.BeginRowId(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - - if (part->Small) { - AddBlobsSize(part.Part.Get(), stats.DataSize, part->Small.Get(), ELargeObj::Outer, slice.BeginRowId(), slice.EndRowId()); - } - if (part->Large) { - AddBlobsSize(part.Part.Get(), stats.DataSize, part->Large.Get(), ELargeObj::Extern, slice.BeginRowId(), slice.EndRowId()); - } - } - } - - for (ui32 groupIndex : xrange(1, part->GroupsCount)) { - TGroupId groupId{groupIndex}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : *part.Slices) { - yieldHandler(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - } - } - - TVector> historicSlices; - - if (part->HistoricGroupsCount) { // main historic group - TGroupId groupId{0, true}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : *part.Slices) { - yieldHandler(); - - TRowId beginRowId, endRowId; - bool readySlice = true; - ui64 beginDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.BeginRowId(), env, beginRowId, readySlice); - ui64 endDataSize = GetPrevHistoricDataSize(part.Part.Get(), groupId, slice.EndRowId(), env, endRowId, readySlice); - ready &= readySlice; - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - if (readySlice && endRowId > beginRowId) { - historicSlices.emplace_back(beginRowId, endRowId); - } - } - } - - for (ui32 groupIndex : xrange(1, part->HistoricGroupsCount)) { - TGroupId groupId{groupIndex, true}; - auto channel = part->GetGroupChannel(groupId); - for (const auto& slice : historicSlices) { - yieldHandler(); - - ui64 beginDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.first, env, ready); - ui64 endDataSize = GetPrevDataSize(part.Part.Get(), groupId, slice.second, env, ready); - if (ready && endDataSize > beginDataSize) { - stats.DataSize.Add(endDataSize - beginDataSize, channel); - } - } - } - - return ready; -} - -} - -inline bool BuildStatsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { - stats.Clear(); - - bool ready = true; - for (const auto& part : subset.Flatten) { - stats.IndexSize.Add(part->IndexesRawSize, part->Label.Channel()); - ready &= AddDataSize(part, stats, env, yieldHandler); - } - - if (!ready) { - return false; - } - - ready &= BuildStatsHistogramsBTreeIndex(subset, stats, histogramBucketsCount, env, yieldHandler); - - return ready; -} } diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp new file mode 100644 index 000000000000..4c060d594780 --- /dev/null +++ b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.cpp @@ -0,0 +1,533 @@ +#include "flat_stat_table.h" +#include "flat_table_subset.h" +#include "flat_page_btree_index_writer.h" + +namespace NKikimr::NTable { + +namespace { + +using TGroupId = NPage::TGroupId; +using TFrames = NPage::TFrames; +using TBtreeIndexNode = NPage::TBtreeIndexNode; +using TChild = TBtreeIndexNode::TChild; +using TColumns = TBtreeIndexNode::TColumns; +using TCells = NPage::TCells; +using TCellsIterable = TBtreeIndexNode::TCellsIterable; +using TCellsIter = TBtreeIndexNode::TCellsIter; + +const static TCellsIterable EmptyKey(static_cast(nullptr), TColumns()); + +enum class ENodeState : ui8 { + Initial, + Opened, + Closed, + Ignored, +}; + +class TTableHistogramBuilderBtreeIndex { + struct TNodeState { + const TPart* Part; + TPageId PageId; + ui32 Level; + TRowId BeginRowId, EndRowId; + ui64 BeginDataSize, EndDataSize; + TCellsIterable BeginKey, EndKey; + ENodeState State = ENodeState::Initial; + + TNodeState(const TPart* part, TPageId pageId, ui32 level, TRowId beginRowId, TRowId endRowId, TRowId beginDataSize, TRowId endDataSize, TCellsIterable beginKey, TCellsIterable endKey) + : Part(part) + , PageId(pageId) + , Level(level) + , BeginRowId(beginRowId) + , EndRowId(endRowId) + , BeginDataSize(beginDataSize) + , EndDataSize(endDataSize) + , BeginKey(beginKey) + , EndKey(endKey) + { + } + + TRowId GetRowCount() const noexcept { + return EndRowId - BeginRowId; + } + + ui64 GetDataSize() const noexcept { + return EndDataSize - BeginDataSize; + } + + // usually a node state goes in order: + // 1. Initial + // 2. Opened - after processing TEvent.IsBegin = true + // 3. Closed - after processing TEvent.IsBegin = false + // if an opened node is being loaded, its state goes in order: + // 1. Initial + // 2. Opened - after processing TEvent.IsBegin = true + // 3. Ignored - after have been loaded + // in a case when a node EndKey >= BeginKey a node state goes in order: + // (which is theoretically possible scenario because of slice bounds) + // 1. Initial + // 2. Closed - after processing TEvent.IsBegin = false + + bool Open(ui64& openedRowCount, ui64& openedDataSize) noexcept { + if (Y_LIKELY(State == ENodeState::Initial)) { + State = ENodeState::Opened; + openedRowCount += GetRowCount(); + openedDataSize += GetDataSize(); + return true; + } + return false; + } + + bool Close(ui64& openedRowCount, ui64& closedRowCount, ui64& openedDataSize, ui64& closedDataSize) noexcept { + if (State == ENodeState::Opened) { + State = ENodeState::Closed; + ui64 rowCount = GetRowCount(); + ui64 dataSize = GetDataSize(); + Y_ABORT_UNLESS(openedRowCount >= rowCount); + Y_ABORT_UNLESS(openedDataSize >= dataSize); + openedRowCount -= rowCount; + openedDataSize -= dataSize; + closedRowCount += rowCount; + closedDataSize += dataSize; + return true; + } else if (Y_UNLIKELY(State == ENodeState::Initial)) { + State = ENodeState::Closed; + closedRowCount += GetRowCount(); + closedDataSize += GetDataSize(); + return true; + } + return false; + } + + bool IgnoreOpened(ui64& openedRowCount, ui64& openedDataSize) noexcept { + if (Y_LIKELY(State == ENodeState::Opened)) { + State = ENodeState::Ignored; + ui64 rowCount = GetRowCount(); + ui64 dataSize = GetDataSize(); + Y_ABORT_UNLESS(openedRowCount >= rowCount); + Y_ABORT_UNLESS(openedDataSize >= dataSize); + openedRowCount -= rowCount; + openedDataSize -= dataSize; + return true; + } + return false; + } + }; + + struct TEvent { + TCellsIterable Key; + bool IsBegin; + TNodeState* Node; + }; + + struct TNodeEventKeyGreater { + const TKeyCellDefaults& KeyDefaults; + + bool operator ()(const TEvent& a, const TEvent& b) const noexcept { + return Compare(a, b) > 0; + } + + i8 Compare(const TEvent& a, const TEvent& b) const noexcept { + // events go in order: + // - Key = {}, IsBegin = true + // - ... + // - Key = {'c'}, IsBegin = false + // - Key = {'c'}, IsBegin = true + // - ... + // - Key = {'d'}, IsBegin = false + // - Key = {'d'}, IsBegin = true + // - ... + // - Key = {}, IsBegin = false + + if (a.Key && b.Key) { // compare by keys + auto cmp = CompareKeys(a.Key, b.Key, KeyDefaults); + if (cmp != 0) { + return cmp; + } + // keys are the same, compare by begin flag, end events first: + return Compare(a.IsBegin ? 1 : -1, b.IsBegin ? 1 : -1); + } + + // category = -1 for Key = { }, IsBegin = true + // category = 0 for Key = {*}, IsBegin = * + // category = +1 for Key = { }, IsBegin = false + return Compare(GetCategory(a), GetCategory(b)); + } + + private: + static i8 GetCategory(const TEvent& a) noexcept { + if (a.Key) { + return 0; + } + return a.IsBegin ? -1 : +1; + } + + static i8 Compare(i8 a, i8 b) noexcept { + if (a < b) return -1; + if (a > b) return +1; + return 0; + } + }; + + struct TNodeRowCountLess { + bool operator ()(const TNodeState* a, const TNodeState* b) const noexcept { + return a->GetRowCount() < b->GetRowCount(); + } + }; + + struct TNodeDataSizeLess { + bool operator ()(const TNodeState* a, const TNodeState* b) const noexcept { + return a->GetDataSize() < b->GetDataSize(); + } + }; + +public: + TTableHistogramBuilderBtreeIndex(const TSubset& subset, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler) + : Subset(subset) + , KeyDefaults(*Subset.Scheme->Keys) + , RowCountResolution(rowCountResolution) + , DataSizeResolution(dataSizeResolution) + , RowCountResolutionGap(RowCountResolution / 2) + , DataSizeResolutionGap(DataSizeResolution / 2) + , Env(env) + , YieldHandler(yieldHandler) + , NodeEventKeyGreater{KeyDefaults} + , FutureEvents(NodeEventKeyGreater) + { + } + + bool Build(TStats& stats) { + bool ready = true; + + for (auto index : xrange(Subset.Flatten.size())) { + auto& part = Subset.Flatten[index]; + auto& meta = part->IndexPages.GetBTree({}); + TCellsIterable beginKey = EmptyKey; + if (part.Slices && part.Slices->front().FirstKey.GetCells()) { + beginKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->front().FirstKey); + } + TCellsIterable endKey = EmptyKey; + if (part.Slices && part.Slices->back().LastKey.GetCells()) { + endKey = MakeCellsIterableKey(part.Part.Get(), part.Slices->back().LastKey); + } + LoadedStateNodes.emplace_back(part.Part.Get(), meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), 0, meta.GetDataSize(), beginKey, endKey); + ready &= SlicePart(*part.Slices, LoadedStateNodes.back()); + } + + if (!ready) { + return false; + } + + ready &= BuildIterate(stats); + + FutureEvents.clear(); + LoadedBTreeNodes.clear(); + LoadedStateNodes.clear(); + + return ready; + } + +private: + bool SlicePart(const TSlices& slices, TNodeState& node) { + YieldHandler(); + + // TODO: avoid binary search for each call (we may intersect slices with nodes in linear time actually) + auto it = slices.LookupBackward(slices.end(), node.EndRowId - 1); + + if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) { + // skip the node + return true; + } + + if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) { + // take the node + AddFutureEvents(node); + return true; + } + + // split the node + + if (node.Level == 0) { + // can't split, decide by node.EndRowId - 1 + // TODO: decide by non-empty slice and node intersection, but this requires size calculation changes too + if (it->Has(node.EndRowId - 1)) { + AddFutureEvents(node); + } + return true; + } + + bool ready = true; + + const auto addNode = [&](TNodeState& child) { + ready &= SlicePart(slices, child); + }; + if (!TryLoadNode(node, addNode)) { + return false; + } + + return ready; + } + + bool BuildIterate(TStats& stats) { + // The idea is the following: + // - move a key pointer through all parts simultaneously + // keeping all nodes that contain current key pointer in opened heaps (sorted by size descending) + // all nodes that ended before current key pointer are considered as closed + // - keep an invariant that size of closed and opened nodes don't exceed next histogram bucket values + // otherwise, load opened nodes + // - because histogram is approximate each its value is allowed to be in a range + // [next value - gap, next value + gap] + + // next histogram keys are been looking for: + ui64 nextHistogramRowCount = RowCountResolution, nextHistogramDataSize = DataSizeResolution; + + // closed nodes stats: + ui64 closedRowCount = 0, closedDataSize = 0; + + // opened nodes stats and heaps: + ui64 openedRowCount = 0, openedDataSize = 0; + TPriorityQueue, TNodeRowCountLess> openedSortedByRowCount; + TPriorityQueue, TNodeDataSizeLess> openedSortedByDataSize; + + // will additionally save list of all nodes that start at current key pointer: + TVector currentKeyPointerOpens; + + while (FutureEvents && (nextHistogramRowCount != Max() || nextHistogramDataSize != Max())) { + YieldHandler(); + + auto currentKeyPointer = FutureEvents.top(); + currentKeyPointerOpens.clear(); + + auto processEvent = [&](const TEvent& event) { + Y_DEBUG_ABORT_UNLESS(NodeEventKeyGreater.Compare(event, currentKeyPointer) <= 0, "Can't process future events"); + if (event.IsBegin) { + if (event.Node->Open(openedRowCount, openedDataSize)) { + openedSortedByRowCount.push(event.Node); + openedSortedByDataSize.push(event.Node); + } + } else { + event.Node->Close(openedRowCount, closedRowCount, openedDataSize, closedDataSize); + } + }; + + // process all events with the same key and type as current key pointer: + do { + const TEvent& event = FutureEvents.top(); + processEvent(event); + if (event.IsBegin) { + currentKeyPointerOpens.push_back(event.Node); + } + FutureEvents.pop(); + } while (FutureEvents && NodeEventKeyGreater.Compare(FutureEvents.top(), currentKeyPointer) == 0); + + const auto addEvent = [&](TEvent event) { + // TODO: skip all closed nodes and don't process them here + // TODO: don't compare each node key and replace it with parentNode.Seek(currentKeyPointer) + auto cmp = NodeEventKeyGreater.Compare(event, currentKeyPointer); + if (cmp <= 0) { // event happened + processEvent(event); + if (cmp == 0) { + currentKeyPointerOpens.push_back(event.Node); + } + } else { // event didn't yet happen + FutureEvents.push(event); + } + }; + const auto addNode = [&](TNodeState& node) { + addEvent(TEvent{node.BeginKey, true, &node}); + addEvent(TEvent{node.EndKey, false, &node}); + }; + + // may safely skip current key pointer and go further only if at the next iteration + // sum of sizes of closed and opened nodes don't exceed next histogram bucket values (plus their gaps) + // otherwise, load opened nodes right now + // in that case, next level nodes will be converted to begin and end events + // and then either processed or been postponed to future events according to current key pointer position + while (nextHistogramRowCount != Max() && closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap && openedSortedByRowCount) { + auto node = openedSortedByRowCount.top(); + openedSortedByRowCount.pop(); + + // may have already closed or ignored nodes in the heap, just skip them + // leaf nodes will be closed later + if (node->Level && node->IgnoreOpened(openedRowCount, openedDataSize)) { + if (!TryLoadNode(*node, addNode)) { + return false; + } + } + } + while (nextHistogramDataSize != Max() && closedDataSize + openedDataSize > nextHistogramDataSize + DataSizeResolutionGap && openedSortedByDataSize) { + auto node = openedSortedByDataSize.top(); + openedSortedByDataSize.pop(); + + // may have already closed or ignored nodes in the heap, just skip them + // leaf nodes will be closed later + if (node->Level && node->IgnoreOpened(openedRowCount, openedDataSize)) { + if (!TryLoadNode(*node, addNode)) { + return false; + } + } + } + + // add current key pointer to a histogram if we either: + // - failed to split opened nodes and may exceed a next histogram bucket value (plus its gaps) + // - have enough closed nodes (more than a next histogram bucket value (minus its gap)) + // current key pointer value is calculated as follows: + // - size of all closed nodes + // - minus size of all nodes that start at current key pointer + // - plus half of size of all ohter opened nodes (as they exact position is unknown) + // also check that current key pointer value is > then last presented value in a histogram + if (currentKeyPointer.Key) { + if (nextHistogramRowCount != Max()) { + if (closedRowCount + openedRowCount > nextHistogramRowCount + RowCountResolutionGap || closedRowCount > nextHistogramRowCount - RowCountResolutionGap) { + ui64 currentKeyRowCountOpens = 0; + for (auto* node : currentKeyPointerOpens) { + if (node->State == ENodeState::Opened) { + currentKeyRowCountOpens += node->GetRowCount(); + } + } + Y_ABORT_UNLESS(currentKeyRowCountOpens <= openedRowCount); + ui64 currentKeyPointerRowCount = closedRowCount + (openedRowCount - currentKeyRowCountOpens) / 2; + if ((stats.RowCountHistogram.empty() ? 0 : stats.RowCountHistogram.back().Value) < currentKeyPointerRowCount && currentKeyPointerRowCount < stats.RowCount) { + AddKey(stats.RowCountHistogram, currentKeyPointer.Key, currentKeyPointerRowCount); + nextHistogramRowCount = Max(currentKeyPointerRowCount + 1, nextHistogramRowCount + RowCountResolution); + if (nextHistogramRowCount + RowCountResolutionGap > stats.RowCount) { + nextHistogramRowCount = Max(); + } + } + } + } + if (nextHistogramDataSize != Max()) { + if (closedDataSize + openedDataSize > nextHistogramDataSize + DataSizeResolutionGap || closedDataSize > nextHistogramDataSize - DataSizeResolutionGap) { + ui64 currentKeyDataSizeOpens = 0; + for (auto* node : currentKeyPointerOpens) { + if (node->State == ENodeState::Opened) { + currentKeyDataSizeOpens += node->GetDataSize(); + } + } + Y_ABORT_UNLESS(currentKeyDataSizeOpens <= openedDataSize); + ui64 currentKeyPointerDataSize = closedDataSize + (openedDataSize - currentKeyDataSizeOpens) / 2; + if ((stats.DataSizeHistogram.empty() ? 0 : stats.DataSizeHistogram.back().Value) < currentKeyPointerDataSize && currentKeyPointerDataSize < stats.DataSize.Size) { + AddKey(stats.DataSizeHistogram, currentKeyPointer.Key, currentKeyPointerDataSize); + nextHistogramDataSize = Max(currentKeyPointerDataSize + 1, nextHistogramDataSize + DataSizeResolution); + if (nextHistogramDataSize + DataSizeResolutionGap > stats.DataSize.Size) { + nextHistogramDataSize = Max(); + } + } + } + } + } + } + + return true; + } + + void AddKey(THistogram& histogram, TCellsIterable& key, ui64 value) { + TVector keyCells; + + // add columns that are present in the part: + auto iter = key.Iter(); + for (TPos pos : xrange(iter.Count())) { + Y_UNUSED(pos); + keyCells.push_back(iter.Next()); + } + + // extend with default values if needed: + for (TPos index = keyCells.size(); index < KeyDefaults.Defs.size(); ++index) { + keyCells.push_back(KeyDefaults.Defs[index]); + } + + TString serializedKey = TSerializedCellVec::Serialize(keyCells); + + histogram.push_back({serializedKey, value}); + } + + bool TryLoadNode(const TNodeState& parent, const auto& addNode) { + Y_ABORT_UNLESS(parent.Level); + + auto page = Env->TryGetPage(parent.Part, parent.PageId, {}); + if (!page) { + return false; + } + + LoadedBTreeNodes.emplace_back(*page); + auto &bTreeNode = LoadedBTreeNodes.back(); + auto& groupInfo = parent.Part->Scheme->GetLayout({}); + + for (auto pos : xrange(bTreeNode.GetChildrenCount())) { + auto& child = bTreeNode.GetChild(pos); + + LoadedStateNodes.emplace_back(parent.Part, child.GetPageId(), parent.Level - 1, + pos ? bTreeNode.GetChild(pos - 1).GetRowCount() : parent.BeginRowId, child.GetRowCount(), + pos ? bTreeNode.GetChild(pos - 1).GetTotalDataSize() : parent.BeginDataSize, child.GetTotalDataSize(), + pos ? bTreeNode.GetKeyCellsIterable(pos - 1, groupInfo.ColsKeyData) : parent.BeginKey, + pos < bTreeNode.GetKeysCount() ? bTreeNode.GetKeyCellsIterable(pos, groupInfo.ColsKeyData) : parent.EndKey); + + addNode(LoadedStateNodes.back()); + } + + return true; + } + + void AddFutureEvents(TNodeState& node) { + FutureEvents.push(TEvent{node.BeginKey, true, &node}); + FutureEvents.push(TEvent{node.EndKey, false, &node}); + } + +private: + TCellsIterable MakeCellsIterableKey(const TPart* part, TSerializedCellVec serializedKey) { + // Note: this method is only called for root nodes and don't worth optimizing + // so let's simply create a new fake b-tree index node with a given key + NPage::TBtreeIndexNodeWriter writer(part->Scheme, {}); + writer.AddChild({1, 1, 1, 0, 0}); + writer.AddKey(serializedKey.GetCells()); + writer.AddChild({2, 2, 2, 0, 0}); + TSharedData serializedNode = writer.Finish(); + LoadedBTreeNodes.emplace_back(serializedNode); + return LoadedBTreeNodes.back().GetKeyCellsIterable(0, part->Scheme->GetLayout({}).ColsKeyData); + } + + static int CompareKeys(const TCellsIterable& left_, const TCellsIterable& right_, const TKeyCellDefaults& keyDefaults) { + Y_ABORT_UNLESS(left_); + Y_ABORT_UNLESS(right_); + + auto left = left_.Iter(), right = right_.Iter(); + size_t end = Max(left.Count(), right.Count()); + Y_ABORT_UNLESS(end <= keyDefaults.Size(), "Key schema is smaller than compared keys"); + + for (size_t pos = 0; pos < end; ++pos) { + const auto& leftCell = pos < left.Count() ? left.Next() : keyDefaults.Defs[pos]; + const auto& rightCell = pos < right.Count() ? right.Next() : keyDefaults.Defs[pos]; + if (int cmp = CompareTypedCells(leftCell, rightCell, keyDefaults.Types[pos])) { + return cmp; + } + } + + return 0; + } + +private: + const TSubset& Subset; + const TKeyCellDefaults& KeyDefaults; + ui64 RowCountResolution, DataSizeResolution; + ui64 RowCountResolutionGap, DataSizeResolutionGap; + IPages* const Env; + TBuildStatsYieldHandler YieldHandler; + TDeque LoadedBTreeNodes; // keep nodes to use TCellsIterable references + TDeque LoadedStateNodes; // keep nodes to use their references + TNodeEventKeyGreater NodeEventKeyGreater; + TPriorityQueue, TNodeEventKeyGreater> FutureEvents; +}; + +} + +bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui64 rowCountResolution, ui64 dataSizeResolution, IPages* env, TBuildStatsYieldHandler yieldHandler) { + TTableHistogramBuilderBtreeIndex builder(subset, rowCountResolution, dataSizeResolution, env, yieldHandler); + + if (!builder.Build(stats)) { + return false; + } + + return true; +} + +} diff --git a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h b/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h deleted file mode 100644 index 78bb64b69ccb..000000000000 --- a/ydb/core/tablet_flat/flat_stat_table_btree_index_histogram.h +++ /dev/null @@ -1,525 +0,0 @@ -#pragma once - -#include "flat_stat_table.h" -#include "flat_table_subset.h" - -namespace NKikimr::NTable { - -namespace { - -using TGroupId = NPage::TGroupId; -using TFrames = NPage::TFrames; -using TBtreeIndexNode = NPage::TBtreeIndexNode; -using TChild = TBtreeIndexNode::TChild; -using TColumns = TBtreeIndexNode::TColumns; -using TCells = NPage::TCells; -using TCellsIterable = TBtreeIndexNode::TCellsIterable; -using TCellsIter = TBtreeIndexNode::TCellsIter; - -const static TCellsIterable EmptyKey(static_cast(nullptr), TColumns()); - -class TTableHistogramBuilderBtreeIndex { -public: - struct TNodeState : public TIntrusiveListItem { - TPageId PageId; - ui32 Level; - TRowId BeginRowId, EndRowId; - TCellsIterable BeginKey, EndKey; - ui64 BeginSize, EndSize; - - TNodeState(TPageId pageId, ui32 level, TRowId beginRowId, TRowId endRowId, TCellsIterable beginKey, TCellsIterable endKey, TRowId beginSize, TRowId endSize) - : PageId(pageId) - , Level(level) - , BeginRowId(beginRowId) - , EndRowId(endRowId) - , BeginKey(beginKey) - , EndKey(endKey) - , BeginSize(beginSize) - , EndSize(endSize) - { - } - - ui64 GetSize() const noexcept { - return EndSize - BeginSize; - } - }; - - struct TGetRowCount { - static ui64 Get(const TChild& child) noexcept { - return child.GetRowCount(); - } - }; - - struct TGetDataSize { - static ui64 Get(const TChild& child) noexcept { - return child.GetTotalDataSize(); - } - }; - -private: - struct TPartNodes { - TPartNodes(const TPart* part, size_t index) - : Part(part) - , Index(index) - { - } - - const TPart* GetPart() const noexcept { - return Part; - } - - size_t GetIndex() const noexcept { - return Index; - } - - size_t GetCount() const noexcept { - return Count; - } - - ui64 GetSize() const noexcept { - return Size; - } - - const TIntrusiveList& GetNodes() const noexcept { - return Nodes; - } - - TNodeState* PopFront() noexcept { - auto result = Nodes.PopFront(); - - Count--; - Size -= result->GetSize(); - - return result; - } - - TNodeState* PopBack() noexcept { - auto result = Nodes.PopBack(); - - Count--; - Size -= result->GetSize(); - - return result; - } - - void PushFront(TNodeState* item) noexcept { - Count++; - Size += item->GetSize(); - Nodes.PushFront(item); - } - - void PushBack(TNodeState* item) noexcept { - Count++; - Size += item->GetSize(); - Nodes.PushBack(item); - } - - bool operator < (const TPartNodes& other) const noexcept { - return Size < other.Size; - } - - private: - const TPart* Part; - size_t Index; - size_t Count = 0; - ui64 Size = 0; - TIntrusiveList Nodes; - }; - -public: - TTableHistogramBuilderBtreeIndex(const TSubset& subset, IPages* env, ui32 histogramBucketsCount, TBuildStatsYieldHandler yieldHandler) - : Subset(subset) - , KeyDefaults(*Subset.Scheme->Keys) - , Env(env) - , HistogramBucketsCount(histogramBucketsCount) - , YieldHandler(yieldHandler) - { - } - - template - bool Build(THistogram& histogram, ui64 statTotalSize) { - if (!HistogramBucketsCount) { - return true; - } - - Resolution = statTotalSize / HistogramBucketsCount; - StatTotalSize = statTotalSize; - - bool ready = true; - ui64 endSize = 0; - TVector parts; - - for (auto index : xrange(Subset.Flatten.size())) { - auto& part = Subset.Flatten[index]; - auto& meta = part->IndexPages.GetBTree({}); - parts.emplace_back(part.Part.Get(), index); - LoadedStateNodes.emplace_back(meta.GetPageId(), meta.LevelCount, 0, meta.GetRowCount(), EmptyKey, EmptyKey, 0, TGetSize::Get(meta)); - ready &= SlicePart(parts.back(), *part.Slices, LoadedStateNodes.back()); - endSize += parts.back().GetSize(); - } - - if (!ready) { - return false; - } - - if (endSize) { - ready &= BuildHistogramRecursive(histogram, parts, 0, endSize, 0); - } - - LoadedBTreeNodes.clear(); - LoadedStateNodes.clear(); - - return ready; - } - -private: - template - bool SlicePart(TPartNodes& part, const TSlices& slices, TNodeState& node) { - YieldHandler(); - - auto it = slices.LookupBackward(slices.end(), node.EndRowId - 1); - - if (it == slices.end() || node.EndRowId <= it->BeginRowId() || it->EndRowId() <= node.BeginRowId) { - // skip the node - return true; - } - - if (it->BeginRowId() <= node.BeginRowId && node.EndRowId <= it->EndRowId()) { - // take the node - part.PushBack(&node); - return true; - } - - // split the node - - if (node.Level == 0) { - // can't split, decide by node.EndRowId - 1 - if (it->Has(node.EndRowId - 1)) { - part.PushBack(&node); - } - return true; - } - - bool ready = true; - - const auto addNode = [&](TNodeState& child) { - ready &= SlicePart(part, slices, child); - }; - if (!TryLoadNode(part.GetPart(), node, addNode)) { - return false; - } - - return ready; - } - - template - bool BuildHistogramRecursive(THistogram& histogram, TVector& parts, ui64 beginSize, ui64 endSize, ui32 depth) { - const static ui32 MaxDepth = 100; - - YieldHandler(); - -#ifndef NDEBUG - { - Y_DEBUG_ABORT_UNLESS(beginSize < endSize); - ui64 size = 0; - for (const auto& part : parts) { - size += part.GetSize(); - } - Y_DEBUG_ABORT_UNLESS(size == endSize - beginSize); - } -#endif - - if (SafeDiff(endSize, beginSize) <= Resolution || depth > MaxDepth) { - Y_DEBUG_ABORT_UNLESS(depth <= MaxDepth, "Shouldn't normally happen"); - return true; - } - - auto biggestPart = std::max_element(parts.begin(), parts.end()); - if (Y_UNLIKELY(biggestPart == parts.end())) { - Y_DEBUG_ABORT("Invalid part states"); - return true; - } - Y_ABORT_UNLESS(biggestPart->GetCount()); - - if (biggestPart->GetCount() == 1 && biggestPart->GetNodes().Front()->Level > 0) { - const auto addNode = [&biggestPart](TNodeState& child) { - biggestPart->PushBack(&child); - }; - if (!TryLoadNode(biggestPart->GetPart(), *biggestPart->PopFront(), addNode)) { - return false; - } - } - TCellsIterable splitKey = biggestPart->GetCount() > 1 - ? FindMedianPartKey(*biggestPart) - : FindMedianTableKey(parts); - - if (!splitKey) { - return true; - } - - ui64 leftSize = 0, middleSize = 0, rightSize = 0; - TVector leftParts, middleParts, rightParts; - - for (auto& part : parts) { - auto& leftNodes = PushNextPartNodes(part, leftParts); - auto& middleNodes = PushNextPartNodes(part, middleParts); - auto& rightNodes = PushNextPartNodes(part, rightParts); - - while (part.GetCount()) { - auto& node = *part.PopFront(); - if (node.EndKey && CompareKeys(node.EndKey, splitKey) <= 0) { - leftNodes.PushBack(&node); - } else if (node.BeginKey && CompareKeys(node.BeginKey, splitKey) >= 0) { - rightNodes.PushBack(&node); - } else { - middleNodes.PushBack(&node); - } - } - - Y_DEBUG_ABORT_UNLESS(middleNodes.GetCount() <= 1); - leftSize += leftNodes.GetSize(); - middleSize += middleNodes.GetSize(); - rightSize += rightNodes.GetSize(); - } - - if (middleSize > Resolution / 2) { - std::make_heap(middleParts.begin(), middleParts.end()); - - while (middleSize > Resolution / 2 && middleParts.size()) { - std::pop_heap(middleParts.begin(), middleParts.end()); - auto& middleNodes = middleParts.back(); - auto& leftNodes = GetNextPartNodes(middleNodes, leftParts); - auto& rightNodes = GetNextPartNodes(middleNodes, rightParts); - TIntrusiveList rightNodesBuffer; - - leftSize -= leftNodes.GetSize(); - middleSize -= middleNodes.GetSize(); - rightSize -= rightNodes.GetSize(); - - auto count = middleNodes.GetCount(); - bool hasChanges = false; - for (auto index : xrange(count)) { - Y_UNUSED(index); - auto& node = *middleNodes.PopFront(); - if (!node.Level) { // can't be splitted, return as-is - middleNodes.PushBack(&node); - continue; - } - const auto addNode = [&](TNodeState& node) { - if (node.EndKey && CompareKeys(node.EndKey, splitKey) <= 0) { - leftNodes.PushBack(&node); - } else if (node.BeginKey && CompareKeys(node.BeginKey, splitKey) >= 0) { - rightNodesBuffer.PushBack(&node); - } else { - middleNodes.PushBack(&node); - } - }; - if (!TryLoadNode(middleNodes.GetPart(), node, addNode)) { - return false; - } - hasChanges = true; - } - - while (!rightNodesBuffer.Empty()) { // reverse right part new nodes - rightNodes.PushFront(rightNodesBuffer.PopBack()); - } - - Y_DEBUG_ABORT_UNLESS(middleNodes.GetCount() <= 1); - leftSize += leftNodes.GetSize(); - middleSize += middleNodes.GetSize(); - rightSize += rightNodes.GetSize(); - - if (hasChanges) { // return updated nodes to the heap - std::push_heap(middleParts.begin(), middleParts.end()); - } else { // can't be splitted, ignore - middleParts.pop_back(); - } - } - } - - if (middleSize == 0 && (leftSize == 0 || rightSize == 0)) { - // no progress, don't continue - return true; - } - - bool ready = true; - - if (leftSize) { - ready &= BuildHistogramRecursive(histogram, leftParts, beginSize, beginSize + leftSize, depth + 1); - } - - ui64 splitSize = beginSize + leftSize + middleSize / 2; - // Note: due to different calculation approaches splitSize may exceed StatTotalSize, ignore them - if (beginSize < splitSize && splitSize < Min(endSize, StatTotalSize)) { - AddBucket(histogram, splitKey, splitSize); - } - - if (rightSize) { - ready &= BuildHistogramRecursive(histogram, rightParts, SafeDiff(endSize, rightSize), endSize, depth + 1); - } - - return ready; - } - - TCellsIterable FindMedianPartKey(const TPartNodes& part) { - Y_ABORT_UNLESS(part.GetCount() > 1, "It's impossible to split part with only one node"); - - TCellsIterable splitKey = EmptyKey; - ui64 splitSize = 0, currentSize = 0; - const ui64 middleSize = part.GetSize() / 2; - - for (const auto& node : part.GetNodes()) { - if (currentSize) { // can't split with the first key, skip it - if (!splitSize || AbsDifference(currentSize, middleSize) < AbsDifference(splitSize, middleSize)) { - splitKey = node.BeginKey; - splitSize = currentSize; - } - } - - currentSize += node.GetSize(); - } - - Y_ABORT_UNLESS(splitKey); - - return splitKey; - } - - TCellsIterable FindMedianTableKey(const TVector& parts) { - TVector keys; - for (const auto& part : parts) { - for (const auto& node : part.GetNodes()) { - if (node.BeginKey) { - keys.push_back(node.BeginKey); - } - } - } - - auto median = keys.begin() + (keys.size() + 1) / 2; - - if (median == keys.end()) { - return EmptyKey; - } - - // Note: may work badly in case when all begin keys are the same - // however such cases are rare and don't worth optimizing with sort+unique complex code - // also this method is only called when we couldn't split the biggest part - std::nth_element(keys.begin(), median, keys.end(), [this](const TCellsIterable& left, const TCellsIterable& right) { - return CompareKeys(left, right) < 0; - }); - - return *median; - } - - void AddBucket(THistogram& histogram, TCellsIterable key, ui64 size) { - TVector splitKeyCells; - - // Add columns that are present in the part - auto iter = key.Iter(); - for (TPos pos : xrange(iter.Count())) { - Y_UNUSED(pos); - splitKeyCells.push_back(iter.Next()); - } - - // Extend with default values if needed - for (TPos index = splitKeyCells.size(); index < KeyDefaults.Defs.size(); ++index) { - splitKeyCells.push_back(KeyDefaults.Defs[index]); - } - - TString serializedSplitKey = TSerializedCellVec::Serialize(splitKeyCells); - - histogram.push_back({serializedSplitKey, size}); - } - - template - bool TryLoadNode(const TPart* part, const TNodeState& parent, const auto& addNode) { - Y_ABORT_UNLESS(parent.Level); - - auto page = Env->TryGetPage(part, parent.PageId, {}); - if (!page) { - return false; - } - - LoadedBTreeNodes.emplace_back(*page); - auto &bTreeNode = LoadedBTreeNodes.back(); - auto& groupInfo = part->Scheme->GetLayout({}); - - for (auto pos : xrange(bTreeNode.GetChildrenCount())) { - auto& child = bTreeNode.GetChild(pos); - - LoadedStateNodes.emplace_back(child.GetPageId(), parent.Level - 1, - pos ? bTreeNode.GetChild(pos - 1).GetRowCount() : parent.BeginRowId, child.GetRowCount(), - pos ? bTreeNode.GetKeyCellsIterable(pos - 1, groupInfo.ColsKeyData) : parent.BeginKey, - pos < bTreeNode.GetKeysCount() ? bTreeNode.GetKeyCellsIterable(pos, groupInfo.ColsKeyData) : parent.EndKey, - pos ? TGetSize::Get(bTreeNode.GetChild(pos - 1)) : parent.BeginSize, TGetSize::Get(child)); - - addNode(LoadedStateNodes.back()); - } - - return true; - } - - TPartNodes& PushNextPartNodes(const TPartNodes& part, TVector& list) const { - Y_ABORT_UNLESS(part.GetIndex() == list.size()); - list.emplace_back(part.GetPart(), part.GetIndex()); - return list.back(); - } - - TPartNodes& GetNextPartNodes(const TPartNodes& part, TVector& list) const { - Y_ABORT_UNLESS(part.GetPart() == list[part.GetIndex()].GetPart()); - return list[part.GetIndex()]; - } - -private: - int CompareKeys(const TCellsIterable& left_, const TCellsIterable& right_) const { - Y_DEBUG_ABORT_UNLESS(left_); - Y_DEBUG_ABORT_UNLESS(right_); - - auto left = left_.Iter(), right = right_.Iter(); - size_t end = Max(left.Count(), right.Count()); - Y_DEBUG_ABORT_UNLESS(end <= KeyDefaults.Size(), "Key schema is smaller than compared keys"); - - - for (size_t pos = 0; pos < end; ++pos) { - const auto& leftCell = pos < left.Count() ? left.Next() : KeyDefaults.Defs[pos]; - const auto& rightCell = pos < right.Count() ? right.Next() : KeyDefaults.Defs[pos]; - if (int cmp = CompareTypedCells(leftCell, rightCell, KeyDefaults.Types[pos])) { - return cmp; - } - } - - return 0; - } - - ui64 AbsDifference(ui64 a, ui64 b) const { - return static_cast(std::abs(static_cast(a) - static_cast(b))); - } - - ui64 SafeDiff(ui64 a, ui64 b) const { - return a - Min(a, b); - } - -private: - const TSubset& Subset; - const TKeyCellDefaults& KeyDefaults; - IPages* const Env; - ui32 HistogramBucketsCount; - TBuildStatsYieldHandler YieldHandler; - ui64 Resolution, StatTotalSize; - TDeque LoadedBTreeNodes; // keep nodes to use TCellsIterable key refs - TDeque LoadedStateNodes; // keep nodes to use TIntrusiveList -}; - -} - -inline bool BuildStatsHistogramsBTreeIndex(const TSubset& subset, TStats& stats, ui32 histogramBucketsCount, IPages* env, TBuildStatsYieldHandler yieldHandler) { - bool ready = true; - - TTableHistogramBuilderBtreeIndex builder(subset, env, histogramBucketsCount, yieldHandler); - - ready &= builder.Build(stats.RowCountHistogram, stats.RowCount); - ready &= builder.Build(stats.DataSizeHistogram, stats.DataSize.Size); - - return ready; -} - -} diff --git a/ydb/core/tablet_flat/flat_table.cpp b/ydb/core/tablet_flat/flat_table.cpp index 354bccbcaefc..ca0b9e58533d 100644 --- a/ydb/core/tablet_flat/flat_table.cpp +++ b/ydb/core/tablet_flat/flat_table.cpp @@ -1415,7 +1415,11 @@ void TTable::SetTableObserver(TIntrusivePtr ptr) noexcept void TPartStats::Add(const TPartView& partView) { PartsCount += 1; - IndexBytes += partView->IndexesRawSize; + if (partView->IndexPages.HasBTree()) { + BTreeIndexBytes += partView->IndexesRawSize; + } else { + FlatIndexBytes += partView->IndexesRawSize; + } ByKeyBytes += partView->ByKey ? partView->ByKey->Raw.size() : 0; PlainBytes += partView->Stat.Bytes; CodedBytes += partView->Stat.Coded; @@ -1434,7 +1438,11 @@ void TPartStats::Add(const TPartView& partView) bool TPartStats::Remove(const TPartView& partView) { NUtil::SubSafe(PartsCount, ui64(1)); - NUtil::SubSafe(IndexBytes, partView->IndexesRawSize); + if (partView->IndexPages.HasBTree()) { + NUtil::SubSafe(BTreeIndexBytes, partView->IndexesRawSize); + } else { + NUtil::SubSafe(FlatIndexBytes, partView->IndexesRawSize); + } NUtil::SubSafe(ByKeyBytes, partView->ByKey ? partView->ByKey->Raw.size() : 0); NUtil::SubSafe(PlainBytes, partView->Stat.Bytes); NUtil::SubSafe(CodedBytes, partView->Stat.Coded); @@ -1463,7 +1471,8 @@ bool TPartStats::Remove(const TPartView& partView) TPartStats& TPartStats::operator+=(const TPartStats& rhs) { PartsCount += rhs.PartsCount; - IndexBytes += rhs.IndexBytes; + FlatIndexBytes += rhs.FlatIndexBytes; + BTreeIndexBytes += rhs.BTreeIndexBytes; OtherBytes += rhs.OtherBytes; ByKeyBytes += rhs.ByKeyBytes; PlainBytes += rhs.PlainBytes; @@ -1480,7 +1489,8 @@ TPartStats& TPartStats::operator+=(const TPartStats& rhs) TPartStats& TPartStats::operator-=(const TPartStats& rhs) { NUtil::SubSafe(PartsCount, rhs.PartsCount); - NUtil::SubSafe(IndexBytes, rhs.IndexBytes); + NUtil::SubSafe(FlatIndexBytes, rhs.FlatIndexBytes); + NUtil::SubSafe(BTreeIndexBytes, rhs.BTreeIndexBytes); NUtil::SubSafe(OtherBytes, rhs.OtherBytes); NUtil::SubSafe(ByKeyBytes, rhs.ByKeyBytes); NUtil::SubSafe(PlainBytes, rhs.PlainBytes); diff --git a/ydb/core/tablet_flat/flat_table_stats.h b/ydb/core/tablet_flat/flat_table_stats.h index 172c777ac3dd..4d735d3fccdc 100644 --- a/ydb/core/tablet_flat/flat_table_stats.h +++ b/ydb/core/tablet_flat/flat_table_stats.h @@ -8,7 +8,8 @@ namespace NTable { struct TPartStats { ui64 PartsCount = 0; /* Total used TPart units in db */ - ui64 IndexBytes = 0; + ui64 FlatIndexBytes = 0; + ui64 BTreeIndexBytes = 0; ui64 OtherBytes = 0; /* Other metadata and sys. indexes */ ui64 ByKeyBytes = 0; ui64 PlainBytes = 0; /* Plain data pages size */ diff --git a/ydb/core/tablet_flat/test/libs/table/test_mixer.h b/ydb/core/tablet_flat/test/libs/table/test_mixer.h index 458e4b7070ae..7c82abb30a2d 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_mixer.h +++ b/ydb/core/tablet_flat/test/libs/table/test_mixer.h @@ -45,7 +45,7 @@ namespace NTest { if (CurrentBucketRemainingRows-- == 0) { // start next bucket with CurrentBucketRemainingRows rows ui64 one = (Skip && Skip > Random.Uniform(Buckets) ? 1 : 0); - CurrentBucketRemainingRows = RowsPerBucket + one, Skip -= one, CurrentBucket++; + CurrentBucketRemainingRows = RowsPerBucket + one - 1, Skip -= one, CurrentBucket++; } return Min(CurrentBucket, Buckets - 1); diff --git a/ydb/core/tablet_flat/test/libs/table/test_store.h b/ydb/core/tablet_flat/test/libs/table/test_store.h index e699fa92e811..761d5c939cbf 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_store.h +++ b/ydb/core/tablet_flat/test/libs/table/test_store.h @@ -24,8 +24,8 @@ namespace NTest { struct TEggs { bool Rooted; - TVector GroupIndexes; - TVector HistoricIndexes; + TVector FlatGroupIndexes; + TVector FlatHistoricIndexes; TVector BTreeGroupIndexes; TVector BTreeHistoricIndexes; TData *Scheme; diff --git a/ydb/core/tablet_flat/test/libs/table/test_writer.h b/ydb/core/tablet_flat/test/libs/table/test_writer.h index 70e916d7b65c..f710071fb6c9 100644 --- a/ydb/core/tablet_flat/test/libs/table/test_writer.h +++ b/ydb/core/tablet_flat/test/libs/table/test_writer.h @@ -77,8 +77,14 @@ namespace NTest { for (const auto &meta : eggs.BTreeGroupIndexes) { indexesRawSize += meta.IndexSize; } + for (const auto &meta : eggs.BTreeHistoricIndexes) { + indexesRawSize += meta.IndexSize; + } } else { - for (auto indexPage : eggs.GroupIndexes) { + for (auto indexPage : eggs.FlatGroupIndexes) { + indexesRawSize += Store->GetPageSize(0, indexPage); + } + for (auto indexPage : eggs.FlatHistoricIndexes) { indexesRawSize += Store->GetPageSize(0, indexPage); } } @@ -90,7 +96,7 @@ namespace NTest { { epoch, TPartScheme::Parse(*eggs.Scheme, eggs.Rooted), - { eggs.GroupIndexes, eggs.HistoricIndexes, eggs.BTreeGroupIndexes, eggs.BTreeHistoricIndexes }, + { eggs.FlatGroupIndexes, eggs.FlatHistoricIndexes, eggs.BTreeGroupIndexes, eggs.BTreeHistoricIndexes }, eggs.Blobs ? new TExtBlobs(*eggs.Blobs, { }) : nullptr, eggs.ByKey ? new TBloom(*eggs.ByKey) : nullptr, eggs.Large ? new TFrames(*eggs.Large) : nullptr, diff --git a/ydb/core/tablet_flat/ut/ut_db_iface.cpp b/ydb/core/tablet_flat/ut/ut_db_iface.cpp index b82252553f46..39fb1473ebe0 100644 --- a/ydb/core/tablet_flat/ut/ut_db_iface.cpp +++ b/ydb/core/tablet_flat/ut/ut_db_iface.cpp @@ -276,7 +276,8 @@ Y_UNIT_TEST_SUITE(DBase) { UNIT_ASSERT(me->Counters().Parts.RowsErase == 0); UNIT_ASSERT(me->Counters().Parts.PartsCount == 0); UNIT_ASSERT(me->Counters().Parts.PlainBytes == 0); - UNIT_ASSERT(me->Counters().Parts.IndexBytes == 0); + UNIT_ASSERT(me->Counters().Parts.FlatIndexBytes == 0); + UNIT_ASSERT(me->Counters().Parts.BTreeIndexBytes == 0); UNIT_ASSERT(me->Counters().Parts.OtherBytes == 0); } diff --git a/ydb/core/tablet_flat/ut/ut_stat.cpp b/ydb/core/tablet_flat/ut/ut_stat.cpp index a02151472585..ec265efe8daa 100644 --- a/ydb/core/tablet_flat/ut/ut_stat.cpp +++ b/ydb/core/tablet_flat/ut/ut_stat.cpp @@ -2,6 +2,8 @@ #include "flat_stat_table.h" #include "flat_stat_table_mixed_index.h" #include "flat_stat_table_btree_index.h" +#include +#include #include #include #include @@ -141,14 +143,14 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { Y_UNIT_TEST(Single_History) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 3547100, 31242); + CheckMixedIndex(*subset, 24000, 3547100, 49916); } Y_UNIT_TEST(Single_History_Slices) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 9582, 1425198, 31242); + CheckMixedIndex(*subset, 9582, 1425198, 49916); } Y_UNIT_TEST(Single_Groups) @@ -167,14 +169,14 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { Y_UNIT_TEST(Single_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 4054050, 18810); + CheckMixedIndex(*subset, 24000, 4054050, 29361); } Y_UNIT_TEST(Single_Groups_History_Slices) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 13570, 2277890, 18810); + CheckMixedIndex(*subset, 13570, 2277890, 29361); } Y_UNIT_TEST(Mixed) @@ -192,14 +194,14 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { Y_UNIT_TEST(Mixed_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, TMixerRnd(4), 0.3); - CheckMixedIndex(*subset, 24000, 4054270, 19152); + CheckMixedIndex(*subset, 24000, 4054270, 29970); } Y_UNIT_TEST(Serial) { TMixerSeq mixer(4, Mass0.Saved.Size()); auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer); - CheckMixedIndex(*subset, 24000, 2106459, 25428); + CheckMixedIndex(*subset, 24000, 2106479, 25458); } Y_UNIT_TEST(Serial_Groups) @@ -213,7 +215,7 @@ Y_UNIT_TEST_SUITE(BuildStatsFlatIndex) { { TMixerSeq mixer(4, Mass1.Saved.Size()); auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer, 0.3); - CheckMixedIndex(*subset, 24000, 4054290, 19168); + CheckMixedIndex(*subset, 24000, 4054290, 30013); } } @@ -237,14 +239,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Single_History) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 3547100, 61162); + CheckMixedIndex(*subset, 24000, 3547100, 81694); } Y_UNIT_TEST(Single_History_Slices) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 9582, 1425198, 61162); + CheckMixedIndex(*subset, 9582, 1425198, 81694); } Y_UNIT_TEST(Single_Groups) @@ -263,14 +265,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Single_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 4054050, 34837); + CheckMixedIndex(*subset, 24000, 4054050, 46562); } Y_UNIT_TEST(Single_Groups_History_Slices) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 13570, 2277890, 34837); + CheckMixedIndex(*subset, 13570, 2277890, 46562); } Y_UNIT_TEST(Mixed) @@ -288,14 +290,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Mixed_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, TMixerRnd(4), 0.3); - CheckMixedIndex(*subset, 24000, 4054270, 34579); + CheckMixedIndex(*subset, 24000, 4054270, 46543); } Y_UNIT_TEST(Serial) { TMixerSeq mixer(4, Mass0.Saved.Size()); auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer); - CheckMixedIndex(*subset, 24000, 2106459, 49502); + CheckMixedIndex(*subset, 24000, 2106479, 49555); } Y_UNIT_TEST(Serial_Groups) @@ -309,7 +311,7 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { { TMixerSeq mixer(4, Mass1.Saved.Size()); auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, mixer, 0.3); - CheckMixedIndex(*subset, 24000, 4054290, 34652); + CheckMixedIndex(*subset, 24000, 4054290, 46640); } Y_UNIT_TEST(Single_LowResolution) @@ -341,14 +343,14 @@ Y_UNIT_TEST_SUITE(BuildStatsMixedIndex) { Y_UNIT_TEST(Single_Groups_History_LowResolution) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckMixedIndex(*subset, 24000, 4054050, 48540, 5310, 531050); + CheckMixedIndex(*subset, 24000, 4054050, 64742, 5310, 531050); } Y_UNIT_TEST(Single_Groups_History_Slices_LowResolution) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), true, WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckMixedIndex(*subset, 13570, 2234982 /* ~2277890 */, 48540, 5310, 531050); + CheckMixedIndex(*subset, 13570, 2234982 /* ~2277890 */, 64742, 5310, 531050); } } @@ -372,14 +374,14 @@ Y_UNIT_TEST_SUITE(BuildStatsBTreeIndex) { Y_UNIT_TEST(Single_History) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckBTreeIndex(*subset, 24000, 3547100, 61162); + CheckBTreeIndex(*subset, 24000, 3547100, 81694); } Y_UNIT_TEST(Single_History_Slices) { auto subset = TMake(Mass0, PageConf(Mass0.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckBTreeIndex(*subset, 9582, 1425282, 61162); + CheckBTreeIndex(*subset, 9582, 1425282, 81694); } Y_UNIT_TEST(Single_Groups) @@ -398,14 +400,14 @@ Y_UNIT_TEST_SUITE(BuildStatsBTreeIndex) { Y_UNIT_TEST(Single_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3); - CheckBTreeIndex(*subset, 24000, 4054050, 34837); + CheckBTreeIndex(*subset, 24000, 4054050, 46562); } Y_UNIT_TEST(Single_Groups_History_Slices) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 1, TMixerOne{ }, 0.3, 13); subset->Flatten.begin()->Slices->Describe(Cerr); Cerr << Endl; - CheckBTreeIndex(*subset, 13570, 2273213, 34837); + CheckBTreeIndex(*subset, 13570, 2273213, 46562); } Y_UNIT_TEST(Mixed) @@ -423,7 +425,7 @@ Y_UNIT_TEST_SUITE(BuildStatsBTreeIndex) { Y_UNIT_TEST(Mixed_Groups_History) { auto subset = TMake(Mass1, PageConf(Mass1.Model->Scheme->Families.size(), WriteBTreeIndex)).Mixed(0, 4, TMixerRnd(4), 0.3); - CheckBTreeIndex(*subset, 24000, 4054270, 34579); + CheckBTreeIndex(*subset, 24000, 4054270, 46543); } } @@ -455,14 +457,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { { const ui32 samples = 5; - Cerr << "Parts:" << Endl; + Cerr << subset.Flatten.size() << " parts:" << Endl; for (auto &part : subset.Flatten) { TTestEnv env; auto index = CreateIndexIter(part.Part.Get(), &env, {}); Cerr << " " << index->GetEndRowId() << " rows, " << IndexTools::CountMainPages(*part.Part) << " pages, " << (part->IndexPages.HasBTree() ? part->IndexPages.GetBTree({}).LevelCount : -1) << " levels: "; - for (ui32 sample : xrange(samples + 1)) { + for (ui32 sample : xrange(1u, samples + 1)) { TRowId rowId((index->GetEndRowId() - 1) * sample / samples); Y_ABORT_UNLESS(index->Seek(rowId) == EReady::Data); TSmallVec keyCells; @@ -492,7 +494,33 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { UNIT_ASSERT_LE(std::abs(percent), allowed); } - void CalcDataBefore(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + void CalcDataBeforeIterate(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + NTest::TChecker wrap(subset, { new TTouchEnv }); + auto env = wrap.GetEnv(); + env->Faulty = false; + + bytes = 0; + rows = 0; + wrap.Seek({}, ESeek::Lower); + + while (wrap.GetReady() == EReady::Data) { + ui64 prevBytes = env->TouchedBytes; + + wrap.Next(); + + if (wrap.GetReady() == EReady::Data && key.GetCells()) { + auto cmp = CompareTypedCellVectors(key.GetCells().data(), wrap->GetKey().Cells().data(), subset.Scheme->Keys->Types.data(), Min(key.GetCells().size(), wrap->GetKey().Cells().size())); + if (cmp < 0) { + break; + } + } + + rows++; + bytes = prevBytes; + } + } + + void CalcDataBeforePrecharge(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { TTouchEnv env; env.Faulty = false; @@ -513,6 +541,23 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { rows = env.TouchedRows; } + void CalcDataBefore(const TSubset& subset, TSerializedCellVec key, ui64& bytes, ui64& rows) { + bool groups = false; + rows = 0; + for (const auto& part : subset.Flatten) { + TTestEnv env; + auto index = CreateIndexIter(part.Part.Get(), &env, {}); + rows += index->GetEndRowId(); + groups |= part->GroupsCount > 1 || part->HistoricGroupsCount > 0; + } + + if (groups || rows > 10000) { + CalcDataBeforePrecharge(subset, key, bytes, rows); + } else { + CalcDataBeforeIterate(subset, key, bytes, rows); + } + } + void CheckHistogram(const TSubset& subset, THistogram histogram, bool isBytes, ui64 total, bool verifyPercents) { Cerr << " " << (isBytes ? "DataSizeHistogram:" : "RowCountHistogram:") << Endl; @@ -550,11 +595,11 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { ui64 delta = total - prevValue, actualDelta = total - prevActualValue; Cerr << " " << FormatPercent(delta, total) << " (actual " << FormatPercent(actualDelta, total) << ")" << Endl; if (verifyPercents) VerifyPercent(delta, total, 20); - UNIT_ASSERT_GT(total, prevValue); + UNIT_ASSERT_GE(total, prevValue); } } - void Check(const TSubset& subset, TMode mode, ui32 histogramBucketsCount = 10, bool verifyPercents = true) { + void Check(const TSubset& subset, TMode mode, ui32 histogramBucketsCount = 10, bool verifyPercents = true, bool faulty = true) { if (mode == 0) { Dump(subset); } @@ -571,6 +616,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { ui64 dataSizeResolution = totalBytes / histogramBucketsCount; TTouchEnv env; + env.Faulty = faulty; // env.Faulty = false; // uncomment for debug TStats stats; auto buildStats = [&]() { @@ -581,7 +627,7 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } }; - const ui32 attempts = 35; + const ui32 attempts = 100; for (ui32 attempt : xrange(attempts)) { if (buildStats()) { break; @@ -593,6 +639,11 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { CheckHistogram(subset, stats.RowCountHistogram, false, totalRows, verifyPercents); CheckHistogram(subset, stats.DataSizeHistogram, true, totalBytes, verifyPercents); + + if (mode == BTreeIndex && verifyPercents && histogramBucketsCount != 1000) { + UNIT_ASSERT_VALUES_EQUAL(stats.RowCountHistogram.size(), histogramBucketsCount - 1); + UNIT_ASSERT_VALUES_EQUAL(stats.DataSizeHistogram.size(), histogramBucketsCount - 1); + } } Y_UNIT_TEST(Single) @@ -906,6 +957,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Single_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 1, TMixerOne{ }); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Single_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -938,6 +997,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Three_Mixed_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 3, TMixerRnd(3)); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Three_Mixed_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -970,6 +1037,14 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { } } + Y_UNIT_TEST(Three_Serial_Small_2_Levels_3_Buckets) + { + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + auto subset = TMake(Mass3, PageConf(Mass3.Model->Scheme->Families.size(), mode)).Mixed(0, 3, TMixerSeq(3, Mass3.Saved.Size())); + Check(*subset, mode, 5, false); + } + } + Y_UNIT_TEST(Three_Serial_Small_1_Level) { for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { @@ -1033,6 +1108,46 @@ Y_UNIT_TEST_SUITE(BuildStatsHistogram) { Check(*subset, mode, 10, false); } } + + Y_UNIT_TEST(Many_Mixed) + { + const ui32 partsCount = 1000; + const ui64 rowsCount = 100000; + + TAutoPtr mass = new NTest::TMass(new NTest::TModelStd(false), rowsCount); + + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + NPage::TConf conf; + conf.Groups.resize(mass->Model->Scheme->Families.size()); + conf.Group(0).PageRows = 1; // we don't care about pages actual size + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; + conf.WriteBTreeIndex = (mode == FlatIndex ? false : true); + + TAutoPtr subset = TMake(*mass, conf).Mixed(0, partsCount, TMixerRnd(partsCount)); + + Check(*subset, mode, 10, false, false); + } + } + + Y_UNIT_TEST(Many_Serial) + { + const ui32 partsCount = 1000; + const ui64 rowsCount = 100000; + + TAutoPtr mass = new NTest::TMass(new NTest::TModelStd(false), rowsCount); + + for (auto mode : {BTreeIndex, FlatIndex, MixedIndex}) { + NPage::TConf conf; + conf.Groups.resize(mass->Model->Scheme->Families.size()); + conf.Group(0).PageRows = 1; // we don't care about pages actual size + conf.Group(0).BTreeIndexNodeKeysMin = conf.Group(0).BTreeIndexNodeKeysMax = 2; + conf.WriteBTreeIndex = (mode == FlatIndex ? false : true); + + TAutoPtr subset = TMake(*mass, conf).Mixed(0, partsCount, TMixerSeq(partsCount, mass->Saved.Size())); + + Check(*subset, mode, 10, false, false); + } + } } } diff --git a/ydb/core/tablet_flat/ya.make b/ydb/core/tablet_flat/ya.make index e5f02f16af21..0355f40f09f2 100644 --- a/ydb/core/tablet_flat/ya.make +++ b/ydb/core/tablet_flat/ya.make @@ -56,6 +56,8 @@ SRCS( flat_stat_part.h flat_stat_table.h flat_stat_table.cpp + flat_stat_table_btree_index.cpp + flat_stat_table_btree_index_histogram.cpp flat_stat_part_group_iter_create.cpp flat_store_hotdog.cpp flat_table.cpp diff --git a/ydb/core/testlib/actors/block_events.cpp b/ydb/core/testlib/actors/block_events.cpp new file mode 100644 index 000000000000..801f1c5bf126 --- /dev/null +++ b/ydb/core/testlib/actors/block_events.cpp @@ -0,0 +1 @@ +#include "block_events.h" diff --git a/ydb/core/testlib/actors/block_events.h b/ydb/core/testlib/actors/block_events.h new file mode 100644 index 000000000000..f54d525cf6cb --- /dev/null +++ b/ydb/core/testlib/actors/block_events.h @@ -0,0 +1,90 @@ +#include "test_runtime.h" + +#include +#include + +namespace NActors { + + /** + * Easy blocking for events under the test actor runtime + * + * Matching events are blocked just before they are processed and stashed + * into a deque. + */ + template + class TBlockEvents : public std::deque { + public: + TBlockEvents(TTestActorRuntime& runtime, std::function condition = {}) + : Runtime(runtime) + , Condition(std::move(condition)) + , Holder(Runtime.AddObserver( + [this](typename TEvType::TPtr& ev) { + this->Process(ev); + })) + {} + + /** + * Unblocks up to count events at the front of the deque, allowing them + * to be handled by the destination actor. + */ + TBlockEvents& Unblock(size_t count = Max()) { + while (!this->empty() && count > 0) { + auto& ev = this->front(); + if (!Stopped) { + IEventHandle* ptr = ev.Get(); + UnblockedOnce.insert(ptr); + } + ui32 nodeId = ev->GetRecipientRewrite().NodeId(); + ui32 nodeIdx = nodeId - Runtime.GetFirstNodeId(); + Cerr << "... unblocking " << (ev->HasEvent() ? TypeName(*ev->GetBase()) : TypeName()) + << " from " << Runtime.FindActorName(ev->Sender) + << " to " << Runtime.FindActorName(ev->GetRecipientRewrite()) + << Endl; + Runtime.Send(ev.Release(), nodeIdx, /* viaActorSystem */ true); + this->pop_front(); + --count; + } + return *this; + } + + /** + * Stops blocking any new events. Events currently in the deque are + * not unblocked, but may be unblocked at a later time if needed. + */ + TBlockEvents& Stop() { + UnblockedOnce.clear(); + Holder.Remove(); + Stopped = true; + return *this; + } + + private: + void Process(typename TEvType::TPtr& ev) { + IEventHandle* ptr = ev.Get(); + auto it = UnblockedOnce.find(ptr); + if (it != UnblockedOnce.end()) { + UnblockedOnce.erase(it); + return; + } + + if (Condition && !Condition(ev)) { + return; + } + + Cerr << "... blocking " << (ev->HasEvent() ? TypeName(*ev->GetBase()) : TypeName()) + << " from " << Runtime.FindActorName(ev->Sender) + << " to " << Runtime.FindActorName(ev->GetRecipientRewrite()) + << Endl; + this->emplace_back(std::move(ev)); + } + + private: + TTestActorRuntime& Runtime; + std::function Condition; + TTestActorRuntime::TEventObserverHolder Holder; + THashSet UnblockedOnce; + bool Stopped = false; + }; + + +} // namespace NActors diff --git a/ydb/core/testlib/actors/test_runtime.cpp b/ydb/core/testlib/actors/test_runtime.cpp index 22201081f16f..645a9368b69e 100644 --- a/ydb/core/testlib/actors/test_runtime.cpp +++ b/ydb/core/testlib/actors/test_runtime.cpp @@ -23,10 +23,11 @@ namespace NActors { void TTestActorRuntime::TNodeData::Stop() { - TNodeDataBase::Stop(); if (Mon) { Mon->Stop(); + GetAppData()->Mon = nullptr; } + TNodeDataBase::Stop(); } TTestActorRuntime::TNodeData::~TNodeData() { diff --git a/ydb/core/testlib/actors/test_runtime.h b/ydb/core/testlib/actors/test_runtime.h index 3016bc441417..d57fcada8d28 100644 --- a/ydb/core/testlib/actors/test_runtime.h +++ b/ydb/core/testlib/actors/test_runtime.h @@ -68,22 +68,42 @@ namespace NActors { void SimulateSleep(TDuration duration); template - inline TResult WaitFuture(NThreading::TFuture f) { + inline TResult WaitFuture(NThreading::TFuture f, TDuration simTimeout = TDuration::Max()) { if (!f.HasValue() && !f.HasException()) { TDispatchOptions options; options.CustomFinalCondition = [&]() { return f.HasValue() || f.HasException(); }; - options.FinalEvents.emplace_back([&](IEventHandle&) { - return f.HasValue() || f.HasException(); - }); + // Quirk: non-empty FinalEvents enables full simulation + options.FinalEvents.emplace_back([](IEventHandle&) { return false; }); - this->DispatchEvents(options); + this->DispatchEvents(options, simTimeout); Y_ABORT_UNLESS(f.HasValue() || f.HasException()); } - return f.ExtractValue(); + if constexpr (!std::is_same_v) { + return f.ExtractValue(); + } else { + return f.GetValue(); + } + } + + template + inline void WaitFor(const TString& description, const TCondition& condition, TDuration simTimeout = TDuration::Max()) { + if (!condition()) { + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + // Quirk: non-empty FinalEvents enables full simulation + options.FinalEvents.emplace_back([](IEventHandle&) { return false; }); + + Cerr << "... waiting for " << description << Endl; + this->DispatchEvents(options, simTimeout); + + Y_ABORT_UNLESS(condition(), "Timeout while waiting for %s", description.c_str()); + } } TIntrusivePtr GetMemObserver(ui32 nodeIndex = 0) { diff --git a/ydb/core/testlib/actors/test_runtime_ut.cpp b/ydb/core/testlib/actors/test_runtime_ut.cpp index d649df72fc89..54cde3af42fd 100644 --- a/ydb/core/testlib/actors/test_runtime_ut.cpp +++ b/ydb/core/testlib/actors/test_runtime_ut.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -622,6 +623,209 @@ Y_UNIT_TEST_SUITE(TActorTest) { UNIT_ASSERT_VALUES_EQUAL(event->Get()->Index, 12u); } } -}; + + Y_UNIT_TEST(TestWaitFuture) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + TEvTrigger() = default; + }; + + class TTriggerActor : public TActorBootstrapped { + public: + TTriggerActor(NThreading::TPromise promise) + : Promise(std::move(promise)) + {} + + void Bootstrap() { + Schedule(TDuration::Seconds(1), new TEvTrigger); + Become(&TThis::StateWork); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTrigger, Handle); + } + } + + void Handle(TEvTrigger::TPtr&) { + Promise.SetValue(); + PassAway(); + } + + private: + NThreading::TPromise Promise; + }; + + TTestActorRuntime runtime; + runtime.Initialize(MakeEgg()); + + NThreading::TPromise promise = NThreading::NewPromise(); + NThreading::TFuture future = promise.GetFuture(); + + auto actor = runtime.Register(new TTriggerActor(std::move(promise))); + runtime.EnableScheduleForActor(actor); + + runtime.WaitFuture(std::move(future)); + } + + Y_UNIT_TEST(TestWaitFor) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + TEvTrigger() = default; + }; + + class TTriggerActor : public TActorBootstrapped { + public: + TTriggerActor(int* ptr) + : Ptr(ptr) + {} + + void Bootstrap() { + Schedule(TDuration::Seconds(1), new TEvTrigger); + Become(&TThis::StateWork); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTrigger, Handle); + } + } + + void Handle(TEvTrigger::TPtr&) { + *Ptr = 42; + PassAway(); + } + + private: + int* Ptr; + }; + + TTestActorRuntime runtime; + runtime.Initialize(MakeEgg()); + + int value = 0; + auto actor = runtime.Register(new TTriggerActor(&value)); + runtime.EnableScheduleForActor(actor); + + runtime.WaitFor("value = 42", [&]{ return value == 42; }); + UNIT_ASSERT_VALUES_EQUAL(value, 42); + } + + Y_UNIT_TEST(TestBlockEvents) { + enum EEv { + EvTrigger = EventSpaceBegin(TEvents::ES_PRIVATE) + }; + + struct TEvTrigger : public TEventLocal { + int Value; + + TEvTrigger(int value) + : Value(value) + {} + }; + + class TTargetActor : public TActorBootstrapped { + public: + TTargetActor(std::vector* ptr) + : Ptr(ptr) + {} + + void Bootstrap() { + Become(&TThis::StateWork); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvTrigger, Handle); + } + } + + void Handle(TEvTrigger::TPtr& ev) { + Ptr->push_back(ev->Get()->Value); + } + + private: + std::vector* Ptr; + }; + + class TSourceActor : public TActorBootstrapped { + public: + TSourceActor(const TActorId& target) + : Target(target) + {} + + void Bootstrap() { + Become(&TThis::StateWork); + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + private: + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvents::TEvWakeup, Handle); + } + } + + void Handle(TEvents::TEvWakeup::TPtr&) { + Send(Target, new TEvTrigger(++Counter)); + Schedule(TDuration::Seconds(1), new TEvents::TEvWakeup); + } + + private: + TActorId Target; + int Counter = 0; + }; + + TTestActorRuntime runtime(2); + runtime.Initialize(MakeEgg()); + + std::vector values; + auto target = runtime.Register(new TTargetActor(&values), /* nodeIdx */ 1); + auto source = runtime.Register(new TSourceActor(target), /* nodeIdx */ 1); + runtime.EnableScheduleForActor(source); + + TBlockEvents block(runtime, [&](const TEvTrigger::TPtr& ev){ return ev->GetRecipientRewrite() == target; }); + runtime.WaitFor("blocked 3 events", [&]{ return block.size() >= 3; }); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 0u); + + block.Unblock(2); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 1u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 0u); + + runtime.WaitFor("blocked 1 more event", [&]{ return block.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.at(0), 1); + UNIT_ASSERT_VALUES_EQUAL(values.at(1), 2); + values.clear(); + + block.Stop(); + runtime.WaitFor("processed 2 more events", [&]{ return values.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 2u); + UNIT_ASSERT_VALUES_EQUAL(values.at(0), 5); + UNIT_ASSERT_VALUES_EQUAL(values.at(1), 6); + values.clear(); + + block.Unblock(); + UNIT_ASSERT_VALUES_EQUAL(block.size(), 0u); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 0u); + runtime.WaitFor("processed 3 more events", [&]{ return values.size() >= 3; }); + UNIT_ASSERT_VALUES_EQUAL(values.size(), 3u); + UNIT_ASSERT_VALUES_EQUAL(values.at(0), 3); + UNIT_ASSERT_VALUES_EQUAL(values.at(1), 4); + UNIT_ASSERT_VALUES_EQUAL(values.at(2), 7); + } +} } diff --git a/ydb/core/testlib/actors/ya.make b/ydb/core/testlib/actors/ya.make index 25f814605794..9c7caacf445d 100644 --- a/ydb/core/testlib/actors/ya.make +++ b/ydb/core/testlib/actors/ya.make @@ -1,7 +1,10 @@ LIBRARY() SRCS( + block_events.cpp + block_events.h test_runtime.cpp + test_runtime.h ) PEERDIR( diff --git a/ydb/core/testlib/basics/feature_flags.h b/ydb/core/testlib/basics/feature_flags.h index 1270874e4a02..04395a54f3fd 100644 --- a/ydb/core/testlib/basics/feature_flags.h +++ b/ydb/core/testlib/basics/feature_flags.h @@ -9,13 +9,13 @@ class TTestFeatureFlagsHolder { public: TFeatureFlags FeatureFlags; - #define FEATURE_FLAG_SETTER(name) \ - TDerived& Set##name(std::optional value) { \ - if (value) { \ - FeatureFlags.Set##name(*value); \ - } \ - return *static_cast(this); \ - } +#define FEATURE_FLAG_SETTER(name) \ + TDerived& Set##name(std::optional value) { \ + if (value) { \ + FeatureFlags.Set##name(*value); \ + } \ + return *static_cast(this); \ + } FEATURE_FLAG_SETTER(AllowYdbRequestsWithoutDatabase) FEATURE_FLAG_SETTER(EnableSystemViews) @@ -61,8 +61,12 @@ class TTestFeatureFlagsHolder { FEATURE_FLAG_SETTER(EnableCMSRequestPriorities) FEATURE_FLAG_SETTER(EnableTableDatetime64) FEATURE_FLAG_SETTER(EnableResourcePools) + FEATURE_FLAG_SETTER(EnableChangefeedsOnIndexTables) + FEATURE_FLAG_SETTER(EnablePgSyntax) + FEATURE_FLAG_SETTER(EnableTieringInColumnShard) + FEATURE_FLAG_SETTER(EnableOlapCompression) - #undef FEATURE_FLAG_SETTER +#undef FEATURE_FLAG_SETTER }; -} // NKikimr +} // namespace NKikimr diff --git a/ydb/core/testlib/basics/runtime.cpp b/ydb/core/testlib/basics/runtime.cpp index 520181dc05ee..0c9921bf59b4 100644 --- a/ydb/core/testlib/basics/runtime.cpp +++ b/ydb/core/testlib/basics/runtime.cpp @@ -17,6 +17,7 @@ namespace NActors { void TTestBasicRuntime::Initialize(TEgg egg) { AddICStuff(); + AddAuditLogStuff(); TTestActorRuntime::Initialize(std::move(egg)); } @@ -76,4 +77,21 @@ namespace NActors { } } } + + void TTestBasicRuntime::AddAuditLogStuff() + { + if (AuditLogBackends) { + for (ui32 nodeIndex = 0; nodeIndex < GetNodeCount(); ++nodeIndex) { + AddLocalService( + NKikimr::NAudit::MakeAuditServiceID(), + TActorSetupCmd( + NKikimr::NAudit::CreateAuditWriter(std::move(AuditLogBackends)).Release(), + TMailboxType::HTSwap, + 0 + ), + nodeIndex + ); + } + } + } } diff --git a/ydb/core/testlib/basics/runtime.h b/ydb/core/testlib/basics/runtime.h index 863f12286fa1..33126d920f53 100644 --- a/ydb/core/testlib/basics/runtime.h +++ b/ydb/core/testlib/basics/runtime.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -12,9 +13,13 @@ namespace NActors { using TNodeLocationCallback = std::function; TNodeLocationCallback LocationCallback; + NKikimr::NAudit::TAuditLogBackends AuditLogBackends; + ~TTestBasicRuntime(); void Initialize(TEgg) override; + void AddICStuff(); + void AddAuditLogStuff(); }; } diff --git a/ydb/core/testlib/common_helper.cpp b/ydb/core/testlib/common_helper.cpp index d5267342d540..8e92ccd15c1b 100644 --- a/ydb/core/testlib/common_helper.cpp +++ b/ydb/core/testlib/common_helper.cpp @@ -22,6 +22,9 @@ const std::vector TLoggerInit::KqpServices = { const std::vector TLoggerInit::CSServices = { NKikimrServices::TX_COLUMNSHARD, + NKikimrServices::TX_COLUMNSHARD_BLOBS, + NKikimrServices::TX_COLUMNSHARD_BLOBS_BS, + NKikimrServices::TX_COLUMNSHARD_BLOBS_TIER, NKikimrServices::TX_COLUMNSHARD_SCAN, NKikimrServices::TX_CONVEYOR }; diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp index c6f05ec8c86f..dd26da35fa74 100644 --- a/ydb/core/testlib/cs_helper.cpp +++ b/ydb/core/testlib/cs_helper.cpp @@ -183,7 +183,7 @@ std::shared_ptr THelper::TestArrowBatch(ui64 pathIdBegin, ui TString THelper::GetTestTableSchema() const { TStringBuilder sb; sb << R"(Columns{ Name: "timestamp" Type : "Timestamp" NotNull : true })"; - sb << R"(Columns{ Name: "resource_id" Type : "Utf8" })"; + sb << R"(Columns{ Name: "resource_id" Type : "Utf8" DataAccessorConstructor{ ClassName: "SPARSED" } })"; sb << "Columns{ Name: \"uid\" Type : \"Utf8\" NotNull : true StorageId : \"" + OptionalStorageId + "\" }"; sb << R"(Columns{ Name: "level" Type : "Int32" })"; sb << "Columns{ Name: \"message\" Type : \"Utf8\" StorageId : \"" + OptionalStorageId + "\" }"; @@ -198,7 +198,7 @@ TString THelper::GetTestTableSchema() const { return sb; } -void THelper::CreateOlapTableWithStore(TString tableName /*= "olapTable"*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { +void THelper::CreateSchemaOlapTablesWithStore(const TString tableSchema, TVector tableNames /*= "olapTable"*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); CreateTestOlapStore(sender, Sprintf(R"( Name: "%s" @@ -209,19 +209,25 @@ void THelper::CreateOlapTableWithStore(TString tableName /*= "olapTable"*/, TStr %s } } - )", storeName.c_str(), storeShardsCount, GetTestTableSchema().data())); + )", storeName.c_str(), storeShardsCount, tableSchema.data())); const TString shardingColumns = "[\"" + JoinSeq("\",\"", GetShardingColumns()) + "\"]"; - TBase::CreateTestOlapTable(sender, storeName, Sprintf(R"( - Name: "%s" - ColumnShardCount: %d - Sharding { - HashSharding { - Function: %s - Columns: %s - } - })", tableName.c_str(), tableShardsCount, ShardingMethod.data(), shardingColumns.c_str())); + for (const TString& tableName : tableNames) { + TBase::CreateTestOlapTable(sender, storeName, Sprintf(R"( + Name: "%s" + ColumnShardCount: %d + Sharding { + HashSharding { + Function: %s + Columns: %s + } + })", tableName.c_str(), tableShardsCount, ShardingMethod.data(), shardingColumns.c_str())); + } +} + +void THelper::CreateOlapTablesWithStore(TVector tableNames /*= {"olapTable"}*/, TString storeName /*= "olapStore"*/, ui32 storeShardsCount /*= 4*/, ui32 tableShardsCount /*= 3*/) { + CreateSchemaOlapTablesWithStore(GetTestTableSchema(), tableNames, storeName, storeShardsCount, tableShardsCount); } // Clickbench table diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h index 7a9e3dad1bf3..95c8877b6ba6 100644 --- a/ydb/core/testlib/cs_helper.h +++ b/ydb/core/testlib/cs_helper.h @@ -27,11 +27,16 @@ class THelper: public THelperSchemaless { std::shared_ptr GetArrowSchema() const; YDB_FLAG_ACCESSOR(WithJsonDocument, false); YDB_ACCESSOR(TString, OptionalStorageId, "__MEMORY"); +protected: TString ShardingMethod = "HASH_FUNCTION_CONSISTENCY_64"; +private: bool WithSomeNulls_ = false; protected: - void CreateOlapTableWithStore(TString tableName = "olapTable", TString storeName = "olapStore", + void CreateSchemaOlapTablesWithStore(const TString tableSchema, TVector tableName = {"olapTable"}, TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); + void CreateOlapTablesWithStore(TVector tableName = {"olapTable"}, TString storeName = "olapStore", ui32 storeShardsCount = 4, ui32 tableShardsCount = 3); + public: using TBase::TBase; @@ -43,9 +48,9 @@ class THelper: public THelperSchemaless { static constexpr const char * PROTO_SCHEMA = R"( Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } - Columns { Name: "resource_id" Type: "Utf8" } + Columns { Name: "resource_id" Type: "Utf8" DataAccessorConstructor{ ClassName: "SPARSED" }} Columns { Name: "uid" Type: "Utf8" } - Columns { Name: "level" Type: "Int32" } + Columns { Name: "level" Type: "Int32" DataAccessorConstructor{ ClassName: "SPARSED" }} Columns { Name: "message" Type: "Utf8" } KeyColumnNames: "timestamp" Engine: COLUMN_ENGINE_REPLACING_TIMESERIES diff --git a/ydb/core/testlib/test_client.cpp b/ydb/core/testlib/test_client.cpp index cfb7952b7a73..fbc0ed46e7db 100644 --- a/ydb/core/testlib/test_client.cpp +++ b/ydb/core/testlib/test_client.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -112,6 +113,7 @@ #include #include #include +#include #include #include @@ -250,6 +252,7 @@ namespace Tests { appData.PersQueueMirrorReaderFactory = Settings->PersQueueMirrorReaderFactory.get(); appData.HiveConfig.MergeFrom(Settings->AppConfig->GetHiveConfig()); appData.GraphConfig.MergeFrom(Settings->AppConfig->GetGraphConfig()); + appData.SqsConfig.MergeFrom(Settings->AppConfig->GetSqsConfig()); appData.DynamicNameserviceConfig = new TDynamicNameserviceConfig; auto dnConfig = appData.DynamicNameserviceConfig; @@ -469,7 +472,7 @@ namespace Tests { app.AddDomain(domain.Release()); } - TVector TServer::StartPQTablets(ui32 pqTabletsN) { + TVector TServer::StartPQTablets(ui32 pqTabletsN, bool wait) { auto getChannelBind = [](const TString& storagePool) { TChannelBind bind; bind.SetStoragePoolName(storagePool); @@ -504,7 +507,7 @@ namespace Tests { UNIT_ASSERT_EQUAL_C(createTabletReply->Record.GetOwner(), tabletId, createTabletReply->Record.GetOwner() << " != " << tabletId); ui64 id = createTabletReply->Record.GetTabletID(); - while (true) { + while (wait) { auto tabletCreationResult = Runtime->GrabEdgeEventRethrow(handle); UNIT_ASSERT(tabletCreationResult); @@ -559,7 +562,12 @@ namespace Tests { NKikimrBlobStorage::TDefineHostConfig hostConfig; hostConfig.SetHostConfigId(nodeId); - TString path = TStringBuilder() << Runtime->GetTempDir() << "pdisk_1.dat"; + TString path; + if (Settings->UseSectorMap) { + path ="SectorMap:test-client[:2000]"; + } else { + path = TStringBuilder() << Runtime->GetTempDir() << "pdisk_1.dat"; + } hostConfig.AddDrive()->SetPath(path); Cerr << "test_client.cpp: SetPath # " << path << Endl; bsConfigureRequest->Record.MutableRequest()->AddCommand()->MutableDefineHostConfig()->CopyFrom(hostConfig); @@ -760,6 +768,11 @@ namespace Tests { const auto aid = Runtime->Register(actor, nodeIdx, appData.SystemPoolId, TMailboxType::Revolving, 0); Runtime->RegisterService(NCSIndex::MakeServiceId(Runtime->GetNodeId(nodeIdx)), aid, nodeIdx); } + { + auto* actor = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(NOlap::NGroupedMemoryManager::TConfig(), new ::NMonitoring::TDynamicCounters()); + const auto aid = Runtime->Register(actor, nodeIdx, appData.UserPoolId, TMailboxType::Revolving, 0); + Runtime->RegisterService(NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::MakeServiceId(Runtime->GetNodeId(nodeIdx)), aid, nodeIdx); + } { auto* actor = NConveyor::TScanServiceOperator::CreateService(NConveyor::TConfig(), new ::NMonitoring::TDynamicCounters()); const auto aid = Runtime->Register(actor, nodeIdx, appData.UserPoolId, TMailboxType::Revolving, 0); @@ -831,7 +844,7 @@ namespace Tests { auto kqpProxySharedResources = std::make_shared(); IActor* kqpRmService = NKqp::CreateKqpResourceManagerActor( - Settings->AppConfig->GetTableServiceConfig().GetResourceManager(), nullptr, {}, kqpProxySharedResources); + Settings->AppConfig->GetTableServiceConfig().GetResourceManager(), nullptr, {}, kqpProxySharedResources, Runtime->GetNodeId(nodeIdx)); TActorId kqpRmServiceId = Runtime->Register(kqpRmService, nodeIdx); Runtime->RegisterService(NKqp::MakeKqpRmServiceID(Runtime->GetNodeId(nodeIdx)), kqpRmServiceId, nodeIdx); @@ -1133,7 +1146,7 @@ namespace Tests { "TestTenant", nullptr, // MakeIntrusive(), YqSharedResources, - NKikimr::NFolderService::CreateMockFolderServiceAdapterActor, + [](auto& config) { return NKikimr::NFolderService::CreateMockFolderServiceAdapterActor(config, "");}, /*IcPort = */0, {} ); @@ -2692,6 +2705,50 @@ namespace Tests { return Server->DynamicNodes(); } + void TTenants::CreateTenant(Ydb::Cms::CreateDatabaseRequest request, ui32 nodes, TDuration timeout) { + const TString path = request.path(); + const bool serverless = request.has_serverless_resources(); + + // Create new tenant + auto& runtime = *Server->GetRuntime(); + const auto result = NKikimr::NRpcService::DoLocalRpc>( + std::move(request), "", "", runtime.GetActorSystem(0), true + ).ExtractValueSync(); + + if (result.operation().status() != Ydb::StatusIds::SUCCESS) { + NYql::TIssues issues; + NYql::IssuesFromMessage(result.operation().issues(), issues); + ythrow yexception() << "Failed to create tenant " << path << ", " << result.operation().status() << ", reason:\n" << issues.ToString(); + } + + // Run new tenant + if (!serverless) { + Run(path, nodes); + } + + // Wait tenant is up + Ydb::Cms::GetDatabaseStatusResult getTenantResult; + const TActorId edgeActor = runtime.AllocateEdgeActor(); + const TInstant start = TInstant::Now(); + while (TInstant::Now() - start <= timeout) { + auto getTenantRequest = std::make_unique(); + getTenantRequest->Record.MutableRequest()->set_path(path); + runtime.SendToPipe(MakeConsoleID(), edgeActor, getTenantRequest.release(), 0, GetPipeConfigWithRetries()); + + auto response = runtime.GrabEdgeEvent(edgeActor, timeout); + if (!response) { + ythrow yexception() << "Waiting CMS get tenant response timeout. Last tenant description:\n" << getTenantResult.DebugString(); + } + response->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantResult); + if (getTenantResult.state() == Ydb::Cms::GetDatabaseStatusResult::RUNNING) { + return; + } + + Sleep(TDuration::MilliSeconds(100)); + } + ythrow yexception() << "Waiting tenant status RUNNING timeout. Spent time " << TInstant::Now() - start << " exceeds limit " << timeout << ". Last tenant description:\n" << getTenantResult.DebugString(); + } + TVector &TTenants::Nodes(const TString &name) { return Tenants[name]; } diff --git a/ydb/core/testlib/test_client.h b/ydb/core/testlib/test_client.h index 795491279c1d..68b878f4de04 100644 --- a/ydb/core/testlib/test_client.h +++ b/ydb/core/testlib/test_client.h @@ -156,6 +156,8 @@ namespace Tests { NYql::IYtGateway::TPtr YtGateway; bool InitializeFederatedQuerySetupFactory = false; TString ServerCertFilePath; + bool Verbose = true; + bool UseSectorMap = false; std::function CreateTicketParser = NKikimr::CreateTicketParser; std::shared_ptr GrpcServiceFactory; @@ -205,6 +207,8 @@ namespace Tests { TServerSettings& SetComputationFactory(NMiniKQL::TComputationNodeFactory computationFactory) { ComputationFactory = std::move(computationFactory); return *this; } TServerSettings& SetYtGateway(NYql::IYtGateway::TPtr ytGateway) { YtGateway = std::move(ytGateway); return *this; } TServerSettings& SetInitializeFederatedQuerySetupFactory(bool value) { InitializeFederatedQuerySetupFactory = value; return *this; } + TServerSettings& SetVerbose(bool value) { Verbose = value; return *this; } + TServerSettings& SetUseSectorMap(bool value) { UseSectorMap = value; return *this; } TServerSettings& SetPersQueueGetReadSessionsInfoWorkerFactory( std::shared_ptr factory ) { @@ -301,7 +305,7 @@ namespace Tests { } } void StartDummyTablets(); - TVector StartPQTablets(ui32 pqTabletsN); + TVector StartPQTablets(ui32 pqTabletsN, bool wait = true); TTestActorRuntime* GetRuntime() const; const TServerSettings& GetSettings() const; const NScheme::TTypeRegistry* GetTypeRegistry(); @@ -630,6 +634,8 @@ namespace Tests { ui32 Availabe() const; ui32 Capacity() const; + void CreateTenant(Ydb::Cms::CreateDatabaseRequest request, ui32 nodes = 1, TDuration timeout = TDuration::Seconds(30)); + private: TVector& Nodes(const TString &name); void StopNode(const TString /*name*/, ui32 nodeIdx); diff --git a/ydb/core/testlib/ya.make b/ydb/core/testlib/ya.make index 3bc9f140b1f0..5a63f36a639f 100644 --- a/ydb/core/testlib/ya.make +++ b/ydb/core/testlib/ya.make @@ -100,7 +100,9 @@ PEERDIR( ydb/services/datastreams ydb/services/discovery ydb/services/ext_index/service + ydb/services/ymq ydb/core/tx/conveyor/service + ydb/core/tx/limiter/grouped_memory/usage ydb/services/fq ydb/services/kesus ydb/services/persqueue_cluster_discovery diff --git a/ydb/core/tx/columnshard/background_controller.cpp b/ydb/core/tx/columnshard/background_controller.cpp index fa6a2b32eb85..7449e7d31ff4 100644 --- a/ydb/core/tx/columnshard/background_controller.cpp +++ b/ydb/core/tx/columnshard/background_controller.cpp @@ -20,7 +20,7 @@ void TBackgroundController::CheckDeadlines() { void TBackgroundController::CheckDeadlinesIndexation() { for (auto&& i : ActiveIndexationTasks) { if (TMonotonic::Now() - i.second > NOlap::TCompactionLimits::CompactionTimeout) { - AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "deadline_compaction")("task_id", i.first); + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "deadline_indexation")("task_id", i.first); Y_DEBUG_ABORT_UNLESS(false); } } diff --git a/ydb/core/tx/columnshard/background_controller.h b/ydb/core/tx/columnshard/background_controller.h index bb38f2744061..b57a29d5b072 100644 --- a/ydb/core/tx/columnshard/background_controller.h +++ b/ydb/core/tx/columnshard/background_controller.h @@ -1,6 +1,7 @@ #pragma once #include "engines/changes/abstract/compaction_info.h" #include "engines/portions/meta.h" +#include namespace NKikimr::NOlap { class TColumnEngineChanges; @@ -15,11 +16,16 @@ class TBackgroundController { using TCurrentCompaction = THashMap; TCurrentCompaction ActiveCompactionInfo; + std::shared_ptr Counters; bool ActiveCleanupPortions = false; bool ActiveCleanupTables = false; bool ActiveCleanupInsertTable = false; YDB_READONLY(TMonotonic, LastIndexationInstant, TMonotonic::Zero()); public: + TBackgroundController(std::shared_ptr counters) + : Counters(std::move(counters)) { + } + THashSet GetConflictTTLPortions() const; THashSet GetConflictCompactionPortions() const; @@ -29,6 +35,7 @@ class TBackgroundController { bool StartCompaction(const NOlap::TPlanCompactionInfo& info); void FinishCompaction(const NOlap::TPlanCompactionInfo& info) { Y_ABORT_UNLESS(ActiveCompactionInfo.erase(info.GetPathId())); + Counters->OnCompactionFinish(info.GetPathId()); } const TCurrentCompaction& GetActiveCompaction() const { return ActiveCompactionInfo; diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h index d14d7b2dee19..ee1cf91bb419 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include @@ -44,6 +45,60 @@ class TTabletByBlob { }; +class TBlobsByGenStep { +private: + struct TGenStepFromLogoBlobIdComparator { + bool operator()(const TLogoBlobID& l, const TLogoBlobID& r) const { + TGenStep gsl(l); + TGenStep gsr(r); + if (gsl == gsr) { + return l < r; + } else { + return gsl < gsr; + } + } + }; + std::set Blobs; +public: + [[nodiscard]] bool Add(const TLogoBlobID& blobId) { + return Blobs.emplace(blobId).second; + } + [[nodiscard]] bool Remove(const TLogoBlobID& blobId) { + return Blobs.erase(blobId); + } + bool IsEmpty() const { + return Blobs.empty(); + } + size_t GetSize() const { + return Blobs.size(); + } + + TGenStep GetMinGenStepVerified() const { + AFL_VERIFY(Blobs.size()); + return TGenStep(*Blobs.begin()); + } + + template + requires std::invocable + bool ExtractTo(const TGenStep& lessOrEqualThan, const ui32 countLimit, const TActor& actor) { + ui32 idx = 0; + for (auto it = Blobs.begin(); it != Blobs.end(); ++it) { + TGenStep gs(*it); + if (lessOrEqualThan < gs) { + Blobs.erase(Blobs.begin(), it); + return true; + } + if (++idx > countLimit) { + Blobs.erase(Blobs.begin(), it); + return false; + } + actor(gs, *it); + } + Blobs.clear(); + return true; + } +}; + class TTabletsByBlob { private: THashMap> Data; @@ -518,11 +573,11 @@ class TBlobsCategories { void AddSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { AFL_VERIFY(Sharing.Add(tabletId, id)); } - void RemoveSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { - Y_UNUSED(Sharing.Remove(tabletId, id)); + [[nodiscard]] bool RemoveSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { + return Sharing.Remove(tabletId, id); } - void RemoveBorrowed(const TTabletId tabletId, const TUnifiedBlobId& id) { - Y_UNUSED(Borrowed.Remove(tabletId, id)); + [[nodiscard]] bool RemoveBorrowed(const TTabletId tabletId, const TUnifiedBlobId& id) { + return Borrowed.Remove(tabletId, id); } TBlobsCategories(const TTabletId selfTabletId) : SelfTabletId(selfTabletId) diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h index 19e2da1b39b6..3c7279e7b3f1 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h @@ -33,7 +33,7 @@ class IBlobsGCAction: public ICommonBlobsAction { virtual bool DoIsEmpty() const = 0; public: void AddSharedBlobToNextIteration(const TUnifiedBlobId& blobId, const TTabletId ownerTabletId) { - BlobsToRemove.RemoveSharing(ownerTabletId, blobId); + AFL_VERIFY(BlobsToRemove.RemoveBorrowed(ownerTabletId, blobId)); } void OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs); diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp index 8b80dd9be277..0902a464871a 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp @@ -51,6 +51,7 @@ void IStoragesManager::OnTieringModified(const std::shared_ptrCounters.OnPutResult(blobId.BlobSize()); Y_ABORT_UNLESS(status == NKikimrProto::OK, "The caller must handle unsuccessful status"); Y_ABORT_UNLESS(BatchInfo); Y_ABORT_UNLESS(blobId.Cookie() < BatchInfo->InFlight.size()); @@ -129,6 +128,8 @@ TBlobManager::TBlobManager(TIntrusivePtr tabletInfo, ui32 ge , CurrentGen(gen) , CurrentStep(0) { + BlobsManagerCounters.CurrentGen->Set(CurrentGen); + BlobsManagerCounters.CurrentStep->Set(CurrentStep); } void TBlobManager::RegisterControls(NKikimr::TControlBoard& /*icb*/) { @@ -151,43 +152,19 @@ bool TBlobManager::LoadState(IBlobManagerDb& db, const TTabletId selfTabletId) { return false; } - for (auto it = BlobsToDelete.GetIterator(); it.IsValid(); ++it) { - BlobsManagerCounters.OnDeleteBlobMarker(it.GetBlobId().BlobSize()); - } - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); // Build the list of steps that cannot be garbage collected before Keep flag is set on the blobs - THashSet genStepsWithBlobsToKeep; - std::map> blobsToKeepLocal; + TBlobsByGenStep blobsToKeepLocal; for (const auto& unifiedBlobId : blobsToKeep) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("add_blob_to_keep", unifiedBlobId.ToStringNew()); TLogoBlobID blobId = unifiedBlobId.GetLogoBlobId(); - TGenStep genStep(blobId); - Y_ABORT_UNLESS(LastCollectedGenStep < genStep); - - AFL_VERIFY(blobsToKeepLocal[genStep].emplace(blobId).second)("blob_to_keep_double", unifiedBlobId.ToStringNew()); - BlobsManagerCounters.OnKeepMarker(blobId.BlobSize()); - const ui64 groupId = dsGroupSelector.GetGroup(blobId); - // Keep + DontKeep (probably in different gen:steps) - // GC could go through it to a greater LastCollectedGenStep - if (BlobsToDelete.Contains(SelfTabletId, TUnifiedBlobId(groupId, blobId))) { - continue; - } + Y_ABORT_UNLESS(LastCollectedGenStep < TGenStep(blobId)); - genStepsWithBlobsToKeep.insert(genStep); + AFL_VERIFY(blobsToKeepLocal.Add(blobId))("blob_to_keep_double", unifiedBlobId.ToStringNew()); } std::swap(blobsToKeepLocal, BlobsToKeep); - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - - AllocatedGenSteps.clear(); - for (const auto& gs : genStepsWithBlobsToKeep) { - AllocatedGenSteps.push_back(new TAllocatedGenStep(gs)); - } - AllocatedGenSteps.push_back(new TAllocatedGenStep({ CurrentGen, 0 })); - - Sort(AllocatedGenSteps.begin(), AllocatedGenSteps.end(), [](const TAllocatedGenStepConstPtr& a, const TAllocatedGenStepConstPtr& b) { - return a->GenStep < b->GenStep; - }); + BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep); return true; } @@ -201,7 +178,7 @@ void TBlobManager::PopGCBarriers(const TGenStep gs) { std::deque TBlobManager::FindNewGCBarriers() { TGenStep newCollectGenStep = LastCollectedGenStep; std::deque result; - if (AllocatedGenSteps.empty()) { + if (AllocatedGenSteps.empty() && LastCollectedGenStep < TGenStep(CurrentGen, CurrentStep)) { result.emplace_back(TGenStep(CurrentGen, CurrentStep)); } for (auto& allocated : AllocatedGenSteps) { @@ -212,9 +189,6 @@ std::deque TBlobManager::FindNewGCBarriers() { result.emplace_back(allocated->GenStep); newCollectGenStep = allocated->GenStep; } - if (result.empty() || LastCollectedGenStep < result.front()) { - result.emplace_front(LastCollectedGenStep); - } return result; } @@ -226,6 +200,22 @@ class TBlobManager::TGCContext { YDB_ACCESSOR_DEF(std::deque, KeepsToErase); YDB_READONLY_DEF(std::shared_ptr, SharedBlobsManager); public: + ui64 GetKeepBytes() const { + ui64 size = 0; + for (auto&& i : KeepsToErase) { + size += i.BlobSize(); + } + return size; + } + + ui64 GetDeleteBytes() const { + ui64 size = 0; + for (TTabletsByBlob::TIterator it(ExtractedToRemoveFromDB); it.IsValid(); ++it) { + size += it.GetBlobId().BlobSize(); + } + return size; + } + TGCContext(const std::shared_ptr& sharedBlobsManager) : SharedBlobsManager(sharedBlobsManager) { @@ -237,7 +227,6 @@ class TBlobManager::TGCContext { // TODO: we need only actual channel history here for (ui32 channelIdx = 2; channelIdx < tabletInfo->Channels.size(); ++channelIdx) { const auto& channelHistory = tabletInfo->ChannelInfo(channelIdx)->History; - for (auto it = channelHistory.begin(); it != channelHistory.end(); ++it) { PerGroupGCListsInFlight[TBlobAddress(it->GroupID, channelIdx)]; } @@ -281,78 +270,62 @@ void TBlobManager::DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext) { } } -bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.size()); - for (; BlobsToKeep.size() && (!controlCapacity || !gcContext.IsFull()); BlobsToKeep.erase(BlobsToKeep.begin())) { - auto gsBlobs = BlobsToKeep.begin(); - TGenStep genStep = gsBlobs->first; +bool TBlobManager::DrainKeepTo(const TGenStep& dest, TGCContext& gcContext) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("event", "PreparePerGroupGCRequests")("gen_step", dest)("gs_blobs_to_keep_count", BlobsToKeep.GetSize()); + + const auto pred = [&](const TGenStep& genStep, const TLogoBlobID& logoBlobId) { AFL_VERIFY(LastCollectedGenStep < genStep)("last", LastCollectedGenStep.ToString())("gen", genStep.ToString()); - if (dest < genStep) { - return true; - } - for (auto&& keepBlobIt : gsBlobs->second) { - const ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt.Channel(), keepBlobIt.Generation()); - TBlobAddress bAddress(blobGroup, keepBlobIt.Channel()); - const TUnifiedBlobId keepUnified(blobGroup, keepBlobIt); - gcContext.MutableKeepsToErase().emplace_back(keepUnified); - if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) { - if (keepBlobIt.Generation() == CurrentGen) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew()); - continue; - } - if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew()); - continue; - } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew()); - gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(keepBlobIt); - } else { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew()); - gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(keepBlobIt); + const ui32 blobGroup = TabletInfo->GroupFor(logoBlobId.Channel(), logoBlobId.Generation()); + TBlobAddress bAddress(blobGroup, logoBlobId.Channel()); + const TUnifiedBlobId keepUnified(blobGroup, logoBlobId); + gcContext.MutableKeepsToErase().emplace_back(keepUnified); + if (BlobsToDelete.ExtractBlobTo(keepUnified, gcContext.MutableExtractedToRemoveFromDB())) { + if (logoBlobId.Generation() == CurrentGen) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep", keepUnified.ToStringNew()); + return; } + if (gcContext.GetSharedBlobsManager()->BuildStoreCategories({ keepUnified }).GetDirect().IsEmpty()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_not_direct", keepUnified.ToStringNew()); + return; + } + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_not_keep_old", keepUnified.ToStringNew()); + gcContext.MutablePerGroupGCListsInFlight()[bAddress].DontKeepList.insert(logoBlobId); + } else { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", keepUnified.ToStringNew()); + gcContext.MutablePerGroupGCListsInFlight()[bAddress].KeepList.insert(logoBlobId); } - } - return BlobsToKeep.empty(); + }; + + return BlobsToKeep.ExtractTo(dest, gcContext.GetFreeSpace(), pred); } std::shared_ptr TBlobManager::BuildGCTask(const TString& storageId, const std::shared_ptr& manager, const std::shared_ptr& sharedBlobsInfo, const std::shared_ptr& counters) noexcept { AFL_VERIFY(!CollectGenStepInFlight); - if (BlobsToKeep.empty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { - ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep); + if (BlobsToKeep.IsEmpty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{ CurrentGen, CurrentStep }) { + BlobsManagerCounters.GCCounters.SkipCollectionEmpty->Add(1); + ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep)("reason", "empty"); return nullptr; } - if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod(TDuration::Seconds(GC_INTERVAL_SECONDS))) { + if (AppData()->TimeProvider->Now() - PreviousGCTime < NYDBTest::TControllers::GetColumnShardController()->GetOverridenGCPeriod()) { + ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep)("reason", "too_often"); + BlobsManagerCounters.GCCounters.SkipCollectionThrottling->Add(1); return nullptr; } PreviousGCTime = AppData()->TimeProvider->Now(); TGCContext gcContext(sharedBlobsInfo); - if (FirstGC) { - gcContext.InitializeFirst(TabletInfo); - FirstGC = false; - } - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("action_id", TGUID::CreateTimebased().AsGuidString()); const std::deque newCollectGenSteps = FindNewGCBarriers(); - AFL_VERIFY(newCollectGenSteps.size()); - AFL_VERIFY(newCollectGenSteps.front() == LastCollectedGenStep); if (GCBarrierPreparation != LastCollectedGenStep) { - if (!GCBarrierPreparation.Generation()) { - for (auto&& newCollectGenStep : newCollectGenSteps) { - if (!DrainKeepTo(newCollectGenStep, gcContext)) { - break; - } - CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); - } - AFL_VERIFY(LastCollectedGenStep <= CollectGenStepInFlight)("last", LastCollectedGenStep)("collect", CollectGenStepInFlight); - } else { - AFL_VERIFY(GCBarrierPreparation.Generation() != CurrentGen); + if (GCBarrierPreparation.Generation()) { + AFL_VERIFY(GCBarrierPreparation.Generation() < CurrentGen); AFL_VERIFY(LastCollectedGenStep <= GCBarrierPreparation); - CollectGenStepInFlight = GCBarrierPreparation; - AFL_VERIFY(DrainKeepTo(*CollectGenStepInFlight, gcContext, false)); + if (DrainKeepTo(GCBarrierPreparation, gcContext)) { + CollectGenStepInFlight = GCBarrierPreparation; + } } } else { DrainDeleteTo(LastCollectedGenStep, gcContext); @@ -361,36 +334,45 @@ std::shared_ptr TBlobManager::BuildGCTas if (!DrainKeepTo(newCollectGenStep, gcContext)) { break; } - CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); + if (newCollectGenStep.Generation() == CurrentGen) { + CollectGenStepInFlight = std::max(CollectGenStepInFlight.value_or(newCollectGenStep), newCollectGenStep); + } } - if (!CollectGenStepInFlight) { - CollectGenStepInFlight = LastCollectedGenStep; + if (CollectGenStepInFlight) { + PopGCBarriers(*CollectGenStepInFlight); + if (FirstGC) { + gcContext.InitializeFirst(TabletInfo); + FirstGC = false; + } + if (!BlobsToKeep.IsEmpty()) { + AFL_VERIFY(*CollectGenStepInFlight < BlobsToKeep.GetMinGenStepVerified())("gs", *CollectGenStepInFlight)("first", BlobsToKeep.GetMinGenStepVerified()); + } + AFL_VERIFY(LastCollectedGenStep < *CollectGenStepInFlight); } - PopGCBarriers(*CollectGenStepInFlight); - AFL_VERIFY(LastCollectedGenStep <= *CollectGenStepInFlight); - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", *CollectGenStepInFlight)("current_gen", CurrentGen); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("notice", "collect_gen_step")("value", CollectGenStepInFlight)("current_gen", CurrentGen); - const bool isFull = gcContext.IsFull(); + if (gcContext.IsFull()) { + PreviousGCTime = TInstant::Zero(); + } + BlobsManagerCounters.GCCounters.OnGCTask(gcContext.GetKeepsToErase().size(), gcContext.GetKeepBytes(), + gcContext.GetExtractedToRemoveFromDB().GetSize(), gcContext.GetDeleteBytes(), gcContext.IsFull(), !!CollectGenStepInFlight); auto removeCategories = sharedBlobsInfo->BuildRemoveCategories(std::move(gcContext.MutableExtractedToRemoveFromDB())); - - auto result = std::make_shared(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()), *CollectGenStepInFlight, - std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); + auto result = std::make_shared(storageId, std::move(gcContext.MutablePerGroupGCListsInFlight()), + CollectGenStepInFlight, std::move(gcContext.MutableKeepsToErase()), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); if (result->IsEmpty()) { + BlobsManagerCounters.GCCounters.OnEmptyGCTask(); CollectGenStepInFlight = {}; return nullptr; } - if (isFull) { - PreviousGCTime = TInstant::Zero(); - } - return result; } TBlobBatch TBlobManager::StartBlobBatch() { - ++CurrentStep; + AFL_VERIFY(++CurrentStep < Max() - 10); + BlobsManagerCounters.CurrentStep->Set(CurrentStep); AFL_VERIFY(TabletInfo->Channels.size() > 2); const auto& channel = TabletInfo->Channels[(CurrentStep % (TabletInfo->Channels.size() - 2)) + 2]; ++CountersUpdate.BatchesStarted; @@ -418,11 +400,9 @@ void TBlobManager::DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) { AFL_VERIFY(genStep > edgeGenStep)("gen_step", genStep)("edge_gen_step", edgeGenStep)("blob_id", blobId.ToStringNew()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("to_keep", logoBlobId.ToString()); - BlobsManagerCounters.OnKeepMarker(logoBlobId.BlobSize()); - AFL_VERIFY(BlobsToKeep[genStep].emplace(logoBlobId).second); + AFL_VERIFY(BlobsToKeep.Add(logoBlobId)); + BlobsManagerCounters.OnBlobsToKeep(BlobsToKeep); } - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - blobBatch.BatchInfo->GenStepRef.Reset(); } @@ -458,33 +438,42 @@ void TBlobManager::DeleteBlobOnComplete(const TTabletId tabletId, const TUnified if (!IsBlobInUsage(blobId)) { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId); AFL_VERIFY(BlobsToDelete.Add(tabletId, blobId)); - BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize()); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); } else { - BlobsManagerCounters.OnDeleteBlobDelayedMarker(blobId.BlobSize()); LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId); - BlobsToDeleteDelayed.Add(tabletId, blobId); + AFL_VERIFY(BlobsToDeleteDelayed.Add(tabletId, blobId)); + BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed); } } -void TBlobManager::OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { - db.SaveLastGcBarrier(genStep); +void TBlobManager::OnGCFinishedOnExecute(const std::optional& genStep, IBlobManagerDb& db) { + if (genStep) { + db.SaveLastGcBarrier(*genStep); + } } -void TBlobManager::OnGCFinishedOnComplete(const TGenStep& genStep) { - LastCollectedGenStep = genStep; - AFL_VERIFY(GCBarrierPreparation == LastCollectedGenStep)("prepare", GCBarrierPreparation)("last", LastCollectedGenStep); - CollectGenStepInFlight.reset(); +void TBlobManager::OnGCFinishedOnComplete(const std::optional& genStep) { + if (genStep) { + LastCollectedGenStep = *genStep; + AFL_VERIFY(GCBarrierPreparation == LastCollectedGenStep)("prepare", GCBarrierPreparation)("last", LastCollectedGenStep); + CollectGenStepInFlight.reset(); + } else { + AFL_VERIFY(!CollectGenStepInFlight); + } } -void TBlobManager::OnGCStartOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { - AFL_VERIFY(LastCollectedGenStep <= genStep)("last", LastCollectedGenStep)("prepared", genStep); - db.SaveGCBarrierPreparation(genStep); +void TBlobManager::OnGCStartOnExecute(const std::optional& genStep, IBlobManagerDb& db) { + if (genStep) { + AFL_VERIFY(LastCollectedGenStep < *genStep)("last", LastCollectedGenStep)("prepared", genStep); + db.SaveGCBarrierPreparation(*genStep); + } } -void TBlobManager::OnGCStartOnComplete(const TGenStep& genStep) { - AFL_VERIFY(GCBarrierPreparation <= genStep)("last", GCBarrierPreparation)("prepared", genStep); - GCBarrierPreparation = genStep; +void TBlobManager::OnGCStartOnComplete(const std::optional& genStep) { + if (genStep) { + AFL_VERIFY(GCBarrierPreparation <= *genStep)("last", GCBarrierPreparation)("prepared", genStep); + GCBarrierPreparation = *genStep; + } } void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { @@ -492,8 +481,8 @@ void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { // Check if the blob is marked for delayed deletion if (BlobsToDeleteDelayed.ExtractBlobTo(blobId, BlobsToDelete)) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD_BLOBS_BS)("blob_id", blobId)("event", "blob_delayed_deleted"); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); - BlobsManagerCounters.OnDeleteBlobMarker(blobId.GetLogoBlobId().BlobSize()); + BlobsManagerCounters.OnBlobsToDelete(BlobsToDelete); + BlobsManagerCounters.OnBlobsToDeleteDelayed(BlobsToDeleteDelayed); } } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h index c7a507553c24..52e0f573eb60 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h @@ -133,9 +133,6 @@ struct TBlobManagerCounters { // The implementation of BlobManager that hides all GC-related details class TBlobManager : public IBlobManager, public TCommonBlobsTracker { -private: - static constexpr ui64 GC_INTERVAL_SECONDS = 30; - private: using TBlobAddress = NBlobOperations::NBlobStorage::TBlobAddress; class TGCContext; @@ -145,7 +142,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { ui32 CurrentStep; std::optional CollectGenStepInFlight; // Lists of blobs that need Keep flag to be set - std::map> BlobsToKeep; + TBlobsByGenStep BlobsToKeep; // Lists of blobs that need DoNotKeep flag to be set TTabletsByBlob BlobsToDelete; @@ -173,7 +170,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { virtual void DoSaveBlobBatchOnExecute(const TBlobBatch& blobBatch, IBlobManagerDb& db) override; virtual void DoSaveBlobBatchOnComplete(TBlobBatch&& blobBatch) override; void DrainDeleteTo(const TGenStep& dest, TGCContext& gcContext); - [[nodiscard]] bool DrainKeepTo(const TGenStep& dest, TGCContext& gcContext, const bool controlCapacity = true); + [[nodiscard]] bool DrainKeepTo(const TGenStep& dest, TGCContext& gcContext); public: TBlobManager(TIntrusivePtr tabletInfo, const ui32 gen, const TTabletId selfTabletId); @@ -215,11 +212,11 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { const std::shared_ptr& manager, const std::shared_ptr& sharedBlobsInfo, const std::shared_ptr& counters) noexcept; - void OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db); - void OnGCFinishedOnComplete(const TGenStep& genStep); + void OnGCFinishedOnExecute(const std::optional& genStep, IBlobManagerDb& db); + void OnGCFinishedOnComplete(const std::optional& genStep); - void OnGCStartOnExecute(const TGenStep& genStep, IBlobManagerDb& db); - void OnGCStartOnComplete(const TGenStep& genStep); + void OnGCStartOnExecute(const std::optional& genStep, IBlobManagerDb& db); + void OnGCStartOnComplete(const std::optional& genStep); TBlobManagerCounters GetCountersUpdate() { TBlobManagerCounters res = CountersUpdate; @@ -239,7 +236,7 @@ class TBlobManager : public IBlobManager, public TCommonBlobsTracker { bool ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped = false); TGenStep EdgeGenStep() const { - return CollectGenStepInFlight ? *CollectGenStepInFlight : LastCollectedGenStep; + return CollectGenStepInFlight ? *CollectGenStepInFlight : std::max(GCBarrierPreparation, LastCollectedGenStep); } }; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp index ca22c12bd968..a72c6fb413de 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp @@ -31,7 +31,7 @@ bool TGCTask::DoOnCompleteTxBeforeCleaning(NColumnShard::TColumnShard& /*self*/, return true; } -TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, +TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const std::optional& collectGenStepInFlight, std::deque&& keepsToErase, const std::shared_ptr& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters, const ui64 tabletId, const ui64 currentGen) : TBase(storageId, std::move(blobsToRemove), counters) @@ -65,8 +65,8 @@ std::unique_ptr TGCTask::BuildRequest(const T ("count", it->second.RequestsCount); auto result = std::make_unique( TabletId, CurrentGen, PerGenerationCounter.Val(), - address.GetChannelId(), true, - CollectGenStepInFlight.Generation(), CollectGenStepInFlight.Step(), + address.GetChannelId(), !!CollectGenStepInFlight, + CollectGenStepInFlight ? CollectGenStepInFlight->Generation() : 0, CollectGenStepInFlight ? CollectGenStepInFlight->Step() : 0, new TVector(it->second.KeepList.begin(), it->second.KeepList.end()), new TVector(it->second.DontKeepList.begin(), it->second.DontKeepList.end()), TInstant::Max(), true); diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.h b/ydb/core/tx/columnshard/blobs_action/bs/gc.h index a8f334780417..5471fc04c0dd 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.h @@ -21,7 +21,7 @@ class TGCTask: public IBlobsGCAction { using TGCListsByGroup = THashMap; private: TGCListsByGroup ListsByGroupId; - const TGenStep CollectGenStepInFlight; + const std::optional CollectGenStepInFlight; const ui64 TabletId; const ui64 CurrentGen; std::deque KeepsToErase; @@ -35,11 +35,11 @@ class TGCTask: public IBlobsGCAction { virtual bool DoOnCompleteTxBeforeCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) override; virtual bool DoIsEmpty() const override { - return false; + return !CollectGenStepInFlight && KeepsToErase.empty(); } public: - TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, + TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const std::optional& collectGenStepInFlight, std::deque&& keepsToErase, const std::shared_ptr& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters, const ui64 tabletId, const ui64 currentGen); const TGCListsByGroup& GetListsByGroupId() const { diff --git a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp index 021abc972a0d..06ebc64eccf3 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp @@ -15,13 +15,10 @@ void TWriteAction::DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& self, co ui64 blobsWritten = BlobBatch.GetBlobCount(); ui64 bytesWritten = BlobBatch.GetTotalSize(); if (blobsWroteSuccessfully) { - self.IncCounter(NColumnShard::COUNTER_UPSERT_BLOBS_WRITTEN, blobsWritten); - self.IncCounter(NColumnShard::COUNTER_UPSERT_BYTES_WRITTEN, bytesWritten); - // self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_UPSERTED, insertedBytes); - self.IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); + self.Counters.GetTabletCounters()->OnWriteSuccess(blobsWritten, bytesWritten); Manager->SaveBlobBatchOnComplete(std::move(BlobBatch)); } else { - self.IncCounter(NColumnShard::COUNTER_WRITE_FAIL); + self.Counters.GetTabletCounters()->OnWriteFailure(); } } diff --git a/ydb/core/tx/columnshard/blobs_action/common/const.h b/ydb/core/tx/columnshard/blobs_action/common/const.h index 8901620b2dd0..d599b97050ce 100644 --- a/ydb/core/tx/columnshard/blobs_action/common/const.h +++ b/ydb/core/tx/columnshard/blobs_action/common/const.h @@ -7,6 +7,7 @@ class TGlobal { public: static const inline TString DefaultStorageId = "__DEFAULT"; static const inline TString MemoryStorageId = "__MEMORY"; + static const inline TString LocalMetadataStorageId = "__LOCAL_METADATA"; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/local/storage.cpp b/ydb/core/tx/columnshard/blobs_action/local/storage.cpp new file mode 100644 index 000000000000..141ffe3e7e0c --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/storage.cpp @@ -0,0 +1,62 @@ +#include "storage.h" + +namespace NKikimr::NOlap::NBlobOperations::NLocal { + +TOperator::TOperator(const TString& storageId, const std::shared_ptr& storageSharedBlobsManager) + : TBase(storageId, storageSharedBlobsManager) +{ +} + +namespace { +class TBlobInUseTracker: public IBlobInUseTracker { +private: + virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + return true; + } + virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + return true; + } + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& /*blobId*/) const override { + AFL_VERIFY(false); + return false; + } + +public: +}; +} + +std::shared_ptr TOperator::GetBlobsTracker() const { + static std::shared_ptr result = std::make_shared(); + return result; +} + +namespace { +class TBlobsDeclareRemovingAction: public IBlobsDeclareRemovingAction { +private: + using TBase = IBlobsDeclareRemovingAction; +protected: + virtual void DoDeclareRemove(const TTabletId /*tabletId*/, const TUnifiedBlobId& /*blobId*/) override { + AFL_VERIFY(false); + } + virtual void DoOnExecuteTxAfterRemoving(TBlobManagerDb& /*dbBlobs*/, const bool /*blobsWroteSuccessfully*/) override { + } + virtual void DoOnCompleteTxAfterRemoving(const bool /*blobsWroteSuccessfully*/) override { + + } + +public: + TBlobsDeclareRemovingAction(const TString& storageId, const TTabletId selfTabletId, const std::shared_ptr& counters) + : TBase(storageId, selfTabletId, counters) { + } +}; +} + +std::shared_ptr TOperator::DoStartDeclareRemovingAction( + const std::shared_ptr& counters) { + static std::shared_ptr result = std::make_shared(GetStorageId(), GetSelfTabletId(), counters); + return result; +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/local/storage.h b/ydb/core/tx/columnshard/blobs_action/local/storage.h new file mode 100644 index 000000000000..beb5c4286cab --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/storage.h @@ -0,0 +1,53 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NBlobOperations::NLocal { + +class TOperator: public IBlobsStorageOperator { +private: + using TBase = IBlobsStorageOperator; + NSplitter::TSplitSettings SplitSettings = Default(); + +protected: + virtual const NSplitter::TSplitSettings& DoGetBlobSplitSettings() const override { + return SplitSettings; + } + virtual std::shared_ptr DoStartDeclareRemovingAction( + const std::shared_ptr& /*counters*/) override; + virtual std::shared_ptr DoStartWritingAction() override { + AFL_VERIFY(false)("problem", "unimplemented method"); + return nullptr; + }; + virtual std::shared_ptr DoStartReadingAction() override { + AFL_VERIFY(false)("problem", "unimplemented method"); + return nullptr; + }; + virtual std::shared_ptr DoCreateGCAction(const std::shared_ptr& /*counters*/) const override { + return nullptr; + } + virtual void DoStartGCAction(const std::shared_ptr& /*action*/) const override { + AFL_VERIFY(false)("problem", "unimplemented method"); + }; + virtual bool DoLoad(IBlobManagerDb& /*dbBlobs*/) override { + return true; + }; + virtual void DoOnTieringModified(const std::shared_ptr& /*tiers*/) override { + return; + }; + +public: + TOperator(const TString& storageId, const std::shared_ptr& storageSharedBlobsManager); + + virtual TTabletsByBlob GetBlobsToDelete() const override { + return Default(); + } + + virtual std::shared_ptr GetBlobsTracker() const override; + + virtual bool HasToDelete(const TUnifiedBlobId& /*blobId*/, const TTabletId /*tabletId*/) const override { + return false; + } + +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/local/ya.make b/ydb/core/tx/columnshard/blobs_action/local/ya.make new file mode 100644 index 000000000000..ed4199e82243 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/local/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + storage.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/blobs_action/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp index 255845c9fb92..bd2a8acec539 100644 --- a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp @@ -1,18 +1,22 @@ #include "manager.h" -#include + #include +#include +#include #ifndef KIKIMR_DISABLE_S3_OPS #include #endif -#include #include +#include namespace NKikimr::NOlap { std::shared_ptr TStoragesManager::DoBuildOperator(const TString& storageId) { if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, Shard.SelfId(), Shard.Info(), - Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + return std::make_shared( + storageId, Shard.SelfId(), Shard.Info(), Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + } else if (storageId == TBase::LocalMetadataStorageId) { + return std::make_shared(storageId, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); } else if (storageId == TBase::MemoryStorageId) { #ifndef KIKIMR_DISABLE_S3_OPS { @@ -20,7 +24,7 @@ std::shared_ptr TStoragesManager::DoBuild TGuard g(mutexLocal); Singleton()->SetSecretKey("fakeSecret"); } - return std::make_shared(storageId, Shard.SelfId(), + return std::make_shared(storageId, Shard.SelfId(), std::make_shared("fakeBucket", "fakeSecret"), SharedBlobsManager->GetStorageManagerGuarantee(storageId), Shard.Executor()->Generation()); #else @@ -30,7 +34,8 @@ std::shared_ptr TStoragesManager::DoBuild return nullptr; } else { #ifndef KIKIMR_DISABLE_S3_OPS - return std::make_shared(storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + return std::make_shared( + storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); #else return nullptr; #endif @@ -43,9 +48,7 @@ bool TStoragesManager::DoLoadIdempotency(NTable::TDatabase& database) { TStoragesManager::TStoragesManager(NColumnShard::TColumnShard& shard) : Shard(shard) - , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) -{ - + , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) { } -} \ No newline at end of file +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make index b79b6720608b..ad3f37f24574 100644 --- a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make @@ -7,6 +7,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/data_sharing/manager ydb/core/tx/columnshard/blobs_action/bs + ydb/core/tx/columnshard/blobs_action/local ) IF (OS_WINDOWS) diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp index bda89c9c9daf..15a05e7108a7 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp @@ -9,6 +9,8 @@ bool TTxInsertTableCleanup::Execute(TTransactionContext& txc, const TActorContex NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); NIceDb::TNiceDb db(txc.DB); + Self->TryAbortWrites(db, dbTable, std::move(WriteIdsToAbort)); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); auto allAborted = Self->InsertTable->GetAborted(); auto storage = Self->StoragesManager->GetInsertOperator(); diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h index 6996333a0bd3..96d8f09e5e6d 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.h @@ -5,11 +5,13 @@ namespace NKikimr::NColumnShard { class TTxInsertTableCleanup: public TTransactionBase { private: + THashSet WriteIdsToAbort; std::shared_ptr BlobsAction; public: - TTxInsertTableCleanup(TColumnShard* self) - : TBase(self) { - Y_ABORT_UNLESS(self->InsertTable->GetAborted().size()); + TTxInsertTableCleanup(TColumnShard* self, THashSet&& writeIdsToAbort) + : TBase(self) + , WriteIdsToAbort(std::move(writeIdsToAbort)) { + Y_ABORT_UNLESS(WriteIdsToAbort.size() || self->InsertTable->GetAborted().size()); } ~TTxInsertTableCleanup() { diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h index 937174875fb2..437a16874bce 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h @@ -36,8 +36,12 @@ class TTxRemoveSharedBlobs: public TTransactionBase { for (auto it = categories.GetDirect().GetIterator(); it.IsValid(); ++it) { RemoveAction->DeclareRemove(it.GetTabletId(), it.GetBlobId()); } + for (auto it = categories.GetBorrowed().GetIterator(); it.IsValid(); ++it) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_BLOBS)("problem", "borrowed_to_remove")("blob_id", it.GetBlobId())("tablet_id", it.GetTabletId()); + } AFL_VERIFY(categories.GetBorrowed().IsEmpty()); - AFL_VERIFY(categories.GetSharing().GetSize() == SharingBlobIds.GetSize()); + AFL_VERIFY(categories.GetSharing().GetSize() == SharingBlobIds.GetSize())("sharing_category", categories.GetSharing().GetSize())( + "sharing", SharingBlobIds.GetSize()); } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp index 853bcb7c5463..96a5cf794190 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp @@ -1,13 +1,17 @@ #include "tx_write.h" +#include +#include + namespace NKikimr::NColumnShard { -bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId) { +bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId) { NKikimrTxColumnShard::TLogicalMetadata meta; meta.SetNumRows(batch->GetRowsCount()); meta.SetRawBytes(batch->GetRawBytes()); meta.SetDirtyWriteTimeSeconds(batch.GetStartInstant().Seconds()); - meta.SetSpecialKeysRawData(batch->GetSpecialKeysSafe().SerializeToString()); + meta.SetSpecialKeysRawData(batch->GetSpecialKeysFullSafe()); + meta.SetSpecialKeysPayloadData(batch->GetSpecialKeysPayloadSafe()); const auto& blobRange = batch.GetRange(); Y_ABORT_UNLESS(blobRange.GetBlobId().IsValid()); @@ -16,12 +20,14 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali TBlobGroupSelector dsGroupSelector(Self->Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - const auto& writeMeta = batch.GetAggregation().GetWriteData()->GetWriteMeta(); + const auto& writeMeta = batch.GetAggregation().GetWriteMeta(); meta.SetModificationType(TEnumOperator::SerializeToProto(writeMeta.GetModificationType())); - auto schemeVersion = batch.GetAggregation().GetWriteData()->GetData()->GetSchemaVersion(); + *meta.MutableSchemaSubset() = batch.GetAggregation().GetSchemaSubset().SerializeToProto(); + auto schemeVersion = batch.GetAggregation().GetSchemaVersion(); auto tableSchema = Self->TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchemaVerified(schemeVersion); - NOlap::TInsertedData insertData((ui64)writeId, writeMeta.GetTableId(), writeMeta.GetDedupId(), blobRange, meta, tableSchema->GetVersion(), batch->GetData()); + auto userData = std::make_shared(writeMeta.GetTableId(), blobRange, meta, tableSchema->GetVersion(), batch->GetData()); + NOlap::TInsertedData insertData(writeId, userData); bool ok = Self->InsertTable->Insert(dbTable, std::move(insertData)); if (ok) { Self->UpdateInsertTableCounters(); @@ -32,42 +38,37 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { TMemoryProfileGuard mpg("TTxWrite::Execute"); - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "execute"); ACFL_DEBUG("event", "start_execute"); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); for (auto&& aggr : buffer.GetAggregations()) { - const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); + const auto& writeMeta = aggr->GetWriteMeta(); Y_ABORT_UNLESS(Self->TablesManager.IsReadyForWrite(writeMeta.GetTableId())); txc.DB.NoMoreReadsForTx(); TWriteOperation::TPtr operation; if (writeMeta.HasLongTxId()) { + NIceDb::TNiceDb db(txc.DB); + const TInsertWriteId insertWriteId = + Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId(), writeMeta.GetGranuleShardingVersion()); + aggr->AddInsertWriteId(insertWriteId); if (writeMeta.IsGuaranteeWriter()) { AFL_VERIFY(aggr->GetSplittedBlobs().size() == 1)("count", aggr->GetSplittedBlobs().size()); } else { AFL_VERIFY(aggr->GetSplittedBlobs().size() <= 1)("count", aggr->GetSplittedBlobs().size()); } + if (aggr->GetSplittedBlobs().size() == 1) { + AFL_VERIFY(InsertOneBlob(txc, aggr->GetSplittedBlobs().front(), insertWriteId))("write_id", writeMeta.GetWriteId())( + "insert_write_id", insertWriteId); + } } else { - operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_ABORT_UNLESS(operation); + operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - } - - auto writeId = TWriteId(writeMeta.GetWriteId()); - if (!operation) { - NIceDb::TNiceDb db(txc.DB); - writeId = Self->GetLongTxWrite(db, writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId(), writeMeta.GetGranuleShardingVersion()); - aggr->AddWriteId(writeId); - } - - for (auto&& i : aggr->GetSplittedBlobs()) { - if (operation) { - writeId = Self->BuildNextWriteId(txc); - aggr->AddWriteId(writeId); - } - - if (!InsertOneBlob(txc, i, writeId)) { - LOG_S_DEBUG(TxPrefix() << "duplicate writeId " << (ui64)writeId << TxSuffix()); - Self->IncCounter(COUNTER_WRITE_DUPLICATE); + for (auto&& i : aggr->GetSplittedBlobs()) { + const TInsertWriteId insertWriteId = Self->InsertTable->BuildNextWriteId(txc); + aggr->AddInsertWriteId(insertWriteId); + AFL_VERIFY(InsertOneBlob(txc, i, insertWriteId))("write_id", writeMeta.GetWriteId())("insert_write_id", insertWriteId)( + "size", aggr->GetSplittedBlobs().size()); } } } @@ -82,34 +83,43 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { } Results.clear(); for (auto&& aggr : buffer.GetAggregations()) { - const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); + const auto& writeMeta = aggr->GetWriteMeta(); if (!writeMeta.HasLongTxId()) { - auto operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); - Y_ABORT_UNLESS(operation); + auto operation = Self->OperationsManager->GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - operation->OnWriteFinish(txc, aggr->GetWriteIds()); - if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { + operation->OnWriteFinish(txc, aggr->GetInsertWriteIds(), operation->GetBehaviour() == EOperationBehaviour::NoTxWrite); + Self->OperationsManager->LinkInsertWriteIdToOperationWriteId(aggr->GetInsertWriteIds(), operation->GetWriteId()); + if (operation->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID()); + Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); + Self->OperationsManager->AddTemporaryTxLink(operation->GetLockId()); + Self->OperationsManager->CommitTransactionOnExecute(*Self, operation->GetLockId(), txc, Self->GetLastTxSnapshot()); + } else if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { NKikimrTxColumnShard::TCommitWriteTxBody proto; proto.SetLockId(operation->GetLockId()); TString txBody; Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); auto op = Self->GetProgressTxController().StartProposeOnExecute( - TTxController::TTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId(), writeMeta.GetSource(), operation->GetCookie(), {}), txBody, - txc); + TTxController::TTxInfo( + NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId(), writeMeta.GetSource(), operation->GetCookie(), {}), + txBody, txc); AFL_VERIFY(!op->IsFail()); ResultOperators.emplace_back(op); } else { + auto& info = Self->OperationsManager->GetLockVerified(operation->GetLockId()); NKikimrDataEvents::TLock lock; lock.SetLockId(operation->GetLockId()); lock.SetDataShard(Self->TabletID()); - lock.SetGeneration(1); - lock.SetCounter(1); + lock.SetGeneration(info.GetGeneration()); + lock.SetCounter(info.GetInternalGenerationCounter()); + lock.SetPathId(writeMeta.GetTableId()); auto ev = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), operation->GetLockId(), lock); Results.emplace_back(std::move(ev), writeMeta.GetSource(), operation->GetCookie()); } } else { - Y_ABORT_UNLESS(aggr->GetWriteIds().size() == 1); - auto ev = std::make_unique(Self->TabletID(), writeMeta, (ui64)aggr->GetWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); + Y_ABORT_UNLESS(aggr->GetInsertWriteIds().size() == 1); + auto ev = std::make_unique( + Self->TabletID(), writeMeta, (ui64)aggr->GetInsertWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS); Results.emplace_back(std::move(ev), writeMeta.GetSource(), 0); } } @@ -118,7 +128,8 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { void TTxWrite::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxWrite::Complete"); - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_BLOBS)("tablet_id", Self->TabletID())("tx_state", "complete"); const auto now = TMonotonic::Now(); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); for (auto&& i : buffer.GetAddActions()) { @@ -136,11 +147,22 @@ void TTxWrite::Complete(const TActorContext& ctx) { i.DoSendReply(ctx); } for (ui32 i = 0; i < buffer.GetAggregations().size(); ++i) { - const auto& writeMeta = buffer.GetAggregations()[i]->GetWriteData()->GetWriteMeta(); - Self->CSCounters.OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); - Self->CSCounters.OnSuccessWriteResponse(); + const auto& writeMeta = buffer.GetAggregations()[i]->GetWriteMeta(); + if (!writeMeta.HasLongTxId()) { + auto op = Self->GetOperationsManager().GetOperationVerified((TOperationWriteId)writeMeta.GetWriteId()); + if (op->GetBehaviour() == EOperationBehaviour::WriteWithLock || op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + auto evWrite = std::make_shared(writeMeta.GetTableId(), + buffer.GetAggregations()[i]->GetRecordBatch(), Self->GetIndexOptional()->GetVersionedIndex().GetPrimaryKey()); + Self->GetOperationsManager().AddEventForLock(*Self, op->GetLockId(), evWrite); + } + if (op->GetBehaviour() == EOperationBehaviour::NoTxWrite) { + Self->OperationsManager->CommitTransactionOnComplete(*Self, op->GetLockId(), Self->GetLastTxSnapshot()); + } + } + Self->Counters.GetCSCounters().OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); + Self->Counters.GetCSCounters().OnSuccessWriteResponse(); } - + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h index 98de301e5e16..84ffbe7a9005 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h @@ -43,7 +43,7 @@ class TTxWrite : public NTabletFlatExecutor::TTransactionBase { std::vector> ResultOperators; - bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId); + bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TInsertWriteId writeId); TStringBuilder TxPrefix() const { return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp index a04c6fb2abef..57a1eee50146 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp @@ -43,7 +43,7 @@ bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) LOG_S_ERROR(TxPrefix() << " (" << changes->TypeString() << ") cannot write index blobs" << TxSuffix()); } - Self->EnqueueProgressTx(ctx); + Self->EnqueueProgressTx(ctx, std::nullopt); return true; } diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index fb4e4a9f53b4..f3a6b9e99db9 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -1,14 +1,17 @@ #include "columnshard_impl.h" + +#include "bg_tasks/manager/manager.h" #include "blobs_reader/actor.h" +#include "counters/aggregation/table_stats.h" +#include "engines/column_engine_logs.h" +#include "engines/writer/buffer/actor.h" #include "hooks/abstract/abstract.h" #include "resource_subscriber/actor.h" -#include "engines/writer/buffer/actor.h" -#include "engines/column_engine_logs.h" -#include "bg_tasks/manager/manager.h" +#include "transactions/locks/read_finished.h" -#include -#include #include +#include +#include namespace NKikimr { @@ -16,7 +19,7 @@ IActor* CreateColumnShard(const TActorId& tablet, TTabletStorageInfo* info) { return new NColumnShard::TColumnShard(info, tablet); } -} +} // namespace NKikimr namespace NKikimr::NColumnShard { @@ -26,6 +29,9 @@ void TColumnShard::CleanupActors(const TActorContext& ctx) { } ctx.Send(ResourceSubscribeActor, new TEvents::TEvPoisonPill); ctx.Send(BufferizationWriteActorId, new TEvents::TEvPoisonPill); + for (auto&& i : ActorsToStop) { + ctx.Send(i, new TEvents::TEvPoisonPill); + } StoragesManager->Stop(); DataLocksManager->Stop(); @@ -43,7 +49,8 @@ void TColumnShard::BecomeBroken(const TActorContext& ctx) { void TColumnShard::SwitchToWork(const TActorContext& ctx) { { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SwitchToWork"); for (auto&& i : TablesManager.GetTables()) { @@ -54,24 +61,30 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { SignalTabletActive(ctx); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SignalTabletActive"); TryRegisterMediatorTimeCast(); - EnqueueProgressTx(ctx); + EnqueueProgressTx(ctx, std::nullopt); } - CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); EnqueueBackgroundActivities(); BackgroundSessionsManager->Start(); + ctx.Send(SelfId(), new NActors::TEvents::TEvWakeup()); ctx.Send(SelfId(), new TEvPrivate::TEvPeriodicWakeup()); + ctx.Send(SelfId(), new TEvPrivate::TEvPingSnapshotsUsage()); NYDBTest::TControllers::GetColumnShardController()->OnSwitchToWork(TabletID()); + AFL_VERIFY(!!StartInstant); + Counters.GetCSCounters().Initialization.OnSwitchToWork(TMonotonic::Now() - *StartInstant, TMonotonic::Now() - CreateInstant); } void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { - const TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); + StartInstant = TMonotonic::Now(); + Counters.GetCSCounters().Initialization.OnActivateExecutor(TMonotonic::Now() - CreateInstant); + const TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("self_id", SelfId()); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "OnActivateExecutor"); - Executor()->RegisterExternalTabletCounters(TabletCountersPtr.release()); + Executor()->RegisterExternalTabletCounters(TabletCountersHolder.release()); const auto selfActorId = SelfId(); StoragesManager->Initialize(Executor()->Generation()); - Tiers = std::make_shared(TabletID(), SelfId(), - [selfActorId](const TActorContext& ctx) { + Tiers = std::make_shared(TabletID(), SelfId(), [selfActorId](const TActorContext& ctx) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_new_event"); ctx.Send(selfActorId, new TEvPrivate::TEvTieringModified); }); @@ -79,7 +92,8 @@ void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { if (!NMetadata::NProvider::TServiceOperator::IsEnabled()) { Tiers->TakeConfigs(NYDBTest::TControllers::GetColumnShardController()->GetFallbackTiersSnapshot(), nullptr); } - BackgroundSessionsManager = std::make_shared(std::make_shared(selfActorId, (NOlap::TTabletId)TabletID(), *this)); + BackgroundSessionsManager = std::make_shared( + std::make_shared(selfActorId, (NOlap::TTabletId)TabletID(), *this)); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "initialize_tiring_finished"); auto& icb = *AppData(ctx)->Icb; @@ -141,14 +155,14 @@ void TColumnShard::Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const LOG_S_DEBUG("Server pipe reset at tablet " << TabletID()); } -void TColumnShard::Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext &ctx) { +void TColumnShard::Handle(TEvPrivate::TEvScanStats::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); - IncCounter(COUNTER_SCANNED_ROWS, ev->Get()->Rows); - IncCounter(COUNTER_SCANNED_BYTES, ev->Get()->Bytes); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCANNED_ROWS, ev->Get()->Rows); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCANNED_BYTES, ev->Get()->Bytes); } -void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext &ctx) { +void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ctx); ui64 readCookie = ev->Get()->RequestCookie; LOG_S_DEBUG("Finished read cookie: " << readCookie << " at tablet " << TabletID()); @@ -156,17 +170,28 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon if (HasIndex()) { index = &GetIndexAs().GetVersionedIndex(); } - InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie, index); + + auto readMetaBase = InFlightReadsTracker.ExtractInFlightRequest(ev->Get()->RequestCookie, index, TInstant::Now()); + readMetaBase->OnReadFinished(*this); ui64 txId = ev->Get()->TxId; if (ScanTxInFlight.contains(txId)) { TDuration duration = TAppData::TimeProvider->Now() - ScanTxInFlight[txId]; - IncCounter(COUNTER_SCAN_LATENCY, duration); + Counters.GetTabletCounters()->IncCounter(COUNTER_SCAN_LATENCY, duration); ScanTxInFlight.erase(txId); - SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetTabletCounters()->IncCounter(COUNTER_IMMEDIATE_TX_COMPLETED); } } +void TColumnShard::Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& /*ev*/, const TActorContext& ctx) { + if (auto writeTx = + InFlightReadsTracker.Ping(this, NYDBTest::TControllers::GetColumnShardController()->GetPingCheckPeriod(), TInstant::Now())) { + Execute(writeTx.release(), ctx); + } + ctx.Schedule(0.3 * GetMaxReadStaleness(), new TEvPrivate::TEvPingSnapshotsUsage()); +} + void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx) { if (ev->Get()->Manual) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "TEvPrivate::TEvPeriodicWakeup::MANUAL")("tablet_id", TabletID()); @@ -176,10 +201,20 @@ void TColumnShard::Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorC SendWaitPlanStep(GetOutdatedStep()); SendPeriodicStats(); + EnqueueBackgroundActivities(); ctx.Schedule(PeriodicWakeupActivationPeriod, new TEvPrivate::TEvPeriodicWakeup()); } } +void TColumnShard::Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx) { + if (ev->Get()->Tag == 0) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "TEvPrivate::TEvPeriodicWakeup::MANUAL")("tablet_id", TabletID()); + const TMonotonic now = TMonotonic::Now(); + GetProgressTxController().PingTimeouts(now); + ctx.Schedule(TDuration::Seconds(1), new NActors::TEvents::TEvWakeup(0)); + } +} + void TColumnShard::Handle(TEvMediatorTimecast::TEvRegisterTabletResult::TPtr& ev, const TActorContext&) { const auto* msg = ev->Get(); Y_ABORT_UNLESS(msg->TabletId == TabletID()); @@ -213,14 +248,13 @@ void TColumnShard::UpdateInsertTableCounters() { auto& prepared = InsertTable->GetCountersPrepared(); auto& committed = InsertTable->GetCountersCommitted(); - SetCounter(COUNTER_PREPARED_RECORDS, prepared.Rows); - SetCounter(COUNTER_PREPARED_BYTES, prepared.Bytes); - SetCounter(COUNTER_COMMITTED_RECORDS, committed.Rows); - SetCounter(COUNTER_COMMITTED_BYTES, committed.Bytes); + Counters.GetTabletCounters()->SetCounter(COUNTER_PREPARED_RECORDS, prepared.Rows); + Counters.GetTabletCounters()->SetCounter(COUNTER_PREPARED_BYTES, prepared.Bytes); + Counters.GetTabletCounters()->SetCounter(COUNTER_COMMITTED_RECORDS, committed.Rows); + Counters.GetTabletCounters()->SetCounter(COUNTER_COMMITTED_BYTES, committed.Bytes); - LOG_S_INFO("InsertTable. Prepared: " << prepared.Bytes << " in " << prepared.Rows - << " records, committed: " << committed.Bytes << " in " << committed.Rows - << " records at tablet " << TabletID()); + LOG_S_TRACE("InsertTable. Prepared: " << prepared.Bytes << " in " << prepared.Rows << " records, committed: " << committed.Bytes << " in " + << committed.Rows << " records at tablet " << TabletID()); } void TColumnShard::UpdateIndexCounters() { @@ -229,70 +263,63 @@ void TColumnShard::UpdateIndexCounters() { } auto& stats = TablesManager.MutablePrimaryIndex().GetTotalStats(); - SetCounter(COUNTER_INDEX_TABLES, stats.Tables); - SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); - SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); - SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); - SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); - SetCounter(COUNTER_INSERTED_BYTES, stats.GetInsertedStats().Bytes); - SetCounter(COUNTER_INSERTED_RAW_BYTES, stats.GetInsertedStats().RawBytes); - SetCounter(COUNTER_COMPACTED_PORTIONS, stats.GetCompactedStats().Portions); - SetCounter(COUNTER_COMPACTED_BLOBS, stats.GetCompactedStats().Blobs); - SetCounter(COUNTER_COMPACTED_ROWS, stats.GetCompactedStats().Rows); - SetCounter(COUNTER_COMPACTED_BYTES, stats.GetCompactedStats().Bytes); - SetCounter(COUNTER_COMPACTED_RAW_BYTES, stats.GetCompactedStats().RawBytes); - SetCounter(COUNTER_SPLIT_COMPACTED_PORTIONS, stats.GetSplitCompactedStats().Portions); - SetCounter(COUNTER_SPLIT_COMPACTED_BLOBS, stats.GetSplitCompactedStats().Blobs); - SetCounter(COUNTER_SPLIT_COMPACTED_ROWS, stats.GetSplitCompactedStats().Rows); - SetCounter(COUNTER_SPLIT_COMPACTED_BYTES, stats.GetSplitCompactedStats().Bytes); - SetCounter(COUNTER_SPLIT_COMPACTED_RAW_BYTES, stats.GetSplitCompactedStats().RawBytes); - SetCounter(COUNTER_INACTIVE_PORTIONS, stats.GetInactiveStats().Portions); - SetCounter(COUNTER_INACTIVE_BLOBS, stats.GetInactiveStats().Blobs); - SetCounter(COUNTER_INACTIVE_ROWS, stats.GetInactiveStats().Rows); - SetCounter(COUNTER_INACTIVE_BYTES, stats.GetInactiveStats().Bytes); - SetCounter(COUNTER_INACTIVE_RAW_BYTES, stats.GetInactiveStats().RawBytes); - SetCounter(COUNTER_EVICTED_PORTIONS, stats.GetEvictedStats().Portions); - SetCounter(COUNTER_EVICTED_BLOBS, stats.GetEvictedStats().Blobs); - SetCounter(COUNTER_EVICTED_ROWS, stats.GetEvictedStats().Rows); - SetCounter(COUNTER_EVICTED_BYTES, stats.GetEvictedStats().Bytes); - SetCounter(COUNTER_EVICTED_RAW_BYTES, stats.GetEvictedStats().RawBytes); - - LOG_S_DEBUG("Index: tables " << stats.Tables - << " inserted " << stats.GetInsertedStats().DebugString() - << " compacted " << stats.GetCompactedStats().DebugString() - << " s-compacted " << stats.GetSplitCompactedStats().DebugString() - << " inactive " << stats.GetInactiveStats().DebugString() - << " evicted " << stats.GetEvictedStats().DebugString() - << " column records " << stats.ColumnRecords - << " at tablet " << TabletID()); + const std::shared_ptr& counters = Counters.GetTabletCounters(); + counters->SetCounter(COUNTER_INDEX_TABLES, stats.Tables); + counters->SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); + counters->SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); + counters->SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); + counters->SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); + counters->SetCounter(COUNTER_INSERTED_BYTES, stats.GetInsertedStats().Bytes); + counters->SetCounter(COUNTER_INSERTED_RAW_BYTES, stats.GetInsertedStats().RawBytes); + counters->SetCounter(COUNTER_COMPACTED_PORTIONS, stats.GetCompactedStats().Portions); + counters->SetCounter(COUNTER_COMPACTED_BLOBS, stats.GetCompactedStats().Blobs); + counters->SetCounter(COUNTER_COMPACTED_ROWS, stats.GetCompactedStats().Rows); + counters->SetCounter(COUNTER_COMPACTED_BYTES, stats.GetCompactedStats().Bytes); + counters->SetCounter(COUNTER_COMPACTED_RAW_BYTES, stats.GetCompactedStats().RawBytes); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_PORTIONS, stats.GetSplitCompactedStats().Portions); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_BLOBS, stats.GetSplitCompactedStats().Blobs); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_ROWS, stats.GetSplitCompactedStats().Rows); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_BYTES, stats.GetSplitCompactedStats().Bytes); + counters->SetCounter(COUNTER_SPLIT_COMPACTED_RAW_BYTES, stats.GetSplitCompactedStats().RawBytes); + counters->SetCounter(COUNTER_INACTIVE_PORTIONS, stats.GetInactiveStats().Portions); + counters->SetCounter(COUNTER_INACTIVE_BLOBS, stats.GetInactiveStats().Blobs); + counters->SetCounter(COUNTER_INACTIVE_ROWS, stats.GetInactiveStats().Rows); + counters->SetCounter(COUNTER_INACTIVE_BYTES, stats.GetInactiveStats().Bytes); + counters->SetCounter(COUNTER_INACTIVE_RAW_BYTES, stats.GetInactiveStats().RawBytes); + counters->SetCounter(COUNTER_EVICTED_PORTIONS, stats.GetEvictedStats().Portions); + counters->SetCounter(COUNTER_EVICTED_BLOBS, stats.GetEvictedStats().Blobs); + counters->SetCounter(COUNTER_EVICTED_ROWS, stats.GetEvictedStats().Rows); + counters->SetCounter(COUNTER_EVICTED_BYTES, stats.GetEvictedStats().Bytes); + counters->SetCounter(COUNTER_EVICTED_RAW_BYTES, stats.GetEvictedStats().RawBytes); + + LOG_S_DEBUG("Index: tables " << stats.Tables << " inserted " << stats.GetInsertedStats().DebugString() << " compacted " + << stats.GetCompactedStats().DebugString() << " s-compacted " << stats.GetSplitCompactedStats().DebugString() + << " inactive " << stats.GetInactiveStats().DebugString() << " evicted " + << stats.GetEvictedStats().DebugString() << " column records " << stats.ColumnRecords << " at tablet " + << TabletID()); } ui64 TColumnShard::MemoryUsage() const { - ui64 memory = - ProgressTxController->GetMemoryUsage() + - ScanTxInFlight.size() * (sizeof(ui64) + sizeof(TInstant)) + - LongTxWrites.size() * (sizeof(TWriteId) + sizeof(TLongTxWriteInfo)) + - LongTxWritesByUniqueId.size() * (sizeof(TULID) + sizeof(void*)) + - (WaitingScans.size()) * (sizeof(NOlap::TSnapshot) + sizeof(void*)) + - TabletCounters->Simple()[COUNTER_PREPARED_RECORDS].Get() * sizeof(NOlap::TInsertedData) + - TabletCounters->Simple()[COUNTER_COMMITTED_RECORDS].Get() * sizeof(NOlap::TInsertedData); + ui64 memory = ProgressTxController->GetMemoryUsage() + ScanTxInFlight.size() * (sizeof(ui64) + sizeof(TInstant)) + + LongTxWrites.size() * (sizeof(TInsertWriteId) + sizeof(TLongTxWriteInfo)) + + LongTxWritesByUniqueId.size() * (sizeof(TULID) + sizeof(void*)) + + (WaitingScans.size()) * (sizeof(NOlap::TSnapshot) + sizeof(void*)) + + Counters.GetTabletCounters()->GetValue(COUNTER_PREPARED_RECORDS) * sizeof(NOlap::TInsertedData) + + Counters.GetTabletCounters()->GetValue(COUNTER_COMMITTED_RECORDS) * sizeof(NOlap::TInsertedData); memory += TablesManager.GetMemoryUsage(); return memory; } void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage& usage) { - auto * metrics = Executor()->GetResourceMetrics(); + auto* metrics = Executor()->GetResourceMetrics(); if (!metrics) { return; } ui64 storageBytes = - TabletCounters->Simple()[COUNTER_PREPARED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_INSERTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_COMPACTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_SPLIT_COMPACTED_BYTES].Get() + - TabletCounters->Simple()[COUNTER_INACTIVE_BYTES].Get(); + Counters.GetTabletCounters()->GetValue(COUNTER_PREPARED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_COMMITTED_BYTES) + + Counters.GetTabletCounters()->GetValue(COUNTER_INSERTED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_COMPACTED_BYTES) + + Counters.GetTabletCounters()->GetValue(COUNTER_SPLIT_COMPACTED_BYTES) + Counters.GetTabletCounters()->GetValue(COUNTER_INACTIVE_BYTES); ui64 memory = MemoryUsage(); @@ -308,35 +335,6 @@ void TColumnShard::UpdateResourceMetrics(const TActorContext& ctx, const TUsage& metrics->TryUpdate(ctx); } -void TColumnShard::ConfigureStats(const NOlap::TColumnEngineStats& indexStats, - ::NKikimrTableStats::TTableStats* tabletStats) { - NOlap::TSnapshot lastIndexUpdate = TablesManager.GetPrimaryIndexSafe().LastUpdate(); - auto activeIndexStats = indexStats.Active(); // data stats excluding inactive and evicted - - if (activeIndexStats.Rows < 0 || activeIndexStats.Bytes < 0) { - LOG_S_WARN("Negative stats counter. Rows: " << activeIndexStats.Rows << " Bytes: " << activeIndexStats.Bytes - << TabletID()); - - activeIndexStats.Rows = (activeIndexStats.Rows < 0) ? 0 : activeIndexStats.Rows; - activeIndexStats.Bytes = (activeIndexStats.Bytes < 0) ? 0 : activeIndexStats.Bytes; - } - - tabletStats->SetRowCount(activeIndexStats.Rows); - tabletStats->SetDataSize(activeIndexStats.Bytes + TabletCounters->Simple()[COUNTER_COMMITTED_BYTES].Get()); - - // TODO: we need row/dataSize counters for evicted data (managed by tablet but stored outside) - // tabletStats->SetIndexSize(); // TODO: calc size of internal tables - - tabletStats->SetLastAccessTime(LastAccessTime.MilliSeconds()); - tabletStats->SetLastUpdateTime(lastIndexUpdate.GetPlanStep()); -} - -void TColumnShard::FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const { - tableStats->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOAD].Get()); - tableStats->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_OUT_OF_SPACE].Get()); - tableStats->SetInFlightTxCount(Executor()->GetStats().TxInFly); -} - void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptr& ev) { ev->Record.SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready ev->Record.SetGeneration(Executor()->Generation()); @@ -346,30 +344,24 @@ void TColumnShard::FillOlapStats(const TActorContext& ctx, std::unique_ptrGetResourceMetrics()) { resourceMetrics->Fill(*ev->Record.MutableTabletMetrics()); } - auto* tabletStats = ev->Record.MutableTableStats(); - FillTxTableStats(tabletStats); + if (TablesManager.HasPrimaryIndex()) { - const auto& indexStats = TablesManager.MutablePrimaryIndex().GetTotalStats(); - ConfigureStats(indexStats, tabletStats); + TTableStatsBuilder statsBuilder(Counters, Executor(), TablesManager.MutablePrimaryIndex()); + statsBuilder.FillTotalTableStats(*ev->Record.MutableTableStats()); } } -void TColumnShard::FillColumnTableStats(const TActorContext& ctx, - std::unique_ptr& ev) { - if (!TablesManager.HasPrimaryIndex()) { - return; - } - const auto& tablesIndexStats = TablesManager.MutablePrimaryIndex().GetStats(); - LOG_S_DEBUG("There are stats for " << tablesIndexStats.size() << " tables"); - for (const auto& [tableLocalID, columnStats] : tablesIndexStats) { - if (!columnStats) { - LOG_S_ERROR("SendPeriodicStats: empty stats"); - continue; - } +void TColumnShard::FillColumnTableStats(const TActorContext& ctx, std::unique_ptr& ev) { + auto tables = TablesManager.GetTables(); + std::optional tableStatsBuilder = + TablesManager.HasPrimaryIndex() ? std::make_optional(Counters, Executor(), TablesManager.MutablePrimaryIndex()) + : std::nullopt; + LOG_S_DEBUG("There are stats for " << tables.size() << " tables"); + for (const auto& [pathId, _] : tables) { auto* periodicTableStats = ev->Record.AddTables(); periodicTableStats->SetDatashardId(TabletID()); - periodicTableStats->SetTableLocalId(tableLocalID); + periodicTableStats->SetTableLocalId(pathId); periodicTableStats->SetShardState(2); // NKikimrTxDataShard.EDatashardState.Ready periodicTableStats->SetGeneration(Executor()->Generation()); @@ -381,11 +373,11 @@ void TColumnShard::FillColumnTableStats(const TActorContext& ctx, resourceMetrics->Fill(*periodicTableStats->MutableTabletMetrics()); } - auto* tableStats = periodicTableStats->MutableTableStats(); - FillTxTableStats(tableStats); - ConfigureStats(*columnStats, tableStats); + if (tableStatsBuilder) { + tableStatsBuilder->FillTableStats(pathId, *(periodicTableStats->MutableTableStats())); + } - LOG_S_TRACE("Add stats for table, tableLocalID=" << tableLocalID); + LOG_S_TRACE("Add stats for table, tableLocalID=" << pathId); } } diff --git a/ydb/core/tx/columnshard/columnshard.h b/ydb/core/tx/columnshard/columnshard.h index 90ae1fd8c6b7..186d665153af 100644 --- a/ydb/core/tx/columnshard/columnshard.h +++ b/ydb/core/tx/columnshard/columnshard.h @@ -98,6 +98,7 @@ struct TEvColumnShard { struct TEvInternalScan: public TEventLocal { private: YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::optional, LockId); YDB_ACCESSOR(bool, Reverse, false); YDB_ACCESSOR(ui32, ItemsLimit, 0); YDB_READONLY_DEF(std::vector, ColumnIds); @@ -116,8 +117,9 @@ struct TEvColumnShard { ColumnNames.emplace_back(columnName); } - TEvInternalScan(const ui64 pathId) + TEvInternalScan(const ui64 pathId, const std::optional lockId) : PathId(pathId) + , LockId(lockId) { } diff --git a/ydb/core/tx/columnshard/columnshard__init.cpp b/ydb/core/tx/columnshard/columnshard__init.cpp index aadee209f6dd..09cf1f4ef71f 100644 --- a/ydb/core/tx/columnshard/columnshard__init.cpp +++ b/ydb/core/tx/columnshard/columnshard__init.cpp @@ -19,6 +19,9 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; class TTxInit : public TTransactionBase { +private: + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxInit(TColumnShard* self) : TBase(self) @@ -38,7 +41,6 @@ void TTxInit::SetDefaults() { Self->CurrentSchemeShardId = 0; Self->LastSchemaSeqNo = { }; Self->ProcessingParams.reset(); - Self->LastWriteId = TWriteId{0}; Self->LastPlannedStep = 0; Self->LastPlannedTxId = 0; Self->LastCompletedTx = NOlap::TSnapshot::Zero(); @@ -70,7 +72,6 @@ bool TTxInit::Precharge(TTransactionContext& txc) { ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoGeneration, Self->LastSchemaSeqNo.Generation); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoRound, Self->LastSchemaSeqNo.Round); ready = ready && Schema::GetSpecialProtoValue(db, Schema::EValueIds::ProcessingParams, Self->ProcessingParams); - ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastWriteId, Self->LastWriteId); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedStep, Self->LastPlannedStep); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedTxId, Self->LastPlannedTxId); ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo); @@ -104,7 +105,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) ACFL_DEBUG("step", "TInsertTable::Load_Start"); TMemoryProfileGuard g("TTxInit/InsertTable"); auto localInsertTable = std::make_unique(); - if (!localInsertTable->Load(dbTable, TAppData::TimeProvider->Now())) { + if (!localInsertTable->Load(db, dbTable, TAppData::TimeProvider->Now())) { ACFL_ERROR("step", "TInsertTable::Load_Fails"); return false; } @@ -165,9 +166,9 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } Self->TablesManager = std::move(tManagerLocal); - Self->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); - Self->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); - Self->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); ACFL_DEBUG("step", "TTablesManager::Load_Finish"); } @@ -179,7 +180,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } while (!rowset.EndOfSet()) { - const TWriteId writeId = TWriteId{ rowset.GetValue() }; + const TInsertWriteId writeId = (TInsertWriteId)rowset.GetValue(); const ui32 writePartId = rowset.GetValue(); NKikimrLongTxService::TLongTxId proto; Y_ABORT_UNLESS(proto.ParseFromString(rowset.GetValue())); @@ -222,6 +223,14 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } Self->SharingSessionsManager = local; } + { + TMemoryProfileGuard g("TTxInit/TInFlightReadsTracker"); + TInFlightReadsTracker local(Self->StoragesManager, Self->Counters.GetRequestsTracingCounters()); + if (!local.LoadFromDatabase(txc.DB)) { + return false; + } + Self->InFlightReadsTracker = std::move(local); + } Self->UpdateInsertTableCounters(); Self->UpdateIndexCounters(); @@ -250,7 +259,7 @@ bool TTxInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { } void TTxInit::Complete(const TActorContext& ctx) { - Self->ProgressTxController->StartOperators(); + Self->Counters.GetCSCounters().Initialization.OnTxInitFinished(TMonotonic::Now() - StartInstant); Self->ProgressTxController->OnTabletInit(); Self->SwitchToWork(ctx); NYDBTest::TControllers::GetColumnShardController()->OnTabletInitCompleted(*Self); @@ -258,6 +267,8 @@ void TTxInit::Complete(const TActorContext& ctx) { class TTxUpdateSchema : public TTransactionBase { std::vector NormalizerTasks; + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxUpdateSchema(TColumnShard* self) : TBase(self) @@ -296,6 +307,7 @@ bool TTxUpdateSchema::Execute(TTransactionContext& txc, const TActorContext&) { void TTxUpdateSchema::Complete(const TActorContext& ctx) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("step", "TTxUpdateSchema.Complete"); + Self->Counters.GetCSCounters().Initialization.OnTxUpdateSchemaFinished(TMonotonic::Now() - StartInstant); if (NormalizerTasks.empty()) { AFL_VERIFY(Self->NormalizerController.IsNormalizationFinished())("details", Self->NormalizerController.DebugString()); Self->Execute(new TTxInit(Self), ctx); @@ -361,6 +373,9 @@ void TTxApplyNormalizer::Complete(const TActorContext& ctx) { /// Create local database on tablet start if none class TTxInitSchema : public TTransactionBase { +private: + const TMonotonic StartInstant = TMonotonic::Now(); + public: TTxInitSchema(TColumnShard* self) : TBase(self) @@ -423,6 +438,7 @@ bool TTxInitSchema::Execute(TTransactionContext& txc, const TActorContext&) { } void TTxInitSchema::Complete(const TActorContext& ctx) { + Self->Counters.GetCSCounters().Initialization.OnTxInitSchemaFinished(TMonotonic::Now() - StartInstant); LOG_S_DEBUG("TxInitSchema.Complete at tablet " << Self->TabletID();); Self->Execute(new TTxUpdateSchema(Self), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp b/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp index cc498125c326..883844cc0357 100644 --- a/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp +++ b/ydb/core/tx/columnshard/columnshard__notify_tx_completion.cpp @@ -14,7 +14,7 @@ class TTxNotifyTxCompletion : public TTransactionBase { LOG_S_DEBUG("TTxNotifyTxCompletion.Execute at tablet " << Self->TabletID()); const ui64 txId = Ev->Get()->Record.GetTxId(); - auto txOperator = Self->ProgressTxController->GetTxOperator(txId); + auto txOperator = Self->ProgressTxController->GetTxOperatorOptional(txId); if (txOperator) { txOperator->RegisterSubscriber(Ev->Sender); return true; diff --git a/ydb/core/tx/columnshard/columnshard__plan_step.cpp b/ydb/core/tx/columnshard/columnshard__plan_step.cpp index 80dda8112e0b..33420df5905c 100644 --- a/ydb/core/tx/columnshard/columnshard__plan_step.cpp +++ b/ydb/core/tx/columnshard/columnshard__plan_step.cpp @@ -102,10 +102,10 @@ bool TTxPlanStep::Execute(TTransactionContext& txc, const TActorContext& ctx) { Result = std::make_unique(Self->TabletID(), step); - Self->IncCounter(COUNTER_PLAN_STEP_ACCEPTED); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PLAN_STEP_ACCEPTED); if (plannedCount > 0 || Self->ProgressTxController->HaveOutdatedTxs()) { - Self->EnqueueProgressTx(ctx); + Self->EnqueueProgressTx(ctx, std::nullopt); } return true; } diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp index 0b035d591dd5..73a4a0200d97 100644 --- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp +++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp @@ -2,37 +2,62 @@ #include "columnshard_schema.h" #include + #include namespace NKikimr::NColumnShard { -class TColumnShard::TTxProgressTx : public TTransactionBase { +class TColumnShard::TTxProgressTx: public TTransactionBase { +private: + bool AbortedThroughRemoveExpired = false; + TTxController::ITransactionOperator::TPtr TxOperator; + const ui32 TabletTxNo; + std::optional LastCompletedTx; + std::optional PlannedQueueItem; + std::optional StartExecution; + const TMonotonic ConstructionInstant = TMonotonic::Now(); + public: TTxProgressTx(TColumnShard* self) : TTransactionBase(self) - , TabletTxNo(++Self->TabletTxCounter) - {} + , TabletTxNo(++Self->TabletTxCounter) { + } - TTxType GetTxType() const override { return TXTYPE_PROGRESS; } + TTxType GetTxType() const override { + return TXTYPE_PROGRESS; + } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); Y_ABORT_UNLESS(Self->ProgressTxInFlight); - Self->TabletCounters->Simple()[COUNTER_TX_COMPLETE_LAG].Set(Self->GetTxCompleteLag().MilliSeconds()); + Self->Counters.GetTabletCounters()->SetCounter(COUNTER_TX_COMPLETE_LAG, Self->GetTxCompleteLag().MilliSeconds()); - size_t removedCount = Self->ProgressTxController->CleanExpiredTxs(txc); + const size_t removedCount = Self->ProgressTxController->CleanExpiredTxs(txc); if (removedCount > 0) { // We cannot continue with this transaction, start a new transaction + AbortedThroughRemoveExpired = true; Self->Execute(new TTxProgressTx(Self), ctx); return true; } // Process a single transaction at the front of the queue - auto plannedItem = Self->ProgressTxController->StartPlannedTx(); + const auto plannedItem = Self->ProgressTxController->GetFirstPlannedTx(); if (!!plannedItem) { PlannedQueueItem.emplace(plannedItem->PlanStep, plannedItem->TxId); ui64 step = plannedItem->PlanStep; ui64 txId = plannedItem->TxId; + TxOperator = Self->ProgressTxController->GetTxOperatorVerified(txId); + if (auto txPrepare = TxOperator->BuildTxPrepareForProgress(Self)) { + AbortedThroughRemoveExpired = true; + Self->ProgressTxInFlight = txId; + Self->Execute(txPrepare.release(), ctx); + return true; + } else { + Self->ProgressTxController->PopFirstPlannedTx(); + } + StartExecution = TMonotonic::Now(); + LastCompletedTx = NOlap::TSnapshot(step, txId); if (LastCompletedTx > Self->LastCompletedTx) { NIceDb::TNiceDb db(txc.DB); @@ -40,43 +65,51 @@ class TColumnShard::TTxProgressTx : public TTransactionBase { Schema::SaveSpecialValue(db, Schema::EValueIds::LastCompletedTxId, LastCompletedTx->GetTxId()); } - TxOperator = Self->ProgressTxController->GetVerifiedTxOperator(txId); - AFL_VERIFY(TxOperator->ExecuteOnProgress(*Self, NOlap::TSnapshot(step, txId), txc)); - Self->ProgressTxController->FinishPlannedTx(txId, txc); + AFL_VERIFY(TxOperator->ProgressOnExecute(*Self, NOlap::TSnapshot(step, txId), txc)); + Self->ProgressTxController->ProgressOnExecute(txId, txc); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PLANNED_TX_COMPLETED); + } + Self->ProgressTxInFlight = std::nullopt; + if (!!Self->ProgressTxController->GetPlannedTx()) { + Self->EnqueueProgressTx(ctx, std::nullopt); } return true; } void Complete(const TActorContext& ctx) override { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); + if (AbortedThroughRemoveExpired) { + return; + } + NActors::TLogContextGuard logGuard = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); if (TxOperator) { - TxOperator->CompleteOnProgress(*Self, ctx); + TxOperator->ProgressOnComplete(*Self, ctx); Self->RescheduleWaitingReads(); } if (PlannedQueueItem) { - Self->GetProgressTxController().CompleteRunningTx(*PlannedQueueItem); + AFL_VERIFY(TxOperator); + Self->GetProgressTxController().GetCounters().OnTxProgressLag( + TxOperator->GetOpType(), TMonotonic::Now() - TMonotonic::MilliSeconds(PlannedQueueItem->Step)); + Self->GetProgressTxController().ProgressOnComplete(*PlannedQueueItem); } if (LastCompletedTx) { Self->LastCompletedTx = std::max(*LastCompletedTx, Self->LastCompletedTx); } - Self->ProgressTxInFlight = false; - if (!!Self->ProgressTxController->GetPlannedTx()) { - Self->EnqueueProgressTx(ctx); + if (StartExecution) { + Self->GetProgressTxController().GetCounters().OnTxExecuteDuration(TxOperator->GetOpType(), TMonotonic::Now() - *StartExecution); + Self->GetProgressTxController().GetCounters().OnTxLiveDuration(TxOperator->GetOpType(), TMonotonic::Now() - ConstructionInstant); } Self->SetupIndexation(); } - -private: - TTxController::ITransactionOperator::TPtr TxOperator; - const ui32 TabletTxNo; - std::optional LastCompletedTx; - std::optional PlannedQueueItem; }; -void TColumnShard::EnqueueProgressTx(const TActorContext& ctx) { +void TColumnShard::EnqueueProgressTx(const TActorContext& ctx, const std::optional continueTxId) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "EnqueueProgressTx")("tablet_id", TabletID()); - if (!ProgressTxInFlight) { - ProgressTxInFlight = true; + if (continueTxId) { + AFL_VERIFY(!ProgressTxInFlight || ProgressTxInFlight == continueTxId)("current", ProgressTxInFlight)("expected", continueTxId); + } + if (!ProgressTxInFlight || ProgressTxInFlight == continueTxId) { + ProgressTxInFlight = continueTxId.value_or(0); Execute(new TTxProgressTx(this), ctx); } } @@ -101,4 +134,4 @@ void TColumnShard::Handle(TEvColumnShard::TEvCheckPlannedTransaction::TPtr& ev, // For now do not return result for not finished tx. It would be sent in TTxProgressTx::Complete() } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp index d42ad4fc8d4f..d4ded82be3d8 100644 --- a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp +++ b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp @@ -25,13 +25,14 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseIncCounter(COUNTER_PREPARE_REQUEST); + Self->Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_REQUEST); auto& record = Proto(Ev->Get()); const auto txKind = record.GetTxKind(); const ui64 txId = record.GetTxId(); const auto& txBody = record.GetTxBody(); - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())("tx_id", txId)("this", (ui64)this); + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())("tx_id", txId)("this", (ui64)this); if (txKind == NKikimrTxColumnShard::TX_KIND_TTL) { auto proposeResult = ProposeTtlDeprecated(txBody); @@ -51,7 +52,7 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseCurrentSchemeShardId = record.GetSchemeShardId(); Schema::SaveSpecialValue(db, Schema::EValueIds::CurrentSchemeShardId, Self->CurrentSchemeShardId); } else { - Y_ABORT_UNLESS(Self->CurrentSchemeShardId == record.GetSchemeShardId()); + AFL_VERIFY(Self->CurrentSchemeShardId == record.GetSchemeShardId()); } } std::optional msgSeqNo; @@ -79,28 +80,34 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseTabletID())("request_tx", TxInfo->DebugString())( - "this", (ui64)this)("op_tx", TxOperator->GetTxInfo().DebugString()); + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("tablet_id", Self->TabletID())( + "request_tx", TxInfo->DebugString())("this", (ui64)this)("op_tx", TxOperator->GetTxInfo().DebugString()); + + Self->TryRegisterMediatorTimeCast(); if (TxOperator->IsFail()) { TxOperator->SendReply(*Self, ctx); + return; + } + auto internalOp = Self->GetProgressTxController().GetTxOperatorOptional(txId); + if (!internalOp) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "removed tx operator"); + return; + } + NActors::TLogContextGuard lGuardTx = + NActors::TLogContextBuilder::Build()("int_op_tx", internalOp->GetTxInfo().DebugString())("int_this", (ui64)internalOp.get()); + if (!internalOp->CheckTxInfoForReply(*TxInfo)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "deprecated tx operator"); + return; + } + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "actual tx operator"); + if (internalOp->IsAsync()) { + Self->GetProgressTxController().StartProposeOnComplete(*internalOp, ctx); } else { - auto internalOp = Self->GetProgressTxController().GetVerifiedTxOperator(TxOperator->GetTxId()); - NActors::TLogContextGuard lGuardTx = NActors::TLogContextBuilder::Build()("int_op_tx", internalOp->GetTxInfo().DebugString()); - if (!TxOperator->CheckTxInfoForReply(*TxInfo)) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "deprecated tx operator"); - return; - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "actual tx operator"); - } - if (TxOperator->IsAsync()) { - Self->GetProgressTxController().StartProposeOnComplete(txId, ctx); - } else { - Self->GetProgressTxController().FinishProposeOnComplete(txId, ctx); - } + Self->GetProgressTxController().FinishProposeOnComplete(*internalOp, ctx); } - Self->TryRegisterMediatorTimeCast(); } TTxType GetTxType() const override { @@ -138,11 +145,14 @@ class TTxProposeTransaction: public NTabletFlatExecutor::TTransactionBaseTablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()->GetSchema(); - auto ttlColumn = schema->GetFieldByName(columnName); - if (!ttlColumn) { - return TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL tx wrong TTL column '" + columnName + "'"); + auto schemaSnapshot = Self->TablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema(); + auto schema = schemaSnapshot->GetSchema(); + auto index = schemaSnapshot->GetColumnIdOptional(columnName); + if (!index) { + return TTxController::TProposeResult( + NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL tx wrong TTL column '" + columnName + "'"); } + auto ttlColumn = schemaSnapshot->GetFieldByColumnIdVerified(*index); const TInstant now = TlsActivationContext ? AppData()->TimeProvider->Now() : TInstant::Now(); for (ui64 pathId : ttlBody.GetPathIds()) { diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index a749e3c4f571..dd60f823139e 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -5,6 +5,7 @@ #include "engines/reader/transaction/tx_internal_scan.h" #include +#include namespace NKikimr::NColumnShard { @@ -29,9 +30,9 @@ void TColumnShard::Handle(TEvColumnShard::TEvScan::TPtr& ev, const TActorContext return; } - LastAccessTime = TAppData::TimeProvider->Now(); - ScanTxInFlight.insert({txId, LastAccessTime}); - SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); + Counters.GetColumnTablesCounters()->GetPathIdCounter(record.GetLocalPathId())->OnReadEvent(); + ScanTxInFlight.insert({txId, TAppData::TimeProvider->Now()}); + Counters.GetTabletCounters()->SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); Execute(new NOlap::NReader::TTxScan(this, ev), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index fb70e400254e..8410fe52ad16 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -1,9 +1,16 @@ #include "columnshard_impl.h" -#include "blobs_action/transaction/tx_write.h" + #include "blobs_action/transaction/tx_draft.h" +#include "blobs_action/transaction/tx_write.h" +#include "common/limits.h" #include "counters/columnshard.h" +#include "engines/column_engine_logs.h" #include "operations/batch_builder/builder.h" +#include "operations/manager.h" #include "operations/write_data.h" +#include "transactions/operators/ev_write/primary.h" +#include "transactions/operators/ev_write/secondary.h" +#include "transactions/operators/ev_write/sync.h" #include #include @@ -12,41 +19,36 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx) { - IncCounter(COUNTER_WRITE_FAIL); +void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, + std::unique_ptr&& event, const TActorContext& ctx) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); switch (overloadReason) { case EOverloadStatus::Disk: - IncCounter(COUNTER_OUT_OF_SPACE); + Counters.OnWriteOverloadDisk(); break; case EOverloadStatus::InsertTable: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadInsertTable(writeData.GetSize()); + Counters.OnWriteOverloadInsertTable(writeSize); break; case EOverloadStatus::OverloadMetadata: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadMetadata(writeData.GetSize()); + Counters.OnWriteOverloadMetadata(writeSize); break; case EOverloadStatus::ShardTxInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardTx(writeData.GetSize()); + Counters.OnWriteOverloadShardTx(writeSize); break; case EOverloadStatus::ShardWritesInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardWrites(writeData.GetSize()); + Counters.OnWriteOverloadShardWrites(writeSize); break; case EOverloadStatus::ShardWritesSizeInFly: - IncCounter(COUNTER_WRITE_OVERLOAD); - CSCounters.OnOverloadShardWritesSize(writeData.GetSize()); + Counters.OnWriteOverloadShardWritesSize(writeSize); break; case EOverloadStatus::None: Y_ABORT("invalid function usage"); } - LOG_S_INFO("Write (overload) " << writeData.GetSize() << " bytes into pathId " << writeData.GetWriteMeta().GetTableId() - << " overload reason: [" << overloadReason << "]" - << " at tablet " << TabletID()); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "write_overload")("size", writeSize)("path_id", writeMeta.GetTableId())( + "reason", overloadReason); - ctx.Send(writeData.GetWriteMeta().GetSource(), event.release(), 0, cookie); + ctx.Send(writeMeta.GetSource(), event.release(), 0, cookie); } TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) const { @@ -58,7 +60,7 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) return EOverloadStatus::InsertTable; } - CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + Counters.GetCSCounters().OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); if (TablesManager.GetPrimaryIndex() && TablesManager.GetPrimaryIndex()->IsOverloadedByMetadata(NOlap::IColumnEngine::GetMetadataLimit())) { return EOverloadStatus::OverloadMetadata; } @@ -67,22 +69,26 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) ui64 writesLimit = Settings.OverloadWritesInFlight; ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight; if (txLimit && Executor()->GetStats().TxInFly > txLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)("limit", txLimit); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "tx_in_fly")("sum", Executor()->GetStats().TxInFly)( + "limit", txLimit); return EOverloadStatus::ShardTxInFly; } - if (writesLimit && WritesMonitor.GetWritesInFlight() > writesLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")("sum", WritesMonitor.GetWritesInFlight())("limit", writesLimit); + if (writesLimit && Counters.GetWritesMonitor()->GetWritesInFlight() > writesLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_in_fly")( + "sum", Counters.GetWritesMonitor()->GetWritesInFlight())("limit", writesLimit); return EOverloadStatus::ShardWritesInFly; } - if (writesSizeLimit && WritesMonitor.GetWritesSizeInFlight() > writesSizeLimit) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")("sum", WritesMonitor.GetWritesSizeInFlight())("limit", writesSizeLimit); + if (writesSizeLimit && Counters.GetWritesMonitor()->GetWritesSizeInFlight() > writesSizeLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "shard_overload")("reason", "writes_size_in_fly")( + "sum", Counters.GetWritesMonitor()->GetWritesSizeInFlight())("limit", writesSizeLimit); return EOverloadStatus::ShardWritesSizeInFly; } return EOverloadStatus::None; } void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActorContext& ctx) { - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWriteBlobsResult"); auto& putResult = ev->Get()->GetPutResult(); OnYellowChannels(putResult); @@ -90,25 +96,25 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo auto baseAggregations = wBuffer.GetAggregations(); wBuffer.InitReplyReceived(TMonotonic::Now()); - auto wg = WritesMonitor.FinishWrite(wBuffer.GetSumSize(), wBuffer.GetAggregations().size()); + Counters.GetWritesMonitor()->OnFinishWrite(wBuffer.GetSumSize(), wBuffer.GetAggregations().size()); for (auto&& aggr : baseAggregations) { - const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); + const auto& writeMeta = aggr->GetWriteMeta(); if (!TablesManager.IsReadyForWrite(writeMeta.GetTableId())) { ACFL_ERROR("event", "absent_pathId")("path_id", writeMeta.GetTableId())("has_index", TablesManager.HasPrimaryIndex()); - IncCounter(COUNTER_WRITE_FAIL); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto result = std::make_unique(TabletID(), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR); ctx.Send(writeMeta.GetSource(), result.release()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::NoTable); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::NoTable); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); continue; } if (putResult.GetPutStatus() != NKikimrProto::OK) { - CSCounters.OnWritePutBlobsFail(TMonotonic::Now() - writeMeta.GetWriteStartInstant()); - IncCounter(COUNTER_WRITE_FAIL); + Counters.GetCSCounters().OnWritePutBlobsFail(TMonotonic::Now() - writeMeta.GetWriteStartInstant()); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); auto errCode = NKikimrTxColumnShard::EResultStatus::STORAGE_ERROR; if (putResult.GetPutStatus() == NKikimrProto::TIMEOUT || putResult.GetPutStatus() == NKikimrProto::DEADLINE) { @@ -123,26 +129,26 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo auto result = std::make_unique(TabletID(), writeMeta, errCode); ctx.Send(writeMeta.GetSource(), result.release()); } else { - auto operation = OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); + auto operation = OperationsManager->GetOperation((TOperationWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, - ev->Get()->GetErrorMessage() ? ev->Get()->GetErrorMessage() : "put data fails"); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), + ev->Get()->GetWriteResultStatus(), ev->Get()->GetErrorMessage() ? ev->Get()->GetErrorMessage() : "put data fails"); ctx.Send(writeMeta.GetSource(), result.release(), 0, operation->GetCookie()); } - CSCounters.OnFailedWriteResponse(EWriteFailReason::PutBlob); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::PutBlob); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); } else { const TMonotonic now = TMonotonic::Now(); - CSCounters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant()); - CSCounters.OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant()); - CSCounters.OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant()); - CSCounters.OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant()); - CSCounters.OnWriteMiddle4PutBlobsSuccess(now - writeMeta.GetWriteMiddle4StartInstant()); - CSCounters.OnWriteMiddle5PutBlobsSuccess(now - writeMeta.GetWriteMiddle5StartInstant()); - CSCounters.OnWriteMiddle6PutBlobsSuccess(now - writeMeta.GetWriteMiddle6StartInstant()); + Counters.OnWritePutBlobsSuccess(now - writeMeta.GetWriteStartInstant(), aggr->GetRows()); + Counters.GetCSCounters().OnWriteMiddle1PutBlobsSuccess(now - writeMeta.GetWriteMiddle1StartInstant()); + Counters.GetCSCounters().OnWriteMiddle2PutBlobsSuccess(now - writeMeta.GetWriteMiddle2StartInstant()); + Counters.GetCSCounters().OnWriteMiddle3PutBlobsSuccess(now - writeMeta.GetWriteMiddle3StartInstant()); + Counters.GetCSCounters().OnWriteMiddle4PutBlobsSuccess(now - writeMeta.GetWriteMiddle4StartInstant()); + Counters.GetCSCounters().OnWriteMiddle5PutBlobsSuccess(now - writeMeta.GetWriteMiddle5StartInstant()); + Counters.GetCSCounters().OnWriteMiddle6PutBlobsSuccess(now - writeMeta.GetWriteMiddle6StartInstant()); LOG_S_DEBUG("Write (record) into pathId " << writeMeta.GetTableId() - << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : "") << " at tablet " << TabletID()); - + << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : "") + << " at tablet " << TabletID()); } } Execute(new TTxWrite(this, ev), ctx); @@ -153,8 +159,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteDraft::TPtr& ev, const TActorConte } void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContext& ctx) { - CSCounters.OnStartWriteRequest(); - LastAccessTime = TAppData::TimeProvider->Now(); + Counters.GetCSCounters().OnStartWriteRequest(); const auto& record = Proto(ev->Get()); const ui64 tableId = record.GetTableId(); @@ -163,6 +168,8 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex const TString dedupId = record.GetDedupId(); const auto source = ev->Sender; + Counters.GetColumnTablesCounters()->GetPathIdCounter(tableId)->OnWriteEvent(); + std::optional granuleShardingVersion; if (record.HasGranuleShardingVersion()) { granuleShardingVersion = record.GetGranuleShardingVersion(); @@ -177,181 +184,336 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex writeMeta.SetLongTxId(NLongTxService::TLongTxId::FromProto(record.GetLongTxId())); writeMeta.SetWritePartId(record.GetWritePartId()); - const auto returnFail = [&](const NColumnShard::ECumulativeCounters signalIndex) { - IncCounter(signalIndex); + const auto returnFail = [&](const NColumnShard::ECumulativeCounters signalIndex, const EWriteFailReason reason) { + Counters.GetTabletCounters()->IncCounter(signalIndex); ctx.Send(source, std::make_unique(TabletID(), writeMeta, NKikimrTxColumnShard::EResultStatus::ERROR)); + Counters.GetCSCounters().OnFailedWriteResponse(reason); return; }; if (!AppDataVerified().ColumnShardConfig.GetWritingEnabled()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_writing")("reason", "disabled"); - CSCounters.OnFailedWriteResponse(EWriteFailReason::Disabled); - return returnFail(COUNTER_WRITE_FAIL); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::Disabled); } if (!TablesManager.IsReadyForWrite(tableId)) { - LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex()? "": " no index") - << " at tablet " << TabletID()); + LOG_S_NOTICE("Write (fail) into pathId:" << writeMeta.GetTableId() << (TablesManager.HasPrimaryIndex() ? "" : " no index") + << " at tablet " << TabletID()); + + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::NoTable); + } + + { + auto& portionsIndex = + TablesManager.GetPrimaryIndexAsVerified().GetGranuleVerified(writeMeta.GetTableId()).GetPortionsIndex(); + { + const ui64 minMemoryRead = portionsIndex.GetMinRawMemoryRead(); + if (NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit < minMemoryRead) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_raw_memory")("current", minMemoryRead)( + "limit", NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadRawMemory); + } + } - CSCounters.OnFailedWriteResponse(EWriteFailReason::NoTable); - return returnFail(COUNTER_WRITE_FAIL); + { + const ui64 minMemoryRead = portionsIndex.GetMinBlobMemoryRead(); + if (NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit < minMemoryRead) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "overlimit")("reason", "read_blob_memory")("current", minMemoryRead)( + "limit", NOlap::TGlobalLimits::DefaultBlobsMemoryIntervalLimit)("table_id", writeMeta.GetTableId()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::OverlimitReadBlobMemory); + } + } } const auto& snapshotSchema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetLastSchema(); auto arrowData = std::make_shared(snapshotSchema); if (!arrowData->ParseFromProto(record)) { - LOG_S_ERROR("Write (fail) " << record.GetData().size() << " bytes into pathId " << writeMeta.GetTableId() - << " at tablet " << TabletID()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::IncorrectSchema); - return returnFail(COUNTER_WRITE_FAIL); + LOG_S_ERROR( + "Write (fail) " << record.GetData().size() << " bytes into pathId " << writeMeta.GetTableId() << " at tablet " << TabletID()); + return returnFail(COUNTER_WRITE_FAIL, EWriteFailReason::IncorrectSchema); } NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(), StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING)); auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { - std::unique_ptr result = std::make_unique(TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); - OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); - CSCounters.OnFailedWriteResponse(EWriteFailReason::Overload); + std::unique_ptr result = std::make_unique( + TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); + OverloadWriteFail(overloadStatus, writeData.GetWriteMeta(), writeData.GetSize(), cookie, std::move(result), ctx); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::Overload); } else { if (ui64 writeId = (ui64)HasLongTxWrite(writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId())) { - LOG_S_DEBUG("Write (duplicate) into pathId " << writeMeta.GetTableId() - << " longTx " << writeMeta.GetLongTxIdUnsafe().ToString() - << " at tablet " << TabletID()); + LOG_S_DEBUG("Write (duplicate) into pathId " << writeMeta.GetTableId() << " longTx " << writeMeta.GetLongTxIdUnsafe().ToString() + << " at tablet " << TabletID()); - IncCounter(COUNTER_WRITE_DUPLICATE); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_DUPLICATE); - auto result = std::make_unique( - TabletID(), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS); + auto result = + std::make_unique(TabletID(), writeMeta, writeId, NKikimrTxColumnShard::EResultStatus::SUCCESS); ctx.Send(writeMeta.GetSource(), result.release()); - CSCounters.OnFailedWriteResponse(EWriteFailReason::LongTxDuplication); + Counters.GetCSCounters().OnFailedWriteResponse(EWriteFailReason::LongTxDuplication); return; } - WritesMonitor.RegisterWrite(writeData.GetSize()); + Counters.GetWritesMonitor()->OnStartWrite(writeData.GetSize()); LOG_S_DEBUG("Write (blob) " << writeData.GetSize() << " bytes into pathId " << writeMeta.GetTableId() - << (writeMeta.GetWriteId()? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") - << WritesMonitor.DebugString() - << " at tablet " << TabletID()); + << (writeMeta.GetWriteId() ? (" writeId " + ToString(writeMeta.GetWriteId())).c_str() : " ") + << Counters.GetWritesMonitor()->DebugString() << " at tablet " << TabletID()); writeData.MutableWriteMeta().SetWriteMiddle1StartInstant(TMonotonic::Now()); - std::shared_ptr task = std::make_shared(TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), - snapshotSchema, GetLastTxSnapshot()); + std::shared_ptr task = std::make_shared( + TabletID(), SelfId(), BufferizationWriteActorId, std::move(writeData), snapshotSchema, GetLastTxSnapshot(), Counters.GetCSCounters().WritingCounters); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); } } class TCommitOperation { +private: + const ui64 TabletId; + public: using TPtr = std::shared_ptr; - bool Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { - if (evWrite.Record.GetLocks().GetLocks().size() != 1) { - return false; + bool NeedSyncLocks() const { + return SendingShards.size() && ReceivingShards.size(); + } + + bool IsPrimary() const { + AFL_VERIFY(NeedSyncLocks()); + return TabletId == *ReceivingShards.begin(); + } + + TCommitOperation(const ui64 tabletId) + : TabletId(tabletId) { + } + + TConclusionStatus Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { + AFL_VERIFY(evWrite.Record.GetLocks().GetLocks().size() >= 1); + auto& locks = evWrite.Record.GetLocks(); + auto& lock = evWrite.Record.GetLocks().GetLocks()[0]; + SendingShards = std::set(locks.GetSendingShards().begin(), locks.GetSendingShards().end()); + if ((ui32)locks.GetSendingShards().size() != SendingShards.size()) { + return TConclusionStatus::Fail("duplications in SendingShards proto field"); + } + ReceivingShards = std::set(locks.GetReceivingShards().begin(), locks.GetReceivingShards().end()); + if ((ui32)locks.GetReceivingShards().size() != ReceivingShards.size()) { + return TConclusionStatus::Fail("duplications in ReceivingShards proto field"); } - LockId = evWrite.Record.GetLocks().GetLocks()[0].GetLockId(); TxId = evWrite.Record.GetTxId(); - KqpLocks = evWrite.Record.GetLocks(); - return !!LockId && !!TxId && KqpLocks.GetOp() == NKikimrDataEvents::TKqpLocks::Commit; + LockId = lock.GetLockId(); + Generation = lock.GetGeneration(); + InternalGenerationCounter = lock.GetCounter(); + if (!GetLockId()) { + return TConclusionStatus::Fail("not initialized lock info in commit message"); + } + if (!TxId) { + return TConclusionStatus::Fail("not initialized TxId for commit event"); + } + if (evWrite.Record.GetLocks().GetOp() != NKikimrDataEvents::TKqpLocks::Commit) { + return TConclusionStatus::Fail("incorrect message type"); + } + if (!ReceivingShards.size() || !SendingShards.size()) { + ReceivingShards.clear(); + SendingShards.clear(); + } else { + if (!ReceivingShards.contains(TabletId) && !SendingShards.contains(TabletId)) { + return TConclusionStatus::Fail("shard is incorrect for sending/receiving lists"); + } + } + return TConclusionStatus::Success(); + } + + std::unique_ptr CreateTxOperator( + const NKikimrTxColumnShard::ETransactionKind kind) const { + AFL_VERIFY(ReceivingShards.size()); + if (IsPrimary()) { + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, ReceivingShards, SendingShards); + } else { + return std::make_unique( + TFullTxInfo::BuildFake(kind), LockId, *ReceivingShards.begin(), ReceivingShards.contains(TabletId)); + } } private: - NKikimrDataEvents::TKqpLocks KqpLocks; YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Generation, 0); + YDB_READONLY(ui64, InternalGenerationCounter, 0); YDB_READONLY(ui64, TxId, 0); + YDB_READONLY_DEF(std::set, SendingShards); + YDB_READONLY_DEF(std::set, ReceivingShards); }; -class TProposeWriteTransaction : public NTabletFlatExecutor::TTransactionBase { + +class TProposeWriteTransaction: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + public: TProposeWriteTransaction(TColumnShard* self, TCommitOperation::TPtr op, const TActorId source, const ui64 cookie) : TBase(self) , WriteCommit(op) , Source(source) - , Cookie(cookie) - {} + , Cookie(cookie) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext&) override { + NKikimrTxColumnShard::TCommitWriteTxBody proto; + NKikimrTxColumnShard::ETransactionKind kind; + if (WriteCommit->NeedSyncLocks()) { + if (WriteCommit->IsPrimary()) { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_PRIMARY; + } else { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_SECONDARY; + } + proto = WriteCommit->CreateTxOperator(kind)->SerializeToProto(); + } else { + kind = NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE; + } + proto.SetLockId(WriteCommit->GetLockId()); + TxOperator = Self->GetProgressTxController().StartProposeOnExecute( + TTxController::TTxInfo(kind, WriteCommit->GetTxId(), Source, Cookie, {}), proto.SerializeAsString(), txc); + return true; + } - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_PROPOSE; } + virtual void Complete(const TActorContext& ctx) override { + Self->GetProgressTxController().FinishProposeOnComplete(WriteCommit->GetTxId(), ctx); + } + TTxType GetTxType() const override { + return TXTYPE_PROPOSE; + } private: TCommitOperation::TPtr WriteCommit; TActorId Source; ui64 Cookie; + std::shared_ptr TxOperator; }; -bool TProposeWriteTransaction::Execute(TTransactionContext& txc, const TActorContext&) { - NKikimrTxColumnShard::TCommitWriteTxBody proto; - proto.SetLockId(WriteCommit->GetLockId()); - TString txBody; - Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); - Y_UNUSED(Self->GetProgressTxController().StartProposeOnExecute( - TTxController::TTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, WriteCommit->GetTxId(), Source, Cookie, {}), txBody, txc)); - return true; -} +class TAbortWriteTransaction: public NTabletFlatExecutor::TTransactionBase { +private: + using TBase = NTabletFlatExecutor::TTransactionBase; -void TProposeWriteTransaction::Complete(const TActorContext& ctx) { - Self->GetProgressTxController().FinishProposeOnComplete(WriteCommit->GetTxId(), ctx); -} +public: + TAbortWriteTransaction(TColumnShard* self, const ui64 txId, const TActorId source, const ui64 cookie) + : TBase(self) + , TxId(txId) + , Source(source) + , Cookie(cookie) { + } + + virtual bool Execute(TTransactionContext& txc, const TActorContext&) override { + Self->GetOperationsManager().AbortTransactionOnExecute(*Self, TxId, txc); + return true; + } + + virtual void Complete(const TActorContext& ctx) override { + Self->GetOperationsManager().AbortTransactionOnComplete(*Self, TxId); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), TxId); + ctx.Send(Source, result.release(), 0, Cookie); + } + TTxType GetTxType() const override { + return TXTYPE_PROPOSE; + } + +private: + ui64 TxId; + TActorId Source; + ui64 Cookie; +}; void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx) { - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); + NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); const auto& record = ev->Get()->Record; const auto source = ev->Sender; const auto cookie = ev->Cookie; - const auto behaviour = TOperationsManager::GetBehaviour(*ev->Get()); - - if (behaviour == EOperationBehaviour::Undefined) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid write event"); + const auto behaviourConclusion = TOperationsManager::GetBehaviour(*ev->Get()); + // AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("ev_write", record.DebugString()); + if (behaviourConclusion.IsFail()) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, + "invalid write event: " + behaviourConclusion.GetErrorMessage()); ctx.Send(source, result.release(), 0, cookie); return; } + auto behaviour = *behaviourConclusion; + + if (behaviour == EOperationBehaviour::AbortWriteLock) { + Execute(new TAbortWriteTransaction(this, record.GetLocks().GetLocks()[0].GetLockId(), source, cookie), ctx); + return; + } if (behaviour == EOperationBehaviour::CommitWriteLock) { - auto commitOperation = std::make_shared(); - if (!commitOperation->Parse(*ev->Get())) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid commit event"); + auto commitOperation = std::make_shared(TabletID()); + const auto sendError = [&](const TString& message, const NKikimrDataEvents::TEvWriteResult::EStatus status) { + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, status, message); ctx.Send(source, result.release(), 0, cookie); + }; + auto conclusionParse = commitOperation->Parse(*ev->Get()); + if (conclusionParse.IsFail()) { + sendError(conclusionParse.GetErrorMessage(), NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST); + } else { + if (commitOperation->NeedSyncLocks()) { + auto* lockInfo = OperationsManager->GetLockOptional(commitOperation->GetLockId()); + if (!lockInfo) { + sendError("haven't lock for commit: " + ::ToString(commitOperation->GetLockId()), + NKikimrDataEvents::TEvWriteResult::STATUS_ABORTED); + } else { + if (lockInfo->GetGeneration() != commitOperation->GetGeneration()) { + sendError("tablet lock have another generation: " + ::ToString(lockInfo->GetGeneration()) + + " != " + ::ToString(commitOperation->GetGeneration()), + NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } else if (lockInfo->GetInternalGenerationCounter() != commitOperation->GetInternalGenerationCounter()) { + sendError( + "tablet lock have another internal generation counter: " + ::ToString(lockInfo->GetInternalGenerationCounter()) + + " != " + ::ToString(commitOperation->GetInternalGenerationCounter()), + NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } else { + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + } + } + } else { + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + } } - Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); return; } - const ui64 lockId = (behaviour == EOperationBehaviour::InTxWrite) ? record.GetTxId() : record.GetLockTxId(); - if (record.GetOperations().size() != 1) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); ctx.Send(source, result.release(), 0, cookie); return; } const auto& operation = record.GetOperations()[0]; - const std::optional mType = TEnumOperator::DeserializeFromProto(operation.GetType()); + const std::optional mType = + TEnumOperator::DeserializeFromProto(operation.GetType()); if (!mType) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "operation " + NKikimrDataEvents::TEvWrite::TOperation::EOperationType_Name(operation.GetType()) + " is not supported"); ctx.Send(source, result.release(), 0, cookie); return; } if (!operation.GetTableId().HasSchemaVersion()) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); ctx.Send(source, result.release(), 0, cookie); return; } auto schema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchema(operation.GetTableId().GetSchemaVersion()); if (!schema) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); ctx.Send(source, result.release(), 0, cookie); return; } @@ -359,38 +521,50 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor const auto tableId = operation.GetTableId().GetTableId(); if (!TablesManager.IsReadyForWrite(tableId)) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); ctx.Send(source, result.release(), 0, cookie); return; } auto arrowData = std::make_shared(schema); if (!arrowData->Parse(operation, NEvWrite::TPayloadReader(*ev->Get()))) { - IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); + Counters.GetTabletCounters()->IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); ctx.Send(source, result.release(), 0, cookie); } auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { - NEvWrite::TWriteData writeData(NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData, nullptr, nullptr); - std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); - OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); + std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError( + TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); + OverloadWriteFail(overloadStatus, NEvWrite::TWriteMeta(0, tableId, source, {}), arrowData->GetSize(), cookie, std::move(result), ctx); return; } - auto wg = WritesMonitor.RegisterWrite(arrowData->GetSize()); + Counters.GetWritesMonitor()->OnStartWrite(arrowData->GetSize()); std::optional granuleShardingVersionId; if (record.HasGranuleShardingVersionId()) { granuleShardingVersionId = record.GetGranuleShardingVersionId(); } + ui64 lockId = 0; + if (behaviour == EOperationBehaviour::NoTxWrite) { + lockId = BuildEphemeralTxId(); + } else if (behaviour == EOperationBehaviour::InTxWrite) { + lockId = record.GetTxId(); + } else { + lockId = record.GetLockTxId(); + } + + OperationsManager->RegisterLock(lockId, Generation()); auto writeOperation = OperationsManager->RegisterOperation(lockId, cookie, granuleShardingVersionId, *mType); Y_ABORT_UNLESS(writeOperation); writeOperation->SetBehaviour(behaviour); writeOperation->Start(*this, tableId, arrowData, source, schema, ctx); } -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index 69d54a68d1bd..27497b4e8fc6 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -41,7 +41,7 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorConte if (IsAnyChannelYellowStop()) { ACFL_ERROR("event", "TEvWriteIndex failed")("reason", "channel yellow stop"); - IncCounter(COUNTER_OUT_OF_SPACE); + Counters.GetTabletCounters()->IncCounter(COUNTER_OUT_OF_SPACE); ev->Get()->SetPutStatus(NKikimrProto::TRYLATER); NOlap::TChangesFinishContext context("out of disk space"); ev->Get()->IndexChanges->Abort(*this, context); diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp deleted file mode 100644 index d9f0dcc5e79b..000000000000 --- a/ydb/core/tx/columnshard/columnshard_common.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "columnshard_common.h" -#include - -namespace NKikimr::NColumnShard { - -namespace { - -using EOperation = NArrow::EOperation; -using EAggregate = NArrow::EAggregate; -using TAssign = NSsa::TAssign; -using TAggregateAssign = NSsa::TAggregateAssign; - -} - -using EOperation = NArrow::EOperation; -using TPredicate = NOlap::TPredicate; - -} diff --git a/ydb/core/tx/columnshard/columnshard_common.h b/ydb/core/tx/columnshard/columnshard_common.h deleted file mode 100644 index 455f39a512cc..000000000000 --- a/ydb/core/tx/columnshard/columnshard_common.h +++ /dev/null @@ -1,94 +0,0 @@ -#pragma once -#include "engines/reader/common/description.h" -#include "engines/predicate/predicate.h" - -#include - -namespace NKikimr::NOlap { - struct TIndexInfo; -} - -namespace NKikimr::NColumnShard { - -using TReadDescription = NOlap::NReader::TReadDescription; -using IColumnResolver = NOlap::IColumnResolver; -using NOlap::TWriteId; - -class TBatchCache { -public: - using TUnifiedBlobId = NOlap::TUnifiedBlobId; - using TInsertedBatch = std::pair>; - - static constexpr ui32 MAX_COMMITTED_COUNT = 2 * TLimits::MIN_SMALL_BLOBS_TO_INSERT; - static constexpr ui32 MAX_INSERTED_COUNT = 2 * TLimits::MIN_SMALL_BLOBS_TO_INSERT; - static constexpr ui64 MAX_TOTAL_SIZE = 2 * TLimits::MIN_BYTES_TO_INSERT; - - TBatchCache() - : Inserted(MAX_INSERTED_COUNT) - , Committed(MAX_COMMITTED_COUNT) - {} - - void Insert(TWriteId writeId, const TUnifiedBlobId& blobId, const std::shared_ptr& batch) { - if (Bytes() + blobId.BlobSize() > MAX_TOTAL_SIZE) { - return; - } - InsertedBytes += blobId.BlobSize(); - Inserted.Insert(writeId, {blobId, batch}); - } - - void Commit(TWriteId writeId) { - auto it = Inserted.FindWithoutPromote(writeId); - if (it != Inserted.End()) { - auto& blobId = it->first; - InsertedBytes -= blobId.BlobSize(); - CommittedBytes += blobId.BlobSize(); - - Committed.Insert(blobId, it->second); - Inserted.Erase(it); - } - } - - void EraseInserted(TWriteId writeId) { - auto it = Inserted.FindWithoutPromote(writeId); - if (it != Inserted.End()) { - InsertedBytes -= (*it).first.BlobSize(); - Inserted.Erase(it); - } - } - - void EraseCommitted(const TUnifiedBlobId& blobId) { - auto it = Committed.FindWithoutPromote(blobId); - if (it != Committed.End()) { - CommittedBytes -= blobId.BlobSize(); - Committed.Erase(it); - } - } - - TInsertedBatch GetInserted(TWriteId writeId) const { - auto it = Inserted.Find(writeId); - if (it != Inserted.End()) { - return *it; - } - return {}; - } - - std::shared_ptr Get(const TUnifiedBlobId& blobId) const { - auto it = Committed.Find(blobId); - if (it != Committed.End()) { - return *it; - } - return {}; - } - - ui64 Bytes() const { - return InsertedBytes + CommittedBytes; - } - -private: - mutable TLRUCache Inserted; - mutable TLRUCache> Committed; - ui64 InsertedBytes{0}; - ui64 CommittedBytes{0}; -}; - -} diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index 79deb83616a9..9f1c7a10859d 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -28,6 +28,7 @@ #include "engines/changes/ttl.h" #include "resource_subscriber/counters.h" +#include "transactions/operators/ev_write/sync.h" #include "bg_tasks/adapter/adapter.h" #include "bg_tasks/manager/manager.h" @@ -41,6 +42,7 @@ #include #include #include +#include namespace NKikimr::NColumnShard { @@ -64,33 +66,26 @@ NTabletPipe::TClientConfig GetPipeClientConfig() { TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) : TActor(&TThis::StateInit) - , TTabletExecutedFlat(info, tablet, nullptr) + , TTabletExecutedFlat(info, tablet, new NMiniKQL::TMiniKQLFactory) + , TabletCountersHolder(new TProtobufTabletCounters()) + , Counters(*TabletCountersHolder) , ProgressTxController(std::make_unique(*this)) , StoragesManager(std::make_shared(*this)) , DataLocksManager(std::make_shared()) - , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod(TSettings::DefaultPeriodicWakeupActivationPeriod)) - , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval(TSettings::DefaultStatsReportInterval)) - , InFlightReadsTracker(StoragesManager) + , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod()) + , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval()) + , InFlightReadsTracker(StoragesManager, Counters.GetRequestsTracingCounters()) , TablesManager(StoragesManager, info->TabletID) , Subscribers(std::make_shared(*this)) , PipeClientCache(NTabletPipe::CreateBoundedClientCache(new NTabletPipe::TBoundedClientCacheConfig(), GetPipeClientConfig())) , InsertTable(std::make_unique()) - , SubscribeCounters(std::make_shared()) - , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , ScanCounters("Scan") - , WritesMonitor(*this) - , NormalizerController(StoragesManager, SubscribeCounters) - , SysLocks(this) -{ - TabletCountersPtr.reset(new TProtobufTabletCounters< - ESimpleCounters_descriptor, - ECumulativeCounters_descriptor, - EPercentileCounters_descriptor, - ETxTypes_descriptor - >()); - TabletCounters = TabletCountersPtr.get(); + , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), Counters.GetSubscribeCounters()) + , BackgroundController(Counters.GetBackgroundControllerCounters()) + , NormalizerController(StoragesManager, Counters.GetSubscribeCounters()) + , SysLocks(this) { } void TColumnShard::OnDetach(const TActorContext& ctx) { @@ -191,86 +186,109 @@ ui64 TColumnShard::GetOutdatedStep() const { return step; } -ui64 TColumnShard::GetMinReadStep() const { - const TDuration maxReadStaleness = NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(TDuration::Minutes(5)); - ui64 delayMillisec = maxReadStaleness.MilliSeconds(); +NOlap::TSnapshot TColumnShard::GetMinReadSnapshot() const { + ui64 delayMillisec = GetMaxReadStaleness().MilliSeconds(); ui64 passedStep = GetOutdatedStep(); ui64 minReadStep = (passedStep > delayMillisec ? passedStep - delayMillisec : 0); - return minReadStep; + + if (auto ssClean = InFlightReadsTracker.GetSnapshotToClean()) { + if (ssClean->GetPlanStep() < minReadStep) { + Counters.GetRequestsTracingCounters()->OnDefaultMinSnapshotInstant(TInstant::MilliSeconds(ssClean->GetPlanStep())); + return *ssClean; + } + } + Counters.GetRequestsTracingCounters()->OnDefaultMinSnapshotInstant(TInstant::MilliSeconds(minReadStep)); + return NOlap::TSnapshot::MaxForPlanStep(minReadStep); } -TWriteId TColumnShard::HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const { +TInsertWriteId TColumnShard::HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const { auto it = LongTxWritesByUniqueId.find(longTxId.UniqueId); if (it != LongTxWritesByUniqueId.end()) { auto itPart = it->second.find(partId); if (itPart != it->second.end()) { - return (TWriteId)itPart->second->WriteId; + return itPart->second->InsertWriteId; } } - return (TWriteId)0; + return (TInsertWriteId)0; } -TWriteId TColumnShard::GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId) { +TInsertWriteId TColumnShard::GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId) { auto it = LongTxWritesByUniqueId.find(longTxId.UniqueId); if (it != LongTxWritesByUniqueId.end()) { auto itPart = it->second.find(partId); if (itPart != it->second.end()) { - return (TWriteId)itPart->second->WriteId; + return itPart->second->InsertWriteId; } } else { it = LongTxWritesByUniqueId.emplace(longTxId.UniqueId, TPartsForLTXShard()).first; } - TWriteId writeId = BuildNextWriteId(db); - auto& lw = LongTxWrites[writeId]; - lw.WriteId = (ui64)writeId; + TInsertWriteId insertWriteId = InsertTable->BuildNextWriteId(db); + auto& lw = LongTxWrites[insertWriteId]; + lw.InsertWriteId = insertWriteId; lw.WritePartId = partId; lw.LongTxId = longTxId; lw.GranuleShardingVersionId = granuleShardingVersionId; it->second[partId] = &lw; - Schema::SaveLongTxWrite(db, writeId, partId, longTxId, granuleShardingVersionId); - return writeId; -} - -TWriteId TColumnShard::BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc) { - NIceDb::TNiceDb db(txc.DB); - return BuildNextWriteId(db); -} - -TWriteId TColumnShard::BuildNextWriteId(NIceDb::TNiceDb& db) { - TWriteId writeId = ++LastWriteId; - Schema::SaveSpecialValue(db, Schema::EValueIds::LastWriteId, (ui64)writeId); - return writeId; + Schema::SaveLongTxWrite(db, insertWriteId, partId, longTxId, granuleShardingVersionId); + return insertWriteId; } -void TColumnShard::AddLongTxWrite(TWriteId writeId, ui64 txId) { - auto& lw = LongTxWrites.at(writeId); - lw.PreparedTxId = txId; +void TColumnShard::AddLongTxWrite(const TInsertWriteId writeId, ui64 txId) { + auto it = LongTxWrites.find(writeId); + AFL_VERIFY(it != LongTxWrites.end()); + it->second.PreparedTxId = txId; } -void TColumnShard::LoadLongTxWrite(TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { +void TColumnShard::LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { auto& lw = LongTxWrites[writeId]; lw.WritePartId = writePartId; - lw.WriteId = (ui64)writeId; + lw.InsertWriteId = writeId; lw.LongTxId = longTxId; lw.GranuleShardingVersionId = granuleShardingVersion; LongTxWritesByUniqueId[longTxId.UniqueId][writePartId] = &lw; } -bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId, const ui64 txId) { - auto* lw = LongTxWrites.FindPtr(writeId); - AFL_VERIFY(lw)("write_id", (ui64)writeId)("tx_id", txId); - const ui64 prepared = lw->PreparedTxId; - AFL_VERIFY(!prepared || txId == prepared)("tx", txId)("prepared", prepared); - Schema::EraseLongTxWrite(db, writeId); - auto& ltxParts = LongTxWritesByUniqueId[lw->LongTxId.UniqueId]; - ltxParts.erase(lw->WritePartId); - if (ltxParts.empty()) { - AFL_VERIFY(LongTxWritesByUniqueId.erase(lw->LongTxId.UniqueId)); - } - LongTxWrites.erase(writeId); - return true; +bool TColumnShard::RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId) { + if (auto* lw = LongTxWrites.FindPtr(writeId)) { + ui64 prepared = lw->PreparedTxId; + if (!prepared || txId == prepared) { + Schema::EraseLongTxWrite(db, writeId); + auto& ltxParts = LongTxWritesByUniqueId[lw->LongTxId.UniqueId]; + ltxParts.erase(lw->WritePartId); + if (ltxParts.empty()) { + LongTxWritesByUniqueId.erase(lw->LongTxId.UniqueId); + } + LongTxWrites.erase(writeId); + return true; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_remove_prepared_tx_insertion")("write_id", (ui64)writeId)("tx_id", txId); + return false; + } + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_remove_removed_tx_insertion")("write_id", (ui64)writeId)("tx_id", txId); + return true; + } +} + +void TColumnShard::TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort) { + std::vector failedAborts; + for (auto& writeId : writesToAbort) { + if (!RemoveLongTxWrite(db, writeId, 0)) { + failedAborts.push_back(writeId); + } + } + if (failedAborts.size()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "failed_aborts")("count", failedAborts.size())("writes_count", writesToAbort.size()); + } + for (auto& writeId : failedAborts) { + InsertTable->MarkAsNotAbortable(writeId); + writesToAbort.erase(writeId); + } + if (!writesToAbort.empty()) { + InsertTable->Abort(dbTable, writesToAbort); + } } void TColumnShard::UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc) { @@ -401,9 +419,9 @@ void TColumnShard::RunEnsureTable(const NKikimrTxColumnShard::TCreateTable& tabl TablesManager.AddTableVersion(pathId, version, tableVerProto, db, Tiers); - SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); - SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); - SetCounter(COUNTER_TABLE_TTLS, TablesManager.GetTtl().PathsCount()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); + Counters.GetTabletCounters()->SetCounter(COUNTER_TABLE_TTLS, TablesManager.GetTtl().PathsCount()); } void TColumnShard::RunAlterTable(const NKikimrTxColumnShard::TAlterTable& alterProto, const NOlap::TSnapshot& version, @@ -451,11 +469,6 @@ void TColumnShard::RunDropTable(const NKikimrTxColumnShard::TDropTable& dropProt LOG_S_DEBUG("DropTable for pathId: " << pathId << " at tablet " << TabletID()); TablesManager.DropTable(pathId, version, db); - - // TODO: Allow to read old snapshots after DROP - TBlobGroupSelector dsGroupSelector(Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - InsertTable->DropPath(dbTable, pathId); } void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, const NOlap::TSnapshot& version, @@ -487,8 +500,7 @@ void TColumnShard::EnqueueBackgroundActivities(const bool periodic) { ACFL_DEBUG("event", "EnqueueBackgroundActivities")("periodic", periodic); StoragesManager->GetOperatorVerified(NOlap::IStoragesManager::DefaultStorageId); StoragesManager->GetSharedBlobsManager()->GetStorageManagerVerified(NOlap::IStoragesManager::DefaultStorageId); - CSCounters.OnStartBackground(); - SendPeriodicStats(); + Counters.GetCSCounters().OnStartBackground(); if (!TablesManager.HasPrimaryIndex()) { AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("problem", "Background activities cannot be started: no index at tablet"); @@ -515,7 +527,7 @@ class TChangesTask: public NConveyor::ITask { TString ClassId; NOlap::TSnapshot LastCompletedTx; protected: - virtual bool DoExecute() override { + virtual TConclusionStatus DoExecute(const std::shared_ptr& /*taskPtr*/) override { NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); { NOlap::TConstructionContext context(*TxEvent->IndexInfo, Counters, LastCompletedTx); @@ -525,7 +537,7 @@ class TChangesTask: public NConveyor::ITask { } } TActorContext::AsActorContext().Send(ParentActorId, std::move(TxEvent)); - return true; + return TConclusionStatus::Success(); } public: virtual TString GetTaskClassIdentifier() const override { @@ -609,13 +621,16 @@ class TTTLChangesReadTask: public TChangesReadTask, public TMonitoringObjectsCou using TBase::TBase; }; -void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { - CSCounters.IndexationInput(bytesToIndex); +void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { + Counters.GetCSCounters().IndexationInput(bytesToIndex); - std::vector data; + std::vector data; data.reserve(dataToIndex.size()); for (auto& ptr : dataToIndex) { data.push_back(*ptr); + if (!TablesManager.HasTable(data.back().GetPathId())) { + data.back().SetRemove(); + } } Y_ABORT_UNLESS(data.size()); @@ -632,7 +647,8 @@ void TColumnShard::StartIndexTask(std::vector&& dat NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), IndexationCounters, GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetIndexationCounters(), GetLastCompletedTx()), + 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); } void TColumnShard::SetupIndexation() { @@ -652,8 +668,8 @@ void TColumnShard::SetupIndexation() { if (InsertTable->GetPathPriorities().size() && InsertTable->GetPathPriorities().rbegin()->first.GetCategory() == NOlap::TPathInfoIndexPriority::EIndexationPriority::PreventOverload) { force = true; } - const ui64 bytesLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationStartBytesLimit(TSettings::GuaranteeIndexationStartBytesLimit); - const TDuration durationLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationInterval(TSettings::GuaranteeIndexationInterval); + const ui64 bytesLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationStartBytesLimit(); + const TDuration durationLimit = NYDBTest::TControllers::GetColumnShardController()->GetGuaranteeIndexationInterval(); if (!force && InsertTable->GetCountersCommitted().Bytes < bytesLimit && TMonotonic::Now() < BackgroundController.GetLastIndexationInstant() + durationLimit) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_indexation")("reason", "not_enough_data_and_too_frequency") @@ -662,10 +678,10 @@ void TColumnShard::SetupIndexation() { } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_indexation_tasks")("insert_overload_size", InsertTable->GetCountersCommitted().Bytes); - CSCounters.OnSetupIndexation(); + Counters.GetCSCounters().OnSetupIndexation(); ui64 bytesToIndex = 0; ui64 txBytesWrite = 0; - std::vector dataToIndex; + std::vector dataToIndex; dataToIndex.reserve(TLimits::MIN_SMALL_BLOBS_TO_INSERT); for (auto it = InsertTable->GetPathPriorities().rbegin(); it != InsertTable->GetPathPriorities().rend(); ++it) { for (auto* pathInfo : it->second) { @@ -693,7 +709,7 @@ void TColumnShard::SetupCompaction() { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_compaction")("reason", "disabled"); return; } - CSCounters.OnSetupCompaction(); + Counters.GetCSCounters().OnSetupCompaction(); BackgroundController.CheckDeadlines(); while (BackgroundController.GetCompactionsCount() < TSettings::MAX_ACTIVE_COMPACTIONS) { @@ -712,7 +728,7 @@ void TColumnShard::SetupCompaction() { NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetCompactionCounters(), GetLastCompletedTx()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); } LOG_S_DEBUG("ActiveCompactions: " << BackgroundController.GetCompactionsCount() << " at tablet " << TabletID()); @@ -723,7 +739,7 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_ttl")("reason", "disabled"); return false; } - CSCounters.OnSetupTtl(); + Counters.GetCSCounters().OnSetupTtl(); THashMap eviction = pathTtls; for (auto&& i : eviction) { ACFL_DEBUG("background", "ttl")("path", i.first)("info", i.second.GetDebugString()); @@ -747,7 +763,8 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { if (needWrites) { NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastCompletedTx()), 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), Counters.GetCompactionCounters(), GetLastCompletedTx()), + 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); } else { ev->SetPutStatus(NKikimrProto::OK); ActorContext().Send(SelfId(), std::move(ev)); @@ -757,7 +774,7 @@ bool TColumnShard::SetupTtl(const THashMap& pathTtls) { } void TColumnShard::SetupCleanupPortions() { - CSCounters.OnSetupCleanup(); + Counters.GetCSCounters().OnSetupCleanup(); if (!AppDataVerified().ColumnShardConfig.GetCleanupEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::Cleanup)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_cleanup")("reason", "disabled"); return; @@ -767,9 +784,8 @@ void TColumnShard::SetupCleanupPortions() { return; } - NOlap::TSnapshot cleanupSnapshot{GetMinReadStep(), 0}; - - auto changes = TablesManager.MutablePrimaryIndex().StartCleanupPortions(cleanupSnapshot, TablesManager.GetPathsToDrop(), DataLocksManager); + auto changes = + TablesManager.MutablePrimaryIndex().StartCleanupPortions(GetMinReadSnapshot(), TablesManager.GetPathsToDrop(), DataLocksManager); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; @@ -786,13 +802,21 @@ void TColumnShard::SetupCleanupPortions() { } void TColumnShard::SetupCleanupTables() { - CSCounters.OnSetupCleanup(); + Counters.GetCSCounters().OnSetupCleanup(); if (BackgroundController.IsCleanupTablesActive()) { ACFL_DEBUG("background", "cleanup")("skip_reason", "in_progress"); return; } - auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(TablesManager.MutablePathsToDrop()); + THashSet pathIdsEmptyInInsertTable; + for (auto&& i : TablesManager.GetPathsToDrop()) { + if (InsertTable->HasPathIdData(i)) { + continue; + } + pathIdsEmptyInInsertTable.emplace(i); + } + + auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(pathIdsEmptyInInsertTable); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; @@ -827,21 +851,22 @@ void TColumnShard::Handle(TEvPrivate::TEvGarbageCollectionFinished::TPtr& ev, co } void TColumnShard::SetupCleanupInsertTable() { + auto writeIdsToCleanup = InsertTable->OldWritesToAbort(AppData()->TimeProvider->Now()); + if (BackgroundController.IsCleanupInsertTableActive()) { ACFL_DEBUG("background", "cleanup_insert_table")("skip_reason", "in_progress"); return; } - if (!InsertTable->GetAborted().size()) { + if (!InsertTable->GetAborted().size() && !writeIdsToCleanup.size()) { return; } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "cleanup_started")("aborted", InsertTable->GetAborted().size()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "cleanup_started")("aborted", InsertTable->GetAborted().size())("to_cleanup", writeIdsToCleanup.size()); BackgroundController.StartCleanupInsertTable(); - Execute(new TTxInsertTableCleanup(this), TActorContext::AsActorContext()); + Execute(new TTxInsertTableCleanup(this, std::move(writeIdsToCleanup)), TActorContext::AsActorContext()); } void TColumnShard::Die(const TActorContext& ctx) { - // TODO CleanupActors(ctx); NTabletPipe::CloseAndForgetClient(SelfId(), StatsReportPipe); UnregisterMediatorTimeCast(); @@ -863,6 +888,39 @@ void TColumnShard::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev, const TAct } } +void TColumnShard::Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx) { + const ui64 txId = ev->Get()->Record.GetTxId(); + if (!GetProgressTxController().GetTxOperatorOptional(txId)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set_ignored")("proto", ev->Get()->Record.DebugString()); + Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, txId, TabletID(), ev->Get()->Record.GetTabletProducer(), TabletID(), 0), + ev->Get()->Record.GetTabletProducer(), true), + IEventHandle::FlagTrackDelivery, txId); + return; + } + auto op = GetProgressTxController().GetTxOperatorVerifiedAs(txId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set")("proto", ev->Get()->Record.DebugString())("lock_id", op->GetLockId()); + NKikimrTx::TReadSetData data; + AFL_VERIFY(data.ParseFromArray(ev->Get()->Record.GetReadSet().data(), ev->Get()->Record.GetReadSet().size())); + auto tx = op->CreateReceiveBrokenFlagTx( + *this, ev->Get()->Record.GetTabletProducer(), data.GetDecision() != NKikimrTx::TReadSetData::DECISION_COMMIT); + Execute(tx.release(), ctx); +} + +void TColumnShard::Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx) { + auto opPtr = GetProgressTxController().GetTxOperatorOptional(ev->Get()->Record.GetTxId()); + if (!opPtr) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "missed_read_set_ack")("proto", ev->Get()->Record.DebugString())( + "tx_id", ev->Get()->Record.GetTxId()); + return; + } + auto op = TValidator::CheckNotNull(dynamic_pointer_cast(opPtr)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "read_set_ack")("proto", ev->Get()->Record.DebugString())("lock_id", op->GetLockId()); + auto tx = op->CreateReceiveResultAckTx(*this, ev->Get()->Record.GetTabletConsumer()); + Execute(tx.release(), ctx); +} + void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvProposeFromInitiator::TPtr& ev, const TActorContext& ctx) { AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvProposeFromInitiator"); auto reqSession = std::make_shared(); @@ -1050,9 +1108,13 @@ void TColumnShard::Handle(TAutoPtrIsSharingInProgress()) { ctx.Send(NActors::ActorIdFromProto(ev->Get()->Record.GetSourceActorId()), - new NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobsFinished((NOlap::TTabletId)TabletID(), - NKikimrColumnShardBlobOperationsProto::TEvDeleteSharedBlobsFinished::DestinationCurrenlyLocked)); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "sharing_in_progress"); + new NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobsFinished( + (NOlap::TTabletId)TabletID(), NKikimrColumnShardBlobOperationsProto::TEvDeleteSharedBlobsFinished::DestinationCurrenlyLocked)); + for (auto&& i : ev->Get()->Record.GetBlobIds()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_BLOBS)("event", "sharing_in_progress")("blob_id", i)( + "from_tablet", ev->Get()->Record.GetSourceTabletId()); + } + return; } @@ -1111,4 +1173,8 @@ const NKikimr::NColumnShard::NTiers::TManager* TColumnShard::GetTierManagerPoint return Tiers->GetManagerOptional(tierId); } +TDuration TColumnShard::GetMaxReadStaleness() { + return NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(); +} + } diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index ea6ca9b3164c..74e8e335f3ed 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -3,7 +3,6 @@ #include "background_controller.h" #include "counters.h" #include "columnshard.h" -#include "columnshard_common.h" #include "columnshard_ttl.h" #include "columnshard_private_events.h" #include "tables_manager.h" @@ -13,9 +12,11 @@ #include "transactions/tx_controller.h" #include "inflight_request_tracker.h" #include "counters/columnshard.h" +#include "counters/counters_manager.h" #include "resource_subscriber/counters.h" #include "resource_subscriber/task.h" #include "normalizer/abstract/abstract.h" +#include "operations/manager.h" #include "export/events/events.h" @@ -202,6 +203,8 @@ class TColumnShard void Handle(TEvTabletPipe::TEvClientDestroyed::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvServerConnected::TPtr& ev, const TActorContext& ctx); void Handle(TEvTabletPipe::TEvServerDisconnected::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTxProcessing::TEvReadSet::TPtr& ev, const TActorContext& ctx); + void Handle(TEvTxProcessing::TEvReadSetAck::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvCheckPlannedTransaction::TPtr& ev, const TActorContext& ctx); void Handle(TEvColumnShard::TEvCancelTransactionProposal::TPtr& ev, const TActorContext& ctx); @@ -216,6 +219,9 @@ class TColumnShard void Handle(TEvPrivate::TEvScanStats::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvReadFinished::TPtr &ev, const TActorContext &ctx); void Handle(TEvPrivate::TEvPeriodicWakeup::TPtr& ev, const TActorContext& ctx); + void Handle(NActors::TEvents::TEvWakeup::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvPingSnapshotsUsage::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorContext& ctx); void Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev); void Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx); @@ -275,17 +281,16 @@ class TColumnShard putStatus.OnYellowChannels(Executor()); } - void SetCounter(NColumnShard::ESimpleCounters counter, ui64 num) const { - TabletCounters->Simple()[counter].Set(num); - } - - void IncCounter(NColumnShard::ECumulativeCounters counter, ui64 num = 1) const { - TabletCounters->Cumulative()[counter].Increment(num); - } - void ActivateTiering(const ui64 pathId, const TString& useTiering); void OnTieringModified(const std::optional pathId = {}); + public: + ui64 BuildEphemeralTxId() { + static TAtomicCounter Counter = 0; + static constexpr ui64 shift = (ui64)1 << 47; + return shift | Counter.Inc(); + } + enum class EOverloadStatus { ShardTxInFly /* "shard_tx" */, ShardWritesInFly /* "shard_writes" */, @@ -296,25 +301,17 @@ class TColumnShard None /* "none" */ }; - void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MicroSeconds()); - } - - void IncCounter(NDataShard::ESimpleCounters counter, ui64 num = 1) const { - TabletCounters->Simple()[counter].Add(num); - } - // For syslocks void IncCounter(NDataShard::ECumulativeCounters counter, ui64 num = 1) const { - TabletCounters->Cumulative()[counter].Increment(num); + Counters.GetTabletCounters()->IncCounter(counter, num); } void IncCounter(NDataShard::EPercentileCounters counter, ui64 num) const { - TabletCounters->Percentile()[counter].IncrementFor(num); + Counters.GetTabletCounters()->IncCounter(counter, num); } void IncCounter(NDataShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MilliSeconds()); + Counters.GetTabletCounters()->IncCounter(counter, latency); } inline TRowVersion LastCompleteTxVersion() const { @@ -328,7 +325,7 @@ class TColumnShard } private: - void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); + void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteMeta& writeMeta, const ui64 writeSize, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); EOverloadStatus CheckOverloaded(const ui64 tableId) const; protected: @@ -356,6 +353,9 @@ class TColumnShard switch (ev->GetTypeRewrite()) { hFunc(NMetadata::NProvider::TEvRefreshSubscriberData, Handle); + HFunc(TEvTxProcessing::TEvReadSet, Handle); + HFunc(TEvTxProcessing::TEvReadSetAck, Handle); + HFunc(TEvTabletPipe::TEvClientConnected, Handle); HFunc(TEvTabletPipe::TEvClientDestroyed, Handle); HFunc(TEvTabletPipe::TEvServerConnected, Handle); @@ -375,6 +375,9 @@ class TColumnShard HFunc(TEvPrivate::TEvScanStats, Handle); HFunc(TEvPrivate::TEvReadFinished, Handle); HFunc(TEvPrivate::TEvPeriodicWakeup, Handle); + HFunc(NActors::TEvents::TEvWakeup, Handle); + HFunc(TEvPrivate::TEvPingSnapshotsUsage, Handle); + HFunc(NEvents::TDataEvents::TEvWrite, Handle); HFunc(TEvPrivate::TEvWriteDraft, Handle); HFunc(TEvPrivate::TEvGarbageCollectionFinished, Handle); @@ -408,6 +411,9 @@ class TColumnShard } private: + std::unique_ptr TabletCountersHolder; + TCountersManager Counters; + std::unique_ptr ProgressTxController; std::unique_ptr OperationsManager; std::shared_ptr SharingSessionsManager; @@ -418,68 +424,20 @@ class TColumnShard using TSchemaPreset = TSchemaPreset; using TTableInfo = TTableInfo; + const TMonotonic CreateInstant = TMonotonic::Now(); + std::optional StartInstant; + struct TLongTxWriteInfo { - ui64 WriteId; + TInsertWriteId InsertWriteId; ui32 WritePartId; NLongTxService::TLongTxId LongTxId; ui64 PreparedTxId = 0; std::optional GranuleShardingVersionId; }; - class TWritesMonitor { - private: - TColumnShard& Owner; - YDB_READONLY(ui64, WritesInFlight, 0); - YDB_READONLY(ui64, WritesSizeInFlight, 0); - - public: - class TGuard: public TNonCopyable { - friend class TWritesMonitor; - private: - TWritesMonitor& Owner; - - explicit TGuard(TWritesMonitor& owner) - : Owner(owner) - {} - - public: - ~TGuard() { - Owner.UpdateCounters(); - } - }; - - TWritesMonitor(TColumnShard& owner) - : Owner(owner) - {} - - TGuard RegisterWrite(const ui64 dataSize) { - ++WritesInFlight; - WritesSizeInFlight += dataSize; - return TGuard(*this); - } - - TGuard FinishWrite(const ui64 dataSize, const ui32 writesCount = 1) { - Y_ABORT_UNLESS(WritesInFlight > 0); - Y_ABORT_UNLESS(WritesSizeInFlight >= dataSize); - WritesInFlight -= writesCount; - WritesSizeInFlight -= dataSize; - return TGuard(*this); - } - - TString DebugString() const { - return TStringBuilder() << "{object=write_monitor;count=" << WritesInFlight << ";size=" << WritesSizeInFlight << "}"; - } - - private: - void UpdateCounters() { - Owner.SetCounter(COUNTER_WRITES_IN_FLY, WritesInFlight); - } - }; - ui64 CurrentSchemeShardId = 0; TMessageSeqNo LastSchemaSeqNo; std::optional ProcessingParams; - TWriteId LastWriteId = TWriteId{0}; ui64 LastPlannedStep = 0; ui64 LastPlannedTxId = 0; NOlap::TSnapshot LastCompletedTx = NOlap::TSnapshot::Zero(); @@ -495,35 +453,26 @@ class TColumnShard const TDuration PeriodicWakeupActivationPeriod; TDuration FailActivationDelay = TDuration::Seconds(1); const TDuration StatsReportInterval; - TInstant LastAccessTime; TInstant LastStatsReport; TActorId ResourceSubscribeActor; TActorId BufferizationWriteActorId; TActorId StatsReportPipe; + std::vector ActorsToStop; TInFlightReadsTracker InFlightReadsTracker; TTablesManager TablesManager; std::shared_ptr Subscribers; std::shared_ptr Tiers; - std::unique_ptr TabletCountersPtr; - TTabletCountersBase* TabletCounters; std::unique_ptr PipeClientCache; std::unique_ptr InsertTable; - std::shared_ptr SubscribeCounters; NOlap::NResourceBroker::NSubscribe::TTaskContext InsertTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext CompactTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext TTLTaskSubscription; - const TScanCounters ScanCounters; - const TIndexationCounters CompactionCounters = TIndexationCounters("GeneralCompaction"); - const TIndexationCounters IndexationCounters = TIndexationCounters("Indexation"); - const TIndexationCounters EvictionCounters = TIndexationCounters("Eviction"); - - const TCSCounters CSCounters; - TWritesMonitor WritesMonitor; - bool ProgressTxInFlight = false; + + std::optional ProgressTxInFlight; THashMap ScanTxInFlight; - THashMap LongTxWrites; + THashMap LongTxWrites; using TPartsForLTXShard = THashMap; THashMap LongTxWritesByUniqueId; TMultiMap WaitingScans; @@ -532,31 +481,29 @@ class TColumnShard TLimits Limits; NOlap::TNormalizationController NormalizerController; NDataShard::TSysLocks SysLocks; + static TDuration GetMaxReadStaleness(); void TryRegisterMediatorTimeCast(); void UnregisterMediatorTimeCast(); + void TryAbortWrites(NIceDb::TNiceDb& db, NOlap::TDbWrapper& dbTable, THashSet&& writesToAbort); bool WaitPlanStep(ui64 step); void SendWaitPlanStep(ui64 step); void RescheduleWaitingReads(); NOlap::TSnapshot GetMaxReadVersion() const; - ui64 GetMinReadStep() const; + NOlap::TSnapshot GetMinReadSnapshot() const; ui64 GetOutdatedStep() const; TDuration GetTxCompleteLag() const { ui64 mediatorTime = MediatorTimeCastEntry ? MediatorTimeCastEntry->Get(TabletID()) : 0; return ProgressTxController->GetTxCompleteLag(mediatorTime); } - TWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; - TWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); - void AddLongTxWrite(TWriteId writeId, ui64 txId); - void LoadLongTxWrite(TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); - bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TWriteId writeId, const ui64 txId); + TInsertWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId) const; + TInsertWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId, const std::optional granuleShardingVersionId); + void AddLongTxWrite(const TInsertWriteId writeId, ui64 txId); + void LoadLongTxWrite(const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion); + bool RemoveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui64 txId); - TWriteId BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc); - TWriteId BuildNextWriteId(NIceDb::TNiceDb& db); - - void EnqueueProgressTx(const TActorContext& ctx); void EnqueueBackgroundActivities(const bool periodic = false); virtual void Enqueue(STFUNC_SIG) override; @@ -570,7 +517,7 @@ class TColumnShard void RunDropTable(const NKikimrTxColumnShard::TDropTable& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); void RunAlterStore(const NKikimrTxColumnShard::TAlterStore& body, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc); - void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); + void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); void SetupIndexation(); void SetupCompaction(); bool SetupTtl(const THashMap& pathTtls = {}); @@ -587,12 +534,14 @@ class TColumnShard void SendPeriodicStats(); void FillOlapStats(const TActorContext& ctx, std::unique_ptr& ev); void FillColumnTableStats(const TActorContext& ctx, std::unique_ptr& ev); - void ConfigureStats(const NOlap::TColumnEngineStats& indexStats, ::NKikimrTableStats::TTableStats* tabletStats); - void FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const; public: ui64 TabletTxCounter = 0; + bool HasLongTxWrites(const TInsertWriteId insertWriteId) const { + return LongTxWrites.contains(insertWriteId); + } + void EnqueueProgressTx(const TActorContext& ctx, const std::optional continueTxId); NOlap::TSnapshot GetLastTxSnapshot() const { return NOlap::TSnapshot(LastPlannedStep, LastPlannedTxId); } @@ -620,6 +569,11 @@ class TColumnShard return *ProgressTxController; } + TOperationsManager& GetOperationsManager() const { + AFL_VERIFY(OperationsManager); + return *OperationsManager; + } + bool HasIndex() const { return !!TablesManager.GetPrimaryIndex(); } diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index f46c8e8b7191..cb0e8cd97150 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -11,6 +11,10 @@ #include #include +namespace NKikimr::NOlap::NReader { +class IApplyAction; +} + namespace NKikimr::NColumnShard { struct TEvPrivate { @@ -41,11 +45,28 @@ struct TEvPrivate { EvExportCursorSaved, EvExportSaveCursor, + EvTaskProcessedResult, + EvPingSnapshotsUsage, + EvEnd }; static_assert(EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); + class TEvTaskProcessedResult: public NActors::TEventLocal { + private: + TConclusion> Result; + + public: + TConclusion> ExtractResult() { + return std::move(Result); + } + + TEvTaskProcessedResult(const TConclusion>& result) + : Result(result) { + } + }; + struct TEvTieringModified: public TEventLocal { }; @@ -122,8 +143,9 @@ struct TEvPrivate { struct TEvReadFinished : public TEventLocal { explicit TEvReadFinished(ui64 requestCookie, ui64 txId = 0) - : RequestCookie(requestCookie), TxId(txId) - {} + : RequestCookie(requestCookie) + , TxId(txId) { + } ui64 RequestCookie; ui64 TxId; @@ -137,17 +159,39 @@ struct TEvPrivate { bool Manual; }; - class TEvWriteBlobsResult : public TEventLocal { + struct TEvPingSnapshotsUsage: public TEventLocal { + TEvPingSnapshotsUsage() = default; + }; + + class TEvWriteBlobsResult: public TEventLocal { + public: + enum EErrorClass { + Internal, + Request + }; private: NColumnShard::TBlobPutResult::TPtr PutResult; NOlap::TWritingBuffer WritesBuffer; YDB_READONLY_DEF(TString, ErrorMessage); + YDB_ACCESSOR(EErrorClass, ErrorClass, EErrorClass::Internal); + public: + + NKikimrDataEvents::TEvWriteResult::EStatus GetWriteResultStatus() const { + switch (ErrorClass) { + case EErrorClass::Internal: + return NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR; + case EErrorClass::Request: + return NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST; + } + } - static std::unique_ptr Error(const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error) { - std::unique_ptr result = std::make_unique(std::make_shared(status), - std::move(writesBuffer)); + static std::unique_ptr Error( + const NKikimrProto::EReplyStatus status, NOlap::TWritingBuffer&& writesBuffer, const TString& error, const EErrorClass errorClass) { + std::unique_ptr result = + std::make_unique(std::make_shared(status), std::move(writesBuffer)); result->ErrorMessage = error; + result->ErrorClass = errorClass; return result; } diff --git a/ydb/core/tx/columnshard/columnshard_schema.cpp b/ydb/core/tx/columnshard/columnshard_schema.cpp index 3d9dc8e7a9b0..c9a60029a892 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/columnshard_schema.cpp @@ -4,51 +4,24 @@ namespace NKikimr::NColumnShard { bool Schema::InsertTable_Load(NIceDb::TNiceDb& db, const IBlobGroupSelector* dsGroupSelector, NOlap::TInsertTableAccessor& insertTable, const TInstant& /*loadTime*/) { - auto rowset = db.Table().GreaterOrEqual(0, 0, 0, 0, "").Select(); + auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; } while (!rowset.EndOfSet()) { - EInsertTableIds recType = (EInsertTableIds)rowset.GetValue(); - ui64 planStep = rowset.GetValue(); - ui64 writeTxId = rowset.GetValueOrDefault(); - ui64 pathId = rowset.GetValue(); - TString dedupId = rowset.GetValue(); - TString strBlobId = rowset.GetValue(); - TString metaStr = rowset.GetValue(); - ui64 schemaVersion = rowset.HaveValue() ? rowset.GetValue() : 0; + NOlap::TInsertTableRecordLoadContext constructor; + constructor.ParseFromDatabase(rowset); - TString error; - NOlap::TUnifiedBlobId blobId = NOlap::TUnifiedBlobId::ParseFromString(strBlobId, dsGroupSelector, error); - Y_ABORT_UNLESS(blobId.IsValid(), "Failied to parse blob id: %s", error.c_str()); - - NKikimrTxColumnShard::TLogicalMetadata meta; - if (metaStr) { - Y_ABORT_UNLESS(meta.ParseFromString(metaStr)); - } - - std::optional rangeOffset; - if (rowset.HaveValue()) { - rangeOffset = rowset.GetValue(); - } - std::optional rangeSize; - if (rowset.HaveValue()) { - rangeSize = rowset.GetValue(); - } - - AFL_VERIFY(!!rangeOffset == !!rangeSize); - TInsertedData data(planStep, writeTxId, pathId, dedupId, NOlap::TBlobRange(blobId, rangeOffset.value_or(0), rangeSize.value_or(blobId.BlobSize())), meta, schemaVersion, {}); - - switch (recType) { - case EInsertTableIds::Inserted: - insertTable.AddInserted(std::move(data), true); + switch (constructor.GetRecType()) { + case Schema::EInsertTableIds::Inserted: + insertTable.AddInserted(constructor.BuildInsertedOrAborted(dsGroupSelector), true); break; - case EInsertTableIds::Committed: - insertTable.AddCommitted(std::move(data), true); + case Schema::EInsertTableIds::Committed: + insertTable.AddCommitted(constructor.BuildCommitted(dsGroupSelector), true); break; - case EInsertTableIds::Aborted: - insertTable.AddAborted(std::move(data), true); + case Schema::EInsertTableIds::Aborted: + insertTable.AddAborted(constructor.BuildInsertedOrAborted(dsGroupSelector), true); break; } if (!rowset.Next()) { @@ -59,6 +32,7 @@ bool Schema::InsertTable_Load(NIceDb::TNiceDb& db, const IBlobGroupSelector* dsG } void Schema::SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TString& txBody) { + AFL_VERIFY(txInfo.TxKind != NKikimrTxColumnShard::TX_KIND_NONE); db.Table().Key(txInfo.TxId).Update( NIceDb::TUpdate(txInfo.TxKind), NIceDb::TUpdate(txBody), @@ -70,11 +44,14 @@ void Schema::SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TS } void Schema::UpdateTxInfoSource(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo) { - db.Table().Key(txInfo.GetTxId()).Update( - NIceDb::TUpdate(txInfo.Source), - NIceDb::TUpdate(txInfo.Cookie), - NIceDb::TUpdate(txInfo.SerializeSeqNoAsString()) - ); + db.Table() + .Key(txInfo.GetTxId()) + .Update(NIceDb::TUpdate(txInfo.Source), NIceDb::TUpdate(txInfo.Cookie), + NIceDb::TUpdate(txInfo.SerializeSeqNoAsString())); } +void Schema::UpdateTxInfoBody(NIceDb::TNiceDb& db, const ui64 txId, const TString& txBody) { + db.Table().Key(txId).Update(NIceDb::TUpdate(txBody)); } + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index 50c67db625b1..8b104b9dcd58 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -17,7 +17,7 @@ class TColumnChunkLoadContext; namespace NKikimr::NColumnShard { -using NOlap::TWriteId; +using NOlap::TInsertWriteId; using NOlap::IBlobGroupSelector; struct TFullTxInfo; @@ -31,6 +31,7 @@ struct Schema : NIceDb::Schema { using TSettings = SchemaSettings; using TInsertedData = NOlap::TInsertedData; + using TCommittedData = NOlap::TCommittedData; using TColumnRecord = NOlap::TColumnRecord; enum EIndexTables : ui32 { @@ -108,7 +109,11 @@ struct Schema : NIceDb::Schema { TableVersionInfo = 11, SmallBlobs = 12, OneToOneEvictedBlobs = 13, - BlobsToDeleteWT = 14 + BlobsToDeleteWT = 14, + InFlightSnapshots = 15, + TxDependencies = 16, + TxStates = 17, + TxEvents = 18 }; // Tablet tables @@ -250,6 +255,40 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; + struct InFlightSnapshots: Table<(ui32)ECommonTables::InFlightSnapshots> { + struct PlanStep: Column<1, NScheme::NTypeIds::Uint64> {}; + struct TxId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxDependencies: Table<(ui32)ECommonTables::TxDependencies> { + struct CommitTxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct BrokenTxId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxStates: Table<(ui32)ECommonTables::TxStates> { + struct TxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct Broken: Column<2, NScheme::NTypeIds::Bool> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct TxEvents: Table<(ui32)ECommonTables::TxEvents> { + struct TxId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct GenerationId: Column<2, NScheme::NTypeIds::Uint64> {}; + struct GenerationInternalId: Column<3, NScheme::NTypeIds::Uint64> {}; + struct Data: Column<4, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + // Index tables // InsertTable - common for all indices @@ -372,9 +411,10 @@ struct Schema : NIceDb::Schema { struct Size: Column<7, NScheme::NTypeIds::Uint32> {}; struct RecordsCount: Column<8, NScheme::NTypeIds::Uint32> {}; struct RawBytes: Column<9, NScheme::NTypeIds::Uint64> {}; + struct BlobData: Column<10, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct SharedBlobIds: NIceDb::Schema::Table { @@ -544,7 +584,11 @@ struct Schema : NIceDb::Schema { BackgroundSessions, ShardingInfo, Normalizers, - NormalizerEvents + NormalizerEvents, + InFlightSnapshots, + TxDependencies, + TxStates, + TxEvents >; // @@ -660,8 +704,8 @@ struct Schema : NIceDb::Schema { static void SaveTxInfo(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo, const TString& txBody); + static void UpdateTxInfoBody(NIceDb::TNiceDb& db, const ui64 txId, const TString& txBody); static void UpdateTxInfoSource(NIceDb::TNiceDb& db, const TFullTxInfo& txInfo); - static void UpdateTxInfoSource(NIceDb::TNiceDb& db, ui64 txId, const TActorId& source, ui64 cookie) { db.Table().Key(txId).Update( NIceDb::TUpdate(source), @@ -741,7 +785,7 @@ struct Schema : NIceDb::Schema { db.Table().Key(pathId).Delete(); } - static void SaveLongTxWrite(NIceDb::TNiceDb& db, TWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { + static void SaveLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId, const ui32 writePartId, const NLongTxService::TLongTxId& longTxId, const std::optional granuleShardingVersion) { NKikimrLongTxService::TLongTxId proto; longTxId.ToProto(&proto); TString serialized; @@ -753,32 +797,49 @@ struct Schema : NIceDb::Schema { ); } - static void EraseLongTxWrite(NIceDb::TNiceDb& db, TWriteId writeId) { + static void EraseLongTxWrite(NIceDb::TNiceDb& db, const TInsertWriteId writeId) { db.Table().Key((ui64)writeId).Delete(); } // InsertTable activities - static void InsertTable_Upsert(NIceDb::TNiceDb& db, EInsertTableIds recType, const TInsertedData& data) { - db.Table().Key((ui8)recType, data.PlanStep, data.WriteTxId, data.PathId, data.DedupId).Update( - NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), - NIceDb::TUpdate(data.GetBlobRange().Offset), - NIceDb::TUpdate(data.GetBlobRange().Size), - NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), - NIceDb::TUpdate(data.GetSchemaVersion()) - ); + static void InsertTable_Upsert(NIceDb::TNiceDb& db, const EInsertTableIds recType, const TInsertedData& data) { + db.Table() + .Key((ui8)recType, 0, (ui64)data.GetInsertWriteId(), data.GetPathId(), "") + .Update(NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), + NIceDb::TUpdate(data.GetBlobRange().Offset), + NIceDb::TUpdate(data.GetBlobRange().Size), + NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(data.GetSchemaVersion())); + } + + static void InsertTable_Upsert(NIceDb::TNiceDb& db, const TCommittedData& data) { + db.Table() + .Key((ui8)EInsertTableIds::Committed, data.GetSnapshot().GetPlanStep(), data.GetSnapshot().GetTxId(), data.GetPathId(), + data.GetDedupId()) + .Update(NIceDb::TUpdate(data.GetBlobRange().GetBlobId().ToStringLegacy()), + NIceDb::TUpdate(data.GetBlobRange().Offset), + NIceDb::TUpdate(data.GetBlobRange().Size), + NIceDb::TUpdate(data.GetMeta().SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(data.GetSchemaVersion())); } static void InsertTable_Erase(NIceDb::TNiceDb& db, EInsertTableIds recType, const TInsertedData& data) { - db.Table().Key((ui8)recType, data.PlanStep, data.WriteTxId, data.PathId, data.DedupId).Delete(); + db.Table().Key((ui8)recType, 0, (ui64)data.GetInsertWriteId(), data.GetPathId(), "").Delete(); + } + + static void InsertTable_Erase(NIceDb::TNiceDb& db, const TCommittedData& data) { + db.Table() + .Key((ui8)EInsertTableIds::Committed, data.GetSnapshot().GetPlanStep(), data.GetSnapshot().GetTxId(), data.GetPathId(), data.GetDedupId()) + .Delete(); } static void InsertTable_Insert(NIceDb::TNiceDb& db, const TInsertedData& data) { InsertTable_Upsert(db, EInsertTableIds::Inserted, data); } - static void InsertTable_Commit(NIceDb::TNiceDb& db, const TInsertedData& data) { - InsertTable_Upsert(db, EInsertTableIds::Committed, data); + static void InsertTable_Commit(NIceDb::TNiceDb& db, const TCommittedData& data) { + InsertTable_Upsert(db, data); } static void InsertTable_Abort(NIceDb::TNiceDb& db, const TInsertedData& data) { @@ -789,8 +850,8 @@ struct Schema : NIceDb::Schema { InsertTable_Erase(db, EInsertTableIds::Inserted, data); } - static void InsertTable_EraseCommitted(NIceDb::TNiceDb& db, const TInsertedData& data) { - InsertTable_Erase(db, EInsertTableIds::Committed, data); + static void InsertTable_EraseCommitted(NIceDb::TNiceDb& db, const TCommittedData& data) { + InsertTable_Erase(db, data); } static void InsertTable_EraseAborted(NIceDb::TNiceDb& db, const TInsertedData& data) { @@ -876,13 +937,20 @@ class TColumnChunkLoadContext { class TIndexChunkLoadContext { private: - YDB_READONLY_DEF(TBlobRange, BlobRange); + YDB_READONLY_DEF(std::optional, BlobRange); + YDB_READONLY_DEF(std::optional, BlobData); TChunkAddress Address; const ui32 RecordsCount; const ui32 RawBytes; public: TIndexChunk BuildIndexChunk(const TBlobRangeLink16::TLinkId blobLinkId) const { - return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange.BuildLink(blobLinkId)); + AFL_VERIFY(BlobRange); + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange->BuildLink(blobLinkId)); + } + + TIndexChunk BuildIndexChunk() const { + AFL_VERIFY(BlobData); + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, *BlobData); } template @@ -892,14 +960,141 @@ class TIndexChunkLoadContext { , RawBytes(rowset.template GetValue()) { AFL_VERIFY(Address.GetColumnId())("event", "incorrect address")("address", Address.DebugString()); - TString strBlobId = rowset.template GetValue(); - Y_ABORT_UNLESS(strBlobId.size() == sizeof(TLogoBlobID), "Size %" PRISZT " doesn't match TLogoBlobID", strBlobId.size()); - TLogoBlobID logoBlobId((const ui64*)strBlobId.data()); - BlobRange.BlobId = NOlap::TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); - BlobRange.Offset = rowset.template GetValue(); - BlobRange.Size = rowset.template GetValue(); - AFL_VERIFY(BlobRange.BlobId.IsValid() && BlobRange.Size)("event", "incorrect blob")("blob", BlobRange.ToString()); + if (rowset.template HaveValue()) { + TBlobRange& bRange = BlobRange.emplace(); + TString strBlobId = rowset.template GetValue(); + Y_ABORT_UNLESS(strBlobId.size() == sizeof(TLogoBlobID), "Size %" PRISZT " doesn't match TLogoBlobID", strBlobId.size()); + TLogoBlobID logoBlobId((const ui64*)strBlobId.data()); + bRange.BlobId = NOlap::TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); + bRange.Offset = rowset.template GetValue(); + bRange.Size = rowset.template GetValue(); + AFL_VERIFY(bRange.BlobId.IsValid() && bRange.Size)("event", "incorrect blob")("blob", bRange.ToString()); + } else if (rowset.template HaveValue()) { + BlobData = rowset.template GetValue(); + } else { + AFL_VERIFY(false); + } } }; -} +class TInsertTableRecordLoadContext { +private: + NColumnShard::Schema::EInsertTableIds RecType; + ui64 PlanStep; + ui64 WriteTxId; + ui64 PathId; + YDB_ACCESSOR_DEF(TString, DedupId); + ui64 SchemaVersion; + TString BlobIdString; + std::optional BlobId; + TString MetadataString; + std::optional Metadata; + std::optional RangeOffset; + std::optional RangeSize; + + void Prepare(const IBlobGroupSelector* dsGroupSelector) { + AFL_VERIFY(!PreparedFlag); + PreparedFlag = true; + TString error; + NOlap::TUnifiedBlobId blobId = NOlap::TUnifiedBlobId::ParseFromString(BlobIdString, dsGroupSelector, error); + Y_ABORT_UNLESS(blobId.IsValid(), "Failied to parse blob id: %s", error.c_str()); + BlobId = blobId; + + NKikimrTxColumnShard::TLogicalMetadata meta; + AFL_VERIFY(MetadataString); + Y_ABORT_UNLESS(meta.ParseFromString(MetadataString)); + Metadata = std::move(meta); + AFL_VERIFY(!!RangeOffset == !!RangeSize); + } + + bool PreparedFlag = false; + bool ParsedFlag = false; + +public: + TInsertWriteId GetInsertWriteId() const { + AFL_VERIFY(ParsedFlag); + AFL_VERIFY(RecType != NColumnShard::Schema::EInsertTableIds::Committed); + return (TInsertWriteId)WriteTxId; + } + + NColumnShard::Schema::EInsertTableIds GetRecType() const { + AFL_VERIFY(ParsedFlag); + return RecType; + } + + ui64 GetPlanStep() const { + AFL_VERIFY(ParsedFlag); + return PlanStep; + } + + void Remove(NIceDb::TNiceDb& db) const { + AFL_VERIFY(ParsedFlag); + db.Table().Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId).Delete(); + } + + void Upsert(NIceDb::TNiceDb& db) const { + AFL_VERIFY(ParsedFlag); + using namespace NColumnShard; + if (RangeOffset) { + db.Table() + .Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId) + .Update(NIceDb::TUpdate(BlobIdString), + NIceDb::TUpdate(*RangeOffset), + NIceDb::TUpdate(*RangeSize), NIceDb::TUpdate(MetadataString), + NIceDb::TUpdate(SchemaVersion)); + } else { + db.Table() + .Key((ui8)RecType, PlanStep, WriteTxId, PathId, DedupId) + .Update(NIceDb::TUpdate(BlobIdString), NIceDb::TUpdate(MetadataString), + NIceDb::TUpdate(SchemaVersion)); + } + } + + template + void ParseFromDatabase(TRowset& rowset) { + AFL_VERIFY(!ParsedFlag)("problem", "duplication parsing"); + ParsedFlag = true; + using namespace NColumnShard; + RecType = (Schema::EInsertTableIds)rowset.template GetValue(); + PlanStep = rowset.template GetValue(); + WriteTxId = rowset.template GetValueOrDefault(); + AFL_VERIFY(WriteTxId); + + PathId = rowset.template GetValue(); + DedupId = rowset.template GetValue(); + SchemaVersion = + rowset.template HaveValue() ? rowset.template GetValue() : 0; + BlobIdString = rowset.template GetValue(); + MetadataString = rowset.template GetValue(); + if (rowset.template HaveValue()) { + RangeOffset = rowset.template GetValue(); + } + if (rowset.template HaveValue()) { + RangeSize = rowset.template GetValue(); + } + } + + NOlap::TCommittedData BuildCommitted(const IBlobGroupSelector* dsGroupSelector) { + Prepare(dsGroupSelector); + using namespace NColumnShard; + AFL_VERIFY(RecType == Schema::EInsertTableIds::Committed); + auto userData = std::make_shared(PathId, + NOlap::TBlobRange(*BlobId, RangeOffset.value_or(0), RangeSize.value_or(BlobId->BlobSize())), *Metadata, SchemaVersion, std::nullopt); + AFL_VERIFY(!!DedupId); + AFL_VERIFY(PlanStep); + return NOlap::TCommittedData(userData, PlanStep, WriteTxId, DedupId); + } + + NOlap::TInsertedData BuildInsertedOrAborted(const IBlobGroupSelector* dsGroupSelector) { + Prepare(dsGroupSelector); + using namespace NColumnShard; + AFL_VERIFY(RecType != Schema::EInsertTableIds::Committed); + auto userData = std::make_shared(PathId, + NOlap::TBlobRange(*BlobId, RangeOffset.value_or(0), RangeSize.value_or(BlobId->BlobSize())), *Metadata, SchemaVersion, std::nullopt); + AFL_VERIFY(!DedupId); + AFL_VERIFY(!PlanStep); + return NOlap::TInsertedData((TInsertWriteId)WriteTxId, userData); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/common/limits.h b/ydb/core/tx/columnshard/common/limits.h index 04a5cb55128e..b30432dfb2fd 100644 --- a/ydb/core/tx/columnshard/common/limits.h +++ b/ydb/core/tx/columnshard/common/limits.h @@ -4,6 +4,15 @@ namespace NKikimr::NOlap { class TGlobalLimits { public: - static const inline ui64 TxWriteLimitBytes = 256 * 1024 * 1024; + static constexpr inline ui64 TxWriteLimitBytes = 256 * 1024 * 1024; + static constexpr inline ui64 TTLCompactionMemoryLimit = 1ULL << 30; + static constexpr inline ui64 InsertCompactionMemoryLimit = 1ULL << 30; + static constexpr inline ui64 GeneralCompactionMemoryLimit = 3ULL << 30; + static constexpr inline ui64 ScanMemoryLimit = 3ULL << 30; + + static constexpr inline ui64 DefaultBlobsMemoryIntervalLimit = ScanMemoryLimit; + static constexpr inline ui64 DefaultRejectMemoryIntervalLimit = ScanMemoryLimit; + static constexpr inline ui64 DefaultReduceMemoryIntervalLimit = 0.8 * ScanMemoryLimit; + static constexpr inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/common/scalars.cpp b/ydb/core/tx/columnshard/common/scalars.cpp index d614253e9ec1..d85622edeeec 100644 --- a/ydb/core/tx/columnshard/common/scalars.cpp +++ b/ydb/core/tx/columnshard/common/scalars.cpp @@ -1,6 +1,6 @@ #include "scalars.h" -#include +#include #include #include diff --git a/ydb/core/tx/columnshard/common/scalars.h b/ydb/core/tx/columnshard/common/scalars.h index 328296048620..7635caa19e11 100644 --- a/ydb/core/tx/columnshard/common/scalars.h +++ b/ydb/core/tx/columnshard/common/scalars.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/common/snapshot.cpp b/ydb/core/tx/columnshard/common/snapshot.cpp index 6ca80d818986..eb6e62ccac0c 100644 --- a/ydb/core/tx/columnshard/common/snapshot.cpp +++ b/ydb/core/tx/columnshard/common/snapshot.cpp @@ -35,4 +35,12 @@ TString TSnapshot::SerializeToString() const { return SerializeToProto().SerializeAsString(); } +NKikimr::NOlap::TSnapshot TSnapshot::MaxForPlanStep(const ui64 planStep) noexcept { + return TSnapshot(planStep, ::Max()); +} + +NKikimr::NOlap::TSnapshot TSnapshot::MaxForPlanInstant(const TInstant planInstant) noexcept { + return TSnapshot(planInstant.MilliSeconds(), ::Max()); +} + }; diff --git a/ydb/core/tx/columnshard/common/snapshot.h b/ydb/core/tx/columnshard/common/snapshot.h index ffa48670371e..4bc99d268420 100644 --- a/ydb/core/tx/columnshard/common/snapshot.h +++ b/ydb/core/tx/columnshard/common/snapshot.h @@ -54,6 +54,10 @@ class TSnapshot { return TSnapshot(-1ll, -1ll); } + static TSnapshot MaxForPlanInstant(const TInstant planInstant) noexcept; + + static TSnapshot MaxForPlanStep(const ui64 planStep) noexcept; + constexpr bool operator==(const TSnapshot&) const noexcept = default; constexpr auto operator<=>(const TSnapshot&) const noexcept = default; diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.cpp b/ydb/core/tx/columnshard/common/tests/shard_reader.cpp deleted file mode 100644 index 2789a63e38a8..000000000000 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "shard_reader.h" - -namespace NKikimr::NOlap::NTests { -} diff --git a/ydb/core/tx/columnshard/common/tests/ya.make b/ydb/core/tx/columnshard/common/tests/ya.make deleted file mode 100644 index 8b3c3809006b..000000000000 --- a/ydb/core/tx/columnshard/common/tests/ya.make +++ /dev/null @@ -1,14 +0,0 @@ -LIBRARY() - -SRCS( - shard_reader.cpp -) - -PEERDIR( - ydb/core/formats/arrow/protos - contrib/libs/apache/arrow - ydb/core/formats/arrow - ydb/core/kqp/compute_actor -) - -END() diff --git a/ydb/core/tx/columnshard/common/ya.make b/ydb/core/tx/columnshard/common/ya.make index 87bd2c16b26b..c7d8a27bf3ee 100644 --- a/ydb/core/tx/columnshard/common/ya.make +++ b/ydb/core/tx/columnshard/common/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - limits.h + limits.cpp reverse_accessor.cpp scalars.cpp snapshot.cpp @@ -11,7 +11,7 @@ SRCS( ) PEERDIR( - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos contrib/libs/apache/arrow ydb/core/formats/arrow ydb/core/tx/columnshard/common/protos diff --git a/ydb/core/tx/columnshard/counters/aggregation/table_stats.h b/ydb/core/tx/columnshard/counters/aggregation/table_stats.h new file mode 100644 index 000000000000..68f39a4191de --- /dev/null +++ b/ydb/core/tx/columnshard/counters/aggregation/table_stats.h @@ -0,0 +1,47 @@ +#pragma once + +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TTableStatsBuilder { +private: + TCountersManager& Counters; + const NTabletFlatExecutor::NFlatExecutorSetup::IExecutor& Executor; + NOlap::IColumnEngine& ColumnEngine; + +public: + TTableStatsBuilder( + TCountersManager& counters, const NTabletFlatExecutor::NFlatExecutorSetup::IExecutor* executor, NOlap::IColumnEngine& columnEngine) + : Counters(counters) + , Executor(*executor) + , ColumnEngine(columnEngine) { + } + + void FillTableStats(ui64 pathId, ::NKikimrTableStats::TTableStats& tableStats) { + Counters.FillTableStats(pathId, tableStats); + + auto columnEngineStats = ColumnEngine.GetStats().FindPtr(pathId); + if (columnEngineStats && *columnEngineStats) { + auto activeStats = (*columnEngineStats)->Active(); + tableStats.SetRowCount(activeStats.Rows); + tableStats.SetDataSize(activeStats.Bytes); + } + } + + void FillTotalTableStats(::NKikimrTableStats::TTableStats& tableStats) { + Counters.FillTotalTableStats(tableStats); + + tableStats.SetInFlightTxCount(Executor.GetStats().TxInFly); + tableStats.SetHasLoanedParts(Executor.HasLoanedParts()); + + auto activeStats = ColumnEngine.GetTotalStats().Active(); + tableStats.SetRowCount(activeStats.Rows); + tableStats.SetDataSize(activeStats.Bytes); + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/aggregation/ya.make b/ydb/core/tx/columnshard/counters/aggregation/ya.make new file mode 100644 index 000000000000..95687733d093 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/aggregation/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +SRCS() + +PEERDIR( + ydb/core/protos + ydb/core/base +) + +END() diff --git a/ydb/core/tx/columnshard/counters/background_controller.cpp b/ydb/core/tx/columnshard/counters/background_controller.cpp new file mode 100644 index 000000000000..fcc89f6ee940 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/background_controller.cpp @@ -0,0 +1,18 @@ +#include "background_controller.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +void TBackgroundControllerCounters::OnCompactionFinish(ui64 pathId) { + TInstant now = TAppData::TimeProvider->Now(); + TInstant& lastFinish = LastCompactionFinishByPathId[pathId]; + lastFinish = std::max(lastFinish, now); + + if (LastCompactionFinish < now) { + LastCompactionFinish = now; + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/background_controller.h b/ydb/core/tx/columnshard/counters/background_controller.h new file mode 100644 index 000000000000..d46d7cdacf91 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/background_controller.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TBackgroundControllerCounters { +private: + THashMap LastCompactionFinishByPathId; + TInstant LastCompactionFinish; + +public: + void OnCompactionFinish(ui64 pathId); + + void FillStats(ui64 pathId, ::NKikimrTableStats::TTableStats& output) const { + output.SetLastFullCompactionTs(GetLastCompactionFinishInstant(pathId).value_or(TInstant::Zero()).Seconds()); + } + + void FillTotalStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastFullCompactionTs(LastCompactionFinish.Seconds()); + } + +private: + std::optional GetLastCompactionFinishInstant(const ui64 pathId) const { + auto findInstant = LastCompactionFinishByPathId.FindPtr(pathId); + if (!findInstant) { + return std::nullopt; + } + return *findInstant; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.cpp b/ydb/core/tx/columnshard/counters/blobs_manager.cpp index 1da1ac7ff86b..edd9603d0d03 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.cpp +++ b/ydb/core/tx/columnshard/counters/blobs_manager.cpp @@ -7,58 +7,51 @@ namespace NKikimr::NColumnShard { TBlobsManagerCounters::TBlobsManagerCounters(const TString& module) : TCommonCountersOwner(module) -{ - SkipCollection = TBase::GetDeriviative("GC/Skip/Count"); - StartCollection = TBase::GetDeriviative("GC/Start/Count"); - CollectDropExplicitBytes = TBase::GetDeriviative("GC/Drop/Explicit/Bytes"); - CollectDropExplicitCount = TBase::GetDeriviative("GC/Drop/Explicit/Count"); - CollectDropImplicitBytes = TBase::GetDeriviative("GC/Drop/Implicit/Bytes"); - CollectDropImplicitCount = TBase::GetDeriviative("GC/Drop/Implicit/Count"); - CollectKeepBytes = TBase::GetDeriviative("GC/Keep/Bytes"); - CollectKeepCount = TBase::GetDeriviative("GC/Keep/Count"); - PutBlobBytes = TBase::GetDeriviative("GC/PutBlob/Bytes"); - PutBlobCount = TBase::GetDeriviative("GC/PutBlob/Count"); - CollectGen = TBase::GetValue("GC/Gen"); - CollectStep = TBase::GetValue("GC/Step"); - - DeleteBlobMarkerBytes = TBase::GetDeriviative("GC/MarkerDeleteBlob/Bytes"); - DeleteBlobMarkerCount = TBase::GetDeriviative("GC/MarkerDeleteBlob/Count"); - DeleteBlobDelayedMarkerBytes = TBase::GetDeriviative("GC/MarkerDelayedDeleteBlob/Bytes"); - DeleteBlobDelayedMarkerCount = TBase::GetDeriviative("GC/MarkerDelayedDeleteBlob/Count"); - AddSmallBlobBytes = TBase::GetDeriviative("GC/AddSmallBlob/Bytes"); - AddSmallBlobCount = TBase::GetDeriviative("GC/AddSmallBlob/Count"); - DeleteSmallBlobBytes = TBase::GetDeriviative("GC/DeleteSmallBlob/Bytes"); - DeleteSmallBlobCount = TBase::GetDeriviative("GC/DeleteSmallBlob/Count"); - - BlobsKeepCount = TBase::GetValue("GC/BlobsKeep/Count"); - BlobsKeepBytes = TBase::GetValue("GC/BlobsKeep/Bytes"); - BlobsDeleteCount = TBase::GetValue("GC/BlobsDelete/Count"); - BlobsDeleteBytes = TBase::GetValue("GC/BlobsDelete/Bytes"); + , BlobsToDeleteCount(TBase::GetValue("BlobsToDelete/Count")) + , BlobsToDeleteDelayedCount(TBase::GetValue("BlobsToDeleteDelayed/Count")) + , BlobsToKeepCount(TBase::GetValue("BlobsToKeep/Count")) + , CurrentGen(TBase::GetValue("CurrentGen")) + , CurrentStep(TBase::GetValue("CurrentStep")) + , GCCounters(*this, "GC") - BrokenKeepCount = TBase::GetDeriviative("GC/BrokenKeep/Count"); - BrokenKeepBytes = TBase::GetDeriviative("GC/BrokenKeep/Bytes"); +{ - KeepMarkerCount = TBase::GetDeriviative("GC/KeepMarker/Count"); - KeepMarkerBytes = TBase::GetDeriviative("GC/KeepMarker/Bytes"); } -void TBlobsManagerCounters::OnBlobsKeep(const std::map<::NKikimr::TGenStep, std::set>& blobs) const { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnBlobsKeep")("count", blobs.size()); -// BlobsKeepCount->Set(blobs.size()); -// ui64 size = 0; -// for (auto&& i : blobs) { -// size += i.BlobSize(); -// } -// BlobsKeepBytes->Set(size); +TBlobsManagerGCCounters::TBlobsManagerGCCounters(const TCommonCountersOwner& sameAs, const TString& componentName) + : TBase(sameAs, componentName) + , SkipCollectionEmpty(TBase::GetDeriviative("Skip/Empty/Count")) + , SkipCollectionThrottling(TBase::GetDeriviative("Skip/Throttling/Count")) +{ + KeepsCountTasks = TBase::GetHistogram("Tasks/Keeps/Count", NMonitoring::ExponentialHistogram(16, 2, 100)); + KeepsCountBlobs = TBase::GetHistogram("Tasks/Keeps/Blobs", NMonitoring::ExponentialHistogram(16, 2, 100)); + KeepsCountBytes = TBase::GetHistogram("Tasks/Keeps/Bytes", NMonitoring::ExponentialHistogram(16, 2, 1024)); + DeletesCountBlobs = TBase::GetHistogram("Tasks/Deletes/Count", NMonitoring::ExponentialHistogram(16, 2, 100)); + DeletesCountTasks = TBase::GetHistogram("Tasks/Deletes/Blobs", NMonitoring::ExponentialHistogram(16, 2, 100)); + DeletesCountBytes = TBase::GetHistogram("Tasks/Deletes/Bytes", NMonitoring::ExponentialHistogram(16, 2, 1024)); + FullGCTasks = TBase::GetDeriviative("Tasks/Full/Count"); + MoveBarriers = TBase::GetDeriviative("Tasks/Barrier/Move"); + DontMoveBarriers = TBase::GetDeriviative("Tasks/Barrier/DontMove"); + GCTasks = TBase::GetDeriviative("Tasks/All/Count"); + EmptyGCTasks = TBase::GetDeriviative("Tasks/Empty/Count"); } -void TBlobsManagerCounters::OnBlobsDelete(const NOlap::TTabletsByBlob& /*blobs*/) const { - // BlobsDeleteCount->Set(blobs.size()); - // ui64 size = 0; - // for (auto&& i : blobs) { - // size += i.BlobSize(); - // } - // BlobsDeleteBytes->Set(size); +void TBlobsManagerGCCounters::OnGCTask(const ui32 keepsCount, const ui32 keepBytes, const ui32 deleteCount, const ui32 deleteBytes, const bool isFull, const bool moveBarrier) const { + GCTasks->Add(1); + if (isFull) { + FullGCTasks->Add(1); + } + KeepsCountTasks->Collect(keepsCount); + KeepsCountBlobs->Collect((i64)keepsCount, keepsCount); + KeepsCountBytes->Collect((i64)keepsCount, keepBytes); + DeletesCountTasks->Collect(deleteCount); + DeletesCountBlobs->Collect((i64)deleteCount, deleteCount); + DeletesCountBytes->Collect((i64)deleteCount, deleteBytes); + if (moveBarrier) { + MoveBarriers->Add(1); + } else { + DontMoveBarriers->Add(1); + } } } diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.h b/ydb/core/tx/columnshard/counters/blobs_manager.h index 2c555eaac9f4..8490ff38814e 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.h +++ b/ydb/core/tx/columnshard/counters/blobs_manager.h @@ -2,6 +2,7 @@ #include "common/owner.h" #include +#include #include #include @@ -13,99 +14,53 @@ class TTabletsByBlob; namespace NKikimr::NColumnShard { -class TBlobsManagerCounters: public TCommonCountersOwner { +class TBlobsManagerGCCounters: public TCommonCountersOwner { private: using TBase = TCommonCountersOwner; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropExplicitBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropExplicitCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropImplicitBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectDropImplicitCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr CollectKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr PutBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr PutBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr CollectGen; - NMonitoring::TDynamicCounters::TCounterPtr CollectStep; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobMarkerBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobDelayedMarkerBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteBlobDelayedMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr AddSmallBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr AddSmallBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr DeleteSmallBlobBytes; - NMonitoring::TDynamicCounters::TCounterPtr DeleteSmallBlobCount; - NMonitoring::TDynamicCounters::TCounterPtr BrokenKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr BrokenKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr BlobsKeepCount; - NMonitoring::TDynamicCounters::TCounterPtr BlobsKeepBytes; - NMonitoring::TDynamicCounters::TCounterPtr BlobsDeleteCount; - NMonitoring::TDynamicCounters::TCounterPtr BlobsDeleteBytes; - NMonitoring::TDynamicCounters::TCounterPtr KeepMarkerCount; - NMonitoring::TDynamicCounters::TCounterPtr KeepMarkerBytes; - + NMonitoring::THistogramPtr KeepsCountBytes; + NMonitoring::THistogramPtr KeepsCountBlobs; + NMonitoring::THistogramPtr KeepsCountTasks; + NMonitoring::THistogramPtr DeletesCountBytes; + NMonitoring::THistogramPtr DeletesCountBlobs; + NMonitoring::THistogramPtr DeletesCountTasks; + NMonitoring::TDynamicCounters::TCounterPtr FullGCTasks; + NMonitoring::TDynamicCounters::TCounterPtr MoveBarriers; + NMonitoring::TDynamicCounters::TCounterPtr DontMoveBarriers; + NMonitoring::TDynamicCounters::TCounterPtr GCTasks; + NMonitoring::TDynamicCounters::TCounterPtr EmptyGCTasks; public: - NMonitoring::TDynamicCounters::TCounterPtr SkipCollection; - NMonitoring::TDynamicCounters::TCounterPtr StartCollection; - - TBlobsManagerCounters(const TString& module); - - void OnKeepMarker(const ui64 size) const { - KeepMarkerCount->Add(1); - KeepMarkerBytes->Add(size); - } - - void OnBlobsKeep(const std::map<::NKikimr::TGenStep, std::set>& blobs) const; - - void OnBlobsDelete(const NOlap::TTabletsByBlob& blobs) const; - - void OnAddSmallBlob(const ui32 bSize) const { - AddSmallBlobBytes->Add(bSize); - AddSmallBlobCount->Add(1); - } - - void OnDeleteBlobDelayedMarker(const ui32 bSize) const { - DeleteBlobDelayedMarkerBytes->Add(bSize); - DeleteBlobDelayedMarkerCount->Add(1); - } - - void OnDeleteBlobMarker(const ui32 bSize) const { - DeleteBlobMarkerBytes->Add(bSize); - DeleteBlobMarkerCount->Add(1); - } + const NMonitoring::TDynamicCounters::TCounterPtr SkipCollectionEmpty; + const NMonitoring::TDynamicCounters::TCounterPtr SkipCollectionThrottling; - void OnNewCollectStep(const ui32 gen, const ui32 step) const { - CollectGen->Set(gen); - CollectStep->Set(step); - } + TBlobsManagerGCCounters(const TCommonCountersOwner& sameAs, const TString& componentName); - void OnDeleteSmallBlob(const ui32 bSize) const { - DeleteSmallBlobBytes->Add(bSize); - DeleteSmallBlobCount->Add(1); - } + void OnGCTask(const ui32 keepsCount, const ui32 keepBytes, const ui32 deleteCount, const ui32 deleteBytes, + const bool isFull, const bool moveBarrier) const; - void OnPutResult(const ui32 bSize) const { - PutBlobBytes->Add(bSize); - PutBlobCount->Add(1); - } - - void OnCollectKeep(const ui32 bSize) const { - CollectKeepBytes->Add(bSize); - CollectKeepCount->Add(1); + void OnEmptyGCTask() const { + EmptyGCTasks->Add(1); } +}; - void OnBrokenKeep(const ui32 bSize) const { - BrokenKeepBytes->Add(bSize); - BrokenKeepCount->Add(1); +class TBlobsManagerCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToDeleteCount; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToDeleteDelayedCount; + const NMonitoring::TDynamicCounters::TCounterPtr BlobsToKeepCount; +public: + const NMonitoring::TDynamicCounters::TCounterPtr CurrentGen; + const NMonitoring::TDynamicCounters::TCounterPtr CurrentStep; + const TBlobsManagerGCCounters GCCounters; + TBlobsManagerCounters(const TString& module); + void OnBlobsToDelete(const NOlap::TTabletsByBlob& blobs) const { + BlobsToDeleteCount->Set(blobs.GetSize()); } - - void OnCollectDropExplicit(const ui32 bSize) const { - CollectDropExplicitBytes->Add(bSize); - CollectDropExplicitCount->Add(1); + void OnBlobsToKeep(const NOlap::TBlobsByGenStep& blobs) const { + BlobsToKeepCount->Set(blobs.GetSize()); } - - void OnCollectDropImplicit(const ui32 bSize) const { - CollectDropImplicitBytes->Add(bSize); - CollectDropImplicitCount->Add(1); + void OnBlobsToDeleteDelayed(const NOlap::TTabletsByBlob& blobs) const { + BlobsToDeleteDelayedCount->Set(blobs.GetSize()); } }; diff --git a/ydb/core/tx/columnshard/counters/column_tables.cpp b/ydb/core/tx/columnshard/counters/column_tables.cpp new file mode 100644 index 000000000000..51b9ecf6283e --- /dev/null +++ b/ydb/core/tx/columnshard/counters/column_tables.cpp @@ -0,0 +1,13 @@ +#include "column_tables.h" + +namespace NKikimr::NColumnShard { + +std::shared_ptr TColumnTablesCounters::GetPathIdCounter(ui64 pathId) { + auto findCounter = PathIdCounters.FindPtr(pathId); + if (findCounter) { + return *findCounter; + } + return PathIdCounters.emplace(pathId, std::make_shared(*this)).first->second; +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/column_tables.h b/ydb/core/tx/columnshard/counters/column_tables.h new file mode 100644 index 000000000000..db00069218fb --- /dev/null +++ b/ydb/core/tx/columnshard/counters/column_tables.h @@ -0,0 +1,85 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TSingleColumnTableCounters; + +class TColumnTablesCounters { +private: + YDB_READONLY_CONST(std::shared_ptr, LastAccessTime); + YDB_READONLY_CONST(std::shared_ptr, LastUpdateTime); + + THashMap> PathIdCounters; + + friend class TSingleColumnTableCounters; + +public: + TColumnTablesCounters() + : LastAccessTime(std::make_shared()) + , LastUpdateTime(std::make_shared()) { + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastAccessTime(LastAccessTime->MilliSeconds()); + output.SetLastUpdateTime(LastUpdateTime->MilliSeconds()); + } + + std::shared_ptr GetPathIdCounter(ui64 pathId); +}; + +class TSingleColumnTableCounters { +private: + YDB_READONLY(TInstant, PathIdLastAccessTime, TInstant::Zero()); + YDB_READONLY(TInstant, PathIdLastUpdateTime, TInstant::Zero()); + + const std::shared_ptr TotalLastAccessTime; + const std::shared_ptr TotalLastUpdateTime; + +public: + TSingleColumnTableCounters(TColumnTablesCounters& owner) + : TotalLastAccessTime(owner.LastAccessTime) + , TotalLastUpdateTime(owner.LastUpdateTime) { + } + + void OnReadEvent() { + UpdateLastAccessTime(TAppData::TimeProvider->Now()); + } + + void OnWriteEvent() { + TInstant now = TAppData::TimeProvider->Now(); + UpdateLastUpdateTime(now); + UpdateLastAccessTime(now); + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetLastAccessTime(PathIdLastAccessTime.MilliSeconds()); + output.SetLastUpdateTime(PathIdLastUpdateTime.MilliSeconds()); + } + +private: + void UpdateLastAccessTime(TInstant value) { + if (PathIdLastAccessTime < value) { + PathIdLastAccessTime = value; + } + if (*TotalLastAccessTime < value) { + *TotalLastAccessTime = value; + } + } + + void UpdateLastUpdateTime(TInstant value) { + if (PathIdLastUpdateTime < value) { + PathIdLastUpdateTime = value; + } + if (*TotalLastUpdateTime < value) { + *TotalLastUpdateTime = value; + } + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/columnshard.cpp b/ydb/core/tx/columnshard/counters/columnshard.cpp index 64c7e2931c5f..67c569e1313b 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.cpp +++ b/ydb/core/tx/columnshard/counters/columnshard.cpp @@ -8,7 +8,9 @@ namespace NKikimr::NColumnShard { TCSCounters::TCSCounters() : TBase("CS") -{ + , WritingCounters(std::make_shared(*this)) + , Initialization(*this) + , TxProgress(*this) { StartBackgroundCount = TBase::GetDeriviative("StartBackground/Count"); TooEarlyBackgroundCount = TBase::GetDeriviative("TooEarlyBackground/Count"); SetupCompactionCount = TBase::GetDeriviative("SetupCompaction/Count"); @@ -52,6 +54,7 @@ TCSCounters::TCSCounters() HistogramSuccessWriteMiddle6PutBlobsDurationMs = TBase::GetHistogram("SuccessWriteMiddle6PutBlobsDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); HistogramFailedWritePutBlobsDurationMs = TBase::GetHistogram("FailedWritePutBlobsDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); HistogramWriteTxCompleteDurationMs = TBase::GetHistogram("WriteTxCompleteDurationMs", NMonitoring::ExponentialHistogram(18, 2, 5)); + WritePutBlobsCount = TBase::GetValue("WritePutBlobs"); WriteRequests = TBase::GetValue("WriteRequests"); diff --git a/ydb/core/tx/columnshard/counters/columnshard.h b/ydb/core/tx/columnshard/counters/columnshard.h index 6bada377df17..81df8b300eb8 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.h +++ b/ydb/core/tx/columnshard/counters/columnshard.h @@ -1,8 +1,11 @@ #pragma once #include "common/owner.h" +#include "initialization.h" +#include "tx_progress.h" -#include +#include +#include #include namespace NKikimr::NColumnShard { @@ -13,7 +16,32 @@ enum class EWriteFailReason { LongTxDuplication /* "long_tx_duplication" */, NoTable /* "no_table" */, IncorrectSchema /* "incorrect_schema" */, - Overload /* "overload" */ + Overload /* "overload" */, + OverlimitReadRawMemory /* "overlimit_read_raw_memory" */, + OverlimitReadBlobMemory /* "overlimit_read_blob_memory" */ +}; + +class TWriteCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr VolumeWriteData; + NMonitoring::THistogramPtr HistogramBytesWriteDataCount; + NMonitoring::THistogramPtr HistogramBytesWriteDataBytes; + +public: + TWriteCounters(TCommonCountersOwner& owner) + : TBase(owner, "activity", "writing") + { + VolumeWriteData = TBase::GetDeriviative("Write/Incoming/Bytes"); + HistogramBytesWriteDataCount = TBase::GetHistogram("Write/Incoming/ByBytes/Count", NMonitoring::ExponentialHistogram(18, 2, 100)); + HistogramBytesWriteDataBytes = TBase::GetHistogram("Write/Incoming/ByBytes/Bytes", NMonitoring::ExponentialHistogram(18, 2, 100)); + } + + void OnIncomingData(const ui64 dataSize) const { + VolumeWriteData->Add(dataSize); + HistogramBytesWriteDataCount->Collect((i64)dataSize, 1); + HistogramBytesWriteDataBytes->Collect((i64)dataSize, dataSize); + } }; class TCSCounters: public TCommonCountersOwner { @@ -62,11 +90,17 @@ class TCSCounters: public TCommonCountersOwner { NMonitoring::THistogramPtr HistogramSuccessWriteMiddle6PutBlobsDurationMs; NMonitoring::THistogramPtr HistogramFailedWritePutBlobsDurationMs; NMonitoring::THistogramPtr HistogramWriteTxCompleteDurationMs; + NMonitoring::TDynamicCounters::TCounterPtr WritePutBlobsCount; NMonitoring::TDynamicCounters::TCounterPtr WriteRequests; THashMap FailedWriteRequests; NMonitoring::TDynamicCounters::TCounterPtr SuccessWriteRequests; + public: + const std::shared_ptr WritingCounters; + const TCSInitialization Initialization; + TTxProgressCounters TxProgress; + void OnStartWriteRequest() const { WriteRequests->Add(1); } @@ -80,7 +114,6 @@ class TCSCounters: public TCommonCountersOwner { void OnWritePutBlobsSuccess(const TDuration d) const { HistogramSuccessWritePutBlobsDurationMs->Collect(d.MilliSeconds()); - WritePutBlobsCount->Sub(1); } void OnWriteMiddle1PutBlobsSuccess(const TDuration d) const { @@ -109,11 +142,6 @@ class TCSCounters: public TCommonCountersOwner { void OnWritePutBlobsFail(const TDuration d) const { HistogramFailedWritePutBlobsDurationMs->Collect(d.MilliSeconds()); - WritePutBlobsCount->Sub(1); - } - - void OnWritePutBlobsStart() const { - WritePutBlobsCount->Add(1); } void OnWriteTxComplete(const TDuration d) const { @@ -130,27 +158,27 @@ class TCSCounters: public TCommonCountersOwner { SplitCompactionGranulePortionsCount->SetValue(portionsCount); } - void OnOverloadInsertTable(const ui64 size) const { + void OnWriteOverloadInsertTable(const ui64 size) const { OverloadInsertTableBytes->Add(size); OverloadInsertTableCount->Add(1); } - void OnOverloadMetadata(const ui64 size) const { + void OnWriteOverloadMetadata(const ui64 size) const { OverloadMetadataBytes->Add(size); OverloadMetadataCount->Add(1); } - void OnOverloadShardTx(const ui64 size) const { + void OnWriteOverloadShardTx(const ui64 size) const { OverloadShardTxBytes->Add(size); OverloadShardTxCount->Add(1); } - void OnOverloadShardWrites(const ui64 size) const { + void OnWriteOverloadShardWrites(const ui64 size) const { OverloadShardWritesBytes->Add(size); OverloadShardWritesCount->Add(1); } - void OnOverloadShardWritesSize(const ui64 size) const { + void OnWriteOverloadShardWritesSize(const ui64 size) const { OverloadShardWritesSizeBytes->Add(size); OverloadShardWritesSizeCount->Add(1); } diff --git a/ydb/core/tx/columnshard/counters/counters_manager.cpp b/ydb/core/tx/columnshard/counters/counters_manager.cpp new file mode 100644 index 000000000000..11b18888bc31 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/counters_manager.cpp @@ -0,0 +1,5 @@ +#include "counters_manager.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/counters_manager.h b/ydb/core/tx/columnshard/counters/counters_manager.h new file mode 100644 index 000000000000..17336ca3410d --- /dev/null +++ b/ydb/core/tx/columnshard/counters/counters_manager.h @@ -0,0 +1,97 @@ +#pragma once + +#include "background_controller.h" +#include "column_tables.h" +#include "columnshard.h" +#include "indexation.h" +#include "req_tracer.h" +#include "scan.h" +#include "tablet_counters.h" +#include "writes_monitor.h" + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NColumnShard { + +class TCountersManager { +private: + YDB_READONLY_DEF(std::shared_ptr, TabletCounters); + YDB_READONLY_DEF(std::shared_ptr, WritesMonitor); + + YDB_READONLY_DEF(std::shared_ptr, BackgroundControllerCounters); + YDB_READONLY_DEF(std::shared_ptr, ColumnTablesCounters); + + YDB_READONLY(TCSCounters, CSCounters, TCSCounters()); + YDB_READONLY(TIndexationCounters, EvictionCounters, TIndexationCounters("Eviction")); + YDB_READONLY(TIndexationCounters, IndexationCounters, TIndexationCounters("Indexation")); + YDB_READONLY(TIndexationCounters, CompactionCounters, TIndexationCounters("GeneralCompaction")); + YDB_READONLY(TScanCounters, ScanCounters, TScanCounters("Scan")); + YDB_READONLY_DEF(std::shared_ptr, RequestsTracingCounters); + YDB_READONLY_DEF(std::shared_ptr, SubscribeCounters); + +public: + TCountersManager(TTabletCountersBase& tabletCounters) + : TabletCounters(std::make_shared(tabletCounters)) + , WritesMonitor(std::make_shared(tabletCounters)) + , BackgroundControllerCounters(std::make_shared()) + , ColumnTablesCounters(std::make_shared()) + , RequestsTracingCounters(std::make_shared()) + , SubscribeCounters(std::make_shared()) { + } + + void OnWriteOverloadDisk() const { + TabletCounters->IncCounter(COUNTER_OUT_OF_SPACE); + } + + void OnWriteOverloadInsertTable(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadInsertTable(size); + } + + void OnWriteOverloadMetadata(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadMetadata(size); + } + + void OnWriteOverloadShardTx(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardTx(size); + } + + void OnWriteOverloadShardWrites(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardWrites(size); + } + + void OnWriteOverloadShardWritesSize(const ui64 size) const { + TabletCounters->IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnWriteOverloadShardWritesSize(size); + } + + void FillTableStats(ui64 pathId, ::NKikimrTableStats::TTableStats& tableStats) { + ColumnTablesCounters->GetPathIdCounter(pathId)->FillStats(tableStats); + BackgroundControllerCounters->FillStats(pathId, tableStats); + } + + void FillTotalTableStats(::NKikimrTableStats::TTableStats& tableStats) { + ColumnTablesCounters->FillStats(tableStats); + TabletCounters->FillStats(tableStats); + BackgroundControllerCounters->FillTotalStats(tableStats); + ScanCounters.FillStats(tableStats); + } + + void OnWritePutBlobsSuccess(const TDuration d, const ui64 rowsWritten) const { + TabletCounters->OnWritePutBlobsSuccess(rowsWritten); + CSCounters.OnWritePutBlobsSuccess(d); + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/counters/engine_logs.cpp b/ydb/core/tx/columnshard/counters/engine_logs.cpp index 368eebc40ef1..7a38e052c5ed 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.cpp +++ b/ydb/core/tx/columnshard/counters/engine_logs.cpp @@ -91,9 +91,11 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnNewPortion(const std::shared_ptr } } for (auto&& i : portion->GetIndexes()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); + if (i.HasBlobRange()) { + const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); + if (blobIds.emplace(blobId).second) { + BlobGuards[producedId]->Add(blobId.BlobSize(), blobId.BlobSize()); + } } } PortionRecordCountGuards[producedId]->Add(portion->GetRecordsCount(), 1); @@ -111,9 +113,11 @@ void TEngineLogsCounters::TPortionsInfoGuard::OnDropPortion(const std::shared_pt } } for (auto&& i : portion->GetIndexes()) { - const auto blobId = portion->GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - if (blobIds.emplace(blobId).second) { - BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); + if (i.HasBlobRange()) { + const auto blobId = portion->GetBlobId(i.GetBlobRangeVerified().GetBlobIdxVerified()); + if (blobIds.emplace(blobId).second) { + BlobGuards[producedId]->Sub(blobId.BlobSize(), blobId.BlobSize()); + } } } PortionRecordCountGuards[producedId]->Sub(portion->GetRecordsCount(), 1); diff --git a/ydb/core/tx/columnshard/counters/engine_logs.h b/ydb/core/tx/columnshard/counters/engine_logs.h index 97a4716652c3..2cbaf7fa234e 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.h +++ b/ydb/core/tx/columnshard/counters/engine_logs.h @@ -85,17 +85,44 @@ class TAgentDataClassCounters: public TCommonCountersOwner { } }; +class TIntervalMemoryCounters { +public: + const std::shared_ptr MinReadBytes; + TIntervalMemoryCounters(const std::shared_ptr& minReadBytes) + : MinReadBytes(minReadBytes) + { + + } +}; + +class TPortionsIndexCounters { +public: + const TIntervalMemoryCounters RawBytes; + const TIntervalMemoryCounters BlobBytes; + TPortionsIndexCounters(TIntervalMemoryCounters&& rawBytes, TIntervalMemoryCounters&& blobBytes) + : RawBytes(std::move(rawBytes)) + , BlobBytes(std::move(blobBytes)) { + } +}; + class TGranuleDataCounters { private: const TDataClassCounters InsertedData; const TDataClassCounters CompactedData; const TDataClassCounters FullData; + const TPortionsIndexCounters PortionsIndexCounters; + public: - TGranuleDataCounters(const TDataClassCounters& insertedData, const TDataClassCounters& compactedData, const TDataClassCounters& fullData) + const TPortionsIndexCounters& GetPortionsIndexCounters() const { + return PortionsIndexCounters; + } + + TGranuleDataCounters(const TDataClassCounters& insertedData, const TDataClassCounters& compactedData, const TDataClassCounters& fullData, + TPortionsIndexCounters&& portionsIndexCounters) : InsertedData(insertedData) , CompactedData(compactedData) , FullData(fullData) - { + , PortionsIndexCounters(std::move(portionsIndexCounters)) { } void OnPortionsDataRefresh(const TBaseGranuleDataClassSummary& inserted, const TBaseGranuleDataClassSummary& compacted) const { @@ -105,20 +132,60 @@ class TGranuleDataCounters { } }; +class TIntervalMemoryAgentCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + const std::shared_ptr ReadBytes; +public: + TIntervalMemoryAgentCounters(const TCommonCountersOwner& base, const TString& memoryType) + : TBase(base, "memory", memoryType) + , ReadBytes(TBase::GetValueAutoAggregations("Bytes")) { + } + + TIntervalMemoryCounters GetClient() const { + return TIntervalMemoryCounters(ReadBytes->GetClient()); + } +}; + +class TPortionsIndexAgentsCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + TIntervalMemoryAgentCounters ReadRawBytes; + TIntervalMemoryAgentCounters ReadBlobBytes; + +public: + + TPortionsIndexAgentsCounters(const TString& baseName) + : TBase(baseName) + , ReadRawBytes(TBase::CreateSubGroup("control", "read_memory"), "raw") + , ReadBlobBytes(TBase::CreateSubGroup("control", "read_memory"), "blob") + { + } + + TPortionsIndexCounters BuildCounters() const { + return TPortionsIndexCounters(ReadRawBytes.GetClient(), ReadBlobBytes.GetClient()); + } +}; + class TAgentGranuleDataCounters { private: TAgentDataClassCounters InsertedData; TAgentDataClassCounters CompactedData; TAgentDataClassCounters FullData; + TPortionsIndexAgentsCounters PortionsIndex; + public: TAgentGranuleDataCounters(const TString& ownerId) : InsertedData(ownerId, "ByGranule/Inserted") , CompactedData(ownerId, "ByGranule/Compacted") - , FullData(ownerId, "ByGranule/Full") { + , FullData(ownerId, "ByGranule/Full") + , PortionsIndex("ByGranule/PortionsIndex") + { } TGranuleDataCounters RegisterClient() const { - return TGranuleDataCounters(InsertedData.RegisterClient(), CompactedData.RegisterClient(), FullData.RegisterClient()); + return TGranuleDataCounters( + InsertedData.RegisterClient(), CompactedData.RegisterClient(), FullData.RegisterClient(), PortionsIndex.BuildCounters()); } }; diff --git a/ydb/core/tx/columnshard/counters/initialization.h b/ydb/core/tx/columnshard/counters/initialization.h new file mode 100644 index 000000000000..2a6b432d6135 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/initialization.h @@ -0,0 +1,56 @@ +#pragma once +#include "common/owner.h" + +#include + +namespace NKikimr::NColumnShard { + +class TCSInitialization: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + + const NMonitoring::THistogramPtr HistogramTabletInitializationMs; + const NMonitoring::THistogramPtr HistogramTxInitDurationMs; + const NMonitoring::THistogramPtr HistogramTxUpdateSchemaDurationMs; + const NMonitoring::THistogramPtr HistogramTxInitSchemaDurationMs; + const NMonitoring::THistogramPtr HistogramActivateExecutorFromActivationDurationMs; + const NMonitoring::THistogramPtr HistogramSwitchToWorkFromActivationDurationMs; + const NMonitoring::THistogramPtr HistogramSwitchToWorkFromCreateDurationMs; + +public: + void OnTxInitFinished(const TDuration d) const { + HistogramTxInitDurationMs->Collect(d.MilliSeconds()); + } + + void OnTxUpdateSchemaFinished(const TDuration d) const { + HistogramTxUpdateSchemaDurationMs->Collect(d.MilliSeconds()); + } + + void OnTxInitSchemaFinished(const TDuration d) const { + HistogramTxInitSchemaDurationMs->Collect(d.MilliSeconds()); + } + + void OnActivateExecutor(const TDuration fromCreate) const { + HistogramActivateExecutorFromActivationDurationMs->Collect(fromCreate.MilliSeconds()); + } + void OnSwitchToWork(const TDuration fromStart, const TDuration fromCreate) const { + HistogramSwitchToWorkFromActivationDurationMs->Collect(fromStart.MilliSeconds()); + HistogramSwitchToWorkFromCreateDurationMs->Collect(fromCreate.MilliSeconds()); + } + + TCSInitialization(TCommonCountersOwner& owner) + : TBase(owner, "stage", "initialization") + , HistogramTabletInitializationMs(TBase::GetHistogram("TabletInitializationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxInitDurationMs(TBase::GetHistogram("TxInitDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxUpdateSchemaDurationMs(TBase::GetHistogram("TxInitDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramTxInitSchemaDurationMs(TBase::GetHistogram("TxInitSchemaDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramActivateExecutorFromActivationDurationMs( + TBase::GetHistogram("ActivateExecutorFromActivationDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramSwitchToWorkFromActivationDurationMs( + TBase::GetHistogram("SwitchToWorkFromActivationDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) + , HistogramSwitchToWorkFromCreateDurationMs( + TBase::GetHistogram("SwitchToWorkFromCreateDurationMs", NMonitoring::ExponentialHistogram(15, 2, 32))) { + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/req_tracer.cpp b/ydb/core/tx/columnshard/counters/req_tracer.cpp new file mode 100644 index 000000000000..e40342f21db1 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/req_tracer.cpp @@ -0,0 +1,5 @@ +#include "req_tracer.h" + +namespace NKikimr::NColumnShard { + +} diff --git a/ydb/core/tx/columnshard/counters/req_tracer.h b/ydb/core/tx/columnshard/counters/req_tracer.h new file mode 100644 index 000000000000..f70cd02e4840 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/req_tracer.h @@ -0,0 +1,51 @@ +#pragma once +#include "common/owner.h" +#include + +namespace NKikimr::NColumnShard { + +class TRequestsTracerCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr RequestedMinSnapshotAge; + NMonitoring::TDynamicCounters::TCounterPtr DefaultMinSnapshotAge; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotsCount; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotLock; + NMonitoring::TDynamicCounters::TCounterPtr SnapshotUnlock; + +public: + + TRequestsTracerCounters() + : TBase("cs_requests_tracing") + , RequestedMinSnapshotAge(TBase::GetValue("Snapshots/RequestedAge/Seconds")) + , DefaultMinSnapshotAge(TBase::GetValue("Snapshots/DefaultAge/Seconds")) + , SnapshotsCount(TBase::GetValue("Snapshots/Count")) + , SnapshotLock(TBase::GetDeriviative("Snapshots/Lock")) + , SnapshotUnlock(TBase::GetDeriviative("Snapshots/Unlock")) + { + + } + + void OnDefaultMinSnapshotInstant(const TInstant instant) const { + DefaultMinSnapshotAge->Set((TInstant::Now() - instant).Seconds()); + } + + void OnSnapshotsInfo(const ui32 count, const std::optional snapshotPlanStep) const { + if (snapshotPlanStep) { + RequestedMinSnapshotAge->Set((TInstant::Now() - snapshotPlanStep->GetPlanInstant()).Seconds()); + } else { + RequestedMinSnapshotAge->Set(0); + } + SnapshotsCount->Set(count); + + } + + void OnSnapshotLocked() const { + SnapshotLock->Add(1); + } + void OnSnapshotUnlocked() const { + SnapshotUnlock->Add(1); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/scan.cpp b/ydb/core/tx/columnshard/counters/scan.cpp index 075aa0e880ec..cdfd42aa9bc4 100644 --- a/ydb/core/tx/columnshard/counters/scan.cpp +++ b/ydb/core/tx/columnshard/counters/scan.cpp @@ -88,12 +88,14 @@ TScanCounters::TScanCounters(const TString& module) ScanIntervalState = std::make_shared(*this); ResourcesSubscriberCounters = std::make_shared(); ScanDurationByStatus.resize((ui32)EStatusFinish::COUNT); + ScansFinishedByStatus.resize((ui32)EStatusFinish::COUNT); ui32 idx = 0; for (auto&& i : GetEnumAllValues()) { if (i == EStatusFinish::COUNT) { continue; } ScanDurationByStatus[(ui32)i] = TBase::GetHistogram("ScanDuration/" + ::ToString(i) + "/Milliseconds", NMonitoring::ExponentialHistogram(18, 2, 1)); + ScansFinishedByStatus[(ui32)i] = TBase::GetDeriviative("ScansFinished/" + ::ToString(i)); AFL_VERIFY(idx == (ui32)i); ++idx; } @@ -103,4 +105,8 @@ NKikimr::NColumnShard::TScanAggregations TScanCounters::BuildAggregations() { return TScanAggregations(GetModuleId()); } +void TScanCounters::FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetRangeReads(ScansFinishedByStatus[(ui32)EStatusFinish::Success]->Val()); +} + } diff --git a/ydb/core/tx/columnshard/counters/scan.h b/ydb/core/tx/columnshard/counters/scan.h index d9bbd6b898ce..6d1202288514 100644 --- a/ydb/core/tx/columnshard/counters/scan.h +++ b/ydb/core/tx/columnshard/counters/scan.h @@ -1,8 +1,10 @@ #pragma once #include "common/owner.h" #include "common/histogram.h" +#include #include #include +#include #include namespace NKikimr::NColumnShard { @@ -10,31 +12,30 @@ namespace NKikimr::NColumnShard { class TScanAggregations: public TCommonCountersOwner { private: using TBase = TCommonCountersOwner; - std::shared_ptr ReadBlobs; - std::shared_ptr GranulesProcessing; - std::shared_ptr GranulesReady; std::shared_ptr ResultsReady; + std::shared_ptr RequestedResourcesMemory; std::shared_ptr ScanDuration; std::shared_ptr BlobsWaitingDuration; public: TScanAggregations(const TString& moduleId) : TBase(moduleId) - , GranulesProcessing(std::make_shared(moduleId, "InFlight/Granules/Processing")) , ResultsReady(std::make_shared(moduleId, "InFlight/Results/Ready")) + , RequestedResourcesMemory(std::make_shared(moduleId, "InFlight/Resources/Requested")) , ScanDuration(TBase::GetValueAutoAggregationsClient("ScanDuration")) , BlobsWaitingDuration(TBase::GetValueAutoAggregationsClient("BlobsWaitingDuration")) { } + std::shared_ptr GetRequestedResourcesMemory() const { + return RequestedResourcesMemory; + } + void OnBlobWaitingDuration(const TDuration d, const TDuration fullScanDuration) const { BlobsWaitingDuration->Add(d.MicroSeconds()); ScanDuration->SetValue(fullScanDuration.MicroSeconds()); } - const std::shared_ptr& GetGranulesProcessing() const { - return GranulesProcessing; - } const std::shared_ptr& GetResultsReady() const { return ResultsReady; } @@ -127,6 +128,7 @@ class TScanCounters: public TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr AckWaitingDuration; std::vector ScanDurationByStatus; + std::vector ScansFinishedByStatus; NMonitoring::TDynamicCounters::TCounterPtr NoScanRecords; NMonitoring::TDynamicCounters::TCounterPtr NoScanIntervals; @@ -212,9 +214,10 @@ class TScanCounters: public TCommonCountersOwner { LogScanIntervals->Add(1); } - void OnScanDuration(const EStatusFinish status, const TDuration d) const { + void OnScanFinished(const EStatusFinish status, const TDuration d) const { AFL_VERIFY((ui32)status < ScanDurationByStatus.size()); ScanDurationByStatus[(ui32)status]->Collect(d.MilliSeconds()); + ScansFinishedByStatus[(ui32)status]->Add(1); } void AckWaitingInfo(const TDuration d) const { @@ -257,6 +260,8 @@ class TScanCounters: public TCommonCountersOwner { } TScanAggregations BuildAggregations(); + + void FillStats(::NKikimrTableStats::TTableStats& output) const; }; class TCounterGuard: TNonCopyable { diff --git a/ydb/core/tx/columnshard/counters/tablet_counters.h b/ydb/core/tx/columnshard/counters/tablet_counters.h new file mode 100644 index 000000000000..99292c4f73b8 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/tablet_counters.h @@ -0,0 +1,132 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TTabletCountersHandle { +private: + TTabletCountersBase& TabletCounters; + +public: + TTabletCountersHandle(TTabletCountersBase& stats) + : TabletCounters(stats) { + } + + void SetCounter(NColumnShard::ESimpleCounters counter, ui64 num) const { + TabletCounters.Simple()[counter].Set(num); + } + + void IncCounter(NColumnShard::ECumulativeCounters counter, ui64 num = 1) const { + TabletCounters.Cumulative()[counter].Increment(num); + } + + void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { + TabletCounters.Percentile()[counter].IncrementFor(latency.MicroSeconds()); + } + + void IncCounter(NDataShard::ESimpleCounters counter, ui64 num = 1) const { + TabletCounters.Simple()[counter].Add(num); + } + + void IncCounter(NDataShard::ECumulativeCounters counter, ui64 num = 1) const { + TabletCounters.Cumulative()[counter].Increment(num); + } + + void IncCounter(NDataShard::EPercentileCounters counter, ui64 num) const { + TabletCounters.Percentile()[counter].IncrementFor(num); + } + + void IncCounter(NDataShard::EPercentileCounters counter, const TDuration& latency) const { + TabletCounters.Percentile()[counter].IncrementFor(latency.MilliSeconds()); + } + + ui64 GetValue(NColumnShard::ESimpleCounters counter) const { + return TabletCounters.Simple()[counter].Get(); + } + + ui64 GetValue(NColumnShard::ECumulativeCounters counter) const { + return TabletCounters.Cumulative()[counter].Get(); + } + + const TTabletPercentileCounter& GetValue(NColumnShard::EPercentileCounters counter) const { + return TabletCounters.Percentile()[counter]; + } + + ui64 GetValue(NDataShard::ESimpleCounters counter) const { + return TabletCounters.Simple()[counter].Get(); + } + + ui64 GetValue(NDataShard::ECumulativeCounters counter) const { + return TabletCounters.Cumulative()[counter].Get(); + } + + const TTabletPercentileCounter& GetCounter(NDataShard::EPercentileCounters counter) const { + return TabletCounters.Percentile()[counter]; + } + + void OnWriteSuccess(const ui64 blobsWritten, const ui64 bytesWritten) const { + IncCounter(NColumnShard::COUNTER_OPERATIONS_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_OPERATIONS_BYTES_WRITTEN, bytesWritten); + IncCounter(NColumnShard::COUNTER_WRITE_SUCCESS); + } + + void OnWriteFailure() const { + IncCounter(NColumnShard::COUNTER_WRITE_FAIL); + } + + void OnScanStarted(const NOlap::TSelectInfo::TStats& countersDelta) const { + IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, countersDelta.Portions); + IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, countersDelta.Blobs); + IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, countersDelta.Rows); + IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, countersDelta.Bytes); + } + + void OnWriteCommitted(const NOlap::TInsertionSummary::TCounters& countersDelta) const { + IncCounter(COUNTER_BLOBS_COMMITTED, countersDelta.Rows); + IncCounter(COUNTER_BYTES_COMMITTED, countersDelta.Bytes); + IncCounter(COUNTER_RAW_BYTES_COMMITTED, countersDelta.RawBytes); + } + + void OnCompactionWriteIndexCompleted(bool success, const ui64 blobsWritten, const ui64 bytesWritten) const { + IncCounter(success ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); + IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, bytesWritten); + } + + void OnInsertionWriteIndexCompleted(const ui64 blobsWritten, const ui64 bytesWritten, const TDuration duration) const { + IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, blobsWritten); + IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, bytesWritten); + IncCounter(NColumnShard::COUNTER_INDEXING_TIME, duration.MilliSeconds()); + } + + void OnWritePutBlobsSuccess(const ui64 rowsWritten) const { + IncCounter(NColumnShard::COUNTER_OPERATIONS_ROWS_WRITTEN, rowsWritten); + } + + void OnDropPortionEvent(const ui64 rawBytes, const ui64 blobBytes, const ui64 rows) const { + IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, rawBytes); + IncCounter(NColumnShard::COUNTER_BYTES_ERASED, blobBytes); + IncCounter(NColumnShard::COUNTER_ROWS_ERASED, rows); + } + + void FillStats(::NKikimrTableStats::TTableStats& output) const { + output.SetRowUpdates(GetValue(COUNTER_OPERATIONS_ROWS_WRITTEN)); + output.SetRowDeletes(GetValue(COUNTER_ROWS_ERASED)); + output.SetRowReads(0); // all reads are range reads + output.SetRangeReadRows(GetValue(COUNTER_READ_INDEX_ROWS)); + + output.SetImmediateTxCompleted(GetValue(COUNTER_IMMEDIATE_TX_COMPLETED)); + output.SetTxRejectedByOverload(GetValue(COUNTER_WRITE_OVERLOAD)); + output.SetTxRejectedBySpace(GetValue(COUNTER_OUT_OF_SPACE)); + output.SetPlannedTxCompleted(GetValue(COUNTER_PLANNED_TX_COMPLETED)); + output.SetTxCompleteLagMsec(GetValue(COUNTER_TX_COMPLETE_LAG)); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/tx_progress.h b/ydb/core/tx/columnshard/counters/tx_progress.h new file mode 100644 index 000000000000..24319a3ab748 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/tx_progress.h @@ -0,0 +1,110 @@ +#pragma once +#include "common/owner.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +class TTxProgressCounters: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + using TOpType = TString; + + class TProgressCounters: public TCommonCountersOwner { + private: + using TBase = TCommonCountersOwner; + + public: + NMonitoring::TDynamicCounters::TCounterPtr RegisterTx; + NMonitoring::TDynamicCounters::TCounterPtr RegisterTxWithDeadline; + NMonitoring::TDynamicCounters::TCounterPtr StartProposeOnExecute; + NMonitoring::TDynamicCounters::TCounterPtr StartProposeOnComplete; + NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnExecute; + NMonitoring::TDynamicCounters::TCounterPtr FinishProposeOnComplete; + NMonitoring::TDynamicCounters::TCounterPtr FinishPlannedTx; + NMonitoring::TDynamicCounters::TCounterPtr AbortTx; + NMonitoring::THistogramPtr HistogramTxExecuteDuration; + NMonitoring::THistogramPtr HistogramTxLiveDuration; + NMonitoring::THistogramPtr HistogramTxProgressLag; + + TProgressCounters(const TCommonCountersOwner& owner) + : TBase(owner) + , RegisterTx(TBase::GetDeriviative("RegisterTx")) + , RegisterTxWithDeadline(TBase::GetDeriviative("RegisterTxWithDeadline")) + , StartProposeOnExecute(TBase::GetDeriviative("StartProposeOnExecute")) + , StartProposeOnComplete(TBase::GetDeriviative("StartProposeOnComplete")) + , FinishProposeOnExecute(TBase::GetDeriviative("FinishProposeOnExecute")) + , FinishProposeOnComplete(TBase::GetDeriviative("FinishProposeOnComplete")) + , FinishPlannedTx(TBase::GetDeriviative("FinishPlannedTx")) + , AbortTx(TBase::GetDeriviative("AbortTx")) + , HistogramTxExecuteDuration(TBase::GetHistogram("TxProgress/Execution/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) + , HistogramTxLiveDuration(TBase::GetHistogram("TxProgress/Live/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) + , HistogramTxProgressLag(TBase::GetHistogram("TxProgress/LagOnComplete/DurationMs", NMonitoring::ExponentialHistogram(18, 2, 5))) { + } + }; + + THashMap CountersByOpType; + +public: + void OnTxExecuteDuration(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxExecuteDuration->Collect(d.MilliSeconds()); + } + + void OnTxLiveDuration(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxLiveDuration->Collect(d.MilliSeconds()); + } + + void OnTxProgressLag(const TString& opType, const TDuration d) { + GetSubGroup(opType).HistogramTxProgressLag->Collect(d.MilliSeconds()); + } + + void OnRegisterTx(const TOpType& opType) { + GetSubGroup(opType).RegisterTx->Add(1); + } + + void OnRegisterTxWithDeadline(const TOpType& opType) { + GetSubGroup(opType).RegisterTxWithDeadline->Add(1); + } + + void OnStartProposeOnExecute(const TOpType& opType) { + GetSubGroup(opType).StartProposeOnExecute->Add(1); + } + + void OnStartProposeOnComplete(const TOpType& opType) { + GetSubGroup(opType).StartProposeOnComplete->Add(1); + } + + void OnFinishProposeOnExecute(const TOpType& opType) { + GetSubGroup(opType).FinishProposeOnExecute->Add(1); + } + + void OnFinishProposeOnComplete(const TOpType& opType) { + GetSubGroup(opType).FinishProposeOnComplete->Add(1); + } + + void OnFinishPlannedTx(const TOpType& opType) { + GetSubGroup(opType).FinishPlannedTx->Add(1); + } + + void OnAbortTx(const TOpType& opType) { + GetSubGroup(opType).AbortTx->Add(1); + } + + TTxProgressCounters(TCommonCountersOwner& owner) + : TBase(owner, "TxProgress") { + } + +private: + TProgressCounters& GetSubGroup(const TOpType& opType) { + auto findSubGroup = CountersByOpType.FindPtr(opType); + if (findSubGroup) { + return *findSubGroup; + } + + auto subGroup = TBase::CreateSubGroup("operation", opType); + return CountersByOpType.emplace(opType, subGroup).first->second; + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/writes_monitor.h b/ydb/core/tx/columnshard/counters/writes_monitor.h new file mode 100644 index 000000000000..ad8ad6e474cc --- /dev/null +++ b/ydb/core/tx/columnshard/counters/writes_monitor.h @@ -0,0 +1,46 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TWritesMonitor { +private: + TTabletCountersBase& Stats; + + YDB_READONLY(ui64, WritesInFlight, 0); + YDB_READONLY(ui64, WritesSizeInFlight, 0); + +public: + TWritesMonitor(TTabletCountersBase& stats) + : Stats(stats) { + } + + void OnStartWrite(const ui64 dataSize) { + ++WritesInFlight; + WritesSizeInFlight += dataSize; + UpdateTabletCounters(); + } + + void OnFinishWrite(const ui64 dataSize, const ui32 writesCount = 1) { + Y_ABORT_UNLESS(WritesInFlight > 0); + Y_ABORT_UNLESS(WritesSizeInFlight >= dataSize); + WritesInFlight -= writesCount; + WritesSizeInFlight -= dataSize; + UpdateTabletCounters(); + } + + TString DebugString() const { + return TStringBuilder() << "{object=write_monitor;count=" << WritesInFlight << ";size=" << WritesSizeInFlight + << "}"; + } + +private: + void UpdateTabletCounters() { + Stats.Simple()[COUNTER_WRITES_IN_FLY].Set(WritesInFlight); + } +}; + +} diff --git a/ydb/core/tx/columnshard/counters/ya.make b/ydb/core/tx/columnshard/counters/ya.make index 65797cb34752..8707d6080e30 100644 --- a/ydb/core/tx/columnshard/counters/ya.make +++ b/ydb/core/tx/columnshard/counters/ya.make @@ -1,18 +1,23 @@ LIBRARY() SRCS( - indexation.cpp - scan.cpp - engine_logs.cpp + background_controller.cpp + counters_manager.cpp blobs_manager.cpp + column_tables.cpp columnshard.cpp - insert_table.cpp common_data.cpp + engine_logs.cpp + indexation.cpp + insert_table.cpp + req_tracer.cpp + scan.cpp splitter.cpp ) PEERDIR( library/cpp/monlib/dynamic_counters + ydb/core/tx/columnshard/counters/aggregation ydb/core/tx/columnshard/counters/common ydb/core/base ) diff --git a/ydb/core/tx/columnshard/data_reader/actor.cpp b/ydb/core/tx/columnshard/data_reader/actor.cpp index c66cfc42cacc..4fd69af8a7ab 100644 --- a/ydb/core/tx/columnshard/data_reader/actor.cpp +++ b/ydb/core/tx/columnshard/data_reader/actor.cpp @@ -35,7 +35,10 @@ void TActor::HandleExecute(NKqp::TEvKqpCompute::TEvScanInitActor::TPtr& ev) { } void TActor::HandleExecute(NKqp::TEvKqpCompute::TEvScanError::TPtr& ev) { - AFL_VERIFY(false)("error", NYql::IssuesFromMessageAsString(ev->Get()->Record.GetIssues())); + SwitchStage(EStage::WaitData, EStage::Finished); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "problem_on_restore_data")( + "reason", NYql::IssuesFromMessageAsString(ev->Get()->Record.GetIssues())); + RestoreTask->OnError(NYql::IssuesFromMessageAsString(ev->Get()->Record.GetIssues())); } void TActor::Bootstrap(const TActorContext& /*ctx*/) { diff --git a/ydb/core/tx/columnshard/data_reader/actor.h b/ydb/core/tx/columnshard/data_reader/actor.h index b18af6cf9920..2eca911a87e2 100644 --- a/ydb/core/tx/columnshard/data_reader/actor.h +++ b/ydb/core/tx/columnshard/data_reader/actor.h @@ -12,6 +12,7 @@ class IRestoreTask { YDB_READONLY_DEF(NActors::TActorId, TabletActorId); virtual TConclusionStatus DoOnDataChunk(const std::shared_ptr& data) = 0; virtual TConclusionStatus DoOnFinished() = 0; + virtual void DoOnError(const TString& errorMessage) = 0; virtual std::unique_ptr DoBuildRequestInitiator() const = 0; public: @@ -24,6 +25,10 @@ class IRestoreTask { return DoOnFinished(); } + void OnError(const TString& errorMessage) { + DoOnError(errorMessage); + } + std::unique_ptr BuildRequestInitiator() const { return DoBuildRequestInitiator(); } diff --git a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h index 055081e37173..ea9c99c7fd9d 100644 --- a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h +++ b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h @@ -15,11 +15,11 @@ class TExtendedTransactionBase: public NTabletFlatExecutor::TTransactionBaseTabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("local_tx_no", TabletTxNo)("tx_info", TxInfo); return DoExecute(txc, ctx); } virtual void Complete(const NActors::TActorContext& ctx) override final { - NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("local_tx_no", TabletTxNo)("tx_info", TxInfo); return DoComplete(ctx); } diff --git a/ydb/core/tx/columnshard/data_sharing/protos/data.proto b/ydb/core/tx/columnshard/data_sharing/protos/data.proto index e602bc4dbba4..8b376e919946 100644 --- a/ydb/core/tx/columnshard/data_sharing/protos/data.proto +++ b/ydb/core/tx/columnshard/data_sharing/protos/data.proto @@ -14,7 +14,10 @@ message TColumnRecord { message TIndexChunk { optional uint32 IndexId = 1; optional uint32 ChunkIdx = 2; - optional NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + oneof DataImplementation { + NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + string BlobData = 5; + } message TMeta { optional uint32 RecordsCount = 1; diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp index 1072d6ff1cb6..5bc37cd29122 100644 --- a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp +++ b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp @@ -1,7 +1,7 @@ #include "source.h" #include #include -#include +#include namespace NKikimr::NOlap::NDataSharing { diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp index 7dfe36689d48..7d37981a9039 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp @@ -46,7 +46,7 @@ void TColumnEngineChanges::WriteIndexOnComplete(NColumnShard::TColumnShard* self DoWriteIndexOnComplete(self, context); if (self) { OnFinish(*self, context); - self->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); + self->Counters.GetTabletCounters()->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); } } diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h index 7234e4710f53..721270ea63f3 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h @@ -275,7 +275,7 @@ class TColumnEngineChanges { void Start(NColumnShard::TColumnShard& self); virtual ui32 GetWritePortionsCount() const = 0; - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 index) = 0; + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 index) = 0; virtual bool NeedWritePortion(const ui32 index) const = 0; void WriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context); diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp index 3aa29ed01a13..7917b77682b9 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -17,19 +17,19 @@ void TCleanupPortionsColumnEngineChanges::DoDebugString(TStringOutput& out) cons void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { THashSet pathIds; - if (self) { - THashMap> blobIdsByStorage; - for (auto&& p : PortionsToDrop) { - p.RemoveFromDatabase(context.DBWrapper); - - p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); - pathIds.emplace(p.GetPathId()); - } - for (auto&& i : blobIdsByStorage) { - auto action = BlobsAction.GetRemoving(i.first); - for (auto&& b : i.second) { - action->DeclareRemove((TTabletId)self->TabletID(), b); - } + if (!self) { + return; + } + THashMap> blobIdsByStorage; + for (auto&& p : PortionsToDrop) { + p.RemoveFromDatabase(context.DBWrapper); + p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); + pathIds.emplace(p.GetPathId()); + } + for (auto&& i : blobIdsByStorage) { + auto action = BlobsAction.GetRemoving(i.first); + for (auto&& b : i.second) { + action->DeclareRemove((TTabletId)self->TabletID(), b); } } } @@ -41,9 +41,9 @@ void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::T } } if (self) { - self->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); for (auto&& p : PortionsToDrop) { - self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes()); + self->Counters.GetTabletCounters()->OnDropPortionEvent(p.GetTotalRawBytes(), p.GetTotalBlobBytes(), p.NumRows()); } } } diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h index 71a48e78be6e..a77d172be9e9 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h @@ -42,7 +42,7 @@ class TCleanupPortionsColumnEngineChanges: public TColumnEngineChanges { virtual ui32 GetWritePortionsCount() const override { return 0; } - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 /*index*/) override { + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 /*index*/) override { return nullptr; } virtual bool NeedWritePortion(const ui32 /*index*/) const override { diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp index 3bd8cb6bc333..34d7354b5124 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp @@ -17,13 +17,15 @@ void TCleanupTablesColumnEngineChanges::DoDebugString(TStringOutput& out) const void TCleanupTablesColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { if (self && context.DB) { for (auto&& t : TablesToDrop) { - self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t); + AFL_VERIFY(!self->InsertTable->HasDataInPathId(t)); + AFL_VERIFY(self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t)); } } } void TCleanupTablesColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& /*context*/) { for (auto&& t : TablesToDrop) { + self->InsertTable->ErasePath(t); self->TablesManager.TryFinalizeDropPathOnComplete(t); } self->Subscribers->OnEvent(std::make_shared(TablesToDrop)); diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h index f39d33f5871c..33c7fe34cb1d 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h @@ -40,7 +40,7 @@ class TCleanupTablesColumnEngineChanges: public TColumnEngineChanges { virtual ui32 GetWritePortionsCount() const override { return 0; } - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 /*index*/) override { + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 /*index*/) override { return nullptr; } virtual bool NeedWritePortion(const ui32 /*index*/) const override { diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index a94d160158e6..2441ce4248b8 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -53,7 +53,7 @@ void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { void TCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); } } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp new file mode 100644 index 000000000000..9977ef51219c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.cpp @@ -0,0 +1,31 @@ +#include "merger.h" + +namespace NKikimr::NOlap::NCompaction { + +void IColumnMerger::Start(const std::vector>& input, TMergingContext& mergeContext) { + AFL_VERIFY(!Started); + Started = true; + for (auto&& i : input) { + if (!i) { + continue; + } + AFL_VERIFY(i->GetDataType()->Equals(*Context.GetResultField()->type()))("input", i->GetDataType()->ToString())( + "result", Context.GetResultField()->ToString()); + } + return DoStart(input, mergeContext); +} + + TMergingChunkContext::TMergingChunkContext(const std::shared_ptr& pkAndAddresses) { + auto columnPortionIdx = pkAndAddresses->GetColumnByName(IColumnMerger::PortionIdFieldName); + auto columnPortionRecordIdx = pkAndAddresses->GetColumnByName(IColumnMerger::PortionRecordIndexFieldName); + Y_ABORT_UNLESS(columnPortionIdx && columnPortionRecordIdx); + Y_ABORT_UNLESS(columnPortionIdx->type_id() == arrow::UInt16Type::type_id); + Y_ABORT_UNLESS(columnPortionRecordIdx->type_id() == arrow::UInt32Type::type_id); + IdxArray = static_pointer_cast(columnPortionIdx); + RecordIdxArray = static_pointer_cast(columnPortionRecordIdx); + + AFL_VERIFY(pkAndAddresses->num_rows() == IdxArray->length()); + AFL_VERIFY(pkAndAddresses->num_rows() == RecordIdxArray->length()); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h new file mode 100644 index 000000000000..bf8da708f36b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/merger.h @@ -0,0 +1,143 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TMergingChunkContext { +private: + std::shared_ptr IdxArray; + std::shared_ptr RecordIdxArray; + +public: + const arrow::UInt16Array& GetIdxArray() const { + return *IdxArray; + } + const arrow::UInt32Array& GetRecordIdxArray() const { + return *RecordIdxArray; + } + + TMergingChunkContext(const std::shared_ptr& pkAndAddresses); +}; + +class TMergingContext { +public: + class TAddress { + private: + YDB_ACCESSOR(i32, ChunkIdx, -1); + YDB_ACCESSOR(i32, GlobalPosition, -1); + + public: + TAddress() = default; + bool operator<(const TAddress& item) const { + if (ChunkIdx < item.ChunkIdx) { + return true; + } else if (item.ChunkIdx < ChunkIdx) { + return false; + } else { + return GlobalPosition < item.GlobalPosition; + } + } + + bool IsValid() const { + return ChunkIdx >= 0 && GlobalPosition >= 0; + } + }; + +private: + YDB_READONLY_DEF(std::vector, Chunks); + std::vector> InputContainers; + + std::optional>> RemapPortionIndexToResultIndex; + +public: + const TMergingChunkContext& GetChunk(const ui32 idx) const { + AFL_VERIFY(idx < Chunks.size()); + return Chunks[idx]; + } + + bool HasRemapInfo(const ui32 idx) { + return GetRemapPortionIndexToResultIndex(idx).size(); + } + + const std::vector>& GetRemapPortionIndexToResultIndex() { + if (!RemapPortionIndexToResultIndex) { + std::vector> result; + result.resize(InputContainers.size()); + { + ui32 idx = 0; + for (auto&& p : InputContainers) { + if (p) { + result[idx].resize(p->GetRecordsCount()); + } + ++idx; + } + } + ui32 chunkIdx = 0; + for (auto&& i : Chunks) { + auto& pIdxArray = i.GetIdxArray(); + auto& pRecordIdxArray = i.GetRecordIdxArray(); + for (ui32 recordIdx = 0; recordIdx < i.GetIdxArray().length(); ++recordIdx) { + auto& sourceRemap = result[pIdxArray.Value(recordIdx)]; + if (sourceRemap.size()) { + sourceRemap[pRecordIdxArray.Value(recordIdx)].SetChunkIdx(chunkIdx); + sourceRemap[pRecordIdxArray.Value(recordIdx)].SetGlobalPosition(recordIdx); + } + } + ++chunkIdx; + } + RemapPortionIndexToResultIndex = std::move(result); + } + return *RemapPortionIndexToResultIndex; + } + + const std::vector& GetRemapPortionIndexToResultIndex(const ui32 idx) { + auto& result = GetRemapPortionIndexToResultIndex(); + AFL_VERIFY(idx < result.size()); + return result[idx]; + } + + TMergingContext(const std::vector>& pkAndAddresses, + const std::vector>& inputContainers) + : InputContainers(inputContainers) + { + for (auto&& i : pkAndAddresses) { + Chunks.emplace_back(i); + } + } +}; + +class IColumnMerger { +public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; + +private: + bool Started = false; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) = 0; + virtual void DoStart(const std::vector>& input, TMergingContext& mergeContext) = 0; + +protected: + const TColumnMergeContext& Context; + +public: + static inline const TString PortionIdFieldName = "$$__portion_id"; + static inline const TString PortionRecordIndexFieldName = "$$__portion_record_idx"; + static inline const std::shared_ptr PortionIdField = + std::make_shared(PortionIdFieldName, std::make_shared()); + static inline const std::shared_ptr PortionRecordIndexField = + std::make_shared(PortionRecordIndexFieldName, std::make_shared()); + + IColumnMerger(const TColumnMergeContext& context) + : Context(context) { + } + virtual ~IColumnMerger() = default; + + void Start(const std::vector>& input, TMergingContext& mergeContext); + + std::vector Execute(const TChunkMergeContext& context, TMergingContext& mergeContext) { + return DoExecute(context, mergeContext); + } +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/abstract/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/ya.make new file mode 100644 index 000000000000..07be3f70eb68 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/abstract/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + merger.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.cpp deleted file mode 100644 index cdb81296cf73..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.cpp +++ /dev/null @@ -1,83 +0,0 @@ -#include "column_cursor.h" -#include - -namespace NKikimr::NOlap::NCompaction { - -bool TPortionColumnCursor::Fetch(TMergedColumn& column) { - Y_ABORT_UNLESS(ChunkIdx < ColumnChunks.size()); - Y_ABORT_UNLESS(RecordIndexStart); - ui32 currentStartPortionIdx = *RecordIndexStart; - ui32 currentFinishPortionIdx = RecordIndexFinish; -// NActors::TLogContextGuard lg(NActors::TLogContextBuilder::Build()("portion_id", PortionId)); - while (currentStartPortionIdx - ChunkRecordIndexStartPosition >= CurrentChunkRecordsCount) { - if (!NextChunk()) { - return false; - } - } - - ui32 currentStart = currentStartPortionIdx - ChunkRecordIndexStartPosition; - while (currentFinishPortionIdx - ChunkRecordIndexStartPosition >= CurrentChunkRecordsCount) { - const ui32 currentFinish = CurrentChunkRecordsCount; -// if (currentStart == 0 && CurrentColumnChunk) { -// column.AppendBlob(CurrentBlobChunk->GetData(), *CurrentColumnChunk); -// } else { - column.AppendSlice(GetCurrentArray(), currentStart, currentFinish - currentStart); -// } - currentStart = 0; - if (!NextChunk()) { - return false; - } - } - - const ui32 currentFinish = currentFinishPortionIdx - ChunkRecordIndexStartPosition; - if (currentStart < currentFinish) { - Y_ABORT_UNLESS(currentFinish < CurrentChunkRecordsCount); - column.AppendSlice(GetCurrentArray(), currentStart, currentFinish - currentStart); - } - - RecordIndexStart.reset(); - RecordIndexFinish = 0; - return true; -} - -bool TPortionColumnCursor::Next(const ui32 portionRecordIdx, TMergedColumn& column) { - Y_ABORT_UNLESS(ChunkRecordIndexStartPosition <= portionRecordIdx); - if (!RecordIndexStart) { - RecordIndexStart = portionRecordIdx; - RecordIndexFinish = portionRecordIdx + 1; - } else if (RecordIndexFinish == portionRecordIdx) { - RecordIndexFinish = portionRecordIdx + 1; - } else { - Fetch(column); - RecordIndexStart = portionRecordIdx; - RecordIndexFinish = portionRecordIdx + 1; - } - return true; -} - -bool TPortionColumnCursor::NextChunk() { - CurrentArray = nullptr; - if (++ChunkIdx == ColumnChunks.size()) { - return false; - } else { - ChunkRecordIndexStartPosition += CurrentChunkRecordsCount; - CurrentBlobChunk = BlobChunks[ChunkIdx]; - CurrentColumnChunk = ColumnChunks[ChunkIdx]; - CurrentChunkRecordsCount = CurrentBlobChunk->GetRecordsCountVerified(); - return true; - } -} - -const std::shared_ptr& TPortionColumnCursor::GetCurrentArray() { - Y_ABORT_UNLESS(ChunkIdx < ColumnChunks.size()); - Y_ABORT_UNLESS(CurrentBlobChunk); - - if (!CurrentArray) { - auto res = NArrow::TStatusValidator::GetValid(ColumnLoader->Apply(CurrentBlobChunk->GetData())); - AFL_VERIFY(res->num_columns() == 1); - CurrentArray = res->column(0); - } - return CurrentArray; -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.h b/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.h deleted file mode 100644 index 493cd6268f6a..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_cursor.h +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once -#include "merged_column.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap::NCompaction { - -class TPortionColumnCursor { -private: - std::vector> BlobChunks; - std::vector ColumnChunks; - std::optional RecordIndexStart; - YDB_READONLY(ui32, RecordIndexFinish, 0); - ui32 ChunkRecordIndexStartPosition = 0; - ui32 ChunkIdx = 0; - std::shared_ptr CurrentBlobChunk; - const TColumnRecord* CurrentColumnChunk = nullptr; - ui32 CurrentChunkRecordsCount = 0; - std::shared_ptr CurrentArray; - std::shared_ptr ColumnLoader; - const ui64 PortionId; - - const std::shared_ptr& GetCurrentArray(); - - bool NextChunk(); - -public: - ~TPortionColumnCursor() { - AFL_VERIFY(!RecordIndexStart || ChunkIdx == ColumnChunks.size())("chunk", ChunkIdx) - ("size", ColumnChunks.size())("start", RecordIndexStart)("finish", RecordIndexFinish) - ("max", CurrentBlobChunk->GetRecordsCount())("current_start_position", ChunkRecordIndexStartPosition); - } - - bool Next(const ui32 portionRecordIdx, TMergedColumn& column); - - bool Fetch(TMergedColumn& column); - - TPortionColumnCursor(const std::vector>& columnChunks, const std::vector& records, const std::shared_ptr& loader, const ui64 portionId) - : BlobChunks(columnChunks) - , ColumnChunks(records) - , ColumnLoader(loader) - , PortionId(portionId) { - AFL_VERIFY(ColumnLoader); - Y_UNUSED(PortionId); - Y_ABORT_UNLESS(BlobChunks.size()); - Y_ABORT_UNLESS(ColumnChunks.size() == BlobChunks.size()); - CurrentBlobChunk = BlobChunks.front(); - CurrentColumnChunk = ColumnChunks.front(); - CurrentChunkRecordsCount = CurrentBlobChunk->GetRecordsCountVerified(); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp deleted file mode 100644 index 09eed586ac20..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "column_portion_chunk.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap::NCompaction { - -std::shared_ptr TColumnPortion::AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained) { -// if (CurrentPortionRecords + columnChunk.GetMeta().GetNumRows() <= Context.GetPortionRowsCountLimit() && -// columnChunk.GetMeta().GetRawBytes() < Context.GetChunkRawBytesLimit() && -// data.size() < Context.GetChunkPackedBytesLimit() && -// columnChunk.GetMeta().GetRawBytes() > Context.GetStorePackedChunkSizeLimit() && Context.GetSaver().IsHardPacker() && -// Context.GetUseWholeChunksOptimization()) -// { -// NChanges::TGeneralCompactionCounters::OnFullBlobAppend(columnChunk.BlobRange.GetBlobSize()); -// FlushBuffer(); -// Chunks.emplace_back(std::make_shared(data, columnChunk, Context.GetSchemaInfo())); -// PackedSize += Chunks.back()->GetPackedSize(); -// CurrentPortionRecords += columnChunk.GetMeta().GetNumRows(); -// return nullptr; -// } else { - NChanges::TGeneralCompactionCounters::OnSplittedBlobAppend(columnChunk.BlobRange.GetSize()); - auto batch = NArrow::TStatusValidator::GetValid(Context.GetLoader()->Apply(data)); - AFL_VERIFY(batch->num_columns() == 1); - auto batchArray = batch->column(0); - remained = AppendSlice(batchArray, 0, batch->num_rows()); - if (remained) { - return batchArray; - } else { - return nullptr; - } -// } -} - -ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length) { - Y_ABORT_UNLESS(a); - Y_ABORT_UNLESS(length); - Y_ABORT_UNLESS(CurrentPortionRecords < Context.GetPortionRowsCountLimit()); - Y_ABORT_UNLESS(startIndex + length <= a->length()); - ui32 i = startIndex; - const ui32 packedRecordSize = Context.GetColumnStat() ? Context.GetColumnStat()->GetPackedRecordSize() : 0; - for (; i < startIndex + length; ++i) { - ui64 recordSize = 0; - AFL_VERIFY(NArrow::Append(*Builder, *a, i, &recordSize))("a", a->ToString())("a_type", a->type()->ToString())("builder_type", Builder->type()->ToString()); - CurrentChunkRawSize += recordSize; - PredictedPackedBytes += packedRecordSize ? packedRecordSize : (recordSize / 2); - if (++CurrentPortionRecords == Context.GetPortionRowsCountLimit()) { - FlushBuffer(); - ++i; - break; - } - if (CurrentChunkRawSize >= Context.GetChunkRawBytesLimit() || PredictedPackedBytes >= Context.GetExpectedBlobPackedBytes()) { - FlushBuffer(); - } - } - return startIndex + length - i; -} - -bool TColumnPortion::FlushBuffer() { - if (Builder->length()) { - auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); - Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), newArrayChunk, TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); - Builder = Context.MakeBuilder(); - CurrentChunkRawSize = 0; - PredictedPackedBytes = 0; - PackedSize += Chunks.back()->GetPackedSize(); - return true; - } else { - return false; - } -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h deleted file mode 100644 index f1d4cbadd6cf..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -#include "merge_context.h" -#include -#include -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NCompaction { - -class TColumnPortionResult { -protected: - std::vector> Chunks; - ui64 CurrentPortionRecords = 0; - const ui32 ColumnId; - ui64 PackedSize = 0; -public: - ui64 GetPackedSize() const { - return PackedSize; - } - - TColumnPortionResult(const ui32 columnId) - : ColumnId(columnId) { - - } - - const std::vector>& GetChunks() const { - return Chunks; - } - - ui64 GetCurrentPortionRecords() const { - return CurrentPortionRecords; - } - - TString DebugString() const { - return TStringBuilder() << "chunks=" << Chunks.size() << ";records=" << CurrentPortionRecords << ";"; - } - -}; - -class TColumnPortion: public TColumnPortionResult { -private: - using TBase = TColumnPortionResult; - std::unique_ptr Builder; - const TColumnMergeContext& Context; - YDB_READONLY(ui64, CurrentChunkRawSize, 0); - double PredictedPackedBytes = 0; - const TSimpleColumnInfo ColumnInfo; -public: - TColumnPortion(const TColumnMergeContext& context) - : TBase(context.GetColumnId()) - , Context(context) - , ColumnInfo(Context.GetIndexInfo().GetColumnFeaturesVerified(context.GetColumnId())) - { - Builder = Context.MakeBuilder(); - } - - bool IsFullPortion() const { - Y_ABORT_UNLESS(CurrentPortionRecords <= Context.GetPortionRowsCountLimit()); - return CurrentPortionRecords == Context.GetPortionRowsCountLimit(); - } - - bool FlushBuffer(); - - std::shared_ptr AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained); - ui32 AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length); -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/context.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.cpp new file mode 100644 index 000000000000..35fbf111c993 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.cpp @@ -0,0 +1,5 @@ +#include "context.h" + +namespace NKikimr::NOlap::NCompaction { + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h new file mode 100644 index 000000000000..73117725614d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/context.h @@ -0,0 +1,80 @@ +#pragma once +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NCompaction { + +class TColumnMergeContext { +private: + YDB_READONLY(ui32, ColumnId, 0); + ISnapshotSchema::TPtr SchemaInfo; + YDB_ACCESSOR_DEF(TColumnSaver, Saver); + YDB_READONLY_DEF(std::shared_ptr, Loader); + YDB_READONLY_DEF(std::shared_ptr, ResultField); + YDB_READONLY(ui64, ChunkPackedBytesLimit, 7 * 1024 * 1024); + YDB_READONLY(ui64, ExpectedBlobPackedBytes, 4 * 1024 * 1024); + YDB_READONLY(ui64, ChunkRawBytesLimit, 50 * 1024 * 1024); + YDB_READONLY(ui64, StorePackedChunkSizeLimit, 512 * 1024); + YDB_READONLY(bool, UseWholeChunksOptimization, true); + + std::optional ColumnStat; + + const TIndexInfo& IndexInfo; + +public: + std::shared_ptr GetDefaultValue() const { + return Loader->GetDefaultValue(); + } + + ISnapshotSchema::TPtr GetSchemaInfo() const { + return SchemaInfo; + } + + const std::optional& GetColumnStat() const { + return ColumnStat; + } + + std::unique_ptr MakeBuilder() const { + return NArrow::MakeBuilder(ResultField); + } + + const TIndexInfo& GetIndexInfo() const { + return IndexInfo; + } + + TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 chunkRawBytesLimit, + const std::optional& columnStat) + : ColumnId(columnId) + , SchemaInfo(schema) + , Saver(schema->GetColumnSaver(columnId)) + , Loader(schema->GetColumnLoaderOptional(columnId)) + , ResultField(schema->GetIndexInfo().GetColumnFieldVerified(columnId)) + , ChunkRawBytesLimit(chunkRawBytesLimit) + , UseWholeChunksOptimization(!schema->GetIndexInfo().GetReplaceKey()->GetFieldByName(ResultField->name())) + , ColumnStat(columnStat) + , IndexInfo(schema->GetIndexInfo()) { + Y_ABORT_UNLESS(ChunkRawBytesLimit); + } +}; + +class TChunkMergeContext { +private: + YDB_READONLY(ui32, PortionRowsCountLimit, 10000); + YDB_READONLY(ui32, BatchIdx, 0); + YDB_READONLY(ui32, RecordsCount, 0); + +public: + TChunkMergeContext(const ui32 portionRowsCountLimit, const ui32 batchIdx, const ui32 recordsCount) + : PortionRowsCountLimit(portionRowsCountLimit) + , BatchIdx(batchIdx) + , RecordsCount(recordsCount) + { + AFL_VERIFY(RecordsCount); + AFL_VERIFY(PortionRowsCountLimit); + } +}; +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/result.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.cpp new file mode 100644 index 000000000000..6482ee301543 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.cpp @@ -0,0 +1,10 @@ +#include "result.h" +#include + +namespace NKikimr::NOlap::NCompaction { + +TString TColumnPortionResult::DebugString() const { + return TStringBuilder() << "chunks=" << Chunks.size() << ";"; +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/result.h b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.h new file mode 100644 index 000000000000..850e1f6eebe0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/result.h @@ -0,0 +1,25 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NCompaction { + +class TColumnPortionResult { +protected: + std::vector> Chunks; + const ui32 ColumnId; +public: + + TColumnPortionResult(const ui32 columnId) + : ColumnId(columnId) { + + } + + const std::vector>& GetChunks() const { + return Chunks; + } + + TString DebugString() const; + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/common/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/common/ya.make new file mode 100644 index 000000000000..30667909c931 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/common/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + context.cpp + result.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.cpp deleted file mode 100644 index 8280e58eec95..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "merge_context.h" - -namespace NKikimr::NOlap::NCompaction { - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h b/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h deleted file mode 100644 index a5da857c2aff..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#include - -namespace NKikimr::NOlap::NCompaction { - -class TColumnMergeContext { -private: - YDB_READONLY(ui32, ColumnId, 0); - ISnapshotSchema::TPtr SchemaInfo; - YDB_READONLY_DEF(TColumnSaver, Saver); - YDB_READONLY_DEF(std::shared_ptr, Loader); - YDB_READONLY_DEF(std::shared_ptr, ResultField); - YDB_READONLY(ui32, PortionRowsCountLimit, 10000); - YDB_READONLY(ui64, ChunkPackedBytesLimit, 7 * 1024 * 1024); - YDB_READONLY(ui64, ExpectedBlobPackedBytes, 4 * 1024 * 1024); - YDB_READONLY(ui64, ChunkRawBytesLimit, 50 * 1024 * 1024); - YDB_READONLY(ui64, StorePackedChunkSizeLimit, 512 * 1024); - YDB_READONLY(bool, UseWholeChunksOptimization, true); - - std::optional ColumnStat; - const TIndexInfo& IndexInfo; -public: - ISnapshotSchema::TPtr GetSchemaInfo() const { - return SchemaInfo; - } - - const std::optional& GetColumnStat() const { - return ColumnStat; - } - - std::unique_ptr MakeBuilder() const { - return NArrow::MakeBuilder(ResultField); - } - - const TIndexInfo& GetIndexInfo() const { - return IndexInfo; - } - - TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 portionRowsCountLimit, const ui32 chunkRawBytesLimit, - const std::optional& columnStat) - : ColumnId(columnId) - , SchemaInfo(schema) - , Saver(schema->GetColumnSaver(columnId)) - , Loader(schema->GetColumnLoaderOptional(columnId)) - , ResultField(schema->GetIndexInfo().GetColumnFieldVerified(columnId)) - , PortionRowsCountLimit(portionRowsCountLimit) - , ChunkRawBytesLimit(chunkRawBytesLimit) - , UseWholeChunksOptimization(!schema->GetIndexInfo().GetReplaceKey()->GetFieldByName(ResultField->name())) - , ColumnStat(columnStat) - , IndexInfo(schema->GetIndexInfo()) - { - Y_ABORT_UNLESS(PortionRowsCountLimit); - Y_ABORT_UNLESS(ChunkRawBytesLimit); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp deleted file mode 100644 index 5f638a30f155..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "merged_column.h" - -namespace NKikimr::NOlap::NCompaction { - -void TMergedColumn::AppendBlob(const TString& data, const TColumnRecord& columnChunk) { - RecordsCount += columnChunk.GetMeta().GetNumRows(); - ui32 remained; - std::shared_ptr dataArray = Portions.back().AppendBlob(data, columnChunk, remained); - while (remained) { - Y_ABORT_UNLESS(Portions.back().IsFullPortion()); - NewPortion(); - remained = Portions.back().AppendSlice(dataArray, dataArray->length() - remained, remained); - } - if (Portions.back().IsFullPortion()) { - NewPortion(); - } -} - -void TMergedColumn::AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length) { - RecordsCount += length; - Y_ABORT_UNLESS(data); - ui32 remained = length; - while (remained = Portions.back().AppendSlice(data, startIndex + length - remained, remained)) { - Y_ABORT_UNLESS(Portions.back().IsFullPortion()); - NewPortion(); - } - if (Portions.back().IsFullPortion()) { - NewPortion(); - } -} - -std::vector TMergedColumn::BuildResult() { - std::vector result; - if (Portions.size()) { - Portions.back().FlushBuffer(); - } - for (auto&& i : Portions) { - result.emplace_back(i); - } - return result; -} - -void TMergedColumn::NewPortion() { - if (Portions.size()) { - Portions.back().FlushBuffer(); - } - Portions.emplace_back(TColumnPortion(Context)); -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.h b/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.h deleted file mode 100644 index f0a90bdd9b44..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.h +++ /dev/null @@ -1,28 +0,0 @@ -#pragma once -#include "column_portion_chunk.h" -#include "merge_context.h" -#include - -namespace NKikimr::NOlap::NCompaction { - -class TMergedColumn { -private: - TColumnMergeContext Context; - YDB_READONLY_DEF(std::vector, Portions); - YDB_READONLY(ui32, RecordsCount, 0); - - void NewPortion(); - -public: - TMergedColumn(const TColumnMergeContext& context) - : Context(context) { - NewPortion(); - } - - void AppendBlob(const TString& data, const TColumnRecord& columnChunk); - void AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length); - - std::vector BuildResult(); -}; - -} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp new file mode 100644 index 000000000000..439426439867 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.cpp @@ -0,0 +1,176 @@ +#include "merger.h" + +#include "abstract/merger.h" +#include "plain/logic.h" +#include "sparsed/logic.h" + +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +std::vector TMerger::Execute(const std::shared_ptr& stats, + const NArrow::NMerger::TIntervalPositions& checkPoints, const std::shared_ptr& resultFiltered, const ui64 pathId, + const std::optional shardingActualVersion) { + AFL_VERIFY(Batches.size() == Filters.size()); + std::vector> batchResults; + { + arrow::FieldVector indexFields; + indexFields.emplace_back(IColumnMerger::PortionIdField); + indexFields.emplace_back(IColumnMerger::PortionRecordIndexField); + if (resultFiltered->HasColumnId((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + IIndexInfo::AddDeleteFields(indexFields); + } + IIndexInfo::AddSnapshotFields(indexFields); + auto dataSchema = std::make_shared(indexFields); + NArrow::NMerger::TMergePartialStream mergeStream( + resultFiltered->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); + + ui32 idx = 0; + for (auto&& batch : Batches) { + { + NArrow::NConstruction::IArrayBuilder::TPtr column = + std::make_shared>>( + IColumnMerger::PortionIdFieldName, idx); + batch->AddField(IColumnMerger::PortionIdField, column->BuildArray(batch->num_rows())).Validate(); + } + { + NArrow::NConstruction::IArrayBuilder::TPtr column = + std::make_shared>>( + IColumnMerger::PortionRecordIndexFieldName); + batch->AddField(IColumnMerger::PortionRecordIndexField, column->BuildArray(batch->num_rows())).Validate(); + } + mergeStream.AddSource(batch, Filters[idx]); + ++idx; + } + batchResults = mergeStream.DrainAllParts(checkPoints, indexFields); + } + + std::vector>> chunkGroups; + chunkGroups.resize(batchResults.size()); + + using TColumnData = std::vector>; + THashMap columnsData; + { + ui32 batchIdx = 0; + for (auto&& p : Batches) { + ui32 columnIdx = 0; + for (auto&& i : p->GetSchema()->GetFields()) { + const std::optional columnId = resultFiltered->GetIndexInfo().GetColumnIdOptional(i->name()); + if (columnId) { + auto it = columnsData.find(*columnId); + if (it == columnsData.end()) { + it = columnsData.emplace(*columnId, TColumnData(Batches.size())).first; + } + it->second[batchIdx] = p->GetColumnVerified(columnIdx); + } + ++columnIdx; + } + ++batchIdx; + } + } + + TMergingContext mergingContext(batchResults, Batches); + + for (auto&& [columnId, columnData] : columnsData) { + const TString& columnName = resultFiltered->GetIndexInfo().GetColumnName(columnId); + NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("field_name", columnName)); + auto columnInfo = stats->GetColumnInfo(columnId); + + TColumnMergeContext commonContext( + columnId, resultFiltered, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); + if (OptimizationWritingPackMode) { + commonContext.MutableSaver().AddSerializerWithBorder( + 100, std::make_shared(arrow::Compression::type::UNCOMPRESSED)); + commonContext.MutableSaver().AddSerializerWithBorder( + Max(), std::make_shared(arrow::Compression::type::LZ4_FRAME)); + } + + THolder merger = + IColumnMerger::TFactory::MakeHolder(commonContext.GetLoader()->GetAccessorConstructor().GetClassName(), commonContext); + AFL_VERIFY(!!merger)("problem", "cannot create merger")( + "class_name", commonContext.GetLoader()->GetAccessorConstructor().GetClassName()); + merger->Start(columnData, mergingContext); + + ui32 batchIdx = 0; + for (auto&& batchResult : batchResults) { + const ui32 portionRecordsCountLimit = + batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; + + TChunkMergeContext context(portionRecordsCountLimit, batchIdx, batchResult->num_rows()); + chunkGroups[batchIdx][columnId] = merger->Execute(context, mergingContext); + ++batchIdx; + } + } + ui32 batchIdx = 0; + + const auto groups = + resultFiltered->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); + std::vector result; + for (auto&& columnChunks : chunkGroups) { + auto batchResult = batchResults[batchIdx]; + ++batchIdx; + Y_ABORT_UNLESS(columnChunks.size()); + + for (auto&& i : columnChunks) { + if (i.second.size() != columnChunks.begin()->second.size()) { + for (ui32 p = 0; p < std::min(columnChunks.begin()->second.size(), i.second.size()); ++p) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("p_first", columnChunks.begin()->second[p].DebugString())( + "p", i.second[p].DebugString()); + } + } + AFL_VERIFY(i.second.size() == columnChunks.begin()->second.size())("first", columnChunks.begin()->second.size())( + "current", i.second.size())("first_name", columnChunks.begin()->first)("current_name", i.first); + } + auto columnSnapshotPlanStepIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_PLAN_STEP); + auto columnSnapshotTxIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_TX_ID); + Y_ABORT_UNLESS(columnSnapshotPlanStepIdx); + Y_ABORT_UNLESS(columnSnapshotTxIdx); + Y_ABORT_UNLESS(columnSnapshotPlanStepIdx->type_id() == arrow::UInt64Type::type_id); + Y_ABORT_UNLESS(columnSnapshotTxIdx->type_id() == arrow::UInt64Type::type_id); + + std::vector batchSlices; + std::shared_ptr schemaDetails(new TDefaultSchemaDetails(resultFiltered, stats)); + + for (ui32 i = 0; i < columnChunks.begin()->second.size(); ++i) { + THashMap>> portionColumns; + for (auto&& p : columnChunks) { + portionColumns.emplace(p.first, p.second[i].GetChunks()); + } + batchSlices.emplace_back(portionColumns, schemaDetails, Context.Counters.SplitterCounters); + } + NArrow::NSplitter::TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); + auto packs = slicer.Split(batchSlices); + + ui32 recordIdx = 0; + for (auto&& i : packs) { + TGeneralSerializedSlice slicePrimary(std::move(i)); + auto dataWithSecondary = resultFiltered->GetIndexInfo() + .AppendIndexes(slicePrimary.GetPortionChunksToHash(), SaverContext.GetStoragesManager()) + .DetachResult(); + TGeneralSerializedSlice slice(dataWithSecondary.GetExternalData(), schemaDetails, Context.Counters.SplitterCounters); + + auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); + const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, false); + auto constructor = TWritePortionInfoWithBlobsConstructor::BuildByBlobs(slice.GroupChunksByBlobs(groups), + dataWithSecondary.GetSecondaryInplaceData(), pathId, resultFiltered->GetVersion(), resultFiltered->GetSnapshot(), + SaverContext.GetStoragesManager()); + + NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultFiltered->GetIndexInfo().GetReplaceKey())); + NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); + constructor.GetPortionConstructor().AddMetadata(*resultFiltered, deletionsCount, primaryKeys, snapshotKeys); + constructor.GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); + if (shardingActualVersion) { + constructor.GetPortionConstructor().SetShardingVersion(*shardingActualVersion); + } + result.emplace_back(std::move(constructor)); + recordIdx += slice.GetRecordsCount(); + } + } + return result; +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merger.h b/ydb/core/tx/columnshard/engines/changes/compaction/merger.h new file mode 100644 index 000000000000..9c84799fe8ad --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merger.h @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { +class TMerger { +private: + YDB_ACCESSOR(bool, OptimizationWritingPackMode, false); + std::vector> Batches; + std::vector> Filters; + const TConstructionContext& Context; + const TSaverContext& SaverContext; + +public: + void AddBatch(const std::shared_ptr& batch, const std::shared_ptr& filter) { + AFL_VERIFY(batch); + Batches.emplace_back(batch); + Filters.emplace_back(filter); + } + + TMerger(const TConstructionContext& context, const TSaverContext& saverContext) + : Context(context) + , SaverContext(saverContext) { + } + + TMerger(const TConstructionContext& context, const TSaverContext& saverContext, + std::vector>&& batches, std::vector>&& filters) + : Batches(std::move(batches)) + , Filters(std::move(filters)) + , Context(context) + , SaverContext(saverContext) { + AFL_VERIFY(Batches.size() == Filters.size()); + } + + std::vector Execute(const std::shared_ptr& stats, + const NArrow::NMerger::TIntervalPositions& checkPoints, const std::shared_ptr& resultFiltered, + const ui64 pathId, const std::optional shardingActualVersion); +}; +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp new file mode 100644 index 000000000000..9fd0c4d301e1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.cpp @@ -0,0 +1,59 @@ +#include "column_cursor.h" +#include + +namespace NKikimr::NOlap::NCompaction { + +bool TPortionColumnCursor::Fetch(TMergedColumn& column) { + Y_ABORT_UNLESS(RecordIndexStart); + if (!BlobChunks) { + if (!DefaultArray || DefaultArray->length() < RecordIndexFinish - *RecordIndexStart) { + DefaultArray = NArrow::TThreadSimpleArraysCache::Get(DataType, DefaultValue, RecordIndexFinish - *RecordIndexStart); + } + column.AppendSlice(DefaultArray, 0, RecordIndexFinish - *RecordIndexStart); + } else { + if (CurrentChunk && CurrentChunk->GetAddress().Contains(*RecordIndexStart)) { + } else { + CurrentChunk = BlobChunks->GetChunk(CurrentChunk, *RecordIndexStart); + } + + ui32 currentStart = *RecordIndexStart; + while (CurrentChunk->GetAddress().GetGlobalFinishPosition() <= RecordIndexFinish) { + column.AppendSlice(CurrentChunk->GetArray(), CurrentChunk->GetAddress().GetLocalIndex(currentStart), + CurrentChunk->GetAddress().GetGlobalFinishPosition() - currentStart); + currentStart = CurrentChunk->GetAddress().GetGlobalFinishPosition(); + if (currentStart < BlobChunks->GetRecordsCount()) { + CurrentChunk = BlobChunks->GetChunk(CurrentChunk, currentStart); + } else { + CurrentChunk.reset(); + break; + } + } + + if (currentStart < RecordIndexFinish) { + AFL_VERIFY(CurrentChunk); + Y_ABORT_UNLESS(RecordIndexFinish < CurrentChunk->GetAddress().GetGlobalFinishPosition()); + column.AppendSlice( + CurrentChunk->GetArray(), CurrentChunk->GetAddress().GetLocalIndex(currentStart), RecordIndexFinish - currentStart); + } + } + RecordIndexStart.reset(); + RecordIndexFinish = 0; + + return true; +} + +bool TPortionColumnCursor::Next(const ui32 portionRecordIdx, TMergedColumn& column) { + if (!RecordIndexStart) { + RecordIndexStart = portionRecordIdx; + RecordIndexFinish = portionRecordIdx + 1; + } else if (RecordIndexFinish == portionRecordIdx) { + RecordIndexFinish = portionRecordIdx + 1; + } else { + Fetch(column); + RecordIndexStart = portionRecordIdx; + RecordIndexFinish = portionRecordIdx + 1; + } + return true; +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h new file mode 100644 index 000000000000..4180444c9487 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_cursor.h @@ -0,0 +1,43 @@ +#pragma once +#include "merged_column.h" + +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NCompaction { + +class TPortionColumnCursor { +private: + std::optional CurrentChunk; + std::shared_ptr BlobChunks; + std::shared_ptr DefaultArray; + std::optional RecordIndexStart; + std::shared_ptr DataType; + std::shared_ptr DefaultValue; + YDB_READONLY(ui32, RecordIndexFinish, 0); + +public: + ~TPortionColumnCursor() { + AFL_VERIFY(!RecordIndexStart)("start", RecordIndexStart)("finish", RecordIndexFinish); + } + + bool Next(const ui32 portionRecordIdx, TMergedColumn& column); + + bool Fetch(TMergedColumn& column); + + TPortionColumnCursor(const std::shared_ptr& columnChunks) + : BlobChunks(columnChunks) { + AFL_VERIFY(BlobChunks); + } + + TPortionColumnCursor(const std::shared_ptr& dataType, const std::shared_ptr& defaultValue) + : DataType(dataType) + , DefaultValue(defaultValue) { + AFL_VERIFY(DataType); + } +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp new file mode 100644 index 000000000000..3db4127653b8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.cpp @@ -0,0 +1,50 @@ +#include "column_portion_chunk.h" + +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length) { + Y_ABORT_UNLESS(a); + Y_ABORT_UNLESS(length); + Y_ABORT_UNLESS(CurrentPortionRecords < ChunkContext.GetPortionRowsCountLimit()); + Y_ABORT_UNLESS(startIndex + length <= a->length()); + AFL_VERIFY(Type->id() == a->type_id())("own", Type->ToString())("a", a->type()->ToString()); + ui32 i = startIndex; + const ui32 packedRecordSize = Context.GetColumnStat() ? Context.GetColumnStat()->GetPackedRecordSize() : 0; + for (; i < startIndex + length; ++i) { + ui64 recordSize = 0; + AFL_VERIFY(NArrow::Append(*Builder, *a, i, &recordSize))("a", a->ToString())("a_type", a->type()->ToString())( + "builder_type", Builder->type()->ToString()); + CurrentChunkRawSize += recordSize; + PredictedPackedBytes += packedRecordSize ? packedRecordSize : (recordSize / 2); + if (++CurrentPortionRecords == ChunkContext.GetPortionRowsCountLimit()) { + FlushBuffer(); + ++i; + break; + } + if (CurrentChunkRawSize >= Context.GetChunkRawBytesLimit() || PredictedPackedBytes >= Context.GetExpectedBlobPackedBytes()) { + FlushBuffer(); + } + } + return startIndex + length - i; +} + +bool TColumnPortion::FlushBuffer() { + if (!Builder->length()) { + return false; + } + auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), + std::make_shared(newArrayChunk), TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); + Builder = Context.MakeBuilder(); + CurrentChunkRawSize = 0; + PredictedPackedBytes = 0; + PackedSize += Chunks.back()->GetPackedSize(); + return true; +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h new file mode 100644 index 000000000000..59c6a3b460da --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/column_portion_chunk.h @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TColumnPortion: public TColumnPortionResult { +private: + using TBase = TColumnPortionResult; + std::unique_ptr Builder; + std::shared_ptr Type; + const TColumnMergeContext& Context; + const TChunkMergeContext& ChunkContext; + YDB_READONLY(ui64, CurrentChunkRawSize, 0); + double PredictedPackedBytes = 0; + const TSimpleColumnInfo ColumnInfo; + ui64 PackedSize = 0; + ui64 CurrentPortionRecords = 0; + +public: + TColumnPortion(const TColumnMergeContext& context, const TChunkMergeContext& chunkContext) + : TBase(context.GetColumnId()) + , Context(context) + , ChunkContext(chunkContext) + , ColumnInfo(Context.GetIndexInfo().GetColumnFeaturesVerified(context.GetColumnId())) { + Builder = Context.MakeBuilder(); + Type = Builder->type(); + } + + bool IsFullPortion() const { + Y_ABORT_UNLESS(CurrentPortionRecords <= ChunkContext.GetPortionRowsCountLimit()); + return CurrentPortionRecords == ChunkContext.GetPortionRowsCountLimit(); + } + + bool FlushBuffer(); + + ui32 AppendSlice(const std::shared_ptr& a, const ui32 startIndex, const ui32 length); +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp new file mode 100644 index 000000000000..0081d33d01b5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.cpp @@ -0,0 +1,39 @@ +#include "logic.h" + +namespace NKikimr::NOlap::NCompaction { + +void TPlainMerger::DoStart(const std::vector>& input, TMergingContext& /*mContext*/) { + for (auto&& p : input) { + if (p) { + Cursors.emplace_back(NCompaction::TPortionColumnCursor(p)); + } else { + Cursors.emplace_back( + NCompaction::TPortionColumnCursor(Context.GetLoader()->GetResultField()->type(), Context.GetLoader()->GetDefaultValue())); + } + + } +} + +std::vector TPlainMerger::DoExecute( + const TChunkMergeContext& chunkContext, TMergingContext& mContext) { + NCompaction::TMergedColumn mColumn(Context, chunkContext); + auto& chunkInfo = mContext.GetChunk(chunkContext.GetBatchIdx()); + std::optional predPortionIdx; + for (ui32 idx = 0; idx < chunkInfo.GetIdxArray().length(); ++idx) { + const ui16 portionIdx = chunkInfo.GetIdxArray().Value(idx); + const ui32 portionRecordIdx = chunkInfo.GetRecordIdxArray().Value(idx); + auto& cursor = Cursors[portionIdx]; + cursor.Next(portionRecordIdx, mColumn); + if (predPortionIdx && portionIdx != *predPortionIdx) { + Cursors[*predPortionIdx].Fetch(mColumn); + } + if (idx + 1 == chunkInfo.GetIdxArray().length()) { + cursor.Fetch(mColumn); + } + predPortionIdx = portionIdx; + } + AFL_VERIFY(chunkInfo.GetIdxArray().length() == mColumn.GetRecordsCount()); + return mColumn.BuildResult(); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h new file mode 100644 index 000000000000..5b3c53f2eec9 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/logic.h @@ -0,0 +1,22 @@ +#pragma once +#include "column_cursor.h" + +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { +class TPlainMerger: public IColumnMerger { +private: + static inline auto Registrator = TFactory::TRegistrator(NArrow::NAccessor::TGlobalConst::PlainDataAccessorName); + using TBase = IColumnMerger; + std::vector Cursors; + virtual void DoStart(const std::vector>& input, TMergingContext& mergeContext) override; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) override; + +public: + using TBase::TBase; +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp new file mode 100644 index 000000000000..84dd8608ffc4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.cpp @@ -0,0 +1,36 @@ +#include "merged_column.h" + +namespace NKikimr::NOlap::NCompaction { + +void TMergedColumn::AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length) { + RecordsCount += length; + Y_ABORT_UNLESS(data); + ui32 remained = length; + while (remained = Portions.back().AppendSlice(data, startIndex + length - remained, remained)) { + Y_ABORT_UNLESS(Portions.back().IsFullPortion()); + NewPortion(); + } + if (Portions.back().IsFullPortion()) { + NewPortion(); + } +} + +std::vector TMergedColumn::BuildResult() { + std::vector result; + if (Portions.size()) { + Portions.back().FlushBuffer(); + } + for (auto&& i : Portions) { + result.emplace_back(i); + } + return result; +} + +void TMergedColumn::NewPortion() { + if (Portions.size()) { + Portions.back().FlushBuffer(); + } + Portions.emplace_back(TColumnPortion(Context, ChunkContext)); +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h new file mode 100644 index 000000000000..2433bb8f4862 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/merged_column.h @@ -0,0 +1,31 @@ +#pragma once +#include "column_portion_chunk.h" + +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TMergedColumn { +private: + TColumnMergeContext Context; + TChunkMergeContext ChunkContext; + YDB_READONLY_DEF(std::vector, Portions); + YDB_READONLY(ui32, RecordsCount, 0); + + void NewPortion(); + +public: + TMergedColumn(const TColumnMergeContext& context, const TChunkMergeContext& chunkContext) + : Context(context) + , ChunkContext(chunkContext) + { + NewPortion(); + } + + void AppendSlice(const std::shared_ptr& data, const ui32 startIndex, const ui32 length); + + std::vector BuildResult(); +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make new file mode 100644 index 000000000000..91991ea51097 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/plain/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + column_cursor.cpp + column_portion_chunk.cpp + merged_column.cpp + GLOBAL logic.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp new file mode 100644 index 000000000000..d2c4e14f0664 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.cpp @@ -0,0 +1,171 @@ +#include "logic.h" + +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +void TSparsedMerger::DoStart(const std::vector>& input, TMergingContext& mergingContext) { + ui32 idx = 0; + for (auto&& p : input) { + if (p) { + Cursors.emplace_back(p, Context); + if (mergingContext.HasRemapInfo(idx)) { + CursorPositions.emplace_back(TCursorPosition(&Cursors.back(), mergingContext.GetRemapPortionIndexToResultIndex(idx))); + if (CursorPositions.back().IsFinished()) { + CursorPositions.pop_back(); + } + } + } + ++idx; + } +} + +std::vector TSparsedMerger::DoExecute(const TChunkMergeContext& chunkContext, TMergingContext& /*mergeContext*/) { + std::vector result; + std::shared_ptr writer = std::make_shared(Context); + const auto addSkipsToWriter = [&](i64 delta) { + if (!delta) { + return; + } + AFL_VERIFY(delta >= 0); + if (chunkContext.GetPortionRowsCountLimit() <= writer->GetCurrentSize() + delta) { + const i64 diff = chunkContext.GetPortionRowsCountLimit() - writer->GetCurrentSize(); + writer->AddPositions(diff); + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + delta -= diff; + } + while (chunkContext.GetPortionRowsCountLimit() <= delta) { + writer->AddPositions(chunkContext.GetPortionRowsCountLimit()); + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + delta -= chunkContext.GetPortionRowsCountLimit(); + } + if (delta) { + writer->AddPositions(delta); + } + }; + + std::vector heap; + for (auto it = CursorPositions.begin(); it != CursorPositions.end();) { + AFL_VERIFY(chunkContext.GetBatchIdx() <= it->GetCurrentGlobalChunkIdx()); + if (it->GetCurrentGlobalChunkIdx() == chunkContext.GetBatchIdx()) { + heap.emplace_back(std::move(*it)); + it = CursorPositions.erase(it); + } else { + ++it; + } + } + std::make_heap(heap.begin(), heap.end()); + ui32 nextGlobalPosition = 0; + while (heap.size()) { + std::pop_heap(heap.begin(), heap.end()); + while (heap.size() == 1 || (heap.size() > 1 && heap.front() < heap.back())) { + { + auto& address = heap.back().GetCurrentAddress(); + AFL_VERIFY(nextGlobalPosition <= (ui32)address.GetGlobalPosition()); + addSkipsToWriter(address.GetGlobalPosition() - nextGlobalPosition); + + heap.back().AddIndexTo(*writer); + if (chunkContext.GetPortionRowsCountLimit() == writer->GetCurrentSize()) { + result.emplace_back(writer->Flush()); + writer = std::make_shared(Context); + } + nextGlobalPosition = address.GetGlobalPosition() + 1; + } + if (!heap.back().Next()) { + heap.pop_back(); + break; + } else if (heap.back().GetCurrentGlobalChunkIdx() != chunkContext.GetBatchIdx()) { + CursorPositions.emplace_back(std::move(heap.back())); + heap.pop_back(); + break; + } + } + std::push_heap(heap.begin(), heap.end()); + } + AFL_VERIFY(nextGlobalPosition <= chunkContext.GetRecordsCount()); + addSkipsToWriter(chunkContext.GetRecordsCount() - nextGlobalPosition); + if (writer->HasData()) { + result.emplace_back(writer->Flush()); + } + return result; +} + +void TSparsedMerger::TWriter::AddRealData(const std::shared_ptr& arr, const ui32 index) { + AFL_VERIFY(arr); + AFL_VERIFY(NArrow::Append(*ValueBuilder, *arr, index)); + NArrow::TStatusValidator::Validate(IndexBuilderImpl->Append(CurrentRecordIdx)); + ++UsefulRecordsCount; + ++CurrentRecordIdx; +} + +TColumnPortionResult TSparsedMerger::TWriter::Flush() { + std::vector> fields = { std::make_shared("index", arrow::uint32()), + std::make_shared("value", DataType) }; + auto schema = std::make_shared(fields); + std::vector> columns = { NArrow::TStatusValidator::GetValid(IndexBuilder->Finish()), + NArrow::TStatusValidator::GetValid(ValueBuilder->Finish()) }; + + auto recordBatch = arrow::RecordBatch::Make(schema, UsefulRecordsCount, columns); + NArrow::NAccessor::TSparsedArray::TBuilder builder( + Context.GetIndexInfo().GetColumnFeaturesVerified(Context.GetColumnId()).GetDefaultValue().GetValue(), Context.GetResultField()->type()); + builder.AddChunk(CurrentRecordIdx, recordBatch); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(recordBatch), builder.Finish(), + TChunkAddress(ColumnId, 0), Context.GetIndexInfo().GetColumnFeaturesVerified(ColumnId))); + return *this; +} + +TSparsedMerger::TWriter::TWriter(const TColumnMergeContext& context) + : TBase(context.GetColumnId()) + , DataType(context.GetResultField()->type()) + , Context(context) { + IndexBuilder = NArrow::MakeBuilder(arrow::uint32()); + ValueBuilder = NArrow::MakeBuilder(DataType); + IndexBuilderImpl = (arrow::UInt32Builder*)(IndexBuilder.get()); +} + +bool TSparsedMerger::TPlainChunkCursor::AddIndexTo(const ui32 index, TWriter& writer) { + AFL_VERIFY(ChunkStartPosition <= index); + writer.AddRealData(ChunkAddress->GetArray(), index - ChunkStartPosition); + return true; +} + +bool TSparsedMerger::TSparsedChunkCursor::AddIndexTo(const ui32 index, TWriter& writer) { + AFL_VERIFY(ChunkStartGlobalPosition <= index); + AFL_VERIFY(index == NextGlobalPosition)("index", index)("next", NextGlobalPosition); + writer.AddRealData(Chunk->GetColValue(), NextLocalPosition); + return true; +} + +bool TSparsedMerger::TCursor::AddIndexTo(const ui32 index, TWriter& writer) { + if (FinishGlobalPosition <= index) { + InitArrays(index); + } + if (SparsedCursor) { + return SparsedCursor->AddIndexTo(index, writer); + } else if (PlainCursor) { + return PlainCursor->AddIndexTo(index, writer); + } else { + return false; + } +} + +void TSparsedMerger::TCursor::InitArrays(const ui32 position) { + AFL_VERIFY(!CurrentOwnedArray || !CurrentOwnedArray->GetAddress().Contains(position)); + CurrentOwnedArray = Array->GetArray(CurrentOwnedArray, position, Array); + if (CurrentOwnedArray->GetArray()->GetType() == NArrow::NAccessor::IChunkedArray::EType::SparsedArray) { + auto sparsedArray = static_pointer_cast(CurrentOwnedArray->GetArray()); + SparsedCursor = std::make_shared(sparsedArray, &*CurrentOwnedArray); + PlainCursor = nullptr; + } else { + PlainCursor = make_shared(CurrentOwnedArray->GetArray(), &*CurrentOwnedArray); + SparsedCursor = nullptr; + } + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() <= position); + FinishGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + CurrentOwnedArray->GetArray()->GetRecordsCount(); + AFL_VERIFY(position < FinishGlobalPosition); +} + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h new file mode 100644 index 000000000000..9fc64606a092 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/logic.h @@ -0,0 +1,286 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NCompaction { + +class TSparsedMerger: public IColumnMerger { +private: + static inline auto Registrator = TFactory::TRegistrator(NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName); + + using TBase = IColumnMerger; + class TWriter: public TColumnPortionResult { + private: + using TBase = TColumnPortionResult; + const std::shared_ptr DataType; + const TColumnMergeContext& Context; + std::unique_ptr IndexBuilder; + std::unique_ptr ValueBuilder; + arrow::UInt32Builder* IndexBuilderImpl = nullptr; + ui32 CurrentRecordIdx = 0; + ui32 UsefulRecordsCount = 0; + + public: + TWriter(const TColumnMergeContext& context); + + bool HasData() const { + return CurrentRecordIdx; + } + + ui32 GetCurrentSize() const { + return CurrentRecordIdx; + } + + bool HasUsefulData() const { + return UsefulRecordsCount; + } + + ui32 AddPositions(const i32 delta) { + AFL_VERIFY(delta > 0); + CurrentRecordIdx += delta; + return CurrentRecordIdx; + } + + void AddRealData(const std::shared_ptr& arr, const ui32 index); + + TColumnPortionResult Flush(); + }; + + class TPlainChunkCursor { + private: + std::shared_ptr CurrentChunkedArray; + std::optional ChunkAddress; + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* CurrentOwnedArray; + ui32 ChunkStartPosition = 0; + ui32 ChunkFinishPosition = 0; + + void InitArrays(const ui32 position) { + AFL_VERIFY(!ChunkAddress || ChunkFinishPosition <= position); + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() <= position)("pos", position)( + "global", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ChunkAddress = CurrentChunkedArray->GetChunk(ChunkAddress, position - CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + AFL_VERIFY(ChunkAddress); + ChunkStartPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + ChunkAddress->GetAddress().GetGlobalStartPosition(); + ChunkFinishPosition = + CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + ChunkAddress->GetAddress().GetGlobalFinishPosition(); + AFL_VERIFY(position < ChunkFinishPosition)("finish", ChunkFinishPosition)("pos", position); + AFL_VERIFY(ChunkStartPosition <= position)("start", ChunkStartPosition)("pos", position); + } + + public: + TPlainChunkCursor(const std::shared_ptr& chunked, + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* currentOwnedArray) + : CurrentChunkedArray(chunked) + , CurrentOwnedArray(currentOwnedArray) { + AFL_VERIFY(CurrentChunkedArray); + AFL_VERIFY(CurrentOwnedArray); + InitArrays(CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + } + bool AddIndexTo(const ui32 index, TWriter& writer); + std::optional MoveToSignificant(const ui32 currentGlobalPosition, const TColumnMergeContext& context) { + AFL_VERIFY(ChunkStartPosition <= currentGlobalPosition)("start", ChunkStartPosition)("pos", currentGlobalPosition)( + "global_start", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ui32 currentIndex = currentGlobalPosition; + while (true) { + if (CurrentOwnedArray->GetAddress().GetGlobalFinishPosition() <= currentIndex) { + return {}; + } + if (ChunkFinishPosition <= currentIndex) { + InitArrays(currentIndex); + continue; + } + for (; currentIndex < ChunkFinishPosition; ++currentIndex) { + if (!NArrow::ColumnEqualsScalar( + ChunkAddress->GetArray(), currentIndex - ChunkStartPosition, context.GetLoader()->GetDefaultValue())) { + return currentIndex; + } + } + } + } + }; + + class TSparsedChunkCursor { + private: + std::shared_ptr CurrentSparsedArray; + const NArrow::NAccessor::TSparsedArrayChunk* Chunk = nullptr; + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* CurrentOwnedArray; + ui32 ChunkStartGlobalPosition = 0; + ui32 NextGlobalPosition = 0; + ui32 NextLocalPosition = 0; + ui32 FinishGlobalPosition = 0; + void InitArrays(const ui32 position) { + AFL_VERIFY(!Chunk || CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition() <= position); + Chunk = &CurrentSparsedArray->GetSparsedChunk(CurrentOwnedArray->GetAddress().GetLocalIndex(position)); + AFL_VERIFY(Chunk->GetRecordsCount()); + AFL_VERIFY(CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetStartPosition() <= position && + position < CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition()) + ("pos", position)("start", Chunk->GetStartPosition())("finish", Chunk->GetFinishPosition())( + "shift", CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + ChunkStartGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetStartPosition(); + NextGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFirstIndexNotDefault(); + NextLocalPosition = 0; + FinishGlobalPosition = CurrentOwnedArray->GetAddress().GetGlobalStartPosition() + Chunk->GetFinishPosition(); + } + + public: + std::optional MoveToSignificant(const ui32 currentGlobalPosition, const TColumnMergeContext& /*context*/) { + while (true) { + if (NextGlobalPosition == CurrentOwnedArray->GetAddress().GetGlobalFinishPosition()) { + return {}; + } + if (NextGlobalPosition == FinishGlobalPosition) { + InitArrays(NextGlobalPosition); + continue; + } + if (currentGlobalPosition == NextGlobalPosition) { + return NextGlobalPosition; + } + for (; NextLocalPosition < Chunk->GetNotDefaultRecordsCount(); ++NextLocalPosition) { + NextGlobalPosition = ChunkStartGlobalPosition + Chunk->GetIndexUnsafeFast(NextLocalPosition); + if (currentGlobalPosition <= NextGlobalPosition) { + return NextGlobalPosition; + } + } + NextGlobalPosition = FinishGlobalPosition; + } + } + bool AddIndexTo(const ui32 index, TWriter& writer); + TSparsedChunkCursor(const std::shared_ptr& sparsed, + const NArrow::NAccessor::IChunkedArray::TFullChunkedArrayAddress* currentOwnedArray) + : CurrentSparsedArray(sparsed) + , CurrentOwnedArray(currentOwnedArray) { + AFL_VERIFY(sparsed); + AFL_VERIFY(currentOwnedArray); + InitArrays(CurrentOwnedArray->GetAddress().GetGlobalStartPosition()); + } + }; + + class TCursor { + private: + std::shared_ptr Array; + std::optional CurrentOwnedArray; + std::shared_ptr SparsedCursor; + std::shared_ptr PlainCursor; + ui32 FinishGlobalPosition = 0; + const TColumnMergeContext& Context; + void InitArrays(const ui32 position); + + public: + TCursor(const std::shared_ptr& array, const TColumnMergeContext& context) + : Array(array) + , Context(context) { + AFL_VERIFY(Array); + AFL_VERIFY(Array->GetRecordsCount()); + InitArrays(0); + } + + ui32 GetRecordsCount() const { + return Array->GetRecordsCount(); + } + + ui32 MoveToSignificant(const ui32 start) { + ui32 currentPosition = start; + while (true) { + std::optional significantIndex; + if (SparsedCursor) { + significantIndex = SparsedCursor->MoveToSignificant(currentPosition, Context); + } else if (PlainCursor) { + significantIndex = PlainCursor->MoveToSignificant(currentPosition, Context); + } + if (significantIndex) { + return *significantIndex; + } + if (FinishGlobalPosition == Array->GetRecordsCount()) { + return FinishGlobalPosition; + } else { + currentPosition = FinishGlobalPosition; + InitArrays(FinishGlobalPosition); + } + } + } + + bool AddIndexTo(const ui32 index, TWriter& writer); + }; + + class TCursorPosition: TMoveOnly { + private: + TCursor* Cursor; + ui32 CurrentIndex = 0; + const std::vector* GlobalSequence = nullptr; + TMergingContext::TAddress CurrentAddress; + + bool InitPosition(const ui32 start) { + CurrentIndex = start; + while (true) { + CurrentIndex = Cursor->MoveToSignificant(CurrentIndex); + if (CurrentIndex == GlobalSequence->size()) { + return false; + } + auto& addr = (*GlobalSequence)[CurrentIndex]; + if (addr.GetGlobalPosition() != -1) { + CurrentAddress = addr; + return true; + } + if (++CurrentIndex == GlobalSequence->size()) { + return false; + } + } + } + + public: + TCursor* operator->() { + return Cursor; + } + + void AddIndexTo(TWriter& writer) const { + AFL_VERIFY(Cursor->AddIndexTo(CurrentIndex, writer)); + } + + TCursorPosition(TCursor* cursor, const std::vector& globalSequence) + : Cursor(cursor) + , GlobalSequence(&globalSequence) { + AFL_VERIFY(GlobalSequence->size() == cursor->GetRecordsCount()); + InitPosition(0); + } + + bool IsFinished() const { + AFL_VERIFY(CurrentIndex <= GlobalSequence->size()); + return CurrentIndex == GlobalSequence->size(); + } + + ui32 GetCurrentGlobalPosition() const { + return CurrentAddress.GetGlobalPosition(); + } + + ui32 GetCurrentGlobalChunkIdx() const { + return CurrentAddress.GetChunkIdx(); + } + + const TMergingContext::TAddress& GetCurrentAddress() const { + return CurrentAddress; + } + + bool operator<(const TCursorPosition& item) const { + return item.GetCurrentAddress() < GetCurrentAddress(); + } + + [[nodiscard]] bool Next() { + return InitPosition(++CurrentIndex); + } + }; + + std::deque Cursors; + std::list CursorPositions; + + virtual void DoStart( + const std::vector>& input, TMergingContext& mergeContext) override; + + virtual std::vector DoExecute(const TChunkMergeContext& context, TMergingContext& mergeContext) override; + +public: + using TBase::TBase; +}; + +} // namespace NKikimr::NOlap::NCompaction diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make new file mode 100644 index 000000000000..e24e8341aa7d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/compaction/sparsed/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL logic.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/compaction/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/ya.make b/ydb/core/tx/columnshard/engines/changes/compaction/ya.make index aa52c0f9d6a0..5e76aa0d8971 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/ya.make +++ b/ydb/core/tx/columnshard/engines/changes/compaction/ya.make @@ -1,14 +1,15 @@ LIBRARY() SRCS( - merge_context.cpp - column_cursor.cpp - column_portion_chunk.cpp - merged_column.cpp + merger.cpp ) PEERDIR( ydb/core/tx/tiering + ydb/core/tx/columnshard/engines/changes/compaction/abstract + ydb/core/tx/columnshard/engines/changes/compaction/common + ydb/core/tx/columnshard/engines/changes/compaction/plain + ydb/core/tx/columnshard/engines/changes/compaction/sparsed ) END() diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 87b66aa9473e..ea7b6ddc2eb4 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -1,75 +1,39 @@ #include "general_compaction.h" -#include "compaction/column_cursor.h" -#include "compaction/column_portion_chunk.h" -#include "compaction/merge_context.h" -#include "compaction/merged_column.h" #include "counters/general.h" +#include "compaction/merger.h" -#include -#include -#include +#include #include -#include -#include -#include -#include -#include namespace NKikimr::NOlap::NCompaction { -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept { - std::vector> batchResults; - auto resultSchema = context.SchemaVersions.GetLastSchema(); - auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); - { - auto resultDataSchema = resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(); - NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false, IIndexInfo::GetSnapshotColumnNames()); - - THashSet portionsInUsage; - for (auto&& i : portions) { - AFL_VERIFY(portionsInUsage.emplace(i.GetPortionInfo().GetPortionId()).second); - } - - for (auto&& i : portions) { - auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); - auto batch = i.GetBatch(dataSchema, *resultSchema); - batch = resultSchema->NormalizeBatch(*dataSchema, batch).DetachResult(); - batch = IIndexInfo::NormalizeDeletionColumn(batch); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); - auto filter = BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), portionsInUsage, resultSchema); - mergeStream.AddSource(batch, filter); - } - batchResults = mergeStream.DrainAllParts(CheckPoints, resultDataSchema->fields()); - } - Y_ABORT_UNLESS(batchResults.size()); - for (auto&& b : batchResults) { - auto portions = MakeAppendedPortions(b, GranuleMeta->GetPathId(), resultSchema->GetSnapshot(), GranuleMeta.get(), context, {}); - Y_ABORT_UNLESS(portions.size()); - for (auto& portion : portions) { - if (shardingActual) { - portion.GetPortionConstructor().SetShardingVersion(shardingActual->GetSnapshotVersion()); - } - AppendedPortions.emplace_back(std::move(portion)); - } - } -} - -std::shared_ptr TGeneralCompactColumnEngineChanges::BuildPortionFilter(const std::optional& shardingActual, - const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const { +std::shared_ptr TGeneralCompactColumnEngineChanges::BuildPortionFilter( + const std::optional& shardingActual, const std::shared_ptr& batch, + const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const { std::shared_ptr filter; if (shardingActual && pInfo.NeedShardingFilter(*shardingActual)) { - filter = shardingActual->GetShardingInfo()->GetFilter(batch); + std::set fieldNames; + for (auto&& i : shardingActual->GetShardingInfo()->GetColumnNames()) { + fieldNames.emplace(i); + } + auto table = batch->BuildTableVerified(fieldNames); + AFL_VERIFY(table); + filter = shardingActual->GetShardingInfo()->GetFilter(table); } NArrow::TColumnFilter filterDeleted = NArrow::TColumnFilter::BuildAllowFilter(); if (pInfo.GetMeta().GetDeletionsCount()) { - auto col = batch->GetColumnByName(TIndexInfo::SPEC_COL_DELETE_FLAG); + auto table = batch->BuildTableVerified(std::set({ TIndexInfo::SPEC_COL_DELETE_FLAG })); + AFL_VERIFY(table); + auto col = table->GetColumnByName(TIndexInfo::SPEC_COL_DELETE_FLAG); AFL_VERIFY(col); AFL_VERIFY(col->type()->id() == arrow::Type::BOOL); - auto bCol = static_pointer_cast(col); - for (ui32 i = 0; i < bCol->length(); ++i) { - filterDeleted.Add(!bCol->GetView(i)); + for (auto&& c : col->chunks()) { + auto bCol = static_pointer_cast(c); + for (ui32 i = 0; i < bCol->length(); ++i) { + filterDeleted.Add(!bCol->GetView(i)); + } } NArrow::TColumnFilter filterCorrection = NArrow::TColumnFilter::BuildDenyFilter(); auto pkSchema = resultSchema->GetIndexInfo().GetReplaceKey(); @@ -79,14 +43,17 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build for (auto&& i : excludedIntervalsInfo.GetExcludedIntervals()) { NArrow::NMerger::TSortableBatchPosition startForFound(i.GetStart().ToBatch(pkSchema), 0, pkSchema->field_names(), {}, false); NArrow::NMerger::TSortableBatchPosition finishForFound(i.GetFinish().ToBatch(pkSchema), 0, pkSchema->field_names(), {}, false); - auto foundStart = NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, startForFound, true); + auto foundStart = + NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, startForFound, true); AFL_VERIFY(foundStart); AFL_VERIFY(!foundStart->IsLess())("pos", pos.DebugJson())("start", startForFound.DebugJson())("found", foundStart->DebugString()); - auto foundFinish = NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, finishForFound, false); + auto foundFinish = + NArrow::NMerger::TSortableBatchPosition::FindPosition(pos, pos.GetPosition(), batch->num_rows() - 1, finishForFound, false); AFL_VERIFY(foundFinish); AFL_VERIFY(foundFinish->GetPosition() >= foundStart->GetPosition()); if (foundFinish->GetPosition() > foundStart->GetPosition()) { - AFL_VERIFY(!foundFinish->IsGreater())("pos", pos.DebugJson())("finish", finishForFound.DebugJson())("found", foundFinish->DebugString()); + AFL_VERIFY(!foundFinish->IsGreater())("pos", pos.DebugJson())("finish", finishForFound.DebugJson())( + "found", foundFinish->DebugString()); } filterCorrection.Add(foundStart->GetPosition() - posCurrent, false); if (foundFinish->IsGreater()) { @@ -109,180 +76,74 @@ std::shared_ptr TGeneralCompactColumnEngineChanges::Build return filter; } -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept { - static const TString portionIdFieldName = "$$__portion_id"; - static const TString portionRecordIndexFieldName = "$$__portion_record_idx"; - static const std::shared_ptr portionIdField = std::make_shared(portionIdFieldName, std::make_shared()); - static const std::shared_ptr portionRecordIndexField = std::make_shared(portionRecordIndexFieldName, std::make_shared()); - +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks( + TConstructionContext& context, std::vector&& portions) noexcept { auto resultSchema = context.SchemaVersions.GetLastSchema(); auto shardingActual = context.SchemaVersions.GetShardingInfoActual(GranuleMeta->GetPathId()); - std::vector pkFieldNames = resultSchema->GetIndexInfo().GetReplaceKey()->field_names(); - std::set pkFieldNamesSet(pkFieldNames.begin(), pkFieldNames.end()); - for (auto&& i : TIndexInfo::GetSnapshotColumnNames()) { - pkFieldNamesSet.emplace(i); - } - pkFieldNamesSet.emplace(TIndexInfo::SPEC_COL_DELETE_FLAG); - - std::vector> batchResults; + std::shared_ptr stats = std::make_shared(); + std::shared_ptr resultFiltered; + NCompaction::TMerger merger(context, SaverContext); { - arrow::FieldVector indexFields; - indexFields.emplace_back(portionIdField); - indexFields.emplace_back(portionRecordIndexField); - IIndexInfo::AddSpecialFields(indexFields); - auto dataSchema = std::make_shared(indexFields); - NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSnapshotColumnNames()); - THashSet usedPortionIds; - for (auto&& i : portions) { - AFL_VERIFY(usedPortionIds.emplace(i.GetPortionInfo().GetPortionId()).second); + std::set pkColumnIds; + { + auto pkColumnIdsVector = IIndexInfo::AddSnapshotFieldIds(resultSchema->GetIndexInfo().GetPKColumnIds()); + pkColumnIds = std::set(pkColumnIdsVector.begin(), pkColumnIdsVector.end()); } - - ui32 idx = 0; - for (auto&& i : portions) { - auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); - auto batch = i.GetBatch(dataSchema, *resultSchema, pkFieldNamesSet); + std::set dataColumnIds; + { { - NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionIdFieldName, idx++); - batch = NArrow::TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), portionIdField, column->BuildArray(batch->num_rows()))); - } - { - NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionRecordIndexFieldName); - batch = NArrow::TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), portionRecordIndexField, column->BuildArray(batch->num_rows()))); - } - batch = IIndexInfo::NormalizeDeletionColumn(batch); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); - std::shared_ptr filter = BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), usedPortionIds, resultSchema); - mergeStream.AddSource(batch, filter); - } - batchResults = mergeStream.DrainAllParts(CheckPoints, indexFields); - } - Y_ABORT_UNLESS(batchResults.size()); - - std::shared_ptr stats = std::make_shared(); - for (auto&& i : SwitchedPortions) { - stats->Merge(i.GetSerializationStat(*resultSchema)); - } - - std::vector>> chunkGroups; - chunkGroups.resize(batchResults.size()); - for (auto&& columnId : resultSchema->GetIndexInfo().GetColumnIds()) { - NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("field_name", resultSchema->GetIndexInfo().GetColumnName(columnId))); - auto columnInfo = stats->GetColumnInfo(columnId); - auto resultField = resultSchema->GetIndexInfo().GetColumnFieldVerified(columnId); - - std::vector cursors; - for (auto&& p : portions) { - auto dataSchema = p.GetPortionInfo().GetSchema(context.SchemaVersions); - auto loader = dataSchema->GetColumnLoaderOptional(columnId); - std::vector records; - std::vector> chunks; - if (!p.ExtractColumnChunks(columnId, records, chunks)) { - if (!loader) { - loader = resultSchema->GetColumnLoaderVerified(columnId); - } else { - AFL_VERIFY(dataSchema->IsSpecialColumnId(columnId)); + THashMap schemas; + for (auto& portion : SwitchedPortions) { + auto dataSchema = portion.GetSchema(context.SchemaVersions); + schemas.emplace(dataSchema->GetVersion(), dataSchema); } - chunks.emplace_back(std::make_shared(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetDefaultValueVerified(columnId), resultSchema->GetColumnSaver(columnId))); - records = { nullptr }; + dataColumnIds = ISnapshotSchema::GetColumnsWithDifferentDefaults(schemas, resultSchema); } - AFL_VERIFY(!!loader); - cursors.emplace_back(TPortionColumnCursor(chunks, records, loader, p.GetPortionInfo().GetPortionId())); - } - - ui32 batchesRecordsCount = 0; - ui32 columnRecordsCount = 0; - std::map> columnChunks; - ui32 batchIdx = 0; - for (auto&& batchResult : batchResults) { - const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; - TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); - TMergedColumn mColumn(context); - - auto columnPortionIdx = batchResult->GetColumnByName(portionIdFieldName); - auto columnPortionRecordIdx = batchResult->GetColumnByName(portionRecordIndexFieldName); - auto columnSnapshotPlanStepIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_PLAN_STEP); - auto columnSnapshotTxIdx = batchResult->GetColumnByName(TIndexInfo::SPEC_COL_TX_ID); - Y_ABORT_UNLESS(columnPortionIdx && columnPortionRecordIdx && columnSnapshotPlanStepIdx && columnSnapshotTxIdx); - Y_ABORT_UNLESS(columnPortionIdx->type_id() == arrow::UInt16Type::type_id); - Y_ABORT_UNLESS(columnPortionRecordIdx->type_id() == arrow::UInt32Type::type_id); - Y_ABORT_UNLESS(columnSnapshotPlanStepIdx->type_id() == arrow::UInt64Type::type_id); - Y_ABORT_UNLESS(columnSnapshotTxIdx->type_id() == arrow::UInt64Type::type_id); - const arrow::UInt16Array& pIdxArray = static_cast(*columnPortionIdx); - const arrow::UInt32Array& pRecordIdxArray = static_cast(*columnPortionRecordIdx); - - AFL_VERIFY(batchResult->num_rows() == pIdxArray.length()); - std::optional predPortionIdx; - for (ui32 idx = 0; idx < pIdxArray.length(); ++idx) { - const ui16 portionIdx = pIdxArray.Value(idx); - const ui32 portionRecordIdx = pRecordIdxArray.Value(idx); - auto& cursor = cursors[portionIdx]; - cursor.Next(portionRecordIdx, mColumn); - if (predPortionIdx && portionIdx != *predPortionIdx) { - cursors[*predPortionIdx].Fetch(mColumn); - } - if (idx + 1 == pIdxArray.length()) { - cursor.Fetch(mColumn); + for (auto&& i : SwitchedPortions) { + stats->Merge(i.GetSerializationStat(*resultSchema)); + if (dataColumnIds.size() != resultSchema->GetColumnsCount()) { + for (auto id : i.GetColumnIds()) { + if (resultSchema->HasColumnId(id)) { + dataColumnIds.emplace(id); + } + } } - predPortionIdx = portionIdx; } - chunkGroups[batchIdx][columnId] = mColumn.BuildResult(); - batchesRecordsCount += batchResult->num_rows(); - columnRecordsCount += mColumn.GetRecordsCount(); - AFL_VERIFY(batchResult->num_rows() == mColumn.GetRecordsCount()); - ++batchIdx; - } - AFL_VERIFY(columnRecordsCount == batchesRecordsCount)("mCount", columnRecordsCount)("bCount", batchesRecordsCount); - } - ui32 batchIdx = 0; - - const auto groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); - for (auto&& columnChunks : chunkGroups) { - auto batchResult = batchResults[batchIdx]; - ++batchIdx; - Y_ABORT_UNLESS(columnChunks.size()); - - for (auto&& i : columnChunks) { - if (i.second.size() != columnChunks.begin()->second.size()) { - for (ui32 p = 0; p < std::min(columnChunks.begin()->second.size(), i.second.size()); ++p) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("p_first", columnChunks.begin()->second[p].DebugString())("p", i.second[p].DebugString()); - } + AFL_VERIFY(dataColumnIds.size() <= resultSchema->GetColumnsCount()); + if (dataColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG)) { + pkColumnIds.emplace((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } - AFL_VERIFY(i.second.size() == columnChunks.begin()->second.size())("first", columnChunks.begin()->second.size())("current", i.second.size())("first_name", columnChunks.begin()->first)("current_name", i.first); } - - std::vector batchSlices; - std::shared_ptr schemaDetails(new TDefaultSchemaDetails(resultSchema, stats)); - - for (ui32 i = 0; i < columnChunks.begin()->second.size(); ++i) { - THashMap>> portionColumns; - for (auto&& p : columnChunks) { - portionColumns.emplace(p.first, p.second[i].GetChunks()); + resultFiltered = std::make_shared(resultSchema, dataColumnIds); + { + auto seqDataColumnIds = dataColumnIds; + for (auto&& i : pkColumnIds) { + AFL_VERIFY(seqDataColumnIds.erase(i))("id", i); + } + THashSet usedPortionIds; + for (auto&& i : portions) { + AFL_VERIFY(usedPortionIds.emplace(i.GetPortionInfo().GetPortionId()).second); } - resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - batchSlices.emplace_back(portionColumns, schemaDetails, context.Counters.SplitterCounters); - } - TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); - auto packs = slicer.Split(batchSlices); - ui32 recordIdx = 0; - for (auto&& i : packs) { - TGeneralSerializedSlice slice(std::move(i)); - auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); - const ui32 deletionsCount = IIndexInfo::CalcDeletions(b, true); - AppendedPortions.emplace_back(TWritePortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), GranuleMeta->GetPathId(), - resultSchema->GetVersion(), resultSchema->GetSnapshot(), SaverContext.GetStoragesManager())); - AppendedPortions.back().FillStatistics(resultSchema->GetIndexInfo()); - NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultSchema->GetIndexInfo().GetReplaceKey())); - NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); - AppendedPortions.back().GetPortionConstructor().AddMetadata(*resultSchema, deletionsCount, primaryKeys, snapshotKeys); - AppendedPortions.back().GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); - if (shardingActual) { - AppendedPortions.back().GetPortionConstructor().SetShardingVersion(shardingActual->GetSnapshotVersion()); + for (auto&& i : portions) { + auto blobsSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); + auto batch = i.RestoreBatch(*blobsSchema, *resultFiltered, seqDataColumnIds); + std::shared_ptr filter = + BuildPortionFilter(shardingActual, batch, i.GetPortionInfo(), usedPortionIds, resultFiltered); + merger.AddBatch(batch, filter); } - recordIdx += slice.GetRecordsCount(); } } + + std::optional shardingActualVersion; + if (shardingActual) { + shardingActualVersion = shardingActual->GetSnapshotVersion(); + } + AppendedPortions = merger.Execute(stats, CheckPoints, resultFiltered, GranuleMeta->GetPathId(), shardingActualVersion); + for (auto&& p : AppendedPortions) { + p.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(NPortion::EProduced::SPLIT_COMPACTED); + } } TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { @@ -306,12 +167,9 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize); { - std::vector portions = TReadPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions); - if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { - BuildAppendedPortionsByChunks(context, std::move(portions)); - } else { - BuildAppendedPortionsByFullBatches(context, std::move(portions)); - } + std::vector portions = + TReadPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions); + BuildAppendedPortionsByChunks(context, std::move(portions)); } if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { @@ -328,7 +186,8 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created_diff")("appended", sbAppended)("switched", sbSwitched); } - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created")("appended", AppendedPortions.size())("switched", SwitchedPortions.size()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "blobs_created")("appended", AppendedPortions.size())( + "switched", SwitchedPortions.size()); return TConclusionStatus::Success(); } @@ -336,32 +195,29 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc void TGeneralCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { TBase::DoWriteIndexOnComplete(self, context); if (self) { - self->IncCounter(context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, context.BlobsWritten); - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, context.BytesWritten); + self->Counters.GetTabletCounters()->OnCompactionWriteIndexCompleted( + context.FinishedSuccessfully, context.BlobsWritten, context.BytesWritten); } } void TGeneralCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); auto& g = *GranuleMeta; - self.CSCounters.OnSplitCompactionInfo(g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); + self.Counters.GetCSCounters().OnSplitCompactionInfo( + g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); } NColumnShard::ECumulativeCounters TGeneralCompactColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { return isSuccess ? NColumnShard::COUNTER_COMPACTION_SUCCESS : NColumnShard::COUNTER_COMPACTION_FAIL; } -void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include, const bool validationDuplications) { - AFL_VERIFY(CheckPoints.emplace(position, include).second || !validationDuplications); +void TGeneralCompactColumnEngineChanges::AddCheckPoint( + const NArrow::NMerger::TSortableBatchPosition& position, const bool include) { + CheckPoints.InsertPosition(position, include); } std::shared_ptr TGeneralCompactColumnEngineChanges::BuildMemoryPredictor() { - if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { - return std::make_shared(); - } else { - return std::make_shared(); - } + return std::make_shared(); } ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPortion(const TPortionInfo& portionInfo) { @@ -386,7 +242,8 @@ ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPorti SumMemoryDelta = std::max(SumMemoryDelta, MaxMemoryByColumnChunk[i.first]); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)("portion_info", portionInfo.DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("memory_prediction_after", SumMemoryFix + SumMemoryDelta)( + "portion_info", portionInfo.DebugString()); return SumMemoryFix + SumMemoryDelta; } diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.h b/ydb/core/tx/columnshard/engines/changes/general_compaction.h index 75dc35317630..ab6f1e18684e 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.h +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.h @@ -9,12 +9,11 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { private: using TBase = TCompactColumnEngineChanges; virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; - std::map CheckPoints; - void BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept; + NArrow::NMerger::TIntervalPositions CheckPoints; void BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept; std::shared_ptr BuildPortionFilter(const std::optional& shardingActual, - const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, + const std::shared_ptr& batch, const TPortionInfo& pInfo, const THashSet& portionsInUsage, const ISnapshotSchema::TPtr& resultSchema) const; protected: virtual TConclusionStatus DoConstructBlobs(TConstructionContext& context) noexcept override; @@ -64,7 +63,7 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { static std::shared_ptr BuildMemoryPredictor(); - void AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include = true, const bool validationDuplications = true); + void AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include); virtual TString TypeString() const override { return StaticTypeName(); diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index e34fa411a347..22ca7fd2c738 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -1,12 +1,8 @@ #include "indexation.h" -#include -#include + +#include "compaction/merger.h" + #include -#include -#include -#include -#include -#include namespace NKikimr::NOlap { @@ -40,9 +36,7 @@ void TInsertColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnSha if (!DataToIndex.empty()) { self->UpdateInsertTableCounters(); } - self->IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, context.BlobsWritten); - self->IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, context.BytesWritten); - self->IncCounter(NColumnShard::COUNTER_INDEXING_TIME, context.Duration.MilliSeconds()); + self->Counters.GetTabletCounters()->OnInsertionWriteIndexCompleted(context.BlobsWritten, context.BytesWritten, context.Duration); } } @@ -54,18 +48,70 @@ namespace { class TBatchInfo { private: - YDB_READONLY_DEF(std::shared_ptr, Batch); - const NEvWrite::EModificationType ModificationType; + YDB_READONLY_DEF(std::shared_ptr, Batch); + +public: + TBatchInfo(const std::shared_ptr& batch, const NEvWrite::EModificationType /*modificationType*/) + : Batch(batch) { + } +}; + +class TPathFieldsInfo { +private: + std::set UsageColumnIds; + const ISnapshotSchema::TPtr ResultSchema; + THashMap Schemas; + bool Finished = false; + const ui32 FullColumnsCount; + public: - TBatchInfo(const std::shared_ptr& batch, const NEvWrite::EModificationType modificationType) - : Batch(batch) - , ModificationType(modificationType) + TPathFieldsInfo(const ISnapshotSchema::TPtr& resultSchema) + : UsageColumnIds(IIndexInfo::GetNecessarySystemColumnIdsSet()) + , ResultSchema(resultSchema) + , FullColumnsCount(ResultSchema->GetIndexInfo().GetColumnIds(true).size()) { + AFL_VERIFY(FullColumnsCount); + } + + bool IsFinished() const { + return Finished; + } + bool HasDeletion() const { + AFL_VERIFY(Finished); + return UsageColumnIds.contains((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); } - bool GetIsDeletion() const { - return ModificationType == NEvWrite::EModificationType::Delete; + void Finish() { + AFL_VERIFY(UsageColumnIds.size()); + AFL_VERIFY(!Finished); + Finished = true; + if (UsageColumnIds.size() == FullColumnsCount) { + return; + } + auto defaultDiffs = ISnapshotSchema::GetColumnsWithDifferentDefaults(Schemas, ResultSchema); + UsageColumnIds.insert(defaultDiffs.begin(), defaultDiffs.end()); + } + + const std::set& GetUsageColumnIds() const { + AFL_VERIFY(Finished); + return UsageColumnIds; + } + + void AddChunkInfo(const TCommittedData& data, const TConstructionContext& context) { + AFL_VERIFY(!Finished); + if (UsageColumnIds.size() == FullColumnsCount) { + return; + } + auto blobSchema = context.SchemaVersions.GetSchemaVerified(data.GetSchemaVersion()); + if (!Schemas.contains(data.GetSchemaVersion())) { + Schemas.emplace(data.GetSchemaVersion(), blobSchema); + } + std::vector filteredIds = data.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().GetColumnIds(false)); + if (data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { + filteredIds.emplace_back((ui32)IIndexInfo::ESpecialColumn::DELETE_FLAG); + } + UsageColumnIds.insert(filteredIds.begin(), filteredIds.end()); } }; @@ -73,22 +119,40 @@ class TPathData { private: std::vector Batches; YDB_READONLY_DEF(std::optional, ShardingInfo); - bool HasDeletionFlag = false; + TPathFieldsInfo ColumnsInfo; + public: - TPathData(const std::optional& shardingInfo) + TPathData(const std::optional& shardingInfo, const ISnapshotSchema::TPtr& resultSchema) : ShardingInfo(shardingInfo) - { - + , ColumnsInfo(resultSchema) { } - bool HasDeletion() { - return HasDeletionFlag; + const TPathFieldsInfo& GetColumnsInfo() const { + return ColumnsInfo; } - void AddBatch(const NOlap::TInsertedData& data, const std::shared_ptr& batch) { - if (data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete) { - HasDeletionFlag = true; + void FinishChunksInfo() { + ColumnsInfo.Finish(); + } + + std::vector> GetGeneralContainers() const { + std::vector> result; + for (auto&& i : Batches) { + result.emplace_back(i.GetBatch()); } + return result; + } + + void AddChunkInfo(const NOlap::TCommittedData& data, const TConstructionContext& context) { + ColumnsInfo.AddChunkInfo(data, context); + } + + bool HasDeletion() { + return ColumnsInfo.HasDeletion(); + } + + void AddBatch(const NOlap::TCommittedData& data, const std::shared_ptr& batch) { + AFL_VERIFY(ColumnsInfo.IsFinished()); AFL_VERIFY(batch); Batches.emplace_back(batch, data.GetMeta().GetModificationType()); } @@ -100,121 +164,117 @@ class TPathData { ShardingInfo = info; } } - - std::shared_ptr Merge(const TIndexInfo& indexInfo) const { - auto fullSchema = indexInfo.ArrowSchemaWithSpecials(); - NArrow::NMerger::TMergePartialStream stream(indexInfo.GetReplaceKey(), fullSchema, false, IIndexInfo::GetSnapshotColumnNames()); - THashMap fieldSizes; - ui64 rowsCount = 0; - for (auto&& batch : Batches) { - auto& forMerge = batch.GetBatch(); - stream.AddSource(forMerge, nullptr); - for (ui32 cIdx = 0; cIdx < (ui32)forMerge->num_columns(); ++cIdx) { - fieldSizes[forMerge->column_name(cIdx)] += NArrow::GetArrayDataSize(forMerge->column(cIdx)); - } - rowsCount += forMerge->num_rows(); - } - - NArrow::NMerger::TRecordBatchBuilder builder(fullSchema->fields(), rowsCount, fieldSizes); - stream.SetPossibleSameVersion(true); - stream.DrainAll(builder); - return builder.Finalize(); - } }; class TPathesData { private: THashMap Data; + const ISnapshotSchema::TPtr ResultSchema; public: + TPathesData(const ISnapshotSchema::TPtr& resultSchema) + : ResultSchema(resultSchema) { + } + + void FinishChunksInfo() { + for (auto&& i : Data) { + i.second.FinishChunksInfo(); + } + } + const THashMap& GetData() const { return Data; } - void Add(const NOlap::TInsertedData& inserted, const std::optional& info, const std::shared_ptr& batch) { - auto it = Data.find(inserted.PathId); + void AddChunkInfo(const NOlap::TCommittedData& inserted, const TConstructionContext& context) { + auto shardingFilterCommit = context.SchemaVersions.GetShardingInfoOptional(inserted.GetPathId(), inserted.GetSnapshot()); + auto it = Data.find(inserted.GetPathId()); if (it == Data.end()) { - it = Data.emplace(inserted.PathId, info).first; + it = Data.emplace(inserted.GetPathId(), TPathData(shardingFilterCommit, ResultSchema)).first; } - it->second.AddShardingInfo(info); + it->second.AddChunkInfo(inserted, context); + it->second.AddShardingInfo(shardingFilterCommit); + } + + void AddBatch(const NOlap::TCommittedData& inserted, const std::shared_ptr& batch) { + auto it = Data.find(inserted.GetPathId()); + AFL_VERIFY(it != Data.end()); it->second.AddBatch(inserted, batch); } + + const TPathFieldsInfo& GetPathInfo(const ui64 pathId) const { + auto it = Data.find(pathId); + AFL_VERIFY(it != Data.end()); + return it->second.GetColumnsInfo(); + } }; -} + +} // namespace TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { Y_ABORT_UNLESS(!DataToIndex.empty()); Y_ABORT_UNLESS(AppendedPortions.empty()); - auto maxSnapshot = TSnapshot::Zero(); + auto resultSchema = context.SchemaVersions.GetLastSchema(); + Y_ABORT_UNLESS(resultSchema->GetIndexInfo().IsSorted()); + + TPathesData pathBatches(resultSchema); for (auto& inserted : DataToIndex) { - TSnapshot insertSnap = inserted.GetSnapshot(); - Y_ABORT_UNLESS(insertSnap.Valid()); - if (insertSnap > maxSnapshot) { - maxSnapshot = insertSnap; + if (inserted.GetRemove()) { + continue; } + pathBatches.AddChunkInfo(inserted, context); } - Y_ABORT_UNLESS(maxSnapshot.Valid()); - auto resultSchema = context.SchemaVersions.GetSchema(maxSnapshot); - Y_ABORT_UNLESS(resultSchema->GetIndexInfo().IsSorted()); + pathBatches.FinishChunksInfo(); - TPathesData pathBatches; for (auto& inserted : DataToIndex) { const TBlobRange& blobRange = inserted.GetBlobRange(); - - auto shardingFilterCommit = context.SchemaVersions.GetShardingInfoOptional(inserted.PathId, inserted.GetSnapshot()); - + if (inserted.GetRemove()) { + Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); + continue; + } auto blobSchema = context.SchemaVersions.GetSchemaVerified(inserted.GetSchemaVersion()); - auto& indexInfo = blobSchema->GetIndexInfo(); - Y_ABORT_UNLESS(indexInfo.IsSorted()); - std::shared_ptr batch; + std::shared_ptr batch; { const auto blobData = Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); - Y_ABORT_UNLESS(blobData.size(), "Blob data not present"); - // Prepare batch - batch = NArrow::DeserializeBatch(blobData, indexInfo.ArrowSchema()); - AFL_VERIFY(batch)("event", "cannot_parse") - ("data_snapshot", TStringBuilder() << inserted.GetSnapshot()) - ("index_snapshot", TStringBuilder() << blobSchema->GetSnapshot()); - ; + auto batchSchema = + std::make_shared(inserted.GetMeta().GetSchemaSubset().Apply(blobSchema->GetIndexInfo().ArrowSchema()->fields())); + batch = std::make_shared(NArrow::DeserializeBatch(blobData, batchSchema)); + blobSchema->AdaptBatchToSchema(*batch, resultSchema); } + IIndexInfo::AddSnapshotColumns(*batch, inserted.GetSnapshot()); - batch = AddSpecials(batch, indexInfo, inserted); - batch = resultSchema->NormalizeBatch(*blobSchema, batch).DetachResult(); - pathBatches.Add(inserted, shardingFilterCommit, batch); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSorted(batch, resultSchema->GetIndexInfo().GetReplaceKey())); + auto& pathInfo = pathBatches.GetPathInfo(inserted.GetPathId()); + + if (pathInfo.HasDeletion()) { + IIndexInfo::AddDeleteFlagsColumn(*batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); + } + + pathBatches.AddBatch(inserted, batch); } Y_ABORT_UNLESS(Blobs.IsEmpty()); - const std::vector comparableColumns = resultSchema->GetIndexInfo().GetReplaceKey()->field_names(); + auto stats = std::make_shared(); + std::vector> filters; for (auto& [pathId, pathInfo] : pathBatches.GetData()) { - auto shardingFilter = context.SchemaVersions.GetShardingInfoActual(pathId); - auto mergedBatch = pathInfo.Merge(resultSchema->GetIndexInfo()); + auto filteredSnapshot = std::make_shared(resultSchema, pathInfo.GetColumnsInfo().GetUsageColumnIds()); + std::optional shardingVersion; + if (pathInfo.GetShardingInfo()) { + shardingVersion = pathInfo.GetShardingInfo()->GetSnapshotVersion(); + } + auto batches = pathInfo.GetGeneralContainers(); + filters.resize(batches.size()); auto itGranule = PathToGranule.find(pathId); - AFL_VERIFY(itGranule != PathToGranule.end()); - std::vector> result = NArrow::NMerger::TRWSortableBatchPosition:: - SplitByBordersInSequentialContainer(mergedBatch, comparableColumns, itGranule->second); - for (auto&& b : result) { - if (!b) { - continue; - } - std::optional externalSaver; - if (b->num_rows() < 100) { - externalSaver = NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::UNCOMPRESSED)); - } else { - externalSaver = NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::LZ4_FRAME)); - } - auto portions = MakeAppendedPortions(b, pathId, maxSnapshot, nullptr, context, externalSaver); - Y_ABORT_UNLESS(portions.size()); - for (auto& portion : portions) { - if (pathInfo.GetShardingInfo()) { - portion.GetPortionConstructor().SetShardingVersion(pathInfo.GetShardingInfo()->GetSnapshotVersion()); - } - AppendedPortions.emplace_back(std::move(portion)); - } + AFL_VERIFY(itGranule != PathToGranule.end())("path_id", pathId); + NCompaction::TMerger merger(context, SaverContext, std::move(batches), std::move(filters)); + merger.SetOptimizationWritingPackMode(true); + auto localAppended = merger.Execute(stats, itGranule->second, filteredSnapshot, pathId, shardingVersion); + for (auto&& i : localAppended) { + i.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(NPortion::EProduced::INSERTED); + AppendedPortions.emplace_back(std::move(i)); } } @@ -222,15 +282,8 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont return TConclusionStatus::Success(); } -std::shared_ptr TInsertColumnEngineChanges::AddSpecials(const std::shared_ptr& srcBatch, - const TIndexInfo& indexInfo, const TInsertedData& inserted) const { - auto batch = IIndexInfo::AddSnapshotColumns(srcBatch, inserted.GetSnapshot()); - batch = IIndexInfo::AddDeleteFlagsColumn(batch, inserted.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete); - return NArrow::TColumnOperator().Adapt(batch, indexInfo.ArrowSchemaWithSpecials()).DetachResult(); -} - NColumnShard::ECumulativeCounters TInsertColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { return isSuccess ? NColumnShard::COUNTER_INDEXING_SUCCESS : NColumnShard::COUNTER_INDEXING_FAIL; } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.h b/ydb/core/tx/columnshard/engines/changes/indexation.h index 95befd334c23..4c7f8602a6f5 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.h +++ b/ydb/core/tx/columnshard/engines/changes/indexation.h @@ -1,18 +1,21 @@ #pragma once -#include "abstract/abstract.h" #include "with_appended.h" -#include -#include + +#include "abstract/abstract.h" + #include +#include +#include + +#include namespace NKikimr::NOlap { class TInsertColumnEngineChanges: public TChangesWithAppend { private: using TBase = TChangesWithAppend; - std::shared_ptr AddSpecials(const std::shared_ptr& srcBatch, - const TIndexInfo& indexInfo, const TInsertedData& inserted) const; - std::vector DataToIndex; + std::vector DataToIndex; + protected: virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; @@ -34,15 +37,14 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { } public: - THashMap> PathToGranule; // pathId -> positions (sorted by pk) + THashMap PathToGranule; // pathId -> positions (sorted by pk) public: - TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) + TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) : TBase(saverContext, NBlobOperations::EConsumer::INDEXATION) - , DataToIndex(std::move(dataToIndex)) - { + , DataToIndex(std::move(dataToIndex)) { } - const std::vector& GetDataToIndex() const { + const std::vector& GetDataToIndex() const { return DataToIndex; } @@ -54,7 +56,6 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { return StaticTypeName(); } std::optional AddPathIfNotExists(ui64 pathId); - }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.cpp b/ydb/core/tx/columnshard/engines/changes/ttl.cpp index f23c799e3319..fc74dbea0454 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.cpp +++ b/ydb/core/tx/columnshard/engines/changes/ttl.cpp @@ -46,7 +46,7 @@ void TTTLColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChan } } -std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, +std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, TConstructionContext& context) const { const TPortionInfo& portionInfo = info.GetPortionInfo(); @@ -55,7 +55,7 @@ std::optional TTTLColumnEngineChanges::UpdateEvicted Y_ABORT_UNLESS(portionInfo.GetMeta().GetTierName() != evictFeatures.GetTargetTierName() || blobSchema->GetVersion() < evictFeatures.GetTargetScheme()->GetVersion()); auto portionWithBlobs = TReadPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs, blobSchema->GetIndexInfo()); - std::optional result = TReadPortionInfoWithBlobs::SyncPortion( + std::optional result = TReadPortionInfoWithBlobs::SyncPortion( std::move(portionWithBlobs), blobSchema, evictFeatures.GetTargetScheme(), evictFeatures.GetTargetTierName(), SaverContext.GetStoragesManager(), context.Counters.SplitterCounters); return std::move(result); } diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.h b/ydb/core/tx/columnshard/engines/changes/ttl.h index 92eb0ffa9b3c..b75795e16fe4 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.h +++ b/ydb/core/tx/columnshard/engines/changes/ttl.h @@ -40,7 +40,7 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { } }; - std::optional UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, + std::optional UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, TConstructionContext& context) const; std::vector PortionsToEvict; diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index edc8f8ca86de..24d44eb34587 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -17,9 +17,9 @@ void TChangesWithAppend::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); portionInfo.SaveToDatabase(context.DBWrapper, schemaPtr->GetIndexInfo().GetPKFirstColumnId(), false); } - const auto predRemoveDroppedTable = [self](const TWritePortionInfoWithBlobs& item) { + const auto predRemoveDroppedTable = [self](const TWritePortionInfoWithBlobsResult& item) { auto& portionInfo = item.GetPortionResult(); - if (!!self && (!self->TablesManager.HasTable(portionInfo.GetPathId()) || self->TablesManager.GetTable(portionInfo.GetPathId()).IsDropped())) { + if (!!self && !self->TablesManager.HasTable(portionInfo.GetPathId(), false)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_inserted_data")("reason", "table_removed")("path_id", portionInfo.GetPathId()); return true; } else { @@ -42,13 +42,13 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self case NOlap::TPortionMeta::EProduced::UNSPECIFIED: Y_ABORT_UNLESS(false); // unexpected case NOlap::TPortionMeta::EProduced::INSERTED: - self->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::COMPACTED: - self->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::SPLIT_COMPACTED: - self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); break; case NOlap::TPortionMeta::EProduced::EVICTED: Y_ABORT("Unexpected evicted case"); @@ -58,19 +58,19 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self break; } } - self->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); THashSet blobsDeactivated; for (auto& [_, portionInfo] : PortionsToRemove) { for (auto& rec : portionInfo.Records) { blobsDeactivated.emplace(portionInfo.GetBlobId(rec.BlobRange.GetBlobIdxVerified())); } - self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); } - self->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); for (auto& blobId : blobsDeactivated) { - self->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); + self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); } } { @@ -89,7 +89,6 @@ void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self void TChangesWithAppend::DoCompile(TFinalizationContext& context) { for (auto&& i : AppendedPortions) { i.GetPortionConstructor().SetPortionId(context.NextPortionId()); - i.GetPortionConstructor().MutableMeta().UpdateRecordsMeta(TPortionMeta::EProduced::INSERTED); } for (auto& [_, portionInfo] : PortionsToRemove) { portionInfo.SetRemoveSnapshot(context.GetSnapshot()); @@ -102,49 +101,6 @@ void TChangesWithAppend::DoOnAfterCompile() { } } -std::vector TChangesWithAppend::MakeAppendedPortions(const std::shared_ptr batch, - const ui64 pathId, const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context, const std::optional& overrideSaver) const { - Y_ABORT_UNLESS(batch->num_rows()); - - auto resultSchema = context.SchemaVersions.GetSchema(snapshot); - - std::shared_ptr stats = std::make_shared(); - if (granuleMeta) { - stats = granuleMeta->BuildSerializationStats(resultSchema); - } - auto schema = std::make_shared(resultSchema, stats); - if (overrideSaver) { - schema->SetOverrideSerializer(*overrideSaver); - } - std::vector out; - { - std::vector pages = TBatchSerializedSlice::BuildSimpleSlices(batch, NSplitter::TSplitSettings(), context.Counters.SplitterCounters, schema); - std::vector generalPages; - for (auto&& i : pages) { - auto portionColumns = i.GetPortionChunksToHash(); - resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - generalPages.emplace_back(portionColumns, schema, context.Counters.SplitterCounters); - } - - const NSplitter::TEntityGroups groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); - TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); - auto packs = slicer.Split(generalPages); - - ui32 recordIdx = 0; - for (auto&& i : packs) { - TGeneralSerializedSlice slice(std::move(i)); - auto b = batch->Slice(recordIdx, slice.GetRecordsCount()); - out.emplace_back(TWritePortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), pathId, resultSchema->GetVersion(), snapshot, SaverContext.GetStoragesManager())); - out.back().FillStatistics(resultSchema->GetIndexInfo()); - out.back().GetPortionConstructor().AddMetadata(*resultSchema, b); - out.back().GetPortionConstructor().MutableMeta().SetTierName(IStoragesManager::DefaultStorageId); - recordIdx += slice.GetRecordsCount(); - } - } - - return out; -} - void TChangesWithAppend::DoStart(NColumnShard::TColumnShard& /*self*/) { } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.h b/ydb/core/tx/columnshard/engines/changes/with_appended.h index 4c5fbc2189c6..e35dfbbe4acc 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.h +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.h @@ -17,8 +17,6 @@ class TChangesWithAppend: public TColumnEngineChanges { virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; virtual void DoStart(NColumnShard::TColumnShard& self) override; - std::vector MakeAppendedPortions(const std::shared_ptr batch, const ui64 granule, - const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context, const std::optional& overrideSaver) const; virtual void DoDebugString(TStringOutput& out) const override { out << "remove=" << PortionsToRemove.size() << ";append=" << AppendedPortions.size() << ";"; @@ -61,11 +59,11 @@ class TChangesWithAppend: public TColumnEngineChanges { AFL_VERIFY(PortionsToRemove.emplace(info.GetAddress(), info).second); } - std::vector AppendedPortions; + std::vector AppendedPortions; virtual ui32 GetWritePortionsCount() const override { return AppendedPortions.size(); } - virtual TWritePortionInfoWithBlobs* GetWritePortionInfo(const ui32 index) override { + virtual TWritePortionInfoWithBlobsResult* GetWritePortionInfo(const ui32 index) override { Y_ABORT_UNLESS(index < AppendedPortions.size()); return &AppendedPortions[index]; } diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 912223f8cf1b..2c616c06e32d 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -46,10 +46,6 @@ struct TSelectInfo { std::vector> PortionsOrderedPK; - NColumnShard::TContainerAccessorWithDirection>> GetPortionsOrdered(const bool reverse) const { - return NColumnShard::TContainerAccessorWithDirection>>(PortionsOrderedPK, reverse); - } - size_t NumChunks() const; TStats Stats() const; @@ -284,7 +280,7 @@ class IColumnEngine { } virtual bool IsOverloadedByMetadata(const ui64 limit) const = 0; virtual std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const = 0; - virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; + virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; virtual std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept = 0; virtual std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept = 0; diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 5c2a2d6ad62b..95f9a41aa050 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -60,8 +60,14 @@ const TColumnEngineStats& TColumnEngineForLogs::GetTotalStats() { void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, EStatsUpdateType updateType, const TPortionInfo* exPortionInfo) { - UpdatePortionStats(Counters, portionInfo, updateType, exPortionInfo); - + if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD)) { + auto before = Counters.Active(); + UpdatePortionStats(Counters, portionInfo, updateType, exPortionInfo); + auto after = Counters.Active(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "portion_stats_updated")("type", updateType)("path_id", portionInfo.GetPathId())("portion", portionInfo.GetPortionId())("before_size", before.Bytes)("after_size", after.Bytes)("before_rows", before.Rows)("after_rows", after.Rows); + } else { + UpdatePortionStats(Counters, portionInfo, updateType, exPortionInfo); + } const ui64 pathId = portionInfo.GetPathId(); Y_ABORT_UNLESS(pathId); if (!PathStats.contains(pathId)) { @@ -157,10 +163,9 @@ void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, TInd } void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) { - std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager); + std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager, SchemaObjectsCache); AFL_VERIFY(indexInfoOptional); NOlap::TIndexInfo indexInfo = std::move(*indexInfoOptional); - indexInfo.SetAllKeys(StoragesManager); RegisterSchemaVersion(snapshot, std::move(indexInfo)); } @@ -272,7 +277,7 @@ bool TColumnEngineForLogs::LoadCounters(IDbWrapper& db) { return db.LoadCounters(callback); } -std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { +std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { Y_ABORT_UNLESS(dataToIndex.size()); TSaverContext saverContext(StoragesManager); @@ -280,12 +285,15 @@ std::shared_ptr TColumnEngineForLogs::StartInsert(st auto pkSchema = VersionedIndex.GetLastSchema()->GetIndexInfo().GetReplaceKey(); for (const auto& data : changes->GetDataToIndex()) { - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); if (changes->PathToGranule.contains(pathId)) { continue; } - changes->PathToGranule[pathId] = GetGranulePtrVerified(pathId)->GetBucketPositions(); + if (!data.GetRemove()) { + AFL_VERIFY(changes->PathToGranule.emplace(pathId, GetGranulePtrVerified(pathId)->GetBucketPositions()).second); + } + } return changes; @@ -341,7 +349,6 @@ std::shared_ptr TColumnEngineForLogs::Start ui32 skipLocked = 0; ui32 portionsFromDrop = 0; bool limitExceeded = false; - THashSet uniquePortions; for (ui64 pathId : pathsToDrop) { auto g = GranulesStorage->GetGranuleOptional(pathId); if (!g) { @@ -349,6 +356,9 @@ std::shared_ptr TColumnEngineForLogs::Start } for (auto& [portion, info] : g->GetPortions()) { + if (info->CheckForCleanup()) { + continue; + } if (dataLocksManager->IsLocked(*info)) { ++skipLocked; continue; @@ -359,8 +369,6 @@ std::shared_ptr TColumnEngineForLogs::Start limitExceeded = true; break; } - const auto inserted = uniquePortions.emplace(info->GetAddress()).second; - Y_ABORT_UNLESS(inserted); changes->PortionsToDrop.push_back(*info); ++portionsFromDrop; } @@ -368,8 +376,9 @@ std::shared_ptr TColumnEngineForLogs::Start const TInstant snapshotInstant = snapshot.GetPlanInstant(); for (auto it = CleanupPortions.begin(); !limitExceeded && it != CleanupPortions.end();) { - if (it->first >= snapshotInstant) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanupStop")("snapshot", snapshot.DebugString())("current_snapshot_ts", it->first); + if (it->first > snapshotInstant) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanupStop")("snapshot", snapshot.DebugString())( + "current_snapshot_ts", it->first.MilliSeconds()); break; } for (ui32 i = 0; i < it->second.size();) { @@ -378,17 +387,14 @@ std::shared_ptr TColumnEngineForLogs::Start ++i; continue; } - const auto inserted = uniquePortions.emplace(it->second[i].GetAddress()).second; - if (inserted) { - Y_ABORT_UNLESS(it->second[i].CheckForCleanup(snapshot)); - if (txSize + it->second[i].GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { - txSize += it->second[i].GetTxVolume(); - } else { - limitExceeded = true; - break; - } - changes->PortionsToDrop.push_back(std::move(it->second[i])); + AFL_VERIFY(it->second[i].CheckForCleanup(snapshot))("p_snapshot", it->second[i].GetRemoveSnapshotOptional())("snapshot", snapshot); + if (txSize + it->second[i].GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { + txSize += it->second[i].GetTxVolume(); + } else { + limitExceeded = true; + break; } + changes->PortionsToDrop.push_back(std::move(it->second[i])); if (i + 1 < it->second.size()) { it->second[i] = std::move(it->second.back()); } @@ -420,7 +426,7 @@ std::vector> TColumnEngineForLogs::Star TSaverContext saverContext(StoragesManager); NActualizer::TTieringProcessContext context(memoryUsageLimit, saverContext, dataLocksManager, SignalCounters, ActualizationController); - const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetActualizationTasksLag(TDuration::Seconds(1)); + const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetActualizationTasksLag(); for (auto&& i : pathEviction) { auto g = GetGranuleOptional(i.first); if (g) { @@ -485,9 +491,8 @@ void TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, const bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool updateStats) { Y_ABORT_UNLESS(!portionInfo.Empty()); const ui64 portion = portionInfo.GetPortion(); - auto spg = GetGranulePtrVerified(portionInfo.GetPathId()); - Y_ABORT_UNLESS(spg); - auto p = spg->GetPortionOptional(portion); + auto& spg = MutableGranuleVerified(portionInfo.GetPathId()); + auto p = spg.GetPortionOptional(portion); if (!p) { LOG_S_WARN("Portion erased already " << portionInfo << " at tablet " << TabletId); @@ -496,7 +501,7 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up if (updateStats) { UpdatePortionStats(*p, EStatsUpdateType::ERASE); } - Y_ABORT_UNLESS(spg->ErasePortion(portion)); + Y_ABORT_UNLESS(spg.ErasePortion(portion)); return true; } } @@ -509,20 +514,18 @@ std::shared_ptr TColumnEngineForLogs::Select(ui64 pathId, TSnapshot return out; } - for (const auto& [indexKey, keyPortions] : spg->GetPortionsIndex().GetPoints()) { - for (auto&& [_, portionInfo] : keyPortions.GetStart()) { - if (!portionInfo->IsVisible(snapshot)) { - continue; - } - Y_ABORT_UNLESS(portionInfo->Produced()); - const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo, VersionedIndex.GetLastSchema()->GetIndexInfo()); - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected") - ("pathId", pathId)("portion", portionInfo->DebugString()); - if (skipPortion) { - continue; - } - out->PortionsOrderedPK.emplace_back(portionInfo); + for (const auto& [_, portionInfo] : spg->GetPortions()) { + if (!portionInfo->IsVisible(snapshot)) { + continue; } + Y_ABORT_UNLESS(portionInfo->Produced()); + const bool skipPortion = !pkRangesFilter.IsPortionInUsage(*portionInfo); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", skipPortion ? "portion_skipped" : "portion_selected")("pathId", pathId)( + "portion", portionInfo->DebugString()); + if (skipPortion) { + continue; + } + out->PortionsOrderedPK.emplace_back(portionInfo); } return out; @@ -574,12 +577,4 @@ void TColumnEngineForLogs::DoRegisterTable(const ui64 pathId) { } } -TDuration TColumnEngineForLogs::GetRemovedPortionLivetime() { - TDuration result = TDuration::Minutes(10); - if (HasAppData() && AppDataVerified().ColumnShardConfig.HasRemovedPortionLivetimeSeconds()) { - result = TDuration::Seconds(AppDataVerified().ColumnShardConfig.GetRemovedPortionLivetimeSeconds()); - } - return NYDBTest::TControllers::GetColumnShardController()->GetRemovedPortionLivetime(result); -} - } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index a48fb9cda9e0..7b515c26f40c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -53,10 +53,7 @@ class TColumnEngineForLogs : public IColumnEngine { std::shared_ptr StoragesManager; std::shared_ptr ActualizationController; - - static TDuration GetRemovedPortionLivetime(); - - const TDuration RemovedPortionLivetime = GetRemovedPortionLivetime(); + std::shared_ptr SchemaObjectsCache = std::make_shared(); public: const std::shared_ptr& GetActualizationController() const { @@ -112,7 +109,7 @@ class TColumnEngineForLogs : public IColumnEngine { return limit < TGranulesStat::GetSumMetadataMemoryPortionsSize(); } - std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; + std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept override; std::shared_ptr StartCleanupTables(const THashSet& pathsToDrop) noexcept override; @@ -154,6 +151,10 @@ class TColumnEngineForLogs : public IColumnEngine { return *GetGranulePtrVerified(pathId); } + TGranuleMeta& MutableGranuleVerified(const ui64 pathId) const { + return *GetGranulePtrVerified(pathId); + } + std::shared_ptr GetGranulePtrVerified(const ui64 pathId) const { auto result = GetGranuleOptional(pathId); AFL_VERIFY(result)("path_id", pathId); @@ -173,7 +174,7 @@ class TColumnEngineForLogs : public IColumnEngine { } void AddCleanupPortion(const TPortionInfo& info) { - CleanupPortions[info.GetRemoveSnapshotVerified().GetPlanInstant() + RemovedPortionLivetime].emplace_back(info); + CleanupPortions[info.GetRemoveSnapshotVerified().GetPlanInstant()].emplace_back(info); } void AddShardingInfo(const TGranuleShardingInfo& shardingInfo) { VersionedIndex.AddShardingInfo(shardingInfo); diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index b605475552cc..b5c8e5e4ea58 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -12,7 +12,7 @@ void TDbWrapper::Insert(const TInsertedData& data) { NColumnShard::Schema::InsertTable_Insert(db, data); } -void TDbWrapper::Commit(const TInsertedData& data) { +void TDbWrapper::Commit(const TCommittedData& data) { NIceDb::TNiceDb db(Database); NColumnShard::Schema::InsertTable_Commit(db, data); } @@ -27,7 +27,7 @@ void TDbWrapper::EraseInserted(const TInsertedData& data) { NColumnShard::Schema::InsertTable_EraseInserted(db, data); } -void TDbWrapper::EraseCommitted(const TInsertedData& data) { +void TDbWrapper::EraseCommitted(const TCommittedData& data) { NIceDb::TNiceDb db(Database); NColumnShard::Schema::InsertTable_EraseCommitted(db, data); } @@ -144,16 +144,23 @@ bool TDbWrapper::LoadPortions(const std::function().Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()).Update( - NIceDb::TUpdate(portion.GetBlobId(row.GetBlobRange().GetBlobIdxVerified()).SerializeBinary()), - NIceDb::TUpdate(row.GetBlobRange().Offset), - NIceDb::TUpdate(row.GetBlobRange().Size), - NIceDb::TUpdate(row.GetRecordsCount()), - NIceDb::TUpdate(row.GetRawBytes()) - ); + if (auto bRange = row.GetBlobRangeOptional()) { + AFL_VERIFY(bRange->IsValid()); + db.Table() + .Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()) + .Update(NIceDb::TUpdate(portion.GetBlobId(bRange->GetBlobIdxVerified()).SerializeBinary()), + NIceDb::TUpdate(bRange->Offset), NIceDb::TUpdate(row.GetDataSize()), + NIceDb::TUpdate(row.GetRecordsCount()), NIceDb::TUpdate(row.GetRawBytes())); + } else if (auto bData = row.GetBlobDataOptional()) { + db.Table() + .Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()) + .Update(NIceDb::TUpdate(*bData), + NIceDb::TUpdate(row.GetRecordsCount()), NIceDb::TUpdate(row.GetRawBytes())); + } else { + AFL_VERIFY(false); + } } void TDbWrapper::EraseIndex(const TPortionInfo& portion, const TIndexChunk& row) { diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.h b/ydb/core/tx/columnshard/engines/db_wrapper.h index 39536cb9c987..50958b6fca29 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.h +++ b/ydb/core/tx/columnshard/engines/db_wrapper.h @@ -16,7 +16,8 @@ namespace NKikimr::NOlap { class TColumnChunkLoadContext; class TIndexChunkLoadContext; -struct TInsertedData; +class TInsertedData; +class TCommittedData; class TInsertTableAccessor; class TColumnRecord; class TIndexChunk; @@ -30,10 +31,10 @@ class IDbWrapper { virtual ~IDbWrapper() = default; virtual void Insert(const TInsertedData& data) = 0; - virtual void Commit(const TInsertedData& data) = 0; + virtual void Commit(const TCommittedData& data) = 0; virtual void Abort(const TInsertedData& data) = 0; virtual void EraseInserted(const TInsertedData& data) = 0; - virtual void EraseCommitted(const TInsertedData& data) = 0; + virtual void EraseCommitted(const TCommittedData& data) = 0; virtual void EraseAborted(const TInsertedData& data) = 0; virtual bool Load(TInsertTableAccessor& insertTable, const TInstant& loadTime) = 0; @@ -63,10 +64,10 @@ class TDbWrapper : public IDbWrapper { {} void Insert(const TInsertedData& data) override; - void Commit(const TInsertedData& data) override; + void Commit(const TCommittedData& data) override; void Abort(const TInsertedData& data) override; void EraseInserted(const TInsertedData& data) override; - void EraseCommitted(const TInsertedData& data) override; + void EraseCommitted(const TCommittedData& data) override; void EraseAborted(const TInsertedData& data) override; bool Load(TInsertTableAccessor& insertTable, const TInstant& loadTime) override; diff --git a/ydb/core/tx/columnshard/engines/defs.cpp b/ydb/core/tx/columnshard/engines/defs.cpp new file mode 100644 index 000000000000..2c50c99d1d3c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/defs.cpp @@ -0,0 +1,11 @@ +#include "defs.h" + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} diff --git a/ydb/core/tx/columnshard/engines/defs.h b/ydb/core/tx/columnshard/engines/defs.h index a01edc7ef767..efe1f1c744a8 100644 --- a/ydb/core/tx/columnshard/engines/defs.h +++ b/ydb/core/tx/columnshard/engines/defs.h @@ -8,18 +8,33 @@ namespace NKikimr::NOlap { using TLogThis = TCtorLogger; -enum class TWriteId : ui64 {}; +enum class TOperationWriteId : ui64 { +}; +enum class TInsertWriteId : ui64 { +}; + +inline TOperationWriteId operator++(TOperationWriteId& w) noexcept { + w = TOperationWriteId{ ui64(w) + 1 }; + return w; +} -inline TWriteId operator++(TWriteId& w) noexcept { - w = TWriteId{ui64(w) + 1}; +inline TInsertWriteId operator++(TInsertWriteId& w) noexcept { + w = TInsertWriteId{ ui64(w) + 1 }; return w; } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap + +template <> +struct THash { + inline size_t operator()(const NKikimr::NOlap::TInsertWriteId x) const noexcept { + return THash()(ui64(x)); + } +}; template <> -struct THash { - inline size_t operator()(const NKikimr::NOlap::TWriteId x) const noexcept { +struct THash { + inline size_t operator()(const NKikimr::NOlap::TOperationWriteId x) const noexcept { return THash()(ui64(x)); } }; diff --git a/ydb/core/tx/columnshard/engines/filter.h b/ydb/core/tx/columnshard/engines/filter.h index 7670b0eab1d3..39167306b993 100644 --- a/ydb/core/tx/columnshard/engines/filter.h +++ b/ydb/core/tx/columnshard/engines/filter.h @@ -2,7 +2,7 @@ #include "defs.h" #include -#include +#include #include namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/engines/insert_table/committed.cpp b/ydb/core/tx/columnshard/engines/insert_table/committed.cpp new file mode 100644 index 000000000000..bd4bb9ff6c06 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/committed.cpp @@ -0,0 +1,5 @@ +#include "committed.h" + +namespace NKikimr::NOlap { + +} diff --git a/ydb/core/tx/columnshard/engines/insert_table/committed.h b/ydb/core/tx/columnshard/engines/insert_table/committed.h new file mode 100644 index 000000000000..bd633647b5ec --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/committed.h @@ -0,0 +1,161 @@ +#pragma once +#include "user_data.h" + +#include + +namespace NKikimr::NOlap { + +class TCommittedData: public TUserDataContainer { +private: + using TBase = TUserDataContainer; + YDB_READONLY(TSnapshot, Snapshot, NOlap::TSnapshot::Zero()); + YDB_READONLY_DEF(TString, DedupId); + YDB_READONLY(bool, Remove, false); + +public: + TCommittedData(const std::shared_ptr& userData, const ui64 planStep, const ui64 txId, const TInsertWriteId insertWriteId) + : TBase(userData) + , Snapshot(planStep, txId) + , DedupId(ToString(planStep) + ":" + ToString((ui64)insertWriteId)) { + } + + TCommittedData(const std::shared_ptr& userData, const ui64 planStep, const ui64 txId, const TString& dedupId) + : TBase(userData) + , Snapshot(planStep, txId) + , DedupId(dedupId) { + } + + TCommittedData(const std::shared_ptr& userData, const TSnapshot& ss, const TInsertWriteId insertWriteId) + : TBase(userData) + , Snapshot(ss) + , DedupId(ToString(ss.GetPlanStep()) + ":" + ToString((ui64)insertWriteId)) { + } + + void SetRemove() { + AFL_VERIFY(!Remove); + Remove = true; + } + + bool operator<(const TCommittedData& key) const { + if (Snapshot == key.Snapshot) { + if (UserData->GetPathId() == key.UserData->GetPathId()) { + return DedupId < key.DedupId; + } else { + return UserData->GetPathId() < key.UserData->GetPathId(); + } + } else { + return Snapshot < key.Snapshot; + } + } +}; + +class TCommittedBlob { +private: + TBlobRange BlobRange; + std::variant WriteInfo; + YDB_READONLY(ui64, SchemaVersion, 0); + YDB_READONLY(ui64, RecordsCount, 0); + YDB_READONLY(bool, IsDelete, false); + NArrow::TReplaceKey First; + NArrow::TReplaceKey Last; + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); + +public: + const NArrow::TReplaceKey& GetFirst() const { + return First; + } + const NArrow::TReplaceKey& GetLast() const { + return Last; + } + + ui64 GetSize() const { + return BlobRange.Size; + } + + TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, + const NArrow::TReplaceKey& first, const NArrow::TReplaceKey& last, const bool isDelete, + const NArrow::TSchemaSubset& subset) + : BlobRange(blobRange) + , WriteInfo(snapshot) + , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) + , IsDelete(isDelete) + , First(first) + , Last(last) + , SchemaSubset(subset) { + } + + TCommittedBlob(const TBlobRange& blobRange, const TInsertWriteId writeId, const ui64 schemaVersion, const ui64 recordsCount, + const NArrow::TReplaceKey& first, const NArrow::TReplaceKey& last, const bool isDelete, + const NArrow::TSchemaSubset& subset) + : BlobRange(blobRange) + , WriteInfo(writeId) + , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) + , IsDelete(isDelete) + , First(first) + , Last(last) + , SchemaSubset(subset) { + } + + /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. + /// So hash() and equality should depend on BlobId only. + bool operator==(const TCommittedBlob& key) const { + return BlobRange == key.BlobRange; + } + ui64 Hash() const noexcept { + return BlobRange.Hash(); + } + TString DebugString() const { + if (auto* ss = GetSnapshotOptional()) { + return TStringBuilder() << BlobRange << ";snapshot=" << ss->DebugString(); + } else { + return TStringBuilder() << BlobRange << ";write_id=" << (ui64)GetWriteIdVerified(); + } + } + + bool HasSnapshot() const { + return GetSnapshotOptional(); + } + + const TSnapshot& GetSnapshotDef(const TSnapshot& def) const { + if (auto* snapshot = GetSnapshotOptional()) { + return *snapshot; + } else { + return def; + } + } + + const TSnapshot* GetSnapshotOptional() const { + return std::get_if(&WriteInfo); + } + + const TSnapshot& GetSnapshotVerified() const { + auto* result = GetSnapshotOptional(); + AFL_VERIFY(result); + return *result; + } + + const TInsertWriteId* GetWriteIdOptional() const { + return std::get_if(&WriteInfo); + } + + TInsertWriteId GetWriteIdVerified() const { + auto* result = GetWriteIdOptional(); + AFL_VERIFY(result); + return *result; + } + + const TBlobRange& GetBlobRange() const { + return BlobRange; + } +}; + +} // namespace NKikimr::NOlap + +template <> +struct THash { + inline size_t operator()(const NKikimr::NOlap::TCommittedBlob& key) const { + return key.Hash(); + } +}; diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.cpp b/ydb/core/tx/columnshard/engines/insert_table/data.cpp deleted file mode 100644 index 3f27a5ad5e7d..000000000000 --- a/ydb/core/tx/columnshard/engines/insert_table/data.cpp +++ /dev/null @@ -1,56 +0,0 @@ -#include "data.h" -#include - -namespace NKikimr::NOlap { - -namespace { - -class TInsertTableCacheController { -private: - TAtomicCounter BlobsCacheSize = 0; - const i64 BlobsCacheLimit = (i64)1 << 30; -public: - void Return(const ui64 size) { - const i64 val = BlobsCacheSize.Sub(size); - AFL_VERIFY(val >= 0)("size", size)("val", val); - } - - bool Take(const ui64 size) { - if (BlobsCacheSize.Add(size) <= BlobsCacheLimit) { - return true; - } - const i64 val = BlobsCacheSize.Sub(size); - AFL_VERIFY(val >= 0)("size", size)("val", val); - return false; - } -}; - -} - -TInsertedData::TBlobStorageGuard::~TBlobStorageGuard() { - Singleton()->Return(Data.size()); -} - -TInsertedData::~TInsertedData() { -} - -TInsertedData::TInsertedData(ui64 planStep, ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, - const std::optional& blobData /*= {}*/) - : Meta(proto) - , BlobRange(blobRange) - , PlanStep(planStep) - , WriteTxId(writeTxId) - , PathId(pathId) - , DedupId(dedupId) - , SchemaVersion(schemaVersion) -{ - if (blobData) { - AFL_VERIFY(blobData->size() == BlobRange.Size); - if (Singleton()->Take(blobData->size())) { - BlobDataGuard = std::make_shared(*blobData); - } - } -} - -} diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.h b/ydb/core/tx/columnshard/engines/insert_table/data.h deleted file mode 100644 index 0317b818073c..000000000000 --- a/ydb/core/tx/columnshard/engines/insert_table/data.h +++ /dev/null @@ -1,190 +0,0 @@ -#pragma once -#include "meta.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -struct TInsertedData { -private: - TInsertedDataMeta Meta; - YDB_READONLY_DEF(TBlobRange, BlobRange); - class TBlobStorageGuard { - private: - YDB_READONLY_DEF(TString, Data); - public: - TBlobStorageGuard(const TString& data) - : Data(data) - { - - } - ~TBlobStorageGuard(); - }; - - std::shared_ptr BlobDataGuard; - -public: - ui64 PlanStep = 0; - ui64 WriteTxId = 0; - ui64 PathId = 0; - TString DedupId; - -private: - YDB_READONLY(ui64, SchemaVersion, 0); -public: - std::optional GetBlobData() const { - if (BlobDataGuard) { - return BlobDataGuard->GetData(); - } else { - return {}; - } - } - - ui64 GetTxVolume() const { - return Meta.GetTxVolume() + sizeof(TBlobRange); - } - - const TInsertedDataMeta& GetMeta() const { - return Meta; - } - - TInsertedData() = delete; // avoid invalid TInsertedData anywhere - - TInsertedData(ui64 planStep, ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData); - - TInsertedData(ui64 writeTxId, ui64 pathId, TString dedupId, const TBlobRange& blobRange, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData) - : TInsertedData(0, writeTxId, pathId, dedupId, blobRange, proto, schemaVersion, blobData) - {} - - TInsertedData(ui64 writeTxId, ui64 pathId, TString dedupId, const TUnifiedBlobId& blobId, - const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, const std::optional& blobData) - : TInsertedData(0, writeTxId, pathId, dedupId, TBlobRange(blobId, 0, blobId.BlobSize()), proto, schemaVersion, blobData) - { - } - - ~TInsertedData(); - - bool operator < (const TInsertedData& key) const { - if (PlanStep < key.PlanStep) { - return true; - } else if (PlanStep > key.PlanStep) { - return false; - } - - // PlanStep == key.PlanStep - if (WriteTxId < key.WriteTxId) { - return true; - } else if (WriteTxId > key.WriteTxId) { - return false; - } - - // PlanStep == key.PlanStep && WriteTxId == key.WriteTxId - if (PathId < key.PathId) { - return true; - } else if (PathId > key.PathId) { - return false; - } - - return DedupId < key.DedupId; - } - - bool operator == (const TInsertedData& key) const { - return (PlanStep == key.PlanStep) && - (WriteTxId == key.WriteTxId) && - (PathId == key.PathId) && - (DedupId == key.DedupId); - } - - /// We commit many writeIds in one txId. There could be several blobs with same WriteId and different DedupId. - /// One of them wins and becomes committed. Original DedupId would be lost then. - /// After commit we use original Initiator:WriteId as DedupId of inserted blob inside {PlanStep, TxId}. - /// pathId, initiator, {writeId}, {dedupId} -> pathId, planStep, txId, {dedupId} - void Commit(ui64 planStep, ui64 txId) { - DedupId = ToString(PlanStep) + ":" + ToString((ui64)WriteTxId); - PlanStep = planStep; - WriteTxId = txId; - } - - /// Undo Commit() operation. Restore Initiator:WriteId from DedupId. - void Undo() { - TVector tokens; - size_t numTokens = Split(DedupId, ":", tokens); - Y_ABORT_UNLESS(numTokens == 2); - - PlanStep = FromString(tokens[0]); - WriteTxId = FromString(tokens[1]); - DedupId.clear(); - } - - TSnapshot GetSnapshot() const { - return TSnapshot(PlanStep, WriteTxId); - } - - ui32 BlobSize() const { return BlobRange.GetBlobSize(); } - -}; - -class TCommittedBlob { -private: - TBlobRange BlobRange; - TSnapshot CommitSnapshot; - YDB_READONLY(ui64, SchemaVersion, 0); - YDB_READONLY(ui64, RecordsCount, 0); - YDB_READONLY(bool, IsDelete, false); - YDB_READONLY_DEF(std::optional, First); - YDB_READONLY_DEF(std::optional, Last); -public: - ui64 GetSize() const { - return BlobRange.Size; - } - - const NArrow::TReplaceKey& GetFirstVerified() const { - Y_ABORT_UNLESS(First); - return *First; - } - - const NArrow::TReplaceKey& GetLastVerified() const { - Y_ABORT_UNLESS(Last); - return *Last; - } - - TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, const std::optional& first, - const std::optional& last, const bool isDelete) - : BlobRange(blobRange) - , CommitSnapshot(snapshot) - , SchemaVersion(schemaVersion) - , RecordsCount(recordsCount) - , IsDelete(isDelete) - , First(first) - , Last(last) - {} - - /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. - /// So hash() and equality should depend on BlobId only. - bool operator == (const TCommittedBlob& key) const { return BlobRange == key.BlobRange; } - ui64 Hash() const noexcept { return BlobRange.Hash(); } - TString DebugString() const { - return TStringBuilder() << BlobRange << ";ps=" << CommitSnapshot.GetPlanStep() << ";ti=" << CommitSnapshot.GetTxId(); - } - - const TSnapshot& GetSnapshot() const { - return CommitSnapshot; - } - - const TBlobRange& GetBlobRange() const { - return BlobRange; - } -}; - -} - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TCommittedBlob& key) const { - return key.Hash(); - } -}; diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp index 5147fbd02bc8..980882ad8eca 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp @@ -1,7 +1,9 @@ #include "insert_table.h" + #include -#include +#include #include +#include namespace NKikimr::NOlap { @@ -11,12 +13,13 @@ bool TInsertTable::Insert(IDbWrapper& dbTable, TInsertedData&& data) { dbTable.Insert(*dataPtr); return true; } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_insertion"); return false; } } -TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 planStep, ui64 txId, - const THashSet& writeIds, std::function pathExists) { +TInsertionSummary::TCounters TInsertTable::Commit( + IDbWrapper& dbTable, ui64 planStep, ui64 txId, const THashSet& writeIds, std::function pathExists) { Y_ABORT_UNLESS(!writeIds.empty()); TInsertionSummary::TCounters counters; @@ -33,15 +36,19 @@ TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 plan dbTable.EraseInserted(*data); - const ui64 pathId = data->PathId; + const ui64 pathId = data->GetPathId(); auto* pathInfo = Summary.GetPathInfoOptional(pathId); // There could be commit after drop: propose, drop, plan if (pathInfo && pathExists(pathId)) { - data->Commit(planStep, txId); - dbTable.Commit(*data); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "commit_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString()); + auto committed = data->Commit(planStep, txId); + dbTable.Commit(committed); - pathInfo->AddCommitted(std::move(*data)); + pathInfo->AddCommitted(std::move(committed)); } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString()); dbTable.Abort(*data); Summary.AddAborted(std::move(*data)); } @@ -50,12 +57,14 @@ TInsertionSummary::TCounters TInsertTable::Commit(IDbWrapper& dbTable, ui64 plan return counters; } -void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds) { +void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds) { Y_ABORT_UNLESS(!writeIds.empty()); for (auto writeId : writeIds) { // There could be inconsistency with txs and writes in case of bugs. So we could find no record for writeId. if (std::optional data = Summary.ExtractInserted(writeId)) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "abort_insertion")("path_id", data->GetPathId())( + "blob_range", data->GetBlobRange().ToString())("write_id", writeId); dbTable.EraseInserted(*data); dbTable.Abort(*data); Summary.AddAborted(std::move(*data)); @@ -63,88 +72,100 @@ void TInsertTable::Abort(IDbWrapper& dbTable, const THashSet& writeIds } } -THashSet TInsertTable::OldWritesToAbort(const TInstant& now) const { +THashSet TInsertTable::OldWritesToAbort(const TInstant& now) const { return Summary.GetExpiredInsertions(now - WaitCommitDelay, CleanupPackageSize); } -THashSet TInsertTable::DropPath(IDbWrapper& dbTable, ui64 pathId) { - auto pathInfo = Summary.ExtractPathInfo(pathId); - if (!!pathInfo) { - for (auto& data : pathInfo->GetCommitted()) { - dbTable.EraseCommitted(data); - TInsertedData copy = data; - copy.Undo(); - dbTable.Abort(copy); - Summary.AddAborted(std::move(copy)); - } - } - - return Summary.GetInsertedByPathId(pathId); -} - -void TInsertTable::EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { +void TInsertTable::EraseCommittedOnExecute( + IDbWrapper& dbTable, const TCommittedData& data, const std::shared_ptr& blobsAction) { if (Summary.HasCommitted(data)) { dbTable.EraseCommitted(data); RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } -void TInsertTable::EraseCommittedOnComplete(const TInsertedData& data) { +void TInsertTable::EraseCommittedOnComplete(const TCommittedData& data) { if (Summary.EraseCommitted(data)) { RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } -void TInsertTable::EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { - if (Summary.HasAborted((TWriteId)data.WriteTxId)) { +void TInsertTable::EraseAbortedOnExecute( + IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { + if (Summary.HasAborted(data.GetInsertWriteId())) { dbTable.EraseAborted(data); RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } void TInsertTable::EraseAbortedOnComplete(const TInsertedData& data) { - if (Summary.EraseAborted((TWriteId)data.WriteTxId)) { + if (Summary.EraseAborted(data.GetInsertWriteId())) { RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } -bool TInsertTable::Load(IDbWrapper& dbTable, const TInstant loadTime) { +bool TInsertTable::Load(NIceDb::TNiceDb& db, IDbWrapper& dbTable, const TInstant loadTime) { Y_ABORT_UNLESS(!Loaded); Loaded = true; + LastWriteId = (TInsertWriteId)0; + if (!NColumnShard::Schema::GetSpecialValueOpt(db, NColumnShard::Schema::EValueIds::LastWriteId, LastWriteId)) { + return false; + } + return dbTable.Load(*this, loadTime); } -std::vector TInsertTable::Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const { +std::vector TInsertTable::Read(ui64 pathId, const std::optional lockId, const TSnapshot& reqSnapshot, + const std::shared_ptr& pkSchema, const TPKRangesFilter* pkRangesFilter) const { const TPathInfo* pInfo = Summary.GetPathInfoOptional(pathId); if (!pInfo) { return {}; } - std::vector ret; - ret.reserve(pInfo->GetCommitted().size()); + std::vector result; + result.reserve(pInfo->GetCommitted().size() + Summary.GetInserted().size()); for (const auto& data : pInfo->GetCommitted()) { - if (std::less_equal()(data.GetSnapshot(), snapshot)) { - ret.emplace_back(&data); + if (lockId || data.GetSnapshot() <= reqSnapshot) { + auto start = data.GetMeta().GetFirstPK(pkSchema); + auto finish = data.GetMeta().GetLastPK(pkSchema); + if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { + continue; + } + result.emplace_back(TCommittedBlob(data.GetBlobRange(), data.GetSnapshot(), data.GetSchemaVersion(), data.GetMeta().GetNumRows(), + start, finish, data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); } } - const auto pred = [pkSchema](const TInsertedData* l, const TInsertedData* r) { - return l->GetMeta().GetFirstPK(pkSchema) < r->GetMeta().GetFirstPK(pkSchema); - }; - std::sort(ret.begin(), ret.end(), pred); - - std::vector result; - result.reserve(ret.size()); - for (auto&& i : ret) { - result.emplace_back(TCommittedBlob( - i->GetBlobRange(), i->GetSnapshot(), i->GetSchemaVersion(), i->GetMeta().GetNumRows(), i->GetMeta().GetFirstPK(pkSchema), i->GetMeta().GetLastPK(pkSchema) - , i->GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete)); + if (lockId) { + for (const auto& [writeId, data] : Summary.GetInserted()) { + if (data.GetPathId() != pathId) { + continue; + } + auto start = data.GetMeta().GetFirstPK(pkSchema); + auto finish = data.GetMeta().GetLastPK(pkSchema); + if (pkRangesFilter && pkRangesFilter->IsPortionInPartialUsage(start, finish) == TPKRangeFilter::EUsageClass::DontUsage) { + continue; + } + result.emplace_back(TCommittedBlob(data.GetBlobRange(), writeId, data.GetSchemaVersion(), data.GetMeta().GetNumRows(), start, finish, + data.GetMeta().GetModificationType() == NEvWrite::EModificationType::Delete, data.GetMeta().GetSchemaSubset())); + } } - return result; } -bool TInsertTableAccessor::RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { +TInsertWriteId TInsertTable::BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + return BuildNextWriteId(db); +} + +TInsertWriteId TInsertTable::BuildNextWriteId(NIceDb::TNiceDb& db) { + TInsertWriteId writeId = ++LastWriteId; + NColumnShard::Schema::SaveSpecialValue(db, NColumnShard::Schema::EValueIds::LastWriteId, (ui64)writeId); + return writeId; +} + +bool TInsertTableAccessor::RemoveBlobLinkOnExecute( + const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { AFL_VERIFY(blobsAction); auto itBlob = BlobLinks.find(blobId); AFL_VERIFY(itBlob != BlobLinks.end()); @@ -170,4 +191,4 @@ bool TInsertTableAccessor::RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId } } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h index 373964818370..4f7544e10184 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h @@ -1,17 +1,17 @@ #pragma once -#include "data.h" -#include "rt_insertion.h" +#include "committed.h" +#include "inserted.h" #include "path_info.h" +#include "rt_insertion.h" + +#include +#include #include namespace NKikimr::NOlap { - +class TPKRangesFilter; class IDbWrapper; -/// Use one table for inserted and committed blobs: -/// !Commited => {PlanStep, WriteTxId} are {0, WriteId} -/// Commited => {PlanStep, WriteTxId} are {PlanStep, TxId} - class TInsertTableAccessor { protected: TInsertionSummary Summary; @@ -23,7 +23,14 @@ class TInsertTableAccessor { bool RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction); bool RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId); + public: + void ErasePath(const ui64 pathId) { + Summary.ErasePath(pathId); + } + bool HasDataInPathId(const ui64 pathId) const { + return Summary.HasPathIdData(pathId); + } const std::map>& GetPathPriorities() const { return Summary.GetPathPriorities(); } @@ -46,20 +53,28 @@ class TInsertTableAccessor { return Summary.AddInserted(std::move(data), load); } bool AddAborted(TInsertedData&& data, const bool load) { + AFL_VERIFY_DEBUG(!Summary.ExtractInserted(data.GetInsertWriteId())); if (load) { AddBlobLink(data.GetBlobRange().BlobId); } return Summary.AddAborted(std::move(data), load); } - bool AddCommitted(TInsertedData&& data, const bool load) { + bool AddCommitted(TCommittedData&& data, const bool load) { if (load) { AddBlobLink(data.GetBlobRange().BlobId); } - const ui64 pathId = data.PathId; + const ui64 pathId = data.GetPathId(); return Summary.GetPathInfo(pathId).AddCommitted(std::move(data), load); } - const THashMap& GetAborted() const { return Summary.GetAborted(); } - const THashMap& GetInserted() const { return Summary.GetInserted(); } + bool HasPathIdData(const ui64 pathId) const { + return Summary.HasPathIdData(pathId); + } + const THashMap& GetAborted() const { + return Summary.GetAborted(); + } + const TInsertedContainer& GetInserted() const { + return Summary.GetInserted(); + } const TInsertionSummary::TCounters& GetCountersPrepared() const { return Summary.GetCountersPrepared(); } @@ -74,25 +89,34 @@ class TInsertTableAccessor { class TInsertTable: public TInsertTableAccessor { private: bool Loaded = false; + TInsertWriteId LastWriteId = TInsertWriteId{ 0 }; + public: static constexpr const TDuration WaitCommitDelay = TDuration::Minutes(10); static constexpr ui64 CleanupPackageSize = 10000; bool Insert(IDbWrapper& dbTable, TInsertedData&& data); - TInsertionSummary::TCounters Commit(IDbWrapper& dbTable, ui64 planStep, ui64 txId, - const THashSet& writeIds, std::function pathExists); - void Abort(IDbWrapper& dbTable, const THashSet& writeIds); - THashSet OldWritesToAbort(const TInstant& now) const; - THashSet DropPath(IDbWrapper& dbTable, ui64 pathId); + TInsertionSummary::TCounters Commit( + IDbWrapper& dbTable, ui64 planStep, ui64 txId, const THashSet& writeIds, std::function pathExists); + void Abort(IDbWrapper& dbTable, const THashSet& writeIds); + void MarkAsNotAbortable(const TInsertWriteId writeId) { + Summary.MarkAsNotAbortable(writeId); + } + THashSet OldWritesToAbort(const TInstant& now) const; - void EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); - void EraseCommittedOnComplete(const TInsertedData& key); + void EraseCommittedOnExecute( + IDbWrapper& dbTable, const TCommittedData& key, const std::shared_ptr& blobsAction); + void EraseCommittedOnComplete(const TCommittedData& key); void EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); void EraseAbortedOnComplete(const TInsertedData& key); - std::vector Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const; - bool Load(IDbWrapper& dbTable, const TInstant loadTime); + std::vector Read(ui64 pathId, const std::optional lockId, const TSnapshot& reqSnapshot, + const std::shared_ptr& pkSchema, const TPKRangesFilter* pkRangesFilter) const; + bool Load(NIceDb::TNiceDb& db, IDbWrapper& dbTable, const TInstant loadTime); + + TInsertWriteId BuildNextWriteId(NTabletFlatExecutor::TTransactionContext& txc); + TInsertWriteId BuildNextWriteId(NIceDb::TNiceDb& db); }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp b/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp new file mode 100644 index 000000000000..2986fc0b4c35 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/inserted.cpp @@ -0,0 +1,12 @@ +#include "committed.h" +#include "inserted.h" + +#include + +namespace NKikimr::NOlap { + +TCommittedData TInsertedData::Commit(const ui64 planStep, const ui64 txId) { + return TCommittedData(UserData, planStep, txId, InsertWriteId); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/inserted.h b/ydb/core/tx/columnshard/engines/insert_table/inserted.h new file mode 100644 index 000000000000..e124edeb57e5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/inserted.h @@ -0,0 +1,35 @@ +#pragma once +#include "user_data.h" + +#include + +namespace NKikimr::NOlap { + +class TCommittedData; + +class TInsertedData: public TUserDataContainer { +private: + using TBase = TUserDataContainer; + YDB_READONLY(TInsertWriteId, InsertWriteId, TInsertWriteId(0)); + YDB_READONLY_FLAG(NotAbortable, false); + +public: + void MarkAsNotAbortable() { + NotAbortableFlag = true; + } + + TInsertedData() = delete; // avoid invalid TInsertedData anywhere + + TInsertedData(const TInsertWriteId writeId, const std::shared_ptr& userData) + : TBase(userData) + , InsertWriteId(writeId) { + } + + /// We commit many writeIds in one txId. There could be several blobs with same WriteId and different DedupId. + /// One of them wins and becomes committed. Original DedupId would be lost then. + /// After commit we use original Initiator:WriteId as DedupId of inserted blob inside {PlanStep, TxId}. + /// pathId, initiator, {writeId}, {dedupId} -> pathId, planStep, txId, {dedupId} + [[nodiscard]] TCommittedData Commit(const ui64 planStep, const ui64 txId); +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.cpp b/ydb/core/tx/columnshard/engines/insert_table/meta.cpp index caed6b91ac0d..eee7b2444b2e 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.cpp @@ -6,14 +6,23 @@ NKikimrTxColumnShard::TLogicalMetadata TInsertedDataMeta::SerializeToProto() con return OriginalProto; } -const std::optional& TInsertedDataMeta::GetSpecialKeys() const { - if (!KeysParsed) { - if (OriginalProto.HasSpecialKeysRawData()) { - SpecialKeysParsed = NArrow::TFirstLastSpecialKeys(OriginalProto.GetSpecialKeysRawData()); - } - KeysParsed = true; +std::shared_ptr TInsertedDataMeta::GetSpecialKeys(const std::shared_ptr& schema) const { + if (KeyInitialized.Val()) { + return SpecialKeysParsed; } - return SpecialKeysParsed; + std::shared_ptr result; + if (OriginalProto.HasSpecialKeysPayloadData()) { + result = std::make_shared(OriginalProto.GetSpecialKeysPayloadData(), schema); + } else if (OriginalProto.HasSpecialKeysRawData()) { + result = std::make_shared(OriginalProto.GetSpecialKeysRawData()); + } else { + AFL_VERIFY(false); + } + if (AtomicCas(&KeyInitialization, 1, 0)) { + SpecialKeysParsed = result; + KeyInitialized = 1; + } + return result; } } diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.h b/ydb/core/tx/columnshard/engines/insert_table/meta.h index 196e4cb667de..a913e88c973a 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.h +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -14,13 +15,14 @@ class TInsertedDataMeta { YDB_READONLY(ui32, NumRows, 0); YDB_READONLY(ui64, RawBytes, 0); YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); - mutable bool KeysParsed = false; - mutable std::optional SpecialKeysParsed; - + mutable TAtomicCounter KeyInitialized = 0; + mutable TAtomic KeyInitialization = 0; + mutable std::shared_ptr SpecialKeysParsed; NKikimrTxColumnShard::TLogicalMetadata OriginalProto; + std::shared_ptr GetSpecialKeys(const std::shared_ptr& schema) const; - const std::optional& GetSpecialKeys() const; public: ui64 GetTxVolume() const { return 2 * sizeof(ui64) + sizeof(ui32) + sizeof(OriginalProto) + (SpecialKeysParsed ? SpecialKeysParsed->GetMemoryBytes() : 0); @@ -36,21 +38,18 @@ class TInsertedDataMeta { if (proto.HasModificationType()) { ModificationType = TEnumOperator::DeserializeFromProto(proto.GetModificationType()); } + if (proto.HasSchemaSubset()) { + SchemaSubset.DeserializeFromProto(proto.GetSchemaSubset()).Validate(); + } } - std::optional GetFirstPK(const std::shared_ptr& schema) const { - if (GetSpecialKeys()) { - return GetSpecialKeys()->GetFirst(schema); - } else { - return {}; - } + NArrow::TReplaceKey GetFirstPK(const std::shared_ptr& schema) const { + AFL_VERIFY(schema); + return GetSpecialKeys(schema)->GetFirst(); } - std::optional GetLastPK(const std::shared_ptr& schema) const { - if (GetSpecialKeys()) { - return GetSpecialKeys()->GetLast(schema); - } else { - return {}; - } + NArrow::TReplaceKey GetLastPK(const std::shared_ptr& schema) const { + AFL_VERIFY(schema); + return GetSpecialKeys(schema)->GetLast(); } NKikimrTxColumnShard::TLogicalMetadata SerializeToProto() const; diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp index 31be2b8a8fa3..d4530b55c481 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp @@ -29,7 +29,7 @@ void TPathInfo::AddInsertedSize(const i64 size, const ui64 overloadLimit) { SetInsertedOverload((ui64)InsertedSize > overloadLimit); } -bool TPathInfo::EraseCommitted(const TInsertedData& data) { +bool TPathInfo::EraseCommitted(const TCommittedData& data) { Summary->RemovePriority(*this); const bool result = Committed.erase(data); AddCommittedSize(-1 * (i64)data.BlobSize(), TCompactionLimits::OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID); @@ -39,11 +39,11 @@ bool TPathInfo::EraseCommitted(const TInsertedData& data) { return result; } -bool TPathInfo::HasCommitted(const TInsertedData& data) { +bool TPathInfo::HasCommitted(const TCommittedData& data) { return Committed.contains(data); } -bool TPathInfo::AddCommitted(TInsertedData&& data, const bool load) { +bool TPathInfo::AddCommitted(TCommittedData&& data, const bool load) { const ui64 dataSize = data.BlobSize(); Summary->RemovePriority(*this); AddCommittedSize(data.BlobSize(), TCompactionLimits::OVERLOAD_INSERT_TABLE_SIZE_BY_PATH_ID); diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.h b/ydb/core/tx/columnshard/engines/insert_table/path_info.h index 5e44929307c4..8675a05a7d1b 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.h +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.h @@ -1,7 +1,10 @@ #pragma once +#include "committed.h" +#include "inserted.h" + #include + #include -#include "data.h" namespace NKikimr::NOlap { class TInsertionSummary; @@ -17,12 +20,11 @@ class TPathInfoIndexPriority { private: YDB_READONLY(EIndexationPriority, Category, EIndexationPriority::NoPriority); const ui32 Weight; + public: TPathInfoIndexPriority(const EIndexationPriority category, const ui32 weight) : Category(category) - , Weight(weight) - { - + , Weight(weight) { } bool operator!() const { @@ -37,7 +39,7 @@ class TPathInfoIndexPriority { class TPathInfo: public TMoveOnly { private: const ui64 PathId = 0; - TSet Committed; + TSet Committed; YDB_READONLY(i64, CommittedSize, 0); YDB_READONLY(i64, InsertedSize, 0); bool CommittedOverload = false; @@ -51,6 +53,10 @@ class TPathInfo: public TMoveOnly { void AddCommittedSize(const i64 size, const ui64 overloadLimit); public: + bool IsEmpty() const { + return Committed.empty() && !InsertedSize; + } + void AddInsertedSize(const i64 size, const ui64 overloadLimit); explicit TPathInfo(TInsertionSummary& summary, const ui64 pathId); @@ -61,18 +67,18 @@ class TPathInfo: public TMoveOnly { TPathInfoIndexPriority GetIndexationPriority() const; - bool EraseCommitted(const TInsertedData& data); - bool HasCommitted(const TInsertedData& data); + bool EraseCommitted(const TCommittedData& data); + bool HasCommitted(const TCommittedData& data); - const TSet& GetCommitted() const { + const TSet& GetCommitted() const { return Committed; } - bool AddCommitted(TInsertedData&& data, const bool load = false); + bool AddCommitted(TCommittedData&& data, const bool load = false); bool IsOverloaded() const { return CommittedOverload || InsertedOverload; } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp index 4723ac8da5c6..6cc6e4872da3 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp @@ -23,9 +23,10 @@ void TInsertionSummary::RemovePriority(const TPathInfo& pathInfo) noexcept { const auto priority = pathInfo.GetIndexationPriority(); auto it = Priorities.find(priority); if (it == Priorities.end()) { - Y_ABORT_UNLESS(!priority); + AFL_VERIFY(!priority); return; } + AFL_VERIFY(!!priority); Y_ABORT_UNLESS(it->second.erase(&pathInfo) || !priority); if (it->second.empty()) { Priorities.erase(it); @@ -33,7 +34,9 @@ void TInsertionSummary::RemovePriority(const TPathInfo& pathInfo) noexcept { } void TInsertionSummary::AddPriority(const TPathInfo& pathInfo) noexcept { - Y_ABORT_UNLESS(Priorities[pathInfo.GetIndexationPriority()].emplace(&pathInfo).second); + if (!!pathInfo.GetIndexationPriority()) { + Y_ABORT_UNLESS(Priorities[pathInfo.GetIndexationPriority()].emplace(&pathInfo).second); + } } NKikimr::NOlap::TPathInfo& TInsertionSummary::GetPathInfo(const ui64 pathId) { @@ -44,20 +47,6 @@ NKikimr::NOlap::TPathInfo& TInsertionSummary::GetPathInfo(const ui64 pathId) { return it->second; } -std::optional TInsertionSummary::ExtractPathInfo(const ui64 pathId) { - auto it = PathInfo.find(pathId); - if (it == PathInfo.end()) { - return {}; - } - RemovePriority(it->second); - std::optional result = std::move(it->second); - PathInfo.erase(it); - for (auto&& i : result->GetCommitted()) { - OnEraseCommitted(*result, i.BlobSize()); - } - return result; -} - NKikimr::NOlap::TPathInfo* TInsertionSummary::GetPathInfoOptional(const ui64 pathId) { auto it = PathInfo.find(pathId); if (it == PathInfo.end()) { @@ -100,37 +89,11 @@ void TInsertionSummary::OnEraseInserted(TPathInfo& pathInfo, const ui64 dataSize AFL_VERIFY(Counters.Inserted.GetDataSize() == (i64)StatsPrepared.Bytes); } -THashSet TInsertionSummary::GetInsertedByPathId(const ui64 pathId) const { - THashSet result; - for (auto& [writeId, data] : Inserted) { - if (data.PathId == pathId) { - result.insert(writeId); - } - } - - return result; +THashSet TInsertionSummary::GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const { + return Inserted.GetExpired(timeBorder, limit); } -THashSet TInsertionSummary::GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const { - if (timeBorder < MinInsertedTs) { - return {}; - } - - THashSet toAbort; - TInstant newMin = TInstant::Max(); - for (auto& [writeId, data] : Inserted) { - const TInstant dataInsertTs = data.GetMeta().GetDirtyWriteTime(); - if (dataInsertTs < timeBorder && toAbort.size() < limit) { - toAbort.insert(writeId); - } else { - newMin = Min(newMin, dataInsertTs); - } - } - MinInsertedTs = (toAbort.size() == Inserted.size()) ? TInstant::Zero() : newMin; - return toAbort; -} - -bool TInsertionSummary::EraseAborted(const TWriteId writeId) { +bool TInsertionSummary::EraseAborted(const TInsertWriteId writeId) { auto it = Aborted.find(writeId); if (it == Aborted.end()) { return false; @@ -140,7 +103,7 @@ bool TInsertionSummary::EraseAborted(const TWriteId writeId) { return true; } -bool TInsertionSummary::HasAborted(const TWriteId writeId) { +bool TInsertionSummary::HasAborted(const TInsertWriteId writeId) { auto it = Aborted.find(writeId); if (it == Aborted.end()) { return false; @@ -148,8 +111,8 @@ bool TInsertionSummary::HasAborted(const TWriteId writeId) { return true; } -bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { - TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); +bool TInsertionSummary::EraseCommitted(const TCommittedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.GetPathId()); if (!pathInfo) { Counters.Committed.SkipErase(data.BlobSize()); return false; @@ -163,8 +126,8 @@ bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { } } -bool TInsertionSummary::HasCommitted(const TInsertedData& data) { - TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); +bool TInsertionSummary::HasCommitted(const TCommittedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.GetPathId()); if (!pathInfo) { return false; } @@ -172,40 +135,30 @@ bool TInsertionSummary::HasCommitted(const TInsertedData& data) { } const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddAborted(TInsertedData&& data, const bool load /*= false*/) { - const TWriteId writeId((TWriteId)data.WriteTxId); + const TInsertWriteId writeId = data.GetInsertWriteId(); Counters.Aborted.Add(data.BlobSize(), load); + AFL_VERIFY_DEBUG(!Inserted.contains(writeId)); auto insertInfo = Aborted.emplace(writeId, std::move(data)); - Y_ABORT_UNLESS(insertInfo.second); + AFL_VERIFY(insertInfo.second)("write_id", writeId); return &insertInfo.first->second; } -std::optional TInsertionSummary::ExtractInserted(const TWriteId id) { - auto it = Inserted.find(id); - if (it == Inserted.end()) { - return {}; - } else { - auto pathInfo = GetPathInfoOptional(it->second.PathId); +std::optional TInsertionSummary::ExtractInserted(const TInsertWriteId id) { + auto result = Inserted.ExtractOptional(id); + if (result) { + auto pathInfo = GetPathInfoOptional(result->GetPathId()); if (pathInfo) { - OnEraseInserted(*pathInfo, it->second.BlobSize()); + OnEraseInserted(*pathInfo, result->BlobSize()); } - std::optional result = std::move(it->second); - Inserted.erase(it); - return result; } + return result; } const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddInserted(TInsertedData&& data, const bool load /*= false*/) { - TWriteId writeId{ data.WriteTxId }; - const ui32 dataSize = data.BlobSize(); - const ui64 pathId = data.PathId; - auto insertInfo = Inserted.emplace(writeId, std::move(data)); - if (insertInfo.second) { - OnNewInserted(GetPathInfo(pathId), dataSize, load); - return &insertInfo.first->second; - } else { - Counters.Inserted.SkipAdd(dataSize); - return nullptr; - } + auto* insertInfo = Inserted.AddVerified(std::move(data)); + AFL_VERIFY_DEBUG(!Aborted.contains(insertInfo->GetInsertWriteId())); + OnNewInserted(GetPathInfo(insertInfo->GetPathId()), insertInfo->BlobSize(), load); + return insertInfo; } } diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h index 291886bd81bd..67e8034628c8 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h @@ -1,10 +1,121 @@ #pragma once +#include "inserted.h" +#include "path_info.h" + #include + #include -#include "path_info.h" namespace NKikimr::NOlap { class IBlobsDeclareRemovingAction; + +class TInsertedDataInstant { +private: + const TInsertedData* Data; + const TInstant WriteTime; + +public: + TInsertedDataInstant(const TInsertedData& data) + : Data(&data) + , WriteTime(Data->GetMeta().GetDirtyWriteTime()) + { + + } + + const TInsertedData& GetData() const { + return *Data; + } + TInstant GetWriteTime() const { + return WriteTime; + } + + bool operator<(const TInsertedDataInstant& item) const { + if (WriteTime == item.WriteTime) { + return Data->GetInsertWriteId() < item.Data->GetInsertWriteId(); + } else { + return WriteTime < item.WriteTime; + } + } +}; + +class TInsertedContainer { +private: + THashMap Inserted; + std::set InsertedByWriteTime; + +public: + size_t size() const { + return Inserted.size(); + } + + bool contains(const TInsertWriteId id) const { + return Inserted.contains(id); + } + + THashMap::const_iterator begin() const { + return Inserted.begin(); + } + + THashMap::const_iterator end() const { + return Inserted.end(); + } + + THashSet GetExpired(const TInstant timeBorder, const ui64 limit) const { + THashSet result; + for (auto& data : InsertedByWriteTime) { + if (timeBorder < data.GetWriteTime()) { + break; + } + if (data.GetData().IsNotAbortable()) { + continue; + } + result.emplace(data.GetData().GetInsertWriteId()); + if (limit <= result.size()) { + break; + } + } + return result; + } + + TInsertedData* AddVerified(TInsertedData&& data) { + const TInsertWriteId writeId = data.GetInsertWriteId(); + auto itInsertion = Inserted.emplace(writeId, std::move(data)); + AFL_VERIFY(itInsertion.second); + auto* dataPtr = &itInsertion.first->second; + InsertedByWriteTime.emplace(TInsertedDataInstant(*dataPtr)); + return dataPtr; + } + + const TInsertedData* GetOptional(const TInsertWriteId id) const { + auto it = Inserted.find(id); + if (it == Inserted.end()) { + return nullptr; + } else { + return &it->second; + } + } + + TInsertedData* MutableOptional(const TInsertWriteId id) { + auto it = Inserted.find(id); + if (it == Inserted.end()) { + return nullptr; + } else { + return &it->second; + } + } + + std::optional ExtractOptional(const TInsertWriteId id) { + auto it = Inserted.find(id); + if (it == Inserted.end()) { + return std::nullopt; + } + AFL_VERIFY(InsertedByWriteTime.erase(TInsertedDataInstant(it->second))); + TInsertedData result = std::move(it->second); + Inserted.erase(it); + return result; + } +}; + class TInsertionSummary { public: struct TCounters { @@ -19,9 +130,8 @@ class TInsertionSummary { TCounters StatsCommitted; const NColumnShard::TInsertTableCounters Counters; - THashMap Inserted; - THashMap Aborted; - mutable TInstant MinInsertedTs = TInstant::Zero(); + TInsertedContainer Inserted; + THashMap Aborted; std::map> Priorities; THashMap PathInfo; @@ -33,35 +143,63 @@ class TInsertionSummary { void OnNewInserted(TPathInfo& pathInfo, const ui64 dataSize, const bool load) noexcept; void OnEraseInserted(TPathInfo& pathInfo, const ui64 dataSize) noexcept; static TAtomicCounter CriticalInserted; + public: - THashSet GetInsertedByPathId(const ui64 pathId) const; + bool HasPathIdData(const ui64 pathId) const { + auto it = PathInfo.find(pathId); + if (it == PathInfo.end()) { + return false; + } + return !it->second.IsEmpty(); + } - THashSet GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const; + void ErasePath(const ui64 pathId) { + auto it = PathInfo.find(pathId); + if (it == PathInfo.end()) { + return; + } + RemovePriority(it->second); + AFL_VERIFY(it->second.IsEmpty()); + PathInfo.erase(it); + } - const THashMap& GetInserted() const { + void MarkAsNotAbortable(const TInsertWriteId writeId) { + auto* data = Inserted.MutableOptional(writeId); + if (!data) { + return; + } + data->MarkAsNotAbortable(); + } + + THashSet GetExpiredInsertions(const TInstant timeBorder, const ui64 limit) const; + + const TInsertedContainer& GetInserted() const { return Inserted; } - const THashMap& GetAborted() const { + const THashMap& GetAborted() const { return Aborted; } const TInsertedData* AddAborted(TInsertedData&& data, const bool load = false); - bool EraseAborted(const TWriteId writeId); - bool HasAborted(const TWriteId writeId); + bool EraseAborted(const TInsertWriteId writeId); + bool HasAborted(const TInsertWriteId writeId); - bool EraseCommitted(const TInsertedData& data); - bool HasCommitted(const TInsertedData& data); + bool EraseCommitted(const TCommittedData& data); + bool HasCommitted(const TCommittedData& data); const TInsertedData* AddInserted(TInsertedData&& data, const bool load = false); - std::optional ExtractInserted(const TWriteId id); + std::optional ExtractInserted(const TInsertWriteId id); - const TCounters& GetCountersPrepared() const { return StatsPrepared; } - const TCounters& GetCountersCommitted() const { return StatsCommitted; } + const TCounters& GetCountersPrepared() const { + return StatsPrepared; + } + const TCounters& GetCountersCommitted() const { + return StatsCommitted; + } const NColumnShard::TInsertTableCounters& GetCounters() const { return Counters; } NKikimr::NOlap::TPathInfo& GetPathInfo(const ui64 pathId); - std::optional ExtractPathInfo(const ui64 pathId); TPathInfo* GetPathInfoOptional(const ui64 pathId); const TPathInfo* GetPathInfoOptional(const ui64 pathId) const; @@ -76,4 +214,4 @@ class TInsertionSummary { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp b/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp new file mode 100644 index 000000000000..f5d3db9d71d7 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/user_data.cpp @@ -0,0 +1,45 @@ +#include "user_data.h" +#include + +namespace NKikimr::NOlap { + +namespace { + +class TInsertTableCacheController { +private: + TAtomicCounter BlobsCacheSize = 0; + const i64 BlobsCacheLimit = (i64)1 << 30; +public: + void Return(const ui64 size) { + const i64 val = BlobsCacheSize.Sub(size); + AFL_VERIFY(val >= 0)("size", size)("val", val); + } + + bool Take(const ui64 size) { + if (BlobsCacheSize.Add(size) <= BlobsCacheLimit) { + return true; + } + const i64 val = BlobsCacheSize.Sub(size); + AFL_VERIFY(val >= 0)("size", size)("val", val); + return false; + } +}; + +} + +TUserData::TBlobStorageGuard::~TBlobStorageGuard() { + Singleton()->Return(Data.size()); +} + + TUserData::TUserData(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, + const ui64 schemaVersion, const std::optional& blobData) + : Meta(proto) + , BlobRange(blobRange) + , PathId(pathId) + , SchemaVersion(schemaVersion) { + if (blobData && Singleton()->Take(blobData->size())) { + BlobDataGuard = std::make_shared(*blobData); + } +} + +} diff --git a/ydb/core/tx/columnshard/engines/insert_table/user_data.h b/ydb/core/tx/columnshard/engines/insert_table/user_data.h new file mode 100644 index 000000000000..d734d90524eb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/insert_table/user_data.h @@ -0,0 +1,100 @@ +#pragma once +#include "meta.h" + +#include + +#include + +namespace NKikimr::NOlap { + +class TUserData { +private: + TInsertedDataMeta Meta; + YDB_READONLY_DEF(TBlobRange, BlobRange); + class TBlobStorageGuard { + private: + YDB_READONLY_DEF(TString, Data); + + public: + TBlobStorageGuard(const TString& data) + : Data(data) { + } + ~TBlobStorageGuard(); + }; + + std::shared_ptr BlobDataGuard; + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY(ui64, SchemaVersion, 0); + +public: + TUserData() = delete; + TUserData(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData); + + static std::shared_ptr Build(const ui64 pathId, const TBlobRange& blobRange, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData) { + return std::make_shared(pathId, blobRange, proto, schemaVersion, blobData); + } + + static std::shared_ptr Build(const ui64 pathId, const TUnifiedBlobId& blobId, const NKikimrTxColumnShard::TLogicalMetadata& proto, const ui64 schemaVersion, + const std::optional& blobData) { + return std::make_shared(pathId, TBlobRange(blobId), proto, schemaVersion, blobData); + } + + std::optional GetBlobData() const { + if (BlobDataGuard) { + return BlobDataGuard->GetData(); + } else { + return std::nullopt; + } + } + + ui64 GetTxVolume() const { + return Meta.GetTxVolume() + sizeof(TBlobRange); + } + + const TInsertedDataMeta& GetMeta() const { + return Meta; + } +}; + +class TUserDataContainer { +protected: + std::shared_ptr UserData; + +public: + TUserDataContainer(const std::shared_ptr& userData) + : UserData(userData) { + AFL_VERIFY(UserData); + } + + ui64 GetSchemaVersion() const { + return UserData->GetSchemaVersion(); + } + + ui32 BlobSize() const { + return GetBlobRange().Size; + } + + ui32 GetTxVolume() const { + return UserData->GetTxVolume(); + } + + ui64 GetPathId() const { + return UserData->GetPathId(); + } + + const TBlobRange& GetBlobRange() const { + return UserData->GetBlobRange(); + } + + std::optional GetBlobData() const { + return UserData->GetBlobData(); + } + + const TInsertedDataMeta& GetMeta() const { + return UserData->GetMeta(); + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/ya.make b/ydb/core/tx/columnshard/engines/insert_table/ya.make index 5f1d92bfb0ee..e6fde75077d5 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/ya.make +++ b/ydb/core/tx/columnshard/engines/insert_table/ya.make @@ -3,13 +3,16 @@ LIBRARY() SRCS( insert_table.cpp rt_insertion.cpp - data.cpp + user_data.cpp + inserted.cpp + committed.cpp path_info.cpp meta.cpp ) PEERDIR( contrib/libs/apache/arrow + ydb/library/formats/arrow/modifier ydb/core/protos ydb/core/formats/arrow ydb/core/tablet_flat diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.cpp b/ydb/core/tx/columnshard/engines/portions/column_record.cpp index e4fbef70c5fc..6127ad439326 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.cpp +++ b/ydb/core/tx/columnshard/engines/portions/column_record.cpp @@ -28,7 +28,7 @@ TChunkMeta::TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColu DeserializeFromProto(context.GetAddress(), context.GetMetaProto(), columnInfo).Validate(); } -TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) +TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) : TBase(column, columnInfo.GetNeedMinMax(), columnInfo.GetIsSorted()) { } @@ -52,7 +52,8 @@ TColumnRecord::TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const T { } -TColumnRecord::TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) +TColumnRecord::TColumnRecord( + const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) : Meta(column, columnInfo) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index ff34086f38d7..18fd0984d61b 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -2,20 +2,19 @@ #include "common.h" -#include - +#include +#include #include #include -#include -#include +#include #include +#include #include -#include #include #include - +#include #include namespace NKikimrColumnShardDataSharingProto { @@ -31,16 +30,17 @@ struct TChunkMeta: public TSimpleChunkMeta { private: using TBase = TSimpleChunkMeta; TChunkMeta() = default; - [[nodiscard]] TConclusionStatus DeserializeFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); + [[nodiscard]] TConclusionStatus DeserializeFromProto( + const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); friend class TColumnRecord; + public: TChunkMeta(TSimpleChunkMeta&& baseMeta) - : TBase(baseMeta) - { - + : TBase(baseMeta) { } - [[nodiscard]] static TConclusion BuildFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { + [[nodiscard]] static TConclusion BuildFromProto( + const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { TChunkMeta result; auto parse = result.DeserializeFromProto(address, proto, columnInfo); if (!parse) { @@ -63,20 +63,19 @@ struct TChunkMeta: public TSimpleChunkMeta { TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColumnInfo& columnInfo); - TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); + TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); }; class TColumnRecord { private: TChunkMeta Meta; TColumnRecord(TChunkMeta&& meta) - : Meta(std::move(meta)) - { - + : Meta(std::move(meta)) { } TColumnRecord() = default; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo); + public: ui32 ColumnId = 0; ui16 Chunk = 0; @@ -99,9 +98,7 @@ class TColumnRecord { : Meta(std::move(meta)) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) - , BlobRange(range) - { - + , BlobRange(range) { } class TTestInstanceBuilder { @@ -116,7 +113,7 @@ class TColumnRecord { } }; - ui32 GetColumnId() const { + ui32 GetColumnId() const { return ColumnId; } ui16 GetChunkIdx() const { @@ -127,7 +124,8 @@ class TColumnRecord { } NKikimrColumnShardDataSharingProto::TColumnRecord SerializeToProto() const; - static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { + static TConclusion BuildFromProto( + const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { TColumnRecord result; auto parse = result.DeserializeFromProto(proto, columnInfo); if (!parse) { @@ -136,14 +134,14 @@ class TColumnRecord { return result; } - TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { - TColumnSerializationStat result(ColumnId, columnName); + NArrow::NSplitter::TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { + NArrow::NSplitter::TColumnSerializationStat result(ColumnId, columnName); result.Merge(GetSerializationStat()); return result; } - TSimpleSerializationStat GetSerializationStat() const { - return TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); + NArrow::NSplitter::TSimpleSerializationStat GetSerializationStat() const { + return NArrow::NSplitter::TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); } const TChunkMeta& GetMeta() const { @@ -163,18 +161,17 @@ class TColumnRecord { } TString DebugString() const { - return TStringBuilder() - << "column_id:" << ColumnId << ";" - << "chunk_idx:" << Chunk << ";" - << "blob_range:" << BlobRange.ToString() << ";" - ; + return TStringBuilder() << "column_id:" << ColumnId << ";" + << "chunk_idx:" << Chunk << ";" + << "blob_range:" << BlobRange.ToString() << ";"; } - TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); + TColumnRecord( + const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const TColumnChunkLoadContext& loadContext, const TSimpleColumnInfo& columnInfo); - friend IOutputStream& operator << (IOutputStream& out, const TColumnRecord& rec) { + friend IOutputStream& operator<<(IOutputStream& out, const TColumnRecord& rec) { out << '{'; if (rec.Chunk) { out << 'n' << rec.Chunk; @@ -186,49 +183,4 @@ class TColumnRecord { } }; -class TSimpleOrderedColumnChunk: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const TColumnRecord ColumnRecord; - YDB_READONLY_DEF(TString, Data); -protected: - virtual TString DoDebugString() const override { - TStringBuilder sb; - sb << "column_id=" << GetColumnId() << ";data_size=" << Data.size() << ";"; - if (GetChunkIdxOptional()) { - sb << "chunk=" << GetChunkIdxVerified() << ";"; - } else { - sb << "chunk=NO_INITIALIZED;"; - } - return sb; - } - - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return ColumnRecord.GetMeta().GetNumRows(); - } - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - Y_ABORT_UNLESS(false); - return {}; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return ColumnRecord.GetMeta(); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } -public: - TSimpleOrderedColumnChunk(const TColumnRecord& cRecord, const TString& data) - : TBase(cRecord.ColumnId, cRecord.Chunk) - , ColumnRecord(cRecord) - , Data(data) { - } -}; - -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/common.h b/ydb/core/tx/columnshard/engines/portions/common.h index 1231a1e9f5f3..3702887ccc81 100644 --- a/ydb/core/tx/columnshard/engines/portions/common.h +++ b/ydb/core/tx/columnshard/engines/portions/common.h @@ -1,7 +1,9 @@ #pragma once #include +#include namespace NKikimr::NOlap { +using TColumnSaver = NArrow::NAccessor::TColumnSaver; class TChunkAddress { private: diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.cpp b/ydb/core/tx/columnshard/engines/portions/constructor.cpp index 4b8cfdf97a01..39cd0fe983dc 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor.cpp @@ -57,28 +57,16 @@ void TPortionInfoConstructor::LoadRecord(const TIndexInfo& indexInfo, const TCol } void TPortionInfoConstructor::LoadIndex(const TIndexChunkLoadContext& loadContext) { - const auto linkBlobId = RegisterBlobId(loadContext.GetBlobRange().GetBlobId()); - AddIndex(loadContext.BuildIndexChunk(linkBlobId)); + if (loadContext.GetBlobRange()) { + const TBlobRangeLink16::TLinkId linkBlobId = RegisterBlobId(loadContext.GetBlobRange()->GetBlobId()); + AddIndex(loadContext.BuildIndexChunk(linkBlobId)); + } else { + AddIndex(loadContext.BuildIndexChunk()); + } } const NKikimr::NOlap::TColumnRecord& TPortionInfoConstructor::AppendOneChunkColumn(TColumnRecord&& record) { Y_ABORT_UNLESS(record.ColumnId); - std::optional maxChunk; - for (auto&& i : Records) { - if (i.ColumnId == record.ColumnId) { - if (!maxChunk) { - maxChunk = i.Chunk; - } else { - Y_ABORT_UNLESS(*maxChunk + 1 == i.Chunk); - maxChunk = i.Chunk; - } - } - } - if (maxChunk) { - AFL_VERIFY(*maxChunk + 1 == record.Chunk)("max", *maxChunk)("record", record.Chunk); - } else { - AFL_VERIFY(0 == record.Chunk)("record", record.Chunk); - } Records.emplace_back(std::move(record)); return Records.back(); } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor.h b/ydb/core/tx/columnshard/engines/portions/constructor.h index c29a7afc2c7e..4146c80fbc19 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor.h @@ -276,7 +276,9 @@ class TPortionInfoConstructor { blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); } for (auto&& i : Indexes) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + if (i.HasBlobRange()) { + blobIdxs.emplace(i.GetBlobRangeVerified().GetBlobIdxVerified()); + } } if (BlobIds.size()) { AFL_VERIFY(BlobIds.size() == blobIdxs.size()); diff --git a/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp b/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp index 378871ccf2aa..fa21c6feeeb6 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/constructor_meta.cpp @@ -29,9 +29,6 @@ TPortionMetaConstructor::TPortionMetaConstructor(const TPortionMeta& meta) { RecordSnapshotMax = meta.RecordSnapshotMax; DeletionsCount = meta.GetDeletionsCount(); TierName = meta.GetTierNameOptional(); - if (!meta.StatisticsStorage.IsEmpty()) { - StatisticsStorage = meta.StatisticsStorage; - } if (meta.Produced != NPortion::EProduced::UNSPECIFIED) { Produced = meta.Produced; } @@ -49,28 +46,14 @@ TPortionMeta TPortionMetaConstructor::Build() { result.DeletionsCount = *DeletionsCount; AFL_VERIFY(Produced); result.Produced = *Produced; - if (StatisticsStorage) { - result.StatisticsStorage = *StatisticsStorage; - } return result; } bool TPortionMetaConstructor::LoadMetadata(const NKikimrTxColumnShard::TIndexPortionMeta& portionMeta, const TIndexInfo& indexInfo) { if (!!Produced) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", "parsing duplication"); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", "parsing duplication"); return true; } - if (portionMeta.HasStatisticsStorage()) { - auto parsed = NStatistics::TPortionStorage::BuildFromProto(portionMeta.GetStatisticsStorage()); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", parsed.GetErrorMessage()); - return false; - } - StatisticsStorage = parsed.DetachResult(); - if (StatisticsStorage->IsEmpty()) { - StatisticsStorage.reset(); - } - } if (portionMeta.GetTierName()) { TierName = portionMeta.GetTierName(); } diff --git a/ydb/core/tx/columnshard/engines/portions/constructor_meta.h b/ydb/core/tx/columnshard/engines/portions/constructor_meta.h index ccc854886ec7..87b808a282fd 100644 --- a/ydb/core/tx/columnshard/engines/portions/constructor_meta.h +++ b/ydb/core/tx/columnshard/engines/portions/constructor_meta.h @@ -3,7 +3,6 @@ #include #include #include -#include namespace NKikimr::NOlap { class TPortionInfoConstructor; @@ -13,7 +12,6 @@ class TPortionMetaConstructor { private: std::optional FirstAndLastPK; std::optional TierName; - std::optional StatisticsStorage; std::optional RecordSnapshotMin; std::optional RecordSnapshotMax; std::optional Produced; @@ -31,15 +29,6 @@ class TPortionMetaConstructor { SetTierName(tierName); } - void SetStatisticsStorage(NStatistics::TPortionStorage&& storage) { - AFL_VERIFY(!StatisticsStorage); - StatisticsStorage = std::move(storage); - } - - void ResetStatisticsStorage(NStatistics::TPortionStorage&& storage) { - StatisticsStorage = std::move(storage); - } - void UpdateRecordsMeta(const NPortion::EProduced prod) { Produced = prod; } diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp index 4aeaa20dd20e..990de1b65d78 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp @@ -1,4 +1,5 @@ #include "index_chunk.h" + #include namespace NKikimr::NOlap { @@ -13,16 +14,39 @@ NKikimr::TConclusionStatus TIndexChunk::DeserializeFromProto(const NKikimrColumn RecordsCount = proto.GetMeta().GetRecordsCount(); RawBytes = proto.GetMeta().GetRawBytes(); } - { + if (proto.HasBlobRange()) { auto parsed = TBlobRangeLink16::BuildFromProto(proto.GetBlobRange()); if (!parsed) { return parsed; } - BlobRange = parsed.DetachResult(); + Data = parsed.DetachResult(); + } else if (proto.HasBlobData()) { + Data = proto.GetBlobData(); + } else { + return TConclusionStatus::Fail("incorrect blob info - neither BlobData nor BlobRange"); } return TConclusionStatus::Success(); } +namespace { +class TBlobInfoSerializer { +private: + NKikimrColumnShardDataSharingProto::TIndexChunk& Proto; + +public: + TBlobInfoSerializer(NKikimrColumnShardDataSharingProto::TIndexChunk& proto) + : Proto(proto) { + } + + void operator()(const TBlobRangeLink16& link) { + *Proto.MutableBlobRange() = link.SerializeToProto(); + } + void operator()(const TString& data) { + *Proto.MutableBlobData() = data; + } +}; +} // namespace + NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() const { NKikimrColumnShardDataSharingProto::TIndexChunk result; result.SetIndexId(IndexId); @@ -32,8 +56,26 @@ NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() meta->SetRecordsCount(RecordsCount); meta->SetRawBytes(RawBytes); } - *result.MutableBlobRange() = BlobRange.SerializeToProto(); + std::visit(TBlobInfoSerializer(result), Data); return result; } +namespace { +class TDataSizeExtractor { +public: + TDataSizeExtractor() = default; + + ui64 operator()(const TBlobRangeLink16& link) { + return link.GetSize(); + } + ui64 operator()(const TString& data) { + return data.size(); + } +}; +} // namespace + +ui64 TIndexChunk::GetDataSize() const { + return std::visit(TDataSizeExtractor(), Data); } + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.h b/ydb/core/tx/columnshard/engines/portions/index_chunk.h index 6a71704318ca..65e0855d5eef 100644 --- a/ydb/core/tx/columnshard/engines/portions/index_chunk.h +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.h @@ -6,7 +6,6 @@ #include #include -#include #include #include @@ -30,11 +29,41 @@ class TIndexChunk { YDB_READONLY(ui32, ChunkIdx, 0); YDB_READONLY(ui32, RecordsCount, 0); YDB_READONLY(ui32, RawBytes, 0); - YDB_READONLY_DEF(TBlobRangeLink16, BlobRange); + std::variant Data; TIndexChunk() = default; TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto); public: + ui64 GetDataSize() const; + + bool HasBlobRange() const { + return std::holds_alternative(Data); + } + + const TBlobRangeLink16* GetBlobRangeOptional() const { + return std::get_if(&Data); + } + + const TBlobRangeLink16& GetBlobRangeVerified() const { + const auto* result = std::get_if(&Data); + AFL_VERIFY(result); + return *result; + } + + bool HasBlobData() const { + return std::holds_alternative(Data); + } + + const TString* GetBlobDataOptional() const { + return std::get_if(&Data); + } + + const TString& GetBlobDataVerified() const { + const auto* result = std::get_if(&Data); + AFL_VERIFY(result); + return *result; + } + TChunkAddress GetAddress() const { return TChunkAddress(IndexId, ChunkIdx); } @@ -48,13 +77,22 @@ class TIndexChunk { , ChunkIdx(chunkIdx) , RecordsCount(recordsCount) , RawBytes(rawBytes) - , BlobRange(blobRange) { + , Data(blobRange) { + + } + TIndexChunk(const ui32 indexId, const ui32 chunkIdx, const ui32 recordsCount, const ui64 rawBytes, const TString& blobData) + : IndexId(indexId) + , ChunkIdx(chunkIdx) + , RecordsCount(recordsCount) + , RawBytes(rawBytes) + , Data(blobData) { } void RegisterBlobIdx(const TBlobRangeLink16::TLinkId blobLinkId) { -// AFL_VERIFY(!BlobRange.BlobId.GetTabletId())("original", BlobRange.BlobId.ToStringNew())("new", blobId.ToStringNew()); - BlobRange.BlobIdx = blobLinkId; + auto* result = std::get_if(&Data); + AFL_VERIFY(result); + result->BlobIdx = blobLinkId; } static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto) { diff --git a/ydb/core/tx/columnshard/engines/portions/meta.cpp b/ydb/core/tx/columnshard/engines/portions/meta.cpp index b3b2a92ef9e2..9d7e374ec8f1 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/meta.cpp @@ -11,9 +11,6 @@ namespace NKikimr::NOlap { NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { NKikimrTxColumnShard::TIndexPortionMeta portionMeta; portionMeta.SetTierName(TierName); - if (!StatisticsStorage.IsEmpty()) { - *portionMeta.MutableStatisticsStorage() = StatisticsStorage.SerializeToProto(); - } portionMeta.SetDeletionsCount(DeletionsCount); switch (Produced) { case TPortionMeta::EProduced::UNSPECIFIED: @@ -36,7 +33,7 @@ NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { break; } - portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializeToStringDataOnlyNoCompression()); + portionMeta.SetPrimaryKeyBorders(ReplaceKeyEdges.SerializePayloadToString()); RecordSnapshotMin.SerializeToProto(*portionMeta.MutableRecordSnapshotMin()); RecordSnapshotMax.SerializeToProto(*portionMeta.MutableRecordSnapshotMax()); diff --git a/ydb/core/tx/columnshard/engines/portions/meta.h b/ydb/core/tx/columnshard/engines/portions/meta.h index 5facbe8d5ae6..ad57ef1325c3 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/portions/meta.h @@ -1,9 +1,8 @@ #pragma once #include #include -#include #include -#include +#include #include #include #include @@ -16,7 +15,6 @@ struct TPortionMeta { private: NArrow::TFirstLastSpecialKeys ReplaceKeyEdges; // first and last PK rows YDB_READONLY_DEF(TString, TierName); - YDB_READONLY_DEF(NStatistics::TPortionStorage, StatisticsStorage); YDB_READONLY(ui32, DeletionsCount, 0); friend class TPortionMetaConstructor; TPortionMeta(NArrow::TFirstLastSpecialKeys& pk, const TSnapshot& min, const TSnapshot& max) diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index 0bbefe1c5550..6652bf9c4c5f 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -7,9 +7,11 @@ #include #include #include +#include +#include #include #include -#include +#include #include @@ -30,38 +32,57 @@ std::shared_ptr TPortionInfo::MaxValue(ui32 columnId) const { return result; } -ui64 TPortionInfo::GetColumnRawBytes(const std::vector& columnIds, const bool validation) const { - return GetColumnRawBytes(std::set(columnIds.begin(), columnIds.end()), validation); +ui64 TPortionInfo::GetColumnRawBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetMeta().GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Records, &entityIds, validation); + return sum; } -ui64 TPortionInfo::GetColumnRawBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetColumnBlobBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetBlobRange().GetSize(); + }; + AggregateIndexChunksData(aggr, Records, &entityIds, validation); + return sum; +} + +ui64 TPortionInfo::GetColumnRawBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TColumnRecord& r) { sum += r.GetMeta().GetRawBytes(); }; - AggregateIndexChunksData(aggr, Records, entityIds, validation); + AggregateIndexChunksData(aggr, Records, nullptr, validation); return sum; } -ui64 TPortionInfo::GetColumnBlobBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetColumnBlobBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TColumnRecord& r) { sum += r.GetBlobRange().GetSize(); }; - AggregateIndexChunksData(aggr, Records, entityIds, validation); + AggregateIndexChunksData(aggr, Records, nullptr, validation); return sum; } -ui64 TPortionInfo::GetColumnBlobBytes(const std::vector& columnIds, const bool validation) const { - return GetColumnBlobBytes(std::set(columnIds.begin(), columnIds.end()), validation); +ui64 TPortionInfo::GetIndexRawBytes(const std::set& entityIds, const bool validation) const { + ui64 sum = 0; + const auto aggr = [&](const TIndexChunk& r) { + sum += r.GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Indexes, &entityIds, validation); + return sum; } -ui64 TPortionInfo::GetIndexRawBytes(const std::optional>& entityIds, const bool validation) const { +ui64 TPortionInfo::GetIndexRawBytes(const bool validation) const { ui64 sum = 0; const auto aggr = [&](const TIndexChunk& r) { sum += r.GetRawBytes(); }; - AggregateIndexChunksData(aggr, Indexes, entityIds, validation); + AggregateIndexChunksData(aggr, Indexes, nullptr, validation); return sum; } @@ -312,7 +333,10 @@ THashMap TPortionInfo::DecodeBlobAddress continue; } for (auto&& record : Indexes) { - if (RestoreBlobRange(record.GetBlobRange()) == b.first) { + if (!record.HasBlobRange()) { + continue; + } + if (RestoreBlobRange(record.GetBlobRangeVerified()) == b.first) { if (columnId != record.GetIndexId()) { columnStorageId = indexInfo.GetIndexStorageId(record.GetIndexId()); } @@ -355,7 +379,9 @@ void TPortionInfo::FillBlobRangesByStorage(THashMapemplace(i.GetBlobRange().GetBlobIdxVerified()).second) { - auto blobId = GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); - AFL_VERIFY(currentHashResult); - AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); - lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); + if (auto bRange = i.GetBlobRangeOptional()) { + if (lastBlobIdx != bRange->GetBlobIdxVerified() && currentHashLocal->emplace(bRange->GetBlobIdxVerified()).second) { + auto blobId = GetBlobId(bRange->GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = bRange->GetBlobIdxVerified(); + } } } } @@ -414,38 +442,30 @@ void TPortionInfo::FillBlobIdsByStorage(THashMapGetIndexInfo()); } -THashMap>>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { - THashMap>>> result; +THashMap>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { + THashMap>> result; for (auto&& c : GetRecords()) { const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId()))); - blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); + auto chunk = std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId())); + chunk->SetChunkIdx(c.GetChunkIdx()); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } for (auto&& c : GetIndexes()) { const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())))); - blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); - } - return result; -} + const TString blobData = [&]() -> TString { + if (auto bRange = c.GetBlobRangeOptional()) { + return blobs.Extract(storageId, RestoreBlobRange(*bRange)); + } else if (auto data = c.GetBlobDataOptional()) { + return *data; + } else { + AFL_VERIFY(false); + Y_UNREACHABLE(); + } + }(); + auto chunk = std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobData); + chunk->SetChunkIdx(c.GetChunkIdx()); -THashMap>> TPortionInfo::GetEntityChunks(const TIndexInfo& indexInfo) const { - THashMap>> result; - for (auto&& c : GetRecords()) { - const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange())); - } - for (auto&& c : GetIndexes()) { - const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); - auto& storageRecords = result[storageId]; - auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; - blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange())); + AFL_VERIFY(result[storageId].emplace(c.GetAddress(), chunk).second); } return result; } @@ -496,7 +516,9 @@ void TPortionInfo::FullValidation() const { blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); } for (auto&& i : Indexes) { - blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + if (auto bRange = i.GetBlobRangeOptional()) { + blobIdxs.emplace(bRange->GetBlobIdxVerified()); + } } if (BlobIds.size()) { AFL_VERIFY(BlobIds.size() == blobIdxs.size()); @@ -589,11 +611,10 @@ template TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& portion, const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) { std::vector columns; - auto arrowResultSchema = resultSchema.GetSchema(); - columns.reserve(arrowResultSchema->num_fields()); + columns.reserve(resultSchema.GetColumnIds().size()); const ui32 rowsCount = portion.GetRecordsCount(); - for (auto&& i : arrowResultSchema->fields()) { - columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i->name()), resultSchema.GetColumnLoaderOptional(i->name())); + for (auto&& i : resultSchema.GetColumnIds()) { + columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i), resultSchema.GetColumnLoaderVerified(i)); } { int skipColumnId = -1; @@ -625,35 +646,11 @@ TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& port preparedColumns.emplace_back(c.Compile()); } - return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), arrowResultSchema, rowsCount); + return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), rowsCount); } } -namespace { -class TChunkAccessor { -private: - const std::vector& Chunks; - const std::shared_ptr Loader; -public: - TChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader) - : Chunks(chunks) - , Loader(loader) - { - - } - ui64 GetChunksCount() const { - return Chunks.size(); - } - ui64 GetChunkLength(const ui32 idx) const { - return Chunks[idx].GetRecordsCount(); - } - std::shared_ptr GetArray(const ui32 idx) const { - return Chunks[idx].GetArrayVerified(Loader); - } -}; -} - ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoConstructor& portion) { if (!CurrentSchema || portion.GetMinSnapshotDeprecatedVerified() != LastSnapshot) { CurrentSchema = portion.GetSchema(VersionedIndex); @@ -663,12 +660,8 @@ ISnapshotSchema::TPtr TPortionInfo::TSchemaCursor::GetSchema(const TPortionInfoC return CurrentSchema; } -NArrow::NAccessor::IChunkedArray::TCurrentChunkAddress TDeserializeChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { - TChunkAccessor accessor(Chunks, Loader); - return SelectChunk(chunkCurrent, position, accessor); -} - -TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble( + const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); } @@ -683,100 +676,74 @@ bool TPortionInfo::NeedShardingFilter(const TGranuleShardingInfo& shardingInfo) return true; } -std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { +std::shared_ptr TPortionInfo::TPreparedColumn::AssembleAccessor() const { Y_ABORT_UNLESS(!Blobs.empty()); - std::vector chunks; - chunks.reserve(Blobs.size()); - ui64 recordsCount = 0; + NArrow::NAccessor::TCompositeChunkedArray::TBuilder builder(GetField()->type()); for (auto& blob : Blobs) { - chunks.push_back(blob.BuildDeserializeChunk(Loader)); - recordsCount += blob.GetExpectedRowsCountVerified(); + auto chunkedArray = blob.BuildRecordBatch(*Loader); + builder.AddChunk(chunkedArray); } - - return std::make_shared(recordsCount, Loader, std::move(chunks)); + return builder.Finish(); } -std::shared_ptr TPortionInfo::TPreparedColumn::Assemble() const { +std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { Y_ABORT_UNLESS(!Blobs.empty()); - std::vector> batches; - batches.reserve(Blobs.size()); + std::vector chunks; + chunks.reserve(Blobs.size()); + ui64 recordsCount = 0; for (auto& blob : Blobs) { - batches.push_back(blob.BuildRecordBatch(*Loader)); - Y_ABORT_UNLESS(batches.back()); + chunks.push_back(blob.BuildDeserializeChunk(Loader)); + if (!!blob.GetData()) { + recordsCount += blob.GetExpectedRowsCountVerified(); + } else { + recordsCount += blob.GetDefaultRowsCount(); + } } - auto res = arrow::Table::FromRecordBatches(batches); - Y_VERIFY_S(res.ok(), res.status().message()); - return (*res)->column(0); + return std::make_shared(recordsCount, Loader, std::move(chunks)); } -TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk(const std::shared_ptr& loader) const { +NArrow::NAccessor::TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk( + const std::shared_ptr& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - AFL_VERIFY(loader->GetExpectedSchema()->num_fields() == 1); - auto col = NArrow::TThreadSimpleArraysCache::Get(loader->GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount); - return TDeserializeChunkedArray::TChunk(col); + auto col = std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader->GetField()->type(), DefaultValue, DefaultRowsCount)); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(col); } else { AFL_VERIFY(ExpectedRowsCount); - return TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); + return NArrow::NAccessor::TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); } } -std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { +std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { if (DefaultRowsCount) { Y_ABORT_UNLESS(!Data); - AFL_VERIFY(loader.GetExpectedSchema()->num_fields() == 1); - return arrow::RecordBatch::Make(loader.GetExpectedSchema(), DefaultRowsCount, - { NArrow::TThreadSimpleArraysCache::Get(loader.GetExpectedSchema()->field(0)->type(), DefaultValue, DefaultRowsCount) }); + return std::make_shared( + NArrow::TThreadSimpleArraysCache::Get(loader.GetField()->type(), DefaultValue, DefaultRowsCount)); } else { - auto result = loader.Apply(Data); - if (!result.ok()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot unpack batch")("error", result.status().ToString())("loader", loader.DebugString()); - return nullptr; - } - if (ExpectedRowsCount) { - AFL_VERIFY((*result)->num_rows() == ExpectedRowsCount)("real", (*result)->num_rows())("expected", ExpectedRowsCount); - } - return *result; + AFL_VERIFY(ExpectedRowsCount); + return loader.ApplyVerified(Data, *ExpectedRowsCount); } } -std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleForSeqAccess() const { +std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleToGeneralContainer( + const std::set& sequentialColumnIds) const { std::vector> columns; std::vector> fields; for (auto&& i : Columns) { - columns.emplace_back(i.AssembleForSeqAccess()); - fields.emplace_back(i.GetField()); - } - - return std::make_shared(std::make_shared(fields), std::move(columns)); -} - -std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleTable(const TAssembleOptions& options) const { - std::vector> columns; - std::vector> fields; - for (auto&& i : Columns) { - if (!options.IsAcceptedColumn(i.GetColumnId())) { - continue; - } - std::shared_ptr scalar; - if (options.IsConstantColumn(i.GetColumnId(), scalar)) { - auto type = i.GetField()->type(); - std::shared_ptr arr = NArrow::TThreadSimpleArraysCache::Get(type, scalar, RowsCount); - columns.emplace_back(std::make_shared(arr)); + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("column", i.GetField()->ToString())("id", i.GetColumnId()); + if (sequentialColumnIds.contains(i.GetColumnId())) { + columns.emplace_back(i.AssembleForSeqAccess()); } else { - columns.emplace_back(i.Assemble()); + columns.emplace_back(i.AssembleAccessor()); } fields.emplace_back(i.GetField()); } - return arrow::Table::Make(std::make_shared(fields), columns); -} - -std::shared_ptr TPortionInfo::TPreparedBatchData::Assemble(const TAssembleOptions& options) const { - return NArrow::ToBatch(AssembleTable(options), true); + return std::make_shared(fields, std::move(columns)); } } diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 25c32c264684..6fa105745b05 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -3,9 +3,11 @@ #include "index_chunk.h" #include "meta.h" +#include #include -#include +#include #include +#include #include #include #include @@ -28,57 +30,6 @@ struct TIndexInfo; class TVersionedIndex; class IDbWrapper; -class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { -private: - using TBase = NArrow::NAccessor::IChunkedArray; -public: - class TChunk { - private: - YDB_READONLY(ui32, RecordsCount, 0); - std::shared_ptr PredefinedArray; - const TString Data; - public: - TChunk(const std::shared_ptr& predefinedArray) - : PredefinedArray(predefinedArray) { - AFL_VERIFY(PredefinedArray); - RecordsCount = PredefinedArray->length(); - } - - TChunk(const ui32 recordsCount, const TString& data) - : RecordsCount(recordsCount) - , Data(data) { - - } - - std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { - if (PredefinedArray) { - return PredefinedArray; - } - auto result = loader->ApplyVerified(Data); - AFL_VERIFY(result); - AFL_VERIFY(result->num_columns() == 1); - AFL_VERIFY(result->num_rows() == RecordsCount)("length", result->num_rows())("records_count", RecordsCount); - return result->column(0); - } - }; - - std::shared_ptr Loader; - std::vector Chunks; -protected: - virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; - virtual std::shared_ptr DoGetChunkedArray() const override { - AFL_VERIFY(false); - return nullptr; - } -public: - TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) - : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) - , Loader(loader) - , Chunks(std::move(chunks)) { - AFL_VERIFY(Loader); - } -}; - class TEntityChunk { private: TChunkAddress Address; @@ -147,7 +98,7 @@ class TPortionInfo { } template - static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::optional>& columnIds, const bool validation) { + static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::set* columnIds, const bool validation) { if (columnIds) { auto itColumn = columnIds->begin(); auto itRecord = chunks.begin(); @@ -197,6 +148,16 @@ class TPortionInfo { SetRemoveSnapshot(TSnapshot(planStep, txId)); } + std::vector GetIndexInplaceDataVerified(const ui32 indexId) const { + std::vector result; + for (auto&& i : Indexes) { + if (i.GetEntityId() == indexId) { + result.emplace_back(i.GetBlobDataVerified()); + } + } + return result; + } + void InitRuntimeFeature(const ERuntimeFeature feature, const bool activity) { if (activity) { AddRuntimeFeature(feature); @@ -239,8 +200,7 @@ class TPortionInfo { void ReorderChunks(); - THashMap>>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; - THashMap>> GetEntityChunks(const TIndexInfo & info) const; + THashMap>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); @@ -359,8 +319,8 @@ class TPortionInfo { return result; } - TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { - TSerializationStats result; + NArrow::NSplitter::TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { + NArrow::NSplitter::TSerializationStats result; for (auto&& i : Records) { if (schema.GetFieldByColumnIdOptional(i.ColumnId)) { result.AddStat(i.GetSerializationStat(schema.GetFieldByColumnIdVerified(i.ColumnId)->name())); @@ -378,36 +338,29 @@ class TPortionInfo { } const TColumnRecord* GetRecordPointer(const TChunkAddress& address) const { - for (auto&& i : Records) { - if (i.GetAddress() == address) { - return &i; - } + auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Records.end() && it->GetAddress() == address) { + return &*it; } return nullptr; } - std::optional GetEntityRecord(const TChunkAddress& address) const { - for (auto&& c : GetRecords()) { - if (c.GetAddress() == address) { - return TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange()); - } - } - for (auto&& c : GetIndexes()) { - if (c.GetAddress() == address) { - return TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange()); - } - } - return {}; - } - bool HasEntityAddress(const TChunkAddress& address) const { - for (auto&& c : GetRecords()) { - if (c.GetAddress() == address) { + { + auto it = std::lower_bound(Records.begin(), Records.end(), address, [](const TColumnRecord& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Records.end() && it->GetAddress() == address) { return true; } } - for (auto&& c : GetIndexes()) { - if (c.GetAddress() == address) { + { + auto it = std::lower_bound(Indexes.begin(), Indexes.end(), address, [](const TIndexChunk& item, const TChunkAddress& address) { + return item.GetAddress() < address; + }); + if (it != Indexes.end() && it->GetAddress() == address) { return true; } } @@ -498,10 +451,7 @@ class TPortionInfo { return false; } - bool visible = (Meta.RecordSnapshotMin <= snapshot); - if (visible && RemoveSnapshot.Valid()) { - visible = snapshot < RemoveSnapshot; - } + const bool visible = (Meta.RecordSnapshotMin <= snapshot) && (!RemoveSnapshot.Valid() || snapshot < RemoveSnapshot); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "IsVisible")("analyze_portion", DebugString())("visible", visible)("snapshot", snapshot.DebugString()); return visible; @@ -587,20 +537,21 @@ class TPortionInfo { return result; } - ui64 GetIndexRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetIndexRawBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetIndexRawBytes(const bool validation = true) const; ui64 GetIndexBlobBytes() const noexcept { ui64 sum = 0; for (const auto& rec : Indexes) { - sum += rec.GetBlobRange().Size; + sum += rec.GetDataSize(); } return sum; } - ui64 GetColumnRawBytes(const std::vector& columnIds, const bool validation = true) const; - ui64 GetColumnRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetColumnRawBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnRawBytes(const bool validation = true) const; - ui64 GetColumnBlobBytes(const std::vector& columnIds, const bool validation = true) const; - ui64 GetColumnBlobBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetColumnBlobBytes(const std::set& columnIds, const bool validation = true) const; + ui64 GetColumnBlobBytes(const bool validation = true) const; ui64 GetTotalBlobBytes() const noexcept { return GetIndexBlobBytes() + GetColumnBlobBytes(); @@ -658,8 +609,8 @@ class TPortionInfo { return DefaultRowsCount && !Data; } - std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; - TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; + std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; + NArrow::NAccessor::TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; }; class TPreparedColumn { @@ -672,28 +623,26 @@ class TPortionInfo { } const std::string& GetName() const { - return Loader->GetExpectedSchema()->field(0)->name(); + return Loader->GetField()->name(); } std::shared_ptr GetField() const { - return Loader->GetExpectedSchema()->field(0); + return Loader->GetField(); } TPreparedColumn(std::vector&& blobs, const std::shared_ptr& loader) : Loader(loader) , Blobs(std::move(blobs)) { - Y_ABORT_UNLESS(Loader); - Y_ABORT_UNLESS(Loader->GetExpectedSchema()->num_fields() == 1); + AFL_VERIFY(Loader); } - std::shared_ptr Assemble() const; - std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleAccessor() const; }; class TPreparedBatchData { private: std::vector Columns; - std::shared_ptr Schema; size_t RowsCount = 0; public: struct TAssembleOptions { @@ -734,10 +683,6 @@ class TPortionInfo { return nullptr; } - std::vector GetSchemaColumnNames() const { - return Schema->field_names(); - } - size_t GetColumnsCount() const { return Columns.size(); } @@ -746,15 +691,12 @@ class TPortionInfo { return RowsCount; } - TPreparedBatchData(std::vector&& columns, std::shared_ptr schema, const size_t rowsCount) + TPreparedBatchData(std::vector&& columns, const size_t rowsCount) : Columns(std::move(columns)) - , Schema(schema) , RowsCount(rowsCount) { } - std::shared_ptr Assemble(const TAssembleOptions& options = {}) const; - std::shared_ptr AssembleTable(const TAssembleOptions& options = {}) const; - std::shared_ptr AssembleForSeqAccess() const; + std::shared_ptr AssembleToGeneralContainer(const std::set& sequentialColumnIds) const; }; class TColumnAssemblingInfo { @@ -791,7 +733,7 @@ class TPortionInfo { TPreparedColumn Compile() { if (BlobsInfo.empty()) { - BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader->GetDefaultValue())); + BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows, DataLoader ? DataLoader->GetDefaultValue() : ResultLoader->GetDefaultValue())); return TPreparedColumn(std::move(BlobsInfo), ResultLoader); } else { AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows); @@ -804,13 +746,6 @@ class TPortionInfo { TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; - std::shared_ptr AssembleInBatch(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& data) const { - auto batch = PrepareForAssemble(dataSchema, resultSchema, data).Assemble(); - Y_ABORT_UNLESS(batch->Validate().ok()); - return batch; - } - friend IOutputStream& operator << (IOutputStream& out, const TPortionInfo& info) { out << info.DebugString(); return out; diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp index ca5a1505d620..ae85ef59842c 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.cpp @@ -13,44 +13,22 @@ void TReadPortionInfoWithBlobs::RestoreChunk(const std::shared_ptr TReadPortionInfoWithBlobs::GetBatch(const ISnapshotSchema::TPtr& data, const ISnapshotSchema& result, const std::set& columnNames) const { - Y_ABORT_UNLESS(data); - if (columnNames.empty()) { - if (!CachedBatch) { - THashMap blobs; - for (auto&& i : PortionInfo.Records) { - blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); - Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); - } - CachedBatch = PortionInfo.AssembleInBatch(*data, result, blobs); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(*CachedBatch, result.GetIndexInfo().GetReplaceKey())); - } - return *CachedBatch; - } else if (CachedBatch) { - std::vector columnNamesString; - for (auto&& i : columnNames) { - columnNamesString.emplace_back(i.data(), i.size()); - } - return NArrow::TColumnOperator().VerifyIfAbsent().Extract(*CachedBatch, columnNamesString); - } else { - auto filteredSchema = std::make_shared(data, columnNames); - THashMap blobs; - for (auto&& i : PortionInfo.Records) { - blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); - Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); - } - return PortionInfo.AssembleInBatch(*data, *filteredSchema, blobs); +std::shared_ptr TReadPortionInfoWithBlobs::RestoreBatch( + const ISnapshotSchema& data, const ISnapshotSchema& resultSchema, const std::set& seqColumns) const { + THashMap blobs; + for (auto&& i : PortionInfo.Records) { + blobs[i.GetAddress()] = GetBlobByAddressVerified(i.ColumnId, i.Chunk); + Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } + return PortionInfo.PrepareForAssemble(data, resultSchema, blobs).AssembleToGeneralContainer(seqColumns); } NKikimr::NOlap::TReadPortionInfoWithBlobs TReadPortionInfoWithBlobs::RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) { TReadPortionInfoWithBlobs result(portion); - THashMap>>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); - for (auto&& [storageId, recordsByBlob] : records) { - for (auto&& i : recordsByBlob) { - for (auto&& d : i.second) { - result.RestoreChunk(d); - } + THashMap>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); + for (auto&& [storageId, chunksByAddress] : records) { + for (auto&& [_, chunk] : chunksByAddress) { + result.RestoreChunk(chunk); } } return result; @@ -86,26 +64,21 @@ bool TReadPortionInfoWithBlobs::ExtractColumnChunks(const ui32 entityId, std::ve if (records.empty()) { return false; } - std::map> chunksMap; + std::vector> chunksLocal; for (auto it = Chunks.begin(); it != Chunks.end();) { if (it->first.GetEntityId() == entityId) { - chunksMap.emplace(it->first, std::move(it->second)); + AFL_VERIFY(chunksLocal.empty() || chunksLocal.back()->GetChunkAddressVerified() < it->second->GetChunkAddressVerified()); + chunksLocal.emplace_back(std::move(it->second)); it = Chunks.erase(it); } else { ++it; } } - std::vector> chunksLocal; - for (auto&& i : chunksMap) { - Y_ABORT_UNLESS(i.first.GetColumnId() == entityId); - Y_ABORT_UNLESS(i.first.GetChunk() == chunksLocal.size()); - chunksLocal.emplace_back(i.second); - } std::swap(chunksLocal, chunks); return true; } -std::optional TReadPortionInfoWithBlobs::SyncPortion(TReadPortionInfoWithBlobs&& source, +std::optional TReadPortionInfoWithBlobs::SyncPortion(TReadPortionInfoWithBlobs&& source, const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, std::shared_ptr counters) { if (from->GetVersion() == to->GetVersion() && targetTier == source.GetPortionInfo().GetTierNameDef(IStoragesManager::DefaultStorageId)) { @@ -131,40 +104,27 @@ std::optional TReadPortionInfoWithBlobs::SyncPortion std::vector> newChunks; if (it != columnChunks.end()) { newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i); - } else { - newChunks = to->GetIndexInfo().MakeEmptyChunks(i, pageSizes, to->GetIndexInfo().GetColumnFeaturesVerified(i)); - } - AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); - } - - for (auto&& i : to->GetIndexInfo().GetIndexes()) { - if (from->GetIndexInfo().HasIndexId(i.first)) { - continue; + AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); } - to->GetIndexInfo().AppendIndex(entityChunksNew, i.first); } - auto schemaTo = std::make_shared(to, std::make_shared()); - TGeneralSerializedSlice slice(entityChunksNew, schemaTo, counters); - const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); TPortionInfoConstructor constructor(source.PortionInfo, false, true); constructor.SetMinSnapshotDeprecated(to->GetSnapshot()); constructor.SetSchemaVersion(to->GetVersion()); constructor.MutableMeta().ResetTierName(targetTier); - NStatistics::TPortionStorage storage; - for (auto&& i : to->GetIndexInfo().GetStatisticsByName()) { - auto it = from->GetIndexInfo().GetStatisticsByName().find(i.first); - if (it != from->GetIndexInfo().GetStatisticsByName().end()) { - i.second->CopyData(it->second.GetCursorVerified(), source.PortionInfo.GetMeta().GetStatisticsStorage(), storage); - } else { - i.second->FillStatisticsData(entityChunksNew, storage, to->GetIndexInfo()); - } + TIndexInfo::TSecondaryData secondaryData; + secondaryData.MutableExternalData() = entityChunksNew; + for (auto&& i : to->GetIndexInfo().GetIndexes()) { + to->GetIndexInfo().AppendIndex(entityChunksNew, i.first, storages, secondaryData).Validate(); } - constructor.MutableMeta().ResetStatisticsStorage(std::move(storage)); - TWritePortionInfoWithBlobs result = TWritePortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), std::move(constructor), storages); - return result; + const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); + auto schemaTo = std::make_shared(to, std::make_shared()); + TGeneralSerializedSlice slice(secondaryData.GetExternalData(), schemaTo, counters); + + return TWritePortionInfoWithBlobsConstructor::BuildByBlobs( + slice.GroupChunksByBlobs(groups), secondaryData.GetSecondaryInplaceData(), std::move(constructor), storages); } const TString& TReadPortionInfoWithBlobs::GetBlobByAddressVerified(const ui32 columnId, const ui32 chunkId) const { diff --git a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h index 6d688db66074..a9e24eb3c165 100644 --- a/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/read_with_blobs.h @@ -12,7 +12,7 @@ namespace NKikimr::NOlap { class TVersionedIndex; -class TWritePortionInfoWithBlobs; +class TWritePortionInfoWithBlobsResult; class TReadPortionInfoWithBlobs: public TBasePortionInfoWithBlobs { private: @@ -21,7 +21,6 @@ class TReadPortionInfoWithBlobs: public TBasePortionInfoWithBlobs { void RestoreChunk(const std::shared_ptr& chunk); TPortionInfo PortionInfo; - mutable std::optional> CachedBatch; explicit TReadPortionInfoWithBlobs(TPortionInfo&& portionInfo) : PortionInfo(std::move(portionInfo)) { @@ -39,8 +38,8 @@ class TReadPortionInfoWithBlobs: public TBasePortionInfoWithBlobs { static TReadPortionInfoWithBlobs RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo); - std::shared_ptr GetBatch(const ISnapshotSchema::TPtr& data, const ISnapshotSchema& result, const std::set& columnNames = {}) const; - static std::optional SyncPortion(TReadPortionInfoWithBlobs&& source, + std::shared_ptr RestoreBatch(const ISnapshotSchema& data, const ISnapshotSchema& resultSchema, const std::set& seqColumns) const; + static std::optional SyncPortion(TReadPortionInfoWithBlobs&& source, const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, std::shared_ptr counters); diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp index 4fbc7cb3305c..3f580531b749 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.cpp @@ -3,9 +3,9 @@ namespace NKikimr::NOlap { -void TWritePortionInfoWithBlobs::TBlobInfo::AddChunk(TWritePortionInfoWithBlobs& owner, const std::shared_ptr& chunk) { +void TWritePortionInfoWithBlobsConstructor::TBlobInfo::AddChunk(TWritePortionInfoWithBlobsConstructor& owner, const std::shared_ptr& chunk) { AFL_VERIFY(chunk); - Y_ABORT_UNLESS(!ResultBlob); + Y_ABORT_UNLESS(!Finished); const TString& data = chunk->GetData(); TBlobRangeLink16 bRange(Size, data.size()); @@ -17,24 +17,26 @@ void TWritePortionInfoWithBlobs::TBlobInfo::AddChunk(TWritePortionInfoWithBlobs& chunk->AddIntoPortionBeforeBlob(bRange, owner.GetPortionConstructor()); } -void TWritePortionInfoWithBlobs::TBlobInfo::RegisterBlobId(TWritePortionInfoWithBlobs& owner, const TUnifiedBlobId& blobId) { +void TWritePortionInfoWithBlobsResult::TBlobInfo::RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) const { const TBlobRangeLink16::TLinkId idx = owner.GetPortionConstructor().RegisterBlobId(blobId); for (auto&& i : Chunks) { - owner.GetPortionConstructor().RegisterBlobIdx(i.first, idx); + owner.GetPortionConstructor().RegisterBlobIdx(i, idx); } } -TWritePortionInfoWithBlobs TWritePortionInfoWithBlobs::BuildByBlobs(std::vector&& chunks, +TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators) { TPortionInfoConstructor constructor(granule); constructor.SetMinSnapshotDeprecated(snapshot); constructor.SetSchemaVersion(schemaVersion); - return BuildByBlobs(std::move(chunks), std::move(constructor), operators); + return BuildByBlobs(std::move(chunks), inplaceChunks, std::move(constructor), operators); } -TWritePortionInfoWithBlobs TWritePortionInfoWithBlobs::BuildByBlobs(std::vector&& chunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators) { - TWritePortionInfoWithBlobs result(std::move(constructor)); +TWritePortionInfoWithBlobsConstructor TWritePortionInfoWithBlobsConstructor::BuildByBlobs( + std::vector&& chunks, const THashMap>& inplaceChunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators) { + TWritePortionInfoWithBlobsConstructor result(std::move(constructor)); for (auto&& blob : chunks) { auto storage = operators->GetOperatorVerified(blob.GetGroupName()); auto blobInfo = result.StartBlob(storage); @@ -42,10 +44,15 @@ TWritePortionInfoWithBlobs TWritePortionInfoWithBlobs::BuildByBlobs(std::vector< blobInfo.AddChunk(chunk); } } + for (auto&& [_, i] : inplaceChunks) { + result.GetPortionConstructor().AddIndex( + TIndexChunk(i->GetEntityId(), i->GetChunkIdxVerified(), i->GetRecordsCountVerified(), i->GetRawBytesVerified(), i->GetData())); + } + return result; } -std::vector> TWritePortionInfoWithBlobs::GetEntityChunks(const ui32 entityId) const { +std::vector> TWritePortionInfoWithBlobsConstructor::GetEntityChunks(const ui32 entityId) const { std::map> sortedChunks; for (auto&& b : GetBlobs()) { for (auto&& i : b.GetChunks()) { @@ -62,16 +69,23 @@ std::vector> TWritePortionInfoWithBlobs::GetE return result; } -void TWritePortionInfoWithBlobs::FillStatistics(const TIndexInfo& index) { - NStatistics::TPortionStorage storage; - for (auto&& i : index.GetStatisticsByName()) { - THashMap>> data; - for (auto&& entityId : i.second->GetEntityIds()) { - data.emplace(entityId, GetEntityChunks(entityId)); +TString TWritePortionInfoWithBlobsResult::GetBlobByRangeVerified(const ui32 entityId, const ui32 chunkIdx) const { + AFL_VERIFY(!!PortionConstructor); + for (auto&& rec : PortionConstructor->GetRecords()) { + if (rec.GetEntityId() != entityId || rec.GetChunkIdx() != chunkIdx) { + continue; + } + for (auto&& i : Blobs) { + for (auto&& c : i.GetChunks()) { + if (c == TChunkAddress(entityId, chunkIdx)) { + return i.GetResultBlob().substr(rec.BlobRange.Offset, rec.BlobRange.Size); + } + } } - i.second->FillStatisticsData(data, storage, index); + AFL_VERIFY(false); } - GetPortionConstructor().MutableMeta().SetStatisticsStorage(std::move(storage)); + AFL_VERIFY(false); + return ""; } } diff --git a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h index 39c3bb885fe4..dde424fd63b8 100644 --- a/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/write_with_blobs.h @@ -9,7 +9,9 @@ namespace NKikimr::NOlap { -class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { +class TWritePortionInfoWithBlobsResult; + +class TWritePortionInfoWithBlobsConstructor: public TBasePortionInfoWithBlobs { public: class TBlobInfo { private: @@ -18,9 +20,8 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { YDB_READONLY_DEF(TBlobChunks, Chunks); YDB_READONLY_DEF(std::shared_ptr, Operator); std::vector> ChunksOrdered; - mutable std::optional ResultBlob; - void AddChunk(TWritePortionInfoWithBlobs& owner, const std::shared_ptr& chunk); - + bool Finished = false; + void AddChunk(TWritePortionInfoWithBlobsConstructor& owner, const std::shared_ptr& chunk); public: TBlobInfo(const std::shared_ptr& bOperator) : Operator(bOperator) @@ -31,9 +32,9 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { class TBuilder { private: TBlobInfo* OwnerBlob; - TWritePortionInfoWithBlobs* OwnerPortion; + TWritePortionInfoWithBlobsConstructor* OwnerPortion; public: - TBuilder(TBlobInfo& blob, TWritePortionInfoWithBlobs& portion) + TBuilder(TBlobInfo& blob, TWritePortionInfoWithBlobsConstructor& portion) : OwnerBlob(&blob) , OwnerPortion(&portion) { } @@ -46,26 +47,32 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { } }; - const TString& GetBlob() const { - if (!ResultBlob) { - TString result; - result.reserve(Size); - for (auto&& i : ChunksOrdered) { - result.append(i->GetData()); - } - ResultBlob = std::move(result); + std::vector ExtractChunks() { + std::vector result; + result.reserve(Chunks.size()); + for (auto&& i : Chunks) { + result.emplace_back(i.first); } - return *ResultBlob; + return result; } - void RegisterBlobId(TWritePortionInfoWithBlobs& owner, const TUnifiedBlobId& blobId); + TString ExtractBlob() { + AFL_VERIFY(!Finished); + Finished = true; + TString result; + result.reserve(Size); + for (auto&& i : ChunksOrdered) { + result.append(i->GetData()); + } + ChunksOrdered.clear(); + return result; + } }; private: std::optional PortionConstructor; - std::optional PortionResult; YDB_READONLY_DEF(std::vector, Blobs); - explicit TWritePortionInfoWithBlobs(TPortionInfoConstructor&& portionConstructor) + explicit TWritePortionInfoWithBlobsConstructor(TPortionInfoConstructor&& portionConstructor) : PortionConstructor(std::move(portionConstructor)) { } @@ -73,46 +80,75 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { Blobs.emplace_back(TBlobInfo(bOperator)); return TBlobInfo::TBuilder(Blobs.back(), *this); } - + friend class TWritePortionInfoWithBlobsResult; public: std::vector> GetEntityChunks(const ui32 entityId) const; - void FillStatistics(const TIndexInfo& index); - - static TWritePortionInfoWithBlobs BuildByBlobs(std::vector&& chunks, + static TWritePortionInfoWithBlobsConstructor BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators); - static TWritePortionInfoWithBlobs BuildByBlobs(std::vector&& chunks, + static TWritePortionInfoWithBlobsConstructor BuildByBlobs(std::vector&& chunks, + const THashMap>& inplaceChunks, TPortionInfoConstructor&& constructor, const std::shared_ptr& operators); - const TString& GetBlobByRangeVerified(const ui32 columnId, const ui32 chunkId) const { - for (auto&& b : Blobs) { - auto it = b.GetChunks().find(TChunkAddress(columnId, chunkId)); - if (it == b.GetChunks().end()) { - continue; - } else { - return it->second->GetData(); - } - } - Y_ABORT_UNLESS(false); + std::vector& GetBlobs() { + return Blobs; } - ui64 GetBlobFullSizeVerified(const ui32 columnId, const ui32 chunkId) const { - for (auto&& b : Blobs) { - auto it = b.GetChunks().find(TChunkAddress(columnId, chunkId)); - if (it == b.GetChunks().end()) { - continue; - } else { - return b.GetSize(); - } - } - Y_ABORT_UNLESS(false); + TString DebugString() const { + return TStringBuilder() << "blobs_count=" << Blobs.size() << ";"; } - std::vector& GetBlobs() { - return Blobs; + TPortionInfoConstructor& GetPortionConstructor() { + AFL_VERIFY(!!PortionConstructor); + return *PortionConstructor; + } + +}; + +class TWritePortionInfoWithBlobsResult { +public: + class TBlobInfo { + private: + using TBlobChunks = std::vector; + YDB_READONLY_DEF(TBlobChunks, Chunks); + const TString ResultBlob; + YDB_READONLY_DEF(std::shared_ptr, Operator); + + public: + ui64 GetSize() const { + return ResultBlob.size(); + } + + TBlobInfo(const TString& blobData, TBlobChunks&& chunks, const std::shared_ptr& stOperator) + : Chunks(std::move(chunks)) + , ResultBlob(blobData) + , Operator(stOperator) + { + + } + + const TString& GetResultBlob() const { + return ResultBlob; + } + + void RegisterBlobId(TWritePortionInfoWithBlobsResult& owner, const TUnifiedBlobId& blobId) const; + }; +private: + std::optional PortionConstructor; + std::optional PortionResult; + YDB_READONLY_DEF(std::vector, Blobs); +public: + TWritePortionInfoWithBlobsResult(TWritePortionInfoWithBlobsConstructor&& constructor) + : PortionConstructor(std::move(constructor.PortionConstructor)) { + for (auto&& i : constructor.Blobs) { + Blobs.emplace_back(i.ExtractBlob(), i.ExtractChunks(), i.GetOperator()); + } } + TString GetBlobByRangeVerified(const ui32 entityId, const ui32 chunkIdx) const; + TString DebugString() const { return TStringBuilder() << "blobs_count=" << Blobs.size() << ";"; } @@ -135,7 +171,6 @@ class TWritePortionInfoWithBlobs: public TBasePortionInfoWithBlobs { AFL_VERIFY(!PortionResult); return *PortionConstructor; } - }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/container.cpp b/ydb/core/tx/columnshard/engines/predicate/container.cpp index 0f27c3538293..8afcc1895e4d 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/container.cpp @@ -115,51 +115,53 @@ bool TPredicateContainer::CrossRanges(const TPredicateContainer& ext) { } } -std::optional TPredicateContainer::BuildPredicateFrom(std::shared_ptr object, const TIndexInfo* indexInfo) { +TConclusion TPredicateContainer::BuildPredicateFrom( + std::shared_ptr object, const std::shared_ptr& pkSchema) { if (!object || object->Empty()) { return TPredicateContainer(NArrow::ECompareType::GREATER_OR_EQUAL); } else { if (!object->Good()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not good 'from' predicate"); - return {}; + return TConclusionStatus::Fail("not good 'from' predicate"); } if (!object->IsFrom()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "'from' predicate not is from"); - return {}; + return TConclusionStatus::Fail("'from' predicate not is from"); } - if (indexInfo) { + if (pkSchema) { auto cNames = object->ColumnNames(); i32 countSortingFields = 0; - for (i32 i = 0; i < indexInfo->GetReplaceKey()->num_fields(); ++i) { - if (i < (int)cNames.size() && cNames[i] == indexInfo->GetReplaceKey()->field(i)->name()) { + for (i32 i = 0; i < pkSchema->num_fields(); ++i) { + if (i < (int)cNames.size() && cNames[i] == pkSchema->field(i)->name()) { ++countSortingFields; } else { break; } } - Y_ABORT_UNLESS(countSortingFields == object->Batch->num_columns()); + AFL_VERIFY(countSortingFields == object->Batch->num_columns())("count", countSortingFields)("object", object->Batch->num_columns()); } - return TPredicateContainer(object); + return TPredicateContainer(object, pkSchema ? ExtractKey(*object, pkSchema) : nullptr); } } -std::optional TPredicateContainer::BuildPredicateTo(std::shared_ptr object, const TIndexInfo* indexInfo) { +TConclusion TPredicateContainer::BuildPredicateTo( + std::shared_ptr object, const std::shared_ptr& pkSchema) { if (!object || object->Empty()) { return TPredicateContainer(NArrow::ECompareType::LESS_OR_EQUAL); } else { if (!object->Good()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not good 'to' predicate"); - return {}; + return TConclusionStatus::Fail("not good 'to' predicate"); } if (!object->IsTo()) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "'to' predicate not is to"); - return {}; + return TConclusionStatus::Fail("'to' predicate not is to"); } - if (indexInfo) { + if (pkSchema) { auto cNames = object->ColumnNames(); i32 countSortingFields = 0; - for (i32 i = 0; i < indexInfo->GetReplaceKey()->num_fields(); ++i) { - if (i < (int)cNames.size() && cNames[i] == indexInfo->GetReplaceKey()->field(i)->name()) { + for (i32 i = 0; i < pkSchema->num_fields(); ++i) { + if (i < (int)cNames.size() && cNames[i] == pkSchema->field(i)->name()) { ++countSortingFields; } else { break; @@ -167,7 +169,7 @@ std::optional TPredicateContainer::BuildPre } Y_ABORT_UNLESS(countSortingFields == object->Batch->num_columns()); } - return TPredicateContainer(object); + return TPredicateContainer(object, pkSchema ? TPredicateContainer::ExtractKey(*object, pkSchema) : nullptr); } } diff --git a/ydb/core/tx/columnshard/engines/predicate/container.h b/ydb/core/tx/columnshard/engines/predicate/container.h index 5cff2cbefea5..7d969cf9a759 100644 --- a/ydb/core/tx/columnshard/engines/predicate/container.h +++ b/ydb/core/tx/columnshard/engines/predicate/container.h @@ -1,9 +1,13 @@ #pragma once #include "predicate.h" + #include -#include +#include + #include + #include + #include namespace NKikimr::NOlap { @@ -15,10 +19,12 @@ class TPredicateContainer { std::shared_ptr Object; NArrow::ECompareType CompareType; mutable std::optional> ColumnNames; + std::shared_ptr ReplaceKey; - TPredicateContainer(std::shared_ptr object) + TPredicateContainer(std::shared_ptr object, const std::shared_ptr& replaceKey) : Object(object) - , CompareType(Object->GetCompareType()) { + , CompareType(Object->GetCompareType()) + , ReplaceKey(replaceKey) { } TPredicateContainer(const NArrow::ECompareType compareType) @@ -27,15 +33,37 @@ class TPredicateContainer { static std::partial_ordering ComparePredicatesSamePrefix(const NOlap::TPredicate& l, const NOlap::TPredicate& r); + static std::shared_ptr ExtractKey(const NOlap::TPredicate& predicate, const std::shared_ptr& key) { + AFL_VERIFY(predicate.Batch); + const auto& batchFields = predicate.Batch->schema()->fields(); + const auto& keyFields = key->fields(); + size_t minSize = std::min(batchFields.size(), keyFields.size()); + for (size_t i = 0; i < minSize; ++i) { + Y_DEBUG_ABORT_UNLESS(batchFields[i]->type()->Equals(*keyFields[i]->type())); + } + if (batchFields.size() <= keyFields.size()) { + return std::make_shared(NArrow::TReplaceKey::FromBatch(predicate.Batch, predicate.Batch->schema(), 0)); + } else { + return std::make_shared(NArrow::TReplaceKey::FromBatch(predicate.Batch, key, 0)); + } + } + public: + NArrow::ECompareType GetCompareType() const { + return CompareType; + } + + const std::shared_ptr& GetReplaceKey() const { + return ReplaceKey; + } bool IsEmpty() const { return !Object; } template - std::optional Get(const ui32 colIndex, const ui32 rowIndex, - const std::optional defaultValue = {}) const { + std::optional Get( + const ui32 colIndex, const ui32 rowIndex, const std::optional defaultValue = {}) const { if (!Object) { return defaultValue; } else { @@ -59,13 +87,15 @@ class TPredicateContainer { return TPredicateContainer(NArrow::ECompareType::GREATER_OR_EQUAL); } - static std::optional BuildPredicateFrom(std::shared_ptr object, const TIndexInfo* indexInfo); + static TConclusion BuildPredicateFrom( + std::shared_ptr object, const std::shared_ptr& pkSchema); static TPredicateContainer BuildNullPredicateTo() { return TPredicateContainer(NArrow::ECompareType::LESS_OR_EQUAL); } - static std::optional BuildPredicateTo(std::shared_ptr object, const TIndexInfo* indexInfo); + static TConclusion BuildPredicateTo( + std::shared_ptr object, const std::shared_ptr& pkSchema); NKikimr::NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const { if (!Object) { @@ -73,23 +103,6 @@ class TPredicateContainer { } return NArrow::TColumnFilter::MakePredicateFilter(data, Object->Batch, CompareType); } - - std::optional ExtractKey(const std::shared_ptr& key) const { - if (Object) { - const auto& batchFields = Object->Batch->schema()->fields(); - const auto& keyFields = key->fields(); - size_t minSize = std::min(batchFields.size(), keyFields.size()); - for (size_t i = 0; i < minSize; ++i) { - Y_DEBUG_ABORT_UNLESS(batchFields[i]->type()->Equals(*keyFields[i]->type())); - } - if (batchFields.size() <= keyFields.size()) { - return NArrow::TReplaceKey::FromBatch(Object->Batch, Object->Batch->schema(), 0); - } else { - return NArrow::TReplaceKey::FromBatch(Object->Batch, key, 0); - } - } - return {}; - } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.cpp b/ydb/core/tx/columnshard/engines/predicate/filter.cpp index 10d66a832c1a..085eaa798f07 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/filter.cpp @@ -1,4 +1,7 @@ #include "filter.h" + +#include + #include namespace NKikimr::NOlap { @@ -14,43 +17,50 @@ NKikimr::NArrow::TColumnFilter TPKRangesFilter::BuildFilter(const arrow::Datum& return result; } -bool TPKRangesFilter::Add(std::shared_ptr f, std::shared_ptr t, const TIndexInfo* indexInfo) { +TConclusionStatus TPKRangesFilter::Add( + std::shared_ptr f, std::shared_ptr t, const std::shared_ptr& pkSchema) { if ((!f || f->Empty()) && (!t || t->Empty())) { - return true; + return TConclusionStatus::Success(); + } + auto fromContainerConclusion = TPredicateContainer::BuildPredicateFrom(f, pkSchema); + if (fromContainerConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect from container")( + "from", fromContainerConclusion.GetErrorMessage()); + return fromContainerConclusion; } - auto fromContainer = TPredicateContainer::BuildPredicateFrom(f, indexInfo); - auto toContainer = TPredicateContainer::BuildPredicateTo(t, indexInfo); - if (!fromContainer || !toContainer) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect from/to containers")("from", !!fromContainer)("to", !!toContainer); - return false; + auto toContainerConclusion = TPredicateContainer::BuildPredicateTo(t, pkSchema); + if (toContainerConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "incorrect to container")( + "from", toContainerConclusion.GetErrorMessage()); + return toContainerConclusion; } if (SortedRanges.size() && !FakeRanges) { if (ReverseFlag) { - if (fromContainer->CrossRanges(SortedRanges.front().GetPredicateTo())) { + if (fromContainerConclusion->CrossRanges(SortedRanges.front().GetPredicateTo())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not sorted sequence"); - return false; + return TConclusionStatus::Fail("not sorted sequence"); } } else { - if (fromContainer->CrossRanges(SortedRanges.back().GetPredicateTo())) { + if (fromContainerConclusion->CrossRanges(SortedRanges.back().GetPredicateTo())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "add_range_filter")("problem", "not sorted sequence"); - return false; + return TConclusionStatus::Fail("not sorted sequence"); } } } - auto pkRangeFilter = TPKRangeFilter::Build(std::move(*fromContainer), std::move(*toContainer)); - if (!pkRangeFilter) { - return false; + auto pkRangeFilterConclusion = TPKRangeFilter::Build(fromContainerConclusion.DetachResult(), toContainerConclusion.DetachResult()); + if (pkRangeFilterConclusion.IsFail()) { + return pkRangeFilterConclusion; } if (FakeRanges) { FakeRanges = false; SortedRanges.clear(); } if (ReverseFlag) { - SortedRanges.emplace_front(std::move(*pkRangeFilter)); + SortedRanges.emplace_front(pkRangeFilterConclusion.DetachResult()); } else { - SortedRanges.emplace_back(std::move(*pkRangeFilter)); + SortedRanges.emplace_back(pkRangeFilterConclusion.DetachResult()); } - return true; + return TConclusionStatus::Success(); } TString TPKRangesFilter::DebugString() const { @@ -75,30 +85,134 @@ std::set TPKRangesFilter::GetColumnIds(const TIndexInfo& indexInfo) const return result; } -bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { +bool TPKRangesFilter::IsPortionInUsage(const TPortionInfo& info) const { for (auto&& i : SortedRanges) { - if (i.IsPortionInUsage(info, indexInfo)) { + if (i.IsPortionInUsage(info)) { return true; } } return SortedRanges.empty(); } -bool TPKRangesFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const { +bool TPKRangesFilter::CheckPoint(const NArrow::TReplaceKey& point) const { for (auto&& i : SortedRanges) { - if (i.IsPortionInPartialUsage(start, end, indexInfo)) { + if (i.CheckPoint(point)) { return true; } } - return false; + return SortedRanges.empty(); +} + +TPKRangeFilter::EUsageClass TPKRangesFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const { + for (auto&& i : SortedRanges) { + switch (i.IsPortionInPartialUsage(start, end)) { + case TPKRangeFilter::EUsageClass::FullUsage: + return TPKRangeFilter::EUsageClass::FullUsage; + case TPKRangeFilter::EUsageClass::PartialUsage: + return TPKRangeFilter::EUsageClass::PartialUsage; + case TPKRangeFilter::EUsageClass::DontUsage: + break; + } + } + return TPKRangeFilter::EUsageClass::DontUsage; } TPKRangesFilter::TPKRangesFilter(const bool reverse) - : ReverseFlag(reverse) -{ + : ReverseFlag(reverse) { auto range = TPKRangeFilter::Build(TPredicateContainer::BuildNullPredicateFrom(), TPredicateContainer::BuildNullPredicateTo()); Y_ABORT_UNLESS(range); SortedRanges.emplace_back(*range); } +std::shared_ptr TPKRangesFilter::SerializeToRecordBatch(const std::shared_ptr& pkSchema) const { + auto fullSchema = NArrow::TStatusValidator::GetValid( + pkSchema->AddField(pkSchema->num_fields(), std::make_shared(".ydb_operation_type", arrow::uint32()))); + auto builders = NArrow::MakeBuilders(fullSchema, SortedRanges.size() * 2); + for (auto&& i : SortedRanges) { + for (ui32 idx = 0; idx < (ui32)pkSchema->num_fields(); ++idx) { + if (idx < i.GetPredicateFrom().GetReplaceKey()->Size()) { + AFL_VERIFY(NArrow::Append( + *builders[idx], i.GetPredicateFrom().GetReplaceKey()->Column(idx), i.GetPredicateFrom().GetReplaceKey()->GetPosition())); + } else { + NArrow::TStatusValidator::Validate(builders[idx]->AppendNull()); + } + } + NArrow::Append(*builders[pkSchema->num_fields()], (ui32)i.GetPredicateFrom().GetCompareType()); + + for (ui32 idx = 0; idx < (ui32)pkSchema->num_fields(); ++idx) { + if (idx < i.GetPredicateTo().GetReplaceKey()->Size()) { + AFL_VERIFY(NArrow::Append( + *builders[idx], i.GetPredicateTo().GetReplaceKey()->Column(idx), i.GetPredicateTo().GetReplaceKey()->GetPosition())); + } else { + NArrow::TStatusValidator::Validate(builders[idx]->AppendNull()); + } + } + NArrow::Append(*builders[pkSchema->num_fields()], (ui32)i.GetPredicateTo().GetCompareType()); + } + return arrow::RecordBatch::Make(fullSchema, SortedRanges.size() * 2, NArrow::Finish(std::move(builders))); +} + +std::shared_ptr TPKRangesFilter::BuildFromRecordBatchLines( + const std::shared_ptr& batch, const bool reverse) { + std::shared_ptr result = std::make_shared(reverse); + for (ui32 i = 0; i < batch->num_rows(); ++i) { + auto batchRow = batch->Slice(i, 1); + auto pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batchRow); + auto pTo = std::make_shared(NKernels::EOperation::LessEqual, batchRow); + result->Add(pFrom, pTo, batch->schema()).Validate(); + } + return result; } + +std::shared_ptr TPKRangesFilter::BuildFromRecordBatchFull( + const std::shared_ptr& batch, const std::shared_ptr& pkSchema, const bool reverse) { + std::shared_ptr result = std::make_shared(reverse); + auto pkBatch = NArrow::TColumnOperator().Adapt(batch, pkSchema).DetachResult(); + auto c = batch->GetColumnByName(".ydb_operation_type"); + AFL_VERIFY(c); + AFL_VERIFY(c->type_id() == arrow::Type::UINT32); + auto cUi32 = static_pointer_cast(c); + for (ui32 i = 0; i < batch->num_rows();) { + std::shared_ptr pFrom; + std::shared_ptr pTo; + { + auto batchRow = TPredicate::CutNulls(batch->Slice(i, 1)); + NKernels::EOperation op = (NKernels::EOperation)cUi32->Value(i); + if (op == NKernels::EOperation::GreaterEqual || op == NKernels::EOperation::Greater) { + pFrom = std::make_shared(op, batchRow); + } else if (op == NKernels::EOperation::Equal) { + pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batchRow); + } else { + AFL_VERIFY(false); + } + if (op != NKernels::EOperation::Equal) { + ++i; + } + } + { + auto batchRow = TPredicate::CutNulls(batch->Slice(i, 1)); + NKernels::EOperation op = (NKernels::EOperation)cUi32->Value(i); + if (op == NKernels::EOperation::LessEqual || op == NKernels::EOperation::Less) { + pTo = std::make_shared(op, batchRow); + } else if (op == NKernels::EOperation::Equal) { + pTo = std::make_shared(NKernels::EOperation::LessEqual, batchRow); + } else { + AFL_VERIFY(false); + } + } + result->Add(pFrom, pTo, pkSchema).Validate(); + } + return result; +} + +std::shared_ptr TPKRangesFilter::BuildFromString( + const TString& data, const std::shared_ptr& pkSchema, const bool reverse) { + auto batch = NArrow::TStatusValidator::GetValid(NArrow::NSerialization::TNativeSerializer().Deserialize(data)); + return BuildFromRecordBatchFull(batch, pkSchema, reverse); +} + +TString TPKRangesFilter::SerializeToString(const std::shared_ptr& pkSchema) const { + return NArrow::NSerialization::TNativeSerializer().SerializeFull(SerializeToRecordBatch(pkSchema)); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/filter.h b/ydb/core/tx/columnshard/engines/predicate/filter.h index 20e7a039b273..bbc70b5ff584 100644 --- a/ydb/core/tx/columnshard/engines/predicate/filter.h +++ b/ydb/core/tx/columnshard/engines/predicate/filter.h @@ -9,9 +9,15 @@ class TPKRangesFilter { bool FakeRanges = true; std::deque SortedRanges; bool ReverseFlag = false; + public: TPKRangesFilter(const bool reverse); + [[nodiscard]] TConclusionStatus Add( + std::shared_ptr f, std::shared_ptr t, const std::shared_ptr& pkSchema); + std::shared_ptr SerializeToRecordBatch(const std::shared_ptr& pkSchema) const; + TString SerializeToString(const std::shared_ptr& pkSchema) const; + bool IsEmpty() const { return SortedRanges.empty() || FakeRanges; } @@ -37,13 +43,12 @@ class TPKRangesFilter { return SortedRanges.end(); } - bool IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const; - bool IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const; + bool IsPortionInUsage(const TPortionInfo& info) const; + TPKRangeFilter::EUsageClass IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const; + bool CheckPoint(const NArrow::TReplaceKey& point) const; NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; - [[nodiscard]] bool Add(std::shared_ptr f, std::shared_ptr t, const TIndexInfo* indexInfo); - std::set GetColumnNames() const { std::set result; for (auto&& i : SortedRanges) { @@ -57,6 +62,30 @@ class TPKRangesFilter { TString DebugString() const; std::set GetColumnIds(const TIndexInfo& indexInfo) const; + + static std::shared_ptr BuildFromRecordBatchLines(const std::shared_ptr& batch, const bool reverse); + + static std::shared_ptr BuildFromRecordBatchFull( + const std::shared_ptr& batch, const std::shared_ptr& pkSchema, const bool reverse); + static std::shared_ptr BuildFromString( + const TString& data, const std::shared_ptr& pkSchema, const bool reverse); + + template + static TConclusion BuildFromProto(const TProto& proto, const bool reverse, const std::vector& ydbPk) { + TPKRangesFilter result(reverse); + for (auto& protoRange : proto.GetRanges()) { + TSerializedTableRange range(protoRange); + auto fromPredicate = std::make_shared(); + auto toPredicate = std::make_shared(); + TSerializedTableRange serializedRange(protoRange); + std::tie(*fromPredicate, *toPredicate) = TPredicate::DeserializePredicatesRange(serializedRange, ydbPk); + auto status = result.Add(fromPredicate, toPredicate, NArrow::TStatusValidator::GetValid(NArrow::MakeArrowSchema(ydbPk))); + if (status.IsFail()) { + return status; + } + } + return result; + } }; } diff --git a/ydb/core/tx/columnshard/engines/predicate/predicate.cpp b/ydb/core/tx/columnshard/engines/predicate/predicate.cpp index 7a4ee282f0cc..a6831ca2ad50 100644 --- a/ydb/core/tx/columnshard/engines/predicate/predicate.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/predicate.cpp @@ -1,20 +1,21 @@ #include "predicate.h" +#include #include -#include +#include + +#include namespace NKikimr::NOlap { TPredicate::TPredicate(EOperation op, std::shared_ptr batch) noexcept : Operation(op) - , Batch(std::move(batch)) -{ + , Batch(std::move(batch)) { Y_ABORT_UNLESS(IsFrom() || IsTo()); } TPredicate::TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr& schema) - : Operation(op) -{ + : Operation(op) { Y_ABORT_UNLESS(IsFrom() || IsTo()); if (!serializedBatch.empty()) { Batch = NArrow::DeserializeBatch(serializedBatch, schema); @@ -31,7 +32,112 @@ std::vector TPredicate::ColumnNames() const { return out; } -IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { +std::vector ExtractTypes(const std::vector>& columns) { + std::vector types; + types.reserve(columns.size()); + for (auto& [name, type] : columns) { + types.push_back(type); + } + return types; +} + +TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { + Y_ABORT_UNLESS(cells.size() == columns.size()); + if (cells.empty()) { + return {}; + } + + std::vector types = ExtractTypes(columns); + + NArrow::TArrowBatchBuilder batchBuilder; + batchBuilder.Reserve(1); + auto startStatus = batchBuilder.Start(columns); + Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); + + batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); + + auto batch = batchBuilder.FlushBatch(false); + Y_ABORT_UNLESS(batch); + Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); + Y_ABORT_UNLESS(batch->num_rows() == 1); + return NArrow::SerializeBatchNoCompression(batch); +} + +std::pair TPredicate::DeserializePredicatesRange( + const TSerializedTableRange& range, const std::vector>& columns) { + std::vector leftCells; + std::vector> leftColumns; + bool leftTrailingNull = false; + { + TConstArrayRef cells = range.From.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + leftCells.reserve(size); + leftColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + leftCells.push_back(cells[i]); + leftColumns.push_back(columns[i]); + leftTrailingNull = false; + } else { + leftTrailingNull = true; + } + } + } + + std::vector rightCells; + std::vector> rightColumns; + bool rightTrailingNull = false; + { + TConstArrayRef cells = range.To.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + rightCells.reserve(size); + rightColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + rightCells.push_back(cells[i]); + rightColumns.push_back(columns[i]); + rightTrailingNull = false; + } else { + rightTrailingNull = true; + } + } + } + + const bool fromInclusive = range.FromInclusive || leftTrailingNull; + const bool toInclusive = range.ToInclusive && !rightTrailingNull; + + TString leftBorder = FromCells(leftCells, leftColumns); + TString rightBorder = FromCells(rightCells, rightColumns); + auto leftSchema = NArrow::MakeArrowSchema(leftColumns); + Y_ASSERT(leftSchema.ok()); + auto rightSchema = NArrow::MakeArrowSchema(rightColumns); + Y_ASSERT(rightSchema.ok()); + return std::make_pair( + TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), + TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); +} + +std::shared_ptr TPredicate::CutNulls(const std::shared_ptr& batch) { + AFL_VERIFY(batch->num_rows() == 1)("count", batch->num_rows()); + AFL_VERIFY(batch->num_columns()); + std::vector> colsNotNull; + std::vector> fieldsNotNull; + ui32 idx = 0; + for (auto&& i : batch->columns()) { + if (i->IsNull(0)) { + break; + } + colsNotNull.emplace_back(i); + fieldsNotNull.emplace_back(batch->schema()->field(idx)); + ++idx; + } + AFL_VERIFY(colsNotNull.size()); + return arrow::RecordBatch::Make(std::make_shared(fieldsNotNull), 1, colsNotNull); +} + +IOutputStream& operator<<(IOutputStream& out, const TPredicate& pred) { out << NSsa::GetFunctionName(pred.Operation); for (i32 i = 0; i < pred.Batch->num_columns(); ++i) { @@ -61,4 +167,4 @@ IOutputStream& operator << (IOutputStream& out, const TPredicate& pred) { return out; } -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/predicate.h b/ydb/core/tx/columnshard/engines/predicate/predicate.h index 0b856894e65f..8365971ea29e 100644 --- a/ydb/core/tx/columnshard/engines/predicate/predicate.h +++ b/ydb/core/tx/columnshard/engines/predicate/predicate.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -10,7 +11,9 @@ struct TPredicate { private: using EOperation = NArrow::EOperation; EOperation Operation{ EOperation::Unspecified }; + public: + static std::shared_ptr CutNulls(const std::shared_ptr& batch); std::shared_ptr Batch; @@ -29,8 +32,8 @@ struct TPredicate { } template - std::optional Get(const ui32 colIndex, const ui32 rowIndex, - const std::optional defaultValue = {}) const { + std::optional Get( + const ui32 colIndex, const ui32 rowIndex, const std::optional defaultValue = {}) const { auto column = Batch->column(colIndex); if (!column) { return defaultValue; @@ -42,10 +45,18 @@ struct TPredicate { } } - bool Empty() const noexcept { return Batch.get() == nullptr; } - bool Good() const { return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; } - bool IsFrom() const noexcept { return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; } - bool IsTo() const noexcept { return Operation == EOperation::Less || Operation == EOperation::LessEqual; } + bool Empty() const noexcept { + return Batch.get() == nullptr; + } + bool Good() const { + return !Empty() && Batch->num_columns() && Batch->num_rows() == 1; + } + bool IsFrom() const noexcept { + return Operation == EOperation::Greater || Operation == EOperation::GreaterEqual; + } + bool IsTo() const noexcept { + return Operation == EOperation::Less || Operation == EOperation::LessEqual; + } bool IsInclusive() const { return Operation == EOperation::GreaterEqual || Operation == EOperation::LessEqual; } @@ -56,13 +67,16 @@ struct TPredicate { return Empty() ? "()" : Batch->schema()->ToString(); } + static std::pair DeserializePredicatesRange( + const TSerializedTableRange& range, const std::vector>& columns); + constexpr TPredicate() noexcept = default; TPredicate(EOperation op, std::shared_ptr batch) noexcept; TPredicate(EOperation op, const TString& serializedBatch, const std::shared_ptr& schema); - friend IOutputStream& operator << (IOutputStream& out, const TPredicate& pred); + friend IOutputStream& operator<<(IOutputStream& out, const TPredicate& pred); }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/predicate/range.cpp b/ydb/core/tx/columnshard/engines/predicate/range.cpp index 3a073fcb8fec..83c6602d242d 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.cpp +++ b/ydb/core/tx/columnshard/engines/predicate/range.cpp @@ -6,11 +6,11 @@ namespace NKikimr::NOlap { std::set TPKRangeFilter::GetColumnIds(const TIndexInfo& indexInfo) const { std::set result; for (auto&& i : PredicateFrom.GetColumnNames()) { - result.emplace(indexInfo.GetColumnId(i)); + result.emplace(indexInfo.GetColumnIdVerified(i)); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("predicate_column", i); } for (auto&& i : PredicateTo.GetColumnNames()) { - result.emplace(indexInfo.GetColumnId(i)); + result.emplace(indexInfo.GetColumnIdVerified(i)); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("predicate_column", i); } return result; @@ -39,63 +39,84 @@ NKikimr::NArrow::TColumnFilter TPKRangeFilter::BuildFilter(const arrow::Datum& d return result.And(PredicateFrom.BuildFilter(data)); } -bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const { - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { - const auto& portionEnd = info.IndexKeyEnd(); - const int commonSize = std::min(from->Size(), portionEnd.Size()); - if (std::is_gt(from->ComparePartNotNull(portionEnd, commonSize))) { - return false; - } - } +bool TPKRangeFilter::IsPortionInUsage(const TPortionInfo& info) const { + return IsPortionInPartialUsage(info.IndexKeyStart(), info.IndexKeyEnd()) != TPKRangeFilter::EUsageClass::DontUsage; +} - if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { - const auto& portionStart = info.IndexKeyStart(); - const int commonSize = std::min(to->Size(), portionStart.Size()); - if (std::is_lt(to->ComparePartNotNull(portionStart, commonSize))) { - return false; +TPKRangeFilter::EUsageClass TPKRangeFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const { + { + std::partial_ordering equalityStartWithFrom = std::partial_ordering::greater; + if (const auto& from = PredicateFrom.GetReplaceKey()) { + equalityStartWithFrom = start.ComparePartNotNull(*from, from->Size()); + } + std::partial_ordering equalityEndWithTo = std::partial_ordering::less; + if (const auto& to = PredicateTo.GetReplaceKey()) { + equalityEndWithTo = end.ComparePartNotNull(*to, to->Size()); + } + const bool startInternal = (equalityStartWithFrom == std::partial_ordering::equivalent && PredicateFrom.IsInclude()) || + (equalityStartWithFrom == std::partial_ordering::greater); + const bool endInternal = (equalityEndWithTo == std::partial_ordering::equivalent && PredicateTo.IsInclude()) || + (equalityEndWithTo == std::partial_ordering::less); + if (startInternal && endInternal) { + return EUsageClass::FullUsage; } } + - return true; -} - -bool TPKRangeFilter::IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const { - bool startUsage = false; - bool endUsage = false; - if (auto from = PredicateFrom.ExtractKey(indexInfo.GetPrimaryKey())) { - AFL_VERIFY(from->Size() <= start.Size()); - if (PredicateFrom.IsInclude()) { - startUsage = std::is_lt(start.ComparePartNotNull(*from, from->Size())); - } else { - startUsage = std::is_lteq(start.ComparePartNotNull(*from, from->Size())); + if (const auto& from = PredicateFrom.GetReplaceKey()) { + const std::partial_ordering equalityEndWithFrom = end.ComparePartNotNull(*from, from->Size()); + if (equalityEndWithFrom == std::partial_ordering::less) { + return EUsageClass::DontUsage; + } else if (equalityEndWithFrom == std::partial_ordering::equivalent) { + if (PredicateFrom.IsInclude()) { + return EUsageClass::PartialUsage; + } else { + return EUsageClass::DontUsage; + } } - } else { - startUsage = true; } - if (auto to = PredicateTo.ExtractKey(indexInfo.GetPrimaryKey())) { - AFL_VERIFY(to->Size() <= end.Size()); - if (PredicateTo.IsInclude()) { - endUsage = std::is_gt(end.ComparePartNotNull(*to, to->Size())); - } else { - endUsage = std::is_gteq(end.ComparePartNotNull(*to, to->Size())); + if (const auto& to = PredicateTo.GetReplaceKey()) { + const std::partial_ordering equalityStartWithTo = start.ComparePartNotNull(*to, to->Size()); + if (equalityStartWithTo == std::partial_ordering::greater) { + return EUsageClass::DontUsage; + } else if (equalityStartWithTo == std::partial_ordering::equivalent) { + if (PredicateTo.IsInclude()) { + return EUsageClass::PartialUsage; + } else { + return EUsageClass::DontUsage; + } } - } else { - endUsage = true; } -// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("start", start.DebugString())("end", end.DebugString())("from", PredicateFrom.DebugString())("to", PredicateTo.DebugString()) -// ("start_usage", startUsage)("end_usage", endUsage); +// AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("start", start.DebugString())("end", end.DebugString())("from", PredicateFrom.DebugString())( +// "to", PredicateTo.DebugString()); - return endUsage || startUsage; + return EUsageClass::PartialUsage; } -std::optional TPKRangeFilter::Build(TPredicateContainer&& from, TPredicateContainer&& to) { +TConclusion TPKRangeFilter::Build(TPredicateContainer&& from, TPredicateContainer&& to) { if (!from.CrossRanges(to)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot_build_predicate_range")("error", "predicates from/to not intersected"); - return {}; + return TConclusionStatus::Fail("predicates from/to not intersected"); } return TPKRangeFilter(std::move(from), std::move(to)); } +bool TPKRangeFilter::CheckPoint(const NArrow::TReplaceKey& point) const { + std::partial_ordering equalityWithFrom = std::partial_ordering::greater; + if (const auto& from = PredicateFrom.GetReplaceKey()) { + equalityWithFrom = point.ComparePartNotNull(*from, from->Size()); + } + std::partial_ordering equalityWithTo = std::partial_ordering::less; + if (const auto& to = PredicateTo.GetReplaceKey()) { + equalityWithTo = point.ComparePartNotNull(*to, to->Size()); + } + const bool startInternal = (equalityWithFrom == std::partial_ordering::equivalent && PredicateFrom.IsInclude()) || + (equalityWithFrom == std::partial_ordering::greater); + const bool endInternal = (equalityWithTo == std::partial_ordering::equivalent && PredicateTo.IsInclude()) || + (equalityWithTo == std::partial_ordering::less); + return startInternal && endInternal; +} + } diff --git a/ydb/core/tx/columnshard/engines/predicate/range.h b/ydb/core/tx/columnshard/engines/predicate/range.h index ff84f35408a3..6f9f264b7d70 100644 --- a/ydb/core/tx/columnshard/engines/predicate/range.h +++ b/ydb/core/tx/columnshard/engines/predicate/range.h @@ -28,20 +28,20 @@ class TPKRangeFilter { return PredicateTo; } - std::optional KeyFrom(const std::shared_ptr& key) const { - return PredicateFrom.ExtractKey(key); - } + static TConclusion Build(TPredicateContainer&& from, TPredicateContainer&& to); - std::optional KeyTo(const std::shared_ptr& key) const { - return PredicateTo.ExtractKey(key); - } + NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; - static std::optional Build(TPredicateContainer&& from, TPredicateContainer&& to); + bool IsPortionInUsage(const TPortionInfo& info) const; + bool CheckPoint(const NArrow::TReplaceKey& point) const; - NArrow::TColumnFilter BuildFilter(const arrow::Datum& data) const; + enum class EUsageClass { + DontUsage, + PartialUsage, + FullUsage + }; - bool IsPortionInUsage(const TPortionInfo& info, const TIndexInfo& indexInfo) const; - bool IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end, const TIndexInfo& indexInfo) const; + EUsageClass IsPortionInPartialUsage(const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& end) const; std::set GetColumnIds(const TIndexInfo& indexInfo) const; TString DebugString() const; diff --git a/ydb/core/tx/columnshard/engines/protos/portion_info.proto b/ydb/core/tx/columnshard/engines/protos/portion_info.proto index 8e058f49d8e2..f7f38bb96ed7 100644 --- a/ydb/core/tx/columnshard/engines/protos/portion_info.proto +++ b/ydb/core/tx/columnshard/engines/protos/portion_info.proto @@ -1,5 +1,4 @@ -import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; -import "ydb/core/formats/arrow/protos/ssa.proto"; +import "ydb/library/formats/arrow/protos/ssa.proto"; package NKikimrTxColumnShard; @@ -19,7 +18,6 @@ message TIndexPortionMeta { optional bytes PrimaryKeyBorders = 6; // arrow::RecordBatch with first and last ReplaceKey rows optional TSnapshot RecordSnapshotMin = 7; optional TSnapshot RecordSnapshotMax = 8; - optional NKikimrColumnShardStatisticsProto.TPortionStorage StatisticsStorage = 9; optional uint32 DeletionsCount = 10; } diff --git a/ydb/core/tx/columnshard/engines/protos/ya.make b/ydb/core/tx/columnshard/engines/protos/ya.make index 67c3e138a8ac..5719eb76af10 100644 --- a/ydb/core/tx/columnshard/engines/protos/ya.make +++ b/ydb/core/tx/columnshard/engines/protos/ya.make @@ -5,8 +5,7 @@ SRCS( ) PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/protos - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ) diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h index 2681626b6d4f..37ba57b89985 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h @@ -13,7 +13,7 @@ class TScanIteratorBase { public: virtual ~TScanIteratorBase() = default; - virtual void Apply(IDataTasksProcessor::ITask::TPtr /*processor*/) { + virtual void Apply(const std::shared_ptr& /*task*/) { } @@ -31,7 +31,7 @@ class TScanIteratorBase { return {}; } virtual bool Finished() const = 0; - virtual TConclusion> GetBatch() = 0; + virtual TConclusion> GetBatch() = 0; virtual void PrepareResults() { } diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h index fbcdab4d8622..3b1d545094ac 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h @@ -1,10 +1,12 @@ #pragma once #include "read_metadata.h" + #include -#include -#include #include +#include #include +#include + #include namespace NKikimr::NOlap::NReader { @@ -13,6 +15,7 @@ class TComputeShardingPolicy { private: YDB_READONLY(ui32, ShardsCount, 0); YDB_READONLY_DEF(std::vector, ColumnNames); + public: TString DebugString() const { return TStringBuilder() << "shards_count:" << ShardsCount << ";columns=" << JoinSeq(",", ColumnNames) << ";"; @@ -42,10 +45,12 @@ class TReadContext { const NColumnShard::TConcreteScanCounters Counters; TReadMetadataBase::TConstPtr ReadMetadata; NResourceBroker::NSubscribe::TTaskContext ResourcesTaskContext; + const ui64 ScanId; const TActorId ScanActorId; const TActorId ResourceSubscribeActorId; const TActorId ReadCoordinatorActorId; const TComputeShardingPolicy ComputeShardingPolicy; + public: template std::shared_ptr GetReadMetadataPtrVerifiedAs() const { @@ -74,6 +79,14 @@ class TReadContext { return ScanActorId; } + ui64 GetScanId() const { + return ScanId; + } + + bool HasLock() const { + return !!ReadMetadata->GetLockId(); + } + const TReadMetadataBase::TConstPtr& GetReadMetadata() const { return ReadMetadata; } @@ -86,17 +99,18 @@ class TReadContext { return ResourcesTaskContext; } - TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, const TReadMetadataBase::TConstPtr& readMetadata, - const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy) + TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, + const TReadMetadataBase::TConstPtr& readMetadata, const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, + const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy, const ui64 scanId) : StoragesManager(storagesManager) , Counters(counters) , ReadMetadata(readMetadata) , ResourcesTaskContext("CS::SCAN_READ", counters.ResourcesSubscriberCounters) + , ScanId(scanId) , ScanActorId(scanActorId) , ResourceSubscribeActorId(resourceSubscribeActorId) , ReadCoordinatorActorId(readCoordinatorActorId) - , ComputeShardingPolicy(computeShardingPolicy) - { + , ComputeShardingPolicy(computeShardingPolicy) { Y_ABORT_UNLESS(ReadMetadata); } }; @@ -109,8 +123,9 @@ class IDataReader { virtual TString DoDebugString(const bool verbose) const = 0; virtual void DoAbort() = 0; virtual bool DoIsFinished() const = 0; - virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; virtual TConclusion DoReadNextInterval() = 0; + public: IDataReader(const std::shared_ptr& context); virtual ~IDataReader() = default; @@ -153,7 +168,7 @@ class IDataReader { return *result; } - std::vector ExtractReadyResults(const int64_t maxRowsInBatch) { + std::vector> ExtractReadyResults(const int64_t maxRowsInBatch) { return DoExtractReadyResults(maxRowsInBatch); } @@ -171,4 +186,4 @@ class IDataReader { } }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp index e6fc29578f1c..88416a4d214f 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp @@ -1,21 +1,20 @@ #include "read_metadata.h" + #include namespace NKikimr::NOlap::NReader { -TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index) +TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index) : InsertTable(insertTable) - , Index(index) -{} + , Index(index) { +} std::shared_ptr TDataStorageAccessor::Select(const TReadDescription& readDescription) const { if (readDescription.ReadNothing) { return std::make_shared(); } - return Index->Select(readDescription.PathId, - readDescription.GetSnapshot(), - readDescription.PKRangesFilter); + AFL_VERIFY(readDescription.PKRangesFilter); + return Index->Select(readDescription.PathId, readDescription.GetSnapshot(), *readDescription.PKRangesFilter); } ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInfo& portion) const { @@ -24,8 +23,10 @@ ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInf return schema; } -std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const { - return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot(), pkSchema)); +std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, + const std::shared_ptr& pkSchema, const std::optional lockId, const TSnapshot& reqSnapshot) const { + AFL_VERIFY(readDescription.PKRangesFilter); + return std::move(InsertTable->Read(readDescription.PathId, lockId, reqSnapshot, pkSchema, &*readDescription.PKRangesFilter)); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h index 7770674caa45..d87fcf02868e 100644 --- a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -1,12 +1,17 @@ #pragma once +#include +#include #include #include -#include -#include namespace NKikimr::NOlap { - class TPortionInfo; +class TPortionInfo; } + +namespace NKikimr::NKqp::NInternalImplementation { +struct TEvScanData; +} + namespace NKikimr::NOlap::NReader { class TScanIteratorBase; @@ -18,10 +23,10 @@ class TDataStorageAccessor { const std::unique_ptr& Index; public: - TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index); + TDataStorageAccessor(const std::unique_ptr& insertTable, const std::unique_ptr& index); std::shared_ptr Select(const TReadDescription& readDescription) const; - std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const; + std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema, + const std::optional lockId, const TSnapshot& reqSnapshot) const; }; // Holds all metadata that is needed to perform read/scan @@ -32,29 +37,61 @@ struct TReadMetadataBase { ASC /* "ascending" */, DESC /* "descending" */, }; + private: - const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches - std::optional PKRangesFilter; + const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches + std::shared_ptr PKRangesFilter; TProgramContainer Program; std::shared_ptr IndexVersionsPointer; TSnapshot RequestSnapshot; std::optional RequestShardingInfo; + virtual void DoOnReadFinished(NColumnShard::TColumnShard& /*owner*/) const { + } + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& /*owner*/) const { + } + virtual void DoOnReplyConstruction(const ui64 /*tabletId*/, NKqp::NInternalImplementation::TEvScanData& /*scanData*/) const { + } protected: std::shared_ptr ResultIndexSchema; + ui64 TxId = 0; + std::optional LockId; + +public: + using TConstPtr = std::shared_ptr; + + void OnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + DoOnReplyConstruction(tabletId, scanData); + } + + ui64 GetTxId() const { + return TxId; + } + + std::optional GetLockId() const { + return LockId; + } + + void OnReadFinished(NColumnShard::TColumnShard& owner) const { + DoOnReadFinished(owner); + } + + void OnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + DoOnBeforeStartReading(owner); + } + const TVersionedIndex& GetIndexVersions() const { AFL_VERIFY(IndexVersionsPointer); return *IndexVersionsPointer; } -public: - using TConstPtr = std::shared_ptr; const std::optional& GetRequestShardingInfo() const { return RequestShardingInfo; } - void SetPKRangesFilter(const TPKRangesFilter& value) { - Y_ABORT_UNLESS(IsSorted() && value.IsReverse() == IsDescSorted()); + void SetPKRangesFilter(const std::shared_ptr& value) { + AFL_VERIFY(value); + Y_ABORT_UNLESS(IsSorted() && value->IsReverse() == IsDescSorted()); Y_ABORT_UNLESS(!PKRangesFilter); PKRangesFilter = value; } @@ -64,6 +101,11 @@ struct TReadMetadataBase { return *PKRangesFilter; } + const std::shared_ptr& GetPKRangesFilterPtr() const { + Y_ABORT_UNLESS(!!PKRangesFilter); + return PKRangesFilter; + } + ISnapshotSchema::TPtr GetResultSchema() const { return ResultIndexSchema; } @@ -74,7 +116,7 @@ struct TReadMetadataBase { ISnapshotSchema::TPtr GetLoadSchemaVerified(const TPortionInfo& porition) const; - std::shared_ptr GetBlobSchema(const ui64 version) const { + const std::shared_ptr& GetBlobSchema(const ui64 version) const { return GetIndexVersions().GetSchema(version)->GetIndexInfo().ArrowSchema(); } @@ -90,13 +132,13 @@ struct TReadMetadataBase { RequestShardingInfo = IndexVersionsPointer->GetShardingInfoOptional(pathId, RequestSnapshot); } - TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) + TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, + const std::shared_ptr& schema, const TSnapshot& requestSnapshot) : Sorting(sorting) , Program(ssaProgram) , IndexVersionsPointer(index) , RequestSnapshot(requestSnapshot) - , ResultIndexSchema(schema) - { + , ResultIndexSchema(schema) { } virtual ~TReadMetadataBase() = default; @@ -110,19 +152,25 @@ struct TReadMetadataBase { std::set GetProcessingColumnIds() const { std::set result; for (auto&& i : GetProgram().GetProcessingColumns()) { - result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnId(i)); + result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnIdVerified(i)); } return result; } - bool IsAscSorted() const { return Sorting == ESorting::ASC; } - bool IsDescSorted() const { return Sorting == ESorting::DESC; } - bool IsSorted() const { return IsAscSorted() || IsDescSorted(); } + bool IsAscSorted() const { + return Sorting == ESorting::ASC; + } + bool IsDescSorted() const { + return Sorting == ESorting::DESC; + } + bool IsSorted() const { + return IsAscSorted() || IsDescSorted(); + } virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const = 0; virtual std::vector GetKeyYqlSchema() const = 0; // TODO: can this only be done for base class? - friend IOutputStream& operator << (IOutputStream& out, const TReadMetadataBase& meta) { + friend IOutputStream& operator<<(IOutputStream& out, const TReadMetadataBase& meta) { meta.Dump(out); return out; } @@ -160,7 +208,6 @@ struct TReadMetadataBase { } return ResultIndexSchema->GetIndexInfo().GetIndexNameOptional(entityId); } - }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp index 8b655d899a55..00ab74fa9921 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp @@ -61,8 +61,7 @@ TColumnShardScan::TColumnShardScan(const TActorId& columnShardActorId, const TAc , Deadline(TInstant::Now() + (timeout ? timeout + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) , ScanCountersPool(scanCountersPool) , Stats(NTracing::TTraceClient::GetLocalClient("SHARD", ::ToString(TabletId)/*, "SCAN_TXID:" + ::ToString(TxId)*/)) - , ComputeShardingPolicy(computeShardingPolicy) -{ + , ComputeShardingPolicy(computeShardingPolicy) { AFL_VERIFY(ReadMetadataRange); KeyYqlSchema = ReadMetadataRange->GetKeyYqlSchema(); } @@ -80,7 +79,7 @@ void TColumnShardScan::Bootstrap(const TActorContext& ctx) { ReadCoordinatorActorId = ctx.Register(new NBlobOperations::NRead::TReadCoordinatorActor(TabletId, SelfId())); std::shared_ptr context = std::make_shared(StoragesManager, ScanCountersPool, - ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy); + ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy, ScanId); ScanIterator = ReadMetadataRange->StartScan(context); auto startResult = ScanIterator->Start(); StartInstant = TMonotonic::Now(); @@ -98,17 +97,18 @@ void TColumnShardScan::Bootstrap(const TActorContext& ctx) { } } -void TColumnShardScan::HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev) { +void TColumnShardScan::HandleScan(NColumnShard::TEvPrivate::TEvTaskProcessedResult::TPtr& ev) { --InFlightReads; auto g = Stats->MakeGuard("task_result"); - if (ev->Get()->GetErrorMessage()) { - ACFL_ERROR("event", "TEvTaskProcessedResult")("error", ev->Get()->GetErrorMessage()); - SendScanError("task_error:" + ev->Get()->GetErrorMessage()); + auto result = ev->Get()->ExtractResult(); + if (result.IsFail()) { + ACFL_ERROR("event", "TEvTaskProcessedResult")("error", result.GetErrorMessage()); + SendScanError("task_error:" + result.GetErrorMessage()); Finish(NColumnShard::TScanCounters::EStatusFinish::ConveyorInternalError); } else { ACFL_DEBUG("event", "TEvTaskProcessedResult"); - auto t = static_pointer_cast(ev->Get()->GetResult()); - Y_DEBUG_ABORT_UNLESS(dynamic_pointer_cast(ev->Get()->GetResult())); + auto t = static_pointer_cast(result.GetResult()); + Y_DEBUG_ABORT_UNLESS(dynamic_pointer_cast(result.GetResult())); if (!ScanIterator->Finished()) { ScanIterator->Apply(t); } @@ -208,7 +208,7 @@ bool TColumnShardScan::ProduceResults() noexcept { return false; } - std::optional resultOpt = resultConclusion.DetachResult(); + std::shared_ptr resultOpt = resultConclusion.DetachResult(); if (!resultOpt) { ACFL_DEBUG("stage", "no data is ready yet")("iterator", ScanIterator->DebugString()); return false; @@ -242,6 +242,7 @@ bool TColumnShardScan::ProduceResults() noexcept { Result->ArrowBatch = shardedBatch.GetRecordBatch(); Rows += batch->num_rows(); Bytes += NArrow::GetTableDataSize(Result->ArrowBatch); + ACFL_DEBUG("stage", "data_format")("batch_size", NArrow::GetTableDataSize(Result->ArrowBatch))("num_rows", numRows)("batch_columns", JoinSeq(",", batch->schema()->field_names())); } if (CurrentLastReadKey) { @@ -273,9 +274,9 @@ void TColumnShardScan::ContinueProcessing() { if (ChunksLimiter.HasMore()) { auto g = Stats->MakeGuard("Finish"); MakeResult(); + Finish(NColumnShard::TScanCounters::EStatusFinish::Success); SendResult(false, true); ScanIterator.reset(); - Finish(NColumnShard::TScanCounters::EStatusFinish::Success); } } else { while (true) { @@ -374,6 +375,7 @@ bool TColumnShardScan::SendResult(bool pageFault, bool lastBatch) { Y_ABORT_UNLESS(AckReceivedInstant); ScanCountersPool.AckWaitingInfo(TMonotonic::Now() - *AckReceivedInstant); } + ReadMetadataRange->OnReplyConstruction(TabletId, *Result); AckReceivedInstant.reset(); Send(ScanComputeActorId, Result.Release(), IEventHandle::FlagTrackDelivery); // TODO: FlagSubscribeOnSession ? @@ -401,7 +403,7 @@ void TColumnShardScan::Finish(const NColumnShard::TScanCounters::EStatusFinish s Send(ColumnShardActorId, new NColumnShard::TEvPrivate::TEvReadFinished(RequestCookie, TxId)); AFL_VERIFY(StartInstant); - ScanCountersPool.OnScanDuration(status, TMonotonic::Now() - *StartInstant); + ScanCountersPool.OnScanFinished(status, TMonotonic::Now() - *StartInstant); ReportStats(); PassAway(); } diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.h b/ydb/core/tx/columnshard/engines/reader/actor/actor.h index 33134f85bcab..a37074f70a5d 100644 --- a/ydb/core/tx/columnshard/engines/reader/actor/actor.h +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.h @@ -1,17 +1,17 @@ #pragma once +#include +#include #include +#include +#include #include -#include #include -#include +#include #include #include -#include - -#include -#include #include +#include #include namespace NKikimr::NOlap::NReader { @@ -22,6 +22,7 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TActorId ReadCoordinatorActorId; const std::shared_ptr StoragesManager; std::optional StartInstant; + public: static constexpr auto ActorActivityType() { return NKikimrServices::TActivity::KQP_OLAP_SCAN; @@ -31,9 +32,8 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo virtual void PassAway() override; TColumnShardScan(const TActorId& columnShardActorId, const TActorId& scanComputeActorId, - const std::shared_ptr& storagesManager, const TComputeShardingPolicy& computeShardingPolicy, - ui32 scanId, ui64 txId, ui32 scanGen, ui64 requestCookie, - ui64 tabletId, TDuration timeout, const TReadMetadataBase::TConstPtr& readMetadataRange, + const std::shared_ptr& storagesManager, const TComputeShardingPolicy& computeShardingPolicy, ui32 scanId, ui64 txId, + ui32 scanGen, ui64 requestCookie, ui64 tabletId, TDuration timeout, const TReadMetadataBase::TConstPtr& readMetadataRange, NKikimrDataEvents::EDataFormat dataFormat, const NColumnShard::TScanCounters& scanCountersPool); void Bootstrap(const TActorContext& ctx); @@ -41,21 +41,20 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo private: STATEFN(StateScan) { auto g = Stats->MakeGuard("processing"); - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) - ("SelfId", SelfId())("TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen) - ); + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) ("SelfId", SelfId())( + "TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen)); switch (ev->GetTypeRewrite()) { hFunc(NKqp::TEvKqpCompute::TEvScanDataAck, HandleScan); hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleScan); hFunc(TEvents::TEvUndelivered, HandleScan); hFunc(TEvents::TEvWakeup, HandleScan); - hFunc(NConveyor::TEvExecution::TEvTaskProcessedResult, HandleScan); + hFunc(NColumnShard::TEvPrivate::TEvTaskProcessedResult, HandleScan); default: AFL_VERIFY(false)("unexpected_event", ev->GetTypeName()); } } - void HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev); + void HandleScan(NColumnShard::TEvPrivate::TEvTaskProcessedResult::TPtr& ev); void HandleScan(NKqp::TEvKqpCompute::TEvScanDataAck::TPtr& ev); @@ -80,10 +79,10 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo class TScanStatsOwner: public NKqp::TEvKqpCompute::IShardScanStats { private: YDB_READONLY_DEF(TReadStats, Stats); + public: TScanStatsOwner(const TReadStats& stats) : Stats(stats) { - } virtual THashMap GetMetrics() const override { @@ -142,11 +141,11 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TDuration ReadingDurationMax; NMonitoring::THistogramPtr BlobDurationsCounter; NMonitoring::THistogramPtr ByteDurationsCounter; + public: TBlobStats(const NMonitoring::THistogramPtr blobDurationsCounter, const NMonitoring::THistogramPtr byteDurationsCounter) : BlobDurationsCounter(blobDurationsCounter) , ByteDurationsCounter(byteDurationsCounter) { - } void Received(const TBlobRange& br, const TDuration d) { ReadingDurationSum += d; @@ -181,4 +180,4 @@ class TColumnShardScan: public TActorBootstrapped, NArrow::IRo TDuration LastReportedElapsedTime; }; -} \ No newline at end of file +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp index 26f14784a032..ac778b00a6c7 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp +++ b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp @@ -1,9 +1,23 @@ #include "conveyor_task.h" +#include +#include namespace NKikimr::NOlap::NReader { -bool IDataTasksProcessor::ITask::Apply(IDataReader& indexedDataRead) const { - return DoApply(indexedDataRead); +NKikimr::TConclusionStatus IDataTasksProcessor::ITask::DoExecute(const std::shared_ptr& taskPtr) { + auto result = DoExecuteImpl(); + if (result.IsFail()) { + NActors::TActivationContext::AsActorContext().Send(OwnerId, new NColumnShard::TEvPrivate::TEvTaskProcessedResult(result)); + } else { + NActors::TActivationContext::AsActorContext().Send( + OwnerId, new NColumnShard::TEvPrivate::TEvTaskProcessedResult(static_pointer_cast(taskPtr))); + } + return result; +} + +void IDataTasksProcessor::ITask::DoOnCannotExecute(const TString& reason) { + NActors::TActivationContext::AsActorContext().Send( + OwnerId, new NColumnShard::TEvPrivate::TEvTaskProcessedResult(TConclusionStatus::Fail(reason))); } } diff --git a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h index f29b19ecee59..0342577c2554 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h +++ b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h @@ -1,28 +1,46 @@ #pragma once + #include + #include +#include namespace NKikimr::NOlap::NReader { class IDataReader; +class IApplyAction { +protected: + virtual bool DoApply(IDataReader& indexedDataRead) const = 0; + +public: + bool Apply(IDataReader& indexedDataRead) const { + return DoApply(indexedDataRead); + } +}; + class IDataTasksProcessor { public: - class ITask: public NConveyor::ITask { + class ITask: public NConveyor::ITask, public IApplyAction { private: using TBase = NConveyor::ITask; - protected: - virtual bool DoApply(IDataReader& indexedDataRead) const = 0; - public: - ITask(const std::optional ownerId = {}) - : TBase(ownerId) { + const NActors::TActorId OwnerId; + virtual TConclusionStatus DoExecuteImpl() = 0; - } + protected: + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override final; + virtual void DoOnCannotExecute(const TString& reason) override; + public: using TPtr = std::shared_ptr; virtual ~ITask() = default; - bool Apply(IDataReader& indexedDataRead) const; + + ITask(const NActors::TActorId& ownerId) + : OwnerId(ownerId) + { + + } }; }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/common/description.h b/ydb/core/tx/columnshard/engines/reader/common/description.h index 704b4bd101a9..c180dcc8d067 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/description.h +++ b/ydb/core/tx/columnshard/engines/reader/common/description.h @@ -13,13 +13,15 @@ struct TReadDescription { TProgramContainer Program; public: // Table + ui64 TxId = 0; + std::optional LockId; ui64 PathId = 0; TString TableName; bool ReadNothing = false; // Less[OrEqual], Greater[OrEqual] or both // There's complex logic in NKikimr::TTableRange comparison that could be emulated only with separated compare // operations with potentially different columns. We have to remove columns to support -Inf (Null) and +Inf. - NOlap::TPKRangesFilter PKRangesFilter; + std::shared_ptr PKRangesFilter; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; // List of columns @@ -28,7 +30,7 @@ struct TReadDescription { TReadDescription(const TSnapshot& snapshot, const bool isReverse) : Snapshot(snapshot) - , PKRangesFilter(isReverse) { + , PKRangesFilter(std::make_shared(isReverse)) { } void SetProgram(TProgramContainer&& value) { diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.cpp b/ydb/core/tx/columnshard/engines/reader/common/result.cpp index 484165c67b54..e81e86bfc9d0 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.cpp +++ b/ydb/core/tx/columnshard/engines/reader/common/result.cpp @@ -4,19 +4,19 @@ namespace NKikimr::NOlap::NReader { class TCurrentBatch { private: - std::vector Results; + std::vector> Results; ui64 RecordsCount = 0; public: ui64 GetRecordsCount() const { return RecordsCount; } - void AddChunk(TPartialReadResult&& res) { - RecordsCount += res.GetRecordsCount(); + void AddChunk(std::shared_ptr&& res) { + RecordsCount += res->GetRecordsCount(); Results.emplace_back(std::move(res)); } - void FillResult(std::vector& result) const { + void FillResult(std::vector>& result) const { if (Results.empty()) { return; } @@ -26,11 +26,12 @@ class TCurrentBatch { } }; -std::vector TPartialReadResult::SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult) { +std::vector> TPartialReadResult::SplitResults( + std::vector>&& resultsExt, const ui32 maxRecordsInResult) { std::vector resultBatches; TCurrentBatch currentBatch; for (auto&& i : resultsExt) { - AFL_VERIFY(i.GetRecordsCount()); + AFL_VERIFY(i->GetRecordsCount()); currentBatch.AddChunk(std::move(i)); if (currentBatch.GetRecordsCount() >= maxRecordsInResult) { resultBatches.emplace_back(std::move(currentBatch)); @@ -41,7 +42,7 @@ std::vector TPartialReadResult::SplitResults(std::vector result; + std::vector> result; for (auto&& i : resultBatches) { i.FillResult(result); } diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.h b/ydb/core/tx/columnshard/engines/reader/common/result.h index 2c3f698bf7d7..e3028b01b5ad 100644 --- a/ydb/core/tx/columnshard/engines/reader/common/result.h +++ b/ydb/core/tx/columnshard/engines/reader/common/result.h @@ -1,16 +1,19 @@ #pragma once #include +#include #include #include +#include #include #include namespace NKikimr::NOlap::NReader { // Represents a batch of rows produced by ASC or DESC scan with applied filters and partial aggregation -class TPartialReadResult { +class TPartialReadResult: public TNonCopyable { private: - YDB_READONLY_DEF(std::vector>, ResourcesGuards); + YDB_READONLY_DEF(std::shared_ptr, ResourcesGuard); + YDB_READONLY_DEF(std::shared_ptr, GroupGuard); NArrow::TShardedRecordBatch ResultBatch; // This 1-row batch contains the last key that was read while producing the ResultBatch. @@ -32,12 +35,6 @@ class TPartialReadResult { return ResultBatch.GetRecordBatch(); } - const std::shared_ptr& GetResourcesGuardOnly() const { - AFL_VERIFY(ResourcesGuards.size() == 1); - AFL_VERIFY(!!ResourcesGuards.front()); - return ResourcesGuards.front(); - } - ui64 GetMemorySize() const { return ResultBatch.GetMemorySize(); } @@ -46,7 +43,8 @@ class TPartialReadResult { return ResultBatch.GetRecordsCount(); } - static std::vector SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult); + static std::vector> SplitResults( + std::vector>&& resultsExt, const ui32 maxRecordsInResult); const NArrow::TShardedRecordBatch& GetShardedBatch() const { return ResultBatch; @@ -56,32 +54,23 @@ class TPartialReadResult { return LastReadKey; } - explicit TPartialReadResult( - const std::vector>& resourcesGuards, - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : ResourcesGuards(resourcesGuards) + explicit TPartialReadResult(std::shared_ptr&& resourcesGuard, + std::shared_ptr&& gGuard, const NArrow::TShardedRecordBatch& batch, + std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + : ResourcesGuard(std::move(resourcesGuard)) + , GroupGuard(std::move(gGuard)) , ResultBatch(batch) , LastReadKey(lastKey) - , NotFinishedIntervalIdx(notFinishedIntervalIdx) - { - for (auto&& i : ResourcesGuards) { - AFL_VERIFY(i); - } + , NotFinishedIntervalIdx(notFinishedIntervalIdx) { Y_ABORT_UNLESS(ResultBatch.GetRecordsCount()); Y_ABORT_UNLESS(LastReadKey); Y_ABORT_UNLESS(LastReadKey->num_rows() == 1); } explicit TPartialReadResult( - const std::shared_ptr& resourcesGuards, const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult(std::vector>({resourcesGuards}), batch, lastKey, notFinishedIntervalIdx) { - AFL_VERIFY(resourcesGuards); - } - - explicit TPartialReadResult(const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) - : TPartialReadResult(std::vector>(), batch, lastKey, notFinishedIntervalIdx) { + : TPartialReadResult(nullptr, nullptr, batch, lastKey, notFinishedIntervalIdx) { } }; -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp index 87315949329a..ae28340c9932 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp @@ -1,11 +1,13 @@ #include "constructor.h" -#include "resolver.h" #include "read_metadata.h" +#include "resolver.h" + #include namespace NKikimr::NOlap::NReader::NPlain { -NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { +NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram( + const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { AFL_VERIFY(vIndex); auto& indexInfo = vIndex->GetSchema(Snapshot)->GetIndexInfo(); TIndexColumnResolver columnResolver(indexInfo); @@ -17,26 +19,29 @@ std::vector TIndexScannerConstructor::GetPrimaryKeyScheme(const N return indexInfo.GetPrimaryKeyColumns(); } -NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { +NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata( + const NColumnShard::TColumnShard* self, const TReadDescription& read) const { auto& insertTable = self->InsertTable; auto& index = self->TablesManager.GetPrimaryIndex(); if (!insertTable || !index) { return std::shared_ptr(); } - if (read.GetSnapshot().GetPlanStep() < self->GetMinReadStep()) { - return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot()); + if (read.GetSnapshot().GetPlanInstant() < self->GetMinReadSnapshot().GetPlanInstant()) { + return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot() << ". CS min read snapshot: " + << self->GetMinReadSnapshot() << ". now: " << TInstant::Now()); } TDataStorageAccessor dataAccessor(insertTable, index); - auto readMetadata = std::make_shared(index->CopyVersionedIndexPtr(), read.GetSnapshot(), + AFL_VERIFY(read.PathId); + auto readMetadata = std::make_shared(read.PathId, index->CopyVersionedIndexPtr(), read.GetSnapshot(), IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram()); - auto initResult = readMetadata->Init(read, dataAccessor); + auto initResult = readMetadata->Init(self, read, dataAccessor); if (!initResult) { return initResult; } - return dynamic_pointer_cast(readMetadata); + return static_pointer_cast(readMetadata); } -} \ No newline at end of file +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp index 076b69c7f49a..c24fbe0577a7 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp @@ -1,6 +1,11 @@ #include "read_metadata.h" + +#include +#include #include #include +#include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -8,16 +13,36 @@ std::unique_ptr TReadMetadata::StartScan(const std::shared_pt return std::make_unique(readContext, readContext->GetReadMetadataPtrVerifiedAs()); } -TConclusionStatus TReadMetadata::Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { +TConclusionStatus TReadMetadata::Init( + const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { SetPKRangesFilter(readDescription.PKRangesFilter); InitShardingInfo(readDescription.PathId); + TxId = readDescription.TxId; + LockId = readDescription.LockId; + if (LockId) { + owner->GetOperationsManager().RegisterLock(*LockId, owner->Generation()); + LockSharingInfo = owner->GetOperationsManager().GetLockVerified(*LockId).GetSharingInfo(); + } /// @note We could have column name changes between schema versions: /// Add '1:foo', Drop '1:foo', Add '2:foo'. Drop should hide '1:foo' from reads. /// It's expected that we have only one version on 'foo' in blob and could split them by schema {planStep:txId}. /// So '1:foo' would be omitted in blob records for the column in new snapshots. And '2:foo' - in old ones. /// It's not possible for blobs with several columns. There should be a special logic for them. - CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey()); + CommittedBlobs = + dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey(), LockId, GetRequestSnapshot()); + + if (LockId) { + for (auto&& i : CommittedBlobs) { + if (auto writeId = i.GetWriteIdOptional()) { + if (owner->HasLongTxWrites(*writeId)) { + } else { + auto op = owner->GetOperationsManager().GetOperationByInsertWriteIdVerified(*writeId); + AddWriteIdToCheck(*writeId, op->GetLockId()); + } + } + } + } SelectInfo = dataAccessor.Select(readDescription); StatsMode = readDescription.StatsMode; @@ -41,7 +66,7 @@ std::set TReadMetadata::GetPKColumnIds() const { std::set result; auto& indexInfo = ResultIndexSchema->GetIndexInfo(); for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { - Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second); + Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnIdVerified(i.first)).second); } return result; } @@ -51,8 +76,57 @@ std::shared_ptr TReadMetadata::BuildReader(const std::shared_ptr
field_names(), {}, IsDescSorted()); + return NArrow::NMerger::TSortableBatchPosition(key.ToBatch(GetReplaceKey()), 0, GetReplaceKey()->field_names(), {}, IsDescSorted()); +} + +void TReadMetadata::DoOnReadFinished(NColumnShard::TColumnShard& owner) const { + if (!GetLockId()) { + return; + } + const ui64 lock = *GetLockId(); + if (GetBrokenWithCommitted()) { + owner.GetOperationsManager().GetLockVerified(lock).SetBroken(); + } else { + NOlap::NTxInteractions::TTxConflicts conflicts; + for (auto&& i : GetConflictableLockIds()) { + conflicts.Add(i, lock); + } + auto writer = std::make_shared(PathId, conflicts); + owner.GetOperationsManager().AddEventForLock(owner, lock, writer); + } } +void TReadMetadata::DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const { + if (!LockId) { + return; + } + auto evWriter = std::make_shared( + PathId, GetResultSchema()->GetIndexInfo().GetPrimaryKey(), GetPKRangesFilterPtr(), GetConflictableLockIds()); + owner.GetOperationsManager().AddEventForLock(owner, *LockId, evWriter); } + +void TReadMetadata::DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const { + if (LockSharingInfo) { + NKikimrDataEvents::TLock lockInfo; + lockInfo.SetLockId(LockSharingInfo->GetLockId()); + lockInfo.SetGeneration(LockSharingInfo->GetGeneration()); + lockInfo.SetDataShard(tabletId); + lockInfo.SetCounter(LockSharingInfo->GetCounter()); + lockInfo.SetPathId(PathId); + lockInfo.SetHasWrites(LockSharingInfo->HasWrites()); + if (LockSharingInfo->IsBroken()) { + scanData.LocksInfo.BrokenLocks.emplace_back(std::move(lockInfo)); + } else { + scanData.LocksInfo.Locks.emplace_back(std::move(lockInfo)); + } + } +} + +bool TReadMetadata::IsMyUncommitted(const TInsertWriteId writeId) const { + AFL_VERIFY(LockSharingInfo); + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.GetLockId() == LockSharingInfo->GetLockId(); +} + +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h index 371a09d73102..5f5ad70db296 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h @@ -1,18 +1,104 @@ #pragma once #include #include -#include +#include #include #include +namespace NKikimr::NColumnShard { +class TLockSharingInfo; +} + namespace NKikimr::NOlap::NReader::NPlain { // Holds all metadata that is needed to perform read/scan struct TReadMetadata : public TReadMetadataBase { using TBase = TReadMetadataBase; + +private: + const ui64 PathId; + std::shared_ptr BrokenWithCommitted = std::make_shared(); + std::shared_ptr LockSharingInfo; + + class TWriteIdInfo { + private: + const ui64 LockId; + std::shared_ptr Conflicts; + + public: + TWriteIdInfo(const ui64 lockId, const std::shared_ptr& counter) + : LockId(lockId) + , Conflicts(counter) { + } + + ui64 GetLockId() const { + return LockId; + } + + void MarkAsConflictable() const { + Conflicts->Inc(); + } + + bool IsConflictable() const { + return Conflicts->Val(); + } + }; + + THashMap> LockConflictCounters; + THashMap ConflictedWriteIds; + + virtual void DoOnReadFinished(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnBeforeStartReading(NColumnShard::TColumnShard& owner) const override; + virtual void DoOnReplyConstruction(const ui64 tabletId, NKqp::NInternalImplementation::TEvScanData& scanData) const override; + public: using TConstPtr = std::shared_ptr; + bool GetBrokenWithCommitted() const { + return BrokenWithCommitted->Val(); + } + THashSet GetConflictableLockIds() const { + THashSet result; + for (auto&& i : ConflictedWriteIds) { + if (i.second.IsConflictable()) { + result.emplace(i.second.GetLockId()); + } + } + return result; + } + + bool IsLockConflictable(const ui64 lockId) const { + auto it = LockConflictCounters.find(lockId); + AFL_VERIFY(it != LockConflictCounters.end()); + return it->second->Val(); + } + + bool IsWriteConflictable(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + return it->second.IsConflictable(); + } + + void AddWriteIdToCheck(const TInsertWriteId writeId, const ui64 lockId) { + auto it = LockConflictCounters.find(lockId); + if (it == LockConflictCounters.end()) { + it = LockConflictCounters.emplace(lockId, std::make_shared()).first; + } + AFL_VERIFY(ConflictedWriteIds.emplace(writeId, TWriteIdInfo(lockId, it->second)).second); + } + + [[nodiscard]] bool IsMyUncommitted(const TInsertWriteId writeId) const; + + void SetConflictedWriteId(const TInsertWriteId writeId) const { + auto it = ConflictedWriteIds.find(writeId); + AFL_VERIFY(it != ConflictedWriteIds.end()); + it->second.MarkAsConflictable(); + } + + void SetBrokenWithCommitted() const { + BrokenWithCommitted->Inc(); + } + NArrow::NMerger::TSortableBatchPosition BuildSortedPosition(const NArrow::TReplaceKey& key) const; std::shared_ptr BuildReader(const std::shared_ptr& context) const; @@ -20,13 +106,18 @@ struct TReadMetadata : public TReadMetadataBase { return GetProgram().HasProcessingColumnIds(); } + ui64 GetPathId() const { + return PathId; + } + std::shared_ptr SelectInfo; NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; std::vector CommittedBlobs; std::shared_ptr ReadStats; - TReadMetadata(const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) + TReadMetadata(const ui64 pathId, const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) : TBase(info, sorting, ssaProgram, info->GetSchema(snapshot), snapshot) + , PathId(pathId) , ReadStats(std::make_shared()) { } @@ -35,7 +126,7 @@ struct TReadMetadata : public TReadMetadataBase { return GetResultSchema()->GetIndexInfo().GetPrimaryKeyColumns(); } - TConclusionStatus Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); + TConclusionStatus Init(const NColumnShard::TColumnShard* owner, const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); std::vector GetColumnsOrder() const { auto schema = GetResultSchema(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h index c5a2998a54c1..3890edc6c361 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h @@ -20,10 +20,6 @@ class TIndexColumnResolver: public IColumnResolver { return IndexInfo.GetColumnName(id, required); } - const NTable::TScheme::TTableSchema& GetSchema() const override { - return IndexInfo; - } - NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original((ui32)NOlap::TIndexInfo::ESpecialColumn::PLAN_STEP, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make index b91efa4346d8..1ab826414813 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make @@ -8,6 +8,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/kqp/compute_actor ) END() diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp index f100c8f89041..24ef9a452e4c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp @@ -11,7 +11,7 @@ TString TColumnsSet::DebugString() const { << ");"; } -NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { +TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { if (external.IsEmpty() || IsEmpty()) { return *this; } @@ -30,7 +30,7 @@ NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColum return result; } -NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { +TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { if (external.IsEmpty()) { return *this; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h index 08d7ac103d80..98e77f4971e9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h @@ -1,15 +1,24 @@ #pragma once -#include -#include #include +#include + +#include + #include namespace NKikimr::NOlap::NReader::NPlain { +enum class EStageFeaturesIndexes { + Filter = 0, + Fetching = 1, + Merge = 2 +}; + class TIndexesSet { private: YDB_READONLY_DEF(std::vector, IndexIds); YDB_READONLY_DEF(std::set, IndexIdsSet); + public: TIndexesSet(const std::set& indexIds) : IndexIds(indexIds.begin(), indexIds.end()) @@ -18,8 +27,8 @@ class TIndexesSet { } TIndexesSet(const ui32& indexId) - : IndexIds({indexId}) - , IndexIdsSet({indexId}) { + : IndexIds({ indexId }) + , IndexIdsSet({ indexId }) { } ui32 GetIndexesCount() const { @@ -31,78 +40,70 @@ class TIndexesSet { } }; -class TColumnsSet { -private: - YDB_READONLY_DEF(std::set, ColumnIds); - YDB_READONLY_DEF(std::set, ColumnNames); - std::vector ColumnNamesVector; - YDB_READONLY_DEF(std::shared_ptr, Schema); - ISnapshotSchema::TPtr FullReadSchema; - YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); - - void Rebuild(); +class TColumnsSetIds { +protected: + std::set ColumnIds; public: - TColumnsSet() = default; - bool IsEmpty() const { - return ColumnIds.empty(); + const std::set& GetColumnIds() const { + return ColumnIds; } - bool operator!() const { - return IsEmpty(); + TString DebugString() const { + return JoinSeq(",", ColumnIds); } - const std::vector& GetColumnNamesVector() const { - return ColumnNamesVector; + TColumnsSetIds(const std::set& ids) + : ColumnIds(ids) { } - - ui32 GetColumnsCount() const { - return ColumnIds.size(); + TColumnsSetIds() = default; + TColumnsSetIds(std::set&& ids) + : ColumnIds(std::move(ids)) { } - bool ColumnsOnly(const std::vector& fieldNames) const; - - TColumnsSet(const std::set& columnIds, const TIndexInfo& indexInfo, const ISnapshotSchema::TPtr& fullReadSchema) - : ColumnIds(columnIds) - , FullReadSchema(fullReadSchema) - { - Schema = indexInfo.GetColumnsSchema(ColumnIds); - Rebuild(); + TColumnsSetIds(const std::vector& ids) + : ColumnIds(ids.begin(), ids.end()) { } - TColumnsSet(const std::vector& columnIds, const TIndexInfo& indexInfo, const ISnapshotSchema::TPtr& fullReadSchema) - : ColumnIds(columnIds.begin(), columnIds.end()) - , FullReadSchema(fullReadSchema) - { - Schema = indexInfo.GetColumnsSchema(ColumnIds); - Rebuild(); + TColumnsSetIds operator+(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + result.ColumnIds.insert(external.ColumnIds.begin(), external.ColumnIds.end()); + return result; } - const ISnapshotSchema& GetFilteredSchemaVerified() const { - AFL_VERIFY(FilteredSchema); - return *FilteredSchema; + TColumnsSetIds operator-(const TColumnsSetIds& external) const { + TColumnsSetIds result = *this; + for (auto&& i : external.ColumnIds) { + result.ColumnIds.erase(i); + } + return result; + } + bool IsEmpty() const { + return ColumnIds.empty(); } - const std::shared_ptr& GetFilteredSchemaPtrVerified() const { - AFL_VERIFY(FilteredSchema); - return FilteredSchema; + bool operator!() const { + return IsEmpty(); + } + ui32 GetColumnsCount() const { + return ColumnIds.size(); } - bool Contains(const std::shared_ptr& columnsSet) const { + bool Contains(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return true; } return Contains(*columnsSet); } - bool IsEqual(const std::shared_ptr& columnsSet) const { + bool IsEqual(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return false; } return IsEqual(*columnsSet); } - bool Contains(const TColumnsSet& columnsSet) const { + bool Contains(const TColumnsSetIds& columnsSet) const { for (auto&& i : columnsSet.ColumnIds) { if (!ColumnIds.contains(i)) { return false; @@ -111,7 +112,7 @@ class TColumnsSet { return true; } - bool Cross(const TColumnsSet& columnsSet) const { + bool Cross(const TColumnsSetIds& columnsSet) const { for (auto&& i : columnsSet.ColumnIds) { if (ColumnIds.contains(i)) { return true; @@ -120,7 +121,7 @@ class TColumnsSet { return false; } - std::set Intersect(const TColumnsSet& columnsSet) const { + std::set Intersect(const TColumnsSetIds& columnsSet) const { std::set result; for (auto&& i : columnsSet.ColumnIds) { if (ColumnIds.contains(i)) { @@ -130,7 +131,7 @@ class TColumnsSet { return result; } - bool IsEqual(const TColumnsSet& columnsSet) const { + bool IsEqual(const TColumnsSetIds& columnsSet) const { if (columnsSet.GetColumnIds().size() != ColumnIds.size()) { return false; } @@ -145,6 +146,56 @@ class TColumnsSet { } return true; } +}; + +class TColumnsSet: public TColumnsSetIds { +private: + using TBase = TColumnsSetIds; + YDB_READONLY_DEF(std::set, ColumnNames); + std::vector ColumnNamesVector; + YDB_READONLY_DEF(std::shared_ptr, Schema); + ISnapshotSchema::TPtr FullReadSchema; + YDB_READONLY_DEF(ISnapshotSchema::TPtr, FilteredSchema); + + void Rebuild(); + +public: + TColumnsSet() = default; + const std::vector& GetColumnNamesVector() const { + return ColumnNamesVector; + } + + bool ColumnsOnly(const std::vector& fieldNames) const; + + std::shared_ptr BuildSamePtr(const std::set& columnIds) const { + return std::make_shared(columnIds, FullReadSchema); + } + + TColumnsSet(const std::set& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + TColumnsSet(const std::vector& columnIds, const ISnapshotSchema::TPtr& fullReadSchema) + : TBase(columnIds) + , FullReadSchema(fullReadSchema) { + AFL_VERIFY(!!FullReadSchema); + Schema = FullReadSchema->GetIndexInfo().GetColumnsSchema(ColumnIds); + Rebuild(); + } + + const ISnapshotSchema& GetFilteredSchemaVerified() const { + AFL_VERIFY(FilteredSchema); + return *FilteredSchema; + } + + const std::shared_ptr& GetFilteredSchemaPtrVerified() const { + AFL_VERIFY(FilteredSchema); + return FilteredSchema; + } TString DebugString() const; @@ -153,4 +204,4 @@ class TColumnsSet { TColumnsSet operator-(const TColumnsSet& external) const; }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp index 55810391516d..654315a1ab0b 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp @@ -1,5 +1,6 @@ #include "constructor.h" #include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -14,7 +15,7 @@ bool TBlobsFetcherTask::DoOnError(const TString& storageId, const TBlobRange& ra AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("error_on_blob_reading", range.ToString())("scan_actor_id", Context->GetCommonContext()->GetScanActorId()) ("status", status.GetErrorMessage())("status_code", status.GetStatus())("storage_id", storageId); NActors::TActorContext::AsActorContext().Send(Context->GetCommonContext()->GetScanActorId(), - std::make_unique(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); + std::make_unique(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); return false; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp index ee6bc2eac7e8..0efd8bfbb9d2 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp @@ -1,20 +1,22 @@ #include "context.h" #include "source.h" +#include + namespace NKikimr::NOlap::NReader::NPlain { std::unique_ptr TSpecialReadContext::BuildMerger() const { - return std::make_unique(ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), - IIndexInfo::GetSnapshotColumnNames()); + return std::make_unique( + ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), IIndexInfo::GetSnapshotColumnNames()); } -ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources, const bool isExclusive) { +ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources) { ui64 result = 0; bool hasSequentialReadSources = false; for (auto&& i : sources) { auto fetchingPlan = GetColumnsFetchingPlan(i.second); AFL_VERIFY(i.second->GetIntervalsCount()); - const ui64 sourceMemory = std::max(1, fetchingPlan->PredictRawBytes(i.second) / i.second->GetIntervalsCount()); + const ui64 sourceMemory = std::max(1, i.second->GetResourceGuardsMemory() / i.second->GetIntervalsCount()); if (!i.second->IsSourceInMemory()) { hasSequentialReadSources = true; } @@ -23,17 +25,23 @@ ui64 TSpecialReadContext::GetMemoryForSources(const THashMapIsReverse()) { - result = 2 * result; // due to in time we will have data in original portion + data in merged(or reversed) interval - } } return result; } -std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) const { - const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || !source->IsSourceInMemory(); - const bool partialUsageByPK = ReadMetadata->GetPKRangesFilter().IsPortionInPartialUsage(source->GetStartReplaceKey(), source->GetFinishReplaceKey(), ReadMetadata->GetIndexInfo()); +std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) { + const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || + !source->IsSourceInMemory(); + const bool partialUsageByPK = [&]() { + switch (source->GetUsageClass()) { + case TPKRangeFilter::EUsageClass::PartialUsage: + return true; + case TPKRangeFilter::EUsageClass::DontUsage: + return true; + case TPKRangeFilter::EUsageClass::FullUsage: + return false; + } + }(); const bool useIndexes = (IndexChecker ? source->HasIndexes(IndexChecker->GetIndexIds()) : false); const bool isWholeExclusiveSource = source->GetExclusiveIntervalOnly() && source->IsSourceInMemory(); const bool hasDeletions = source->GetHasDeletions(); @@ -44,16 +52,18 @@ std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(con needShardingFilter = true; } } - if (auto result = CacheFetchingScripts - [needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0] - [partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] - [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]) { -// AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("SS", needSnapshots)("PK", partialUsageByPK)("IDX", useIndexes)("SHARDING", needShardingFilter) -// ("EXCL", source->GetExclusiveIntervalOnly())("MEM", source->IsSourceInMemory())("result", result->DebugString()); - return result; + auto result = CacheFetchingScripts[needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0]; + if (!result) { + result = BuildColumnsFetchingPlan(needSnapshots, isWholeExclusiveSource, partialUsageByPK, useIndexes, needShardingFilter, hasDeletions); + CacheFetchingScripts[needSnapshots ? 1 : 0][isWholeExclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0] + [needShardingFilter ? 1 : 0][hasDeletions ? 1 : 0] = result; } - { - std::shared_ptr result = std::make_shared(); + AFL_VERIFY(result); + if (*result) { + return *result; + } else { + std::shared_ptr result = std::make_shared(*this); result->SetBranchName("FAKE"); result->AddStep(std::make_shared(source->GetRecordsCount())); return result; @@ -62,27 +72,42 @@ std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(con class TColumnsAccumulator { private: - TColumnsSet FetchingReadyColumns; - TColumnsSet AssemblerReadyColumns; + TColumnsSetIds FetchingReadyColumns; + TColumnsSetIds AssemblerReadyColumns; + ISnapshotSchema::TPtr FullSchema; + std::shared_ptr GuaranteeNotOptional; public: - bool AddFetchingStep(TFetchingScript& script, const TColumnsSet& columns) { - auto actualColumns = columns - FetchingReadyColumns; - FetchingReadyColumns = FetchingReadyColumns + columns; + TColumnsAccumulator(const std::shared_ptr& guaranteeNotOptional, const ISnapshotSchema::TPtr& fullSchema) + : FullSchema(fullSchema) + , GuaranteeNotOptional(guaranteeNotOptional) { + } + + bool AddFetchingStep(TFetchingScript& script, const TColumnsSetIds& columns, const EStageFeaturesIndexes& stage) { + auto actualColumns = (TColumnsSetIds)columns - FetchingReadyColumns; + FetchingReadyColumns = FetchingReadyColumns + (TColumnsSetIds)columns; if (!actualColumns.IsEmpty()) { - auto actualSet = std::make_shared(actualColumns); - script.AddStep(std::make_shared(actualSet)); + script.AddStep(std::make_shared(actualColumns, stage)); + script.AddStep(std::make_shared(actualColumns)); return true; } return false; } - bool AddAssembleStep(TFetchingScript& script, const TColumnsSet& columns, const TString& purposeId, const bool optional) { - auto actualColumns = columns - AssemblerReadyColumns; + bool AddAssembleStep(TFetchingScript& script, const TColumnsSetIds& columns, const TString& purposeId, const bool optional) { + auto actualColumns = (TColumnsSetIds)columns - AssemblerReadyColumns; AssemblerReadyColumns = AssemblerReadyColumns + columns; if (!actualColumns.IsEmpty()) { - auto actualSet = std::make_shared(actualColumns); + auto actualSet = std::make_shared(actualColumns.GetColumnIds(), FullSchema); if (optional) { - script.AddStep(std::make_shared(actualSet, purposeId)); + const auto notOptionalColumnIds = GuaranteeNotOptional->Intersect(*actualSet); + if (notOptionalColumnIds.size()) { + std::shared_ptr cross = actualSet->BuildSamePtr(notOptionalColumnIds); + script.AddStep(std::make_shared(cross, purposeId)); + *actualSet = *actualSet - *cross; + } + if (!actualSet->IsEmpty()) { + script.AddStep(std::make_shared(actualSet, purposeId)); + } } else { script.AddStep(std::make_shared(actualSet, purposeId)); } @@ -92,25 +117,29 @@ class TColumnsAccumulator { } }; -std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, const bool partialUsageByPredicateExt, const bool useIndexes, - const bool needFilterSharding, const bool needFilterDeletion) const { - std::shared_ptr result = std::make_shared(); +std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, + const bool partialUsageByPredicateExt, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const { + std::shared_ptr result = std::make_shared(*this); const bool partialUsageByPredicate = partialUsageByPredicateExt && PredicateColumns->GetColumnsCount(); if (!!IndexChecker && useIndexes && exclusiveSource) { result->AddStep(std::make_shared(std::make_shared(IndexChecker->GetIndexIds()))); result->AddStep(std::make_shared(IndexChecker)); } bool hasFilterSharding = false; - TColumnsAccumulator acc; + TColumnsAccumulator acc(MergeColumns, ReadMetadata->GetResultSchema()); if (needFilterSharding && !ShardingColumns->IsEmpty()) { hasFilterSharding = true; - acc.AddFetchingStep(*result, *ShardingColumns); - acc.AddAssembleStep(*result, *ShardingColumns, "SPEC_SHARDING", false); + TColumnsSetIds columnsFetch = *ShardingColumns; + if (!exclusiveSource) { + columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + } + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); + acc.AddAssembleStep(*result, columnsFetch, "SPEC_SHARDING", false); result->AddStep(std::make_shared()); } if (!EFColumns->GetColumnsCount() && !partialUsageByPredicate) { result->SetBranchName("simple"); - TColumnsSet columnsFetch = *FFColumns; + TColumnsSetIds columnsFetch = *FFColumns; if (needFilterDeletion) { columnsFetch = columnsFetch + *DeletionColumns; } @@ -118,21 +147,25 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c columnsFetch = columnsFetch + *SpecColumns; } if (!exclusiveSource) { - columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + columnsFetch = columnsFetch + *MergeColumns; } else { if (columnsFetch.GetColumnsCount() == 1 && SpecColumns->Contains(columnsFetch) && !hasFilterSharding) { return nullptr; } } if (columnsFetch.GetColumnsCount() || hasFilterSharding || needFilterDeletion) { - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Fetching); + if (needSnapshots) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + result->AddStep(std::make_shared()); + } + if (!exclusiveSource) { + acc.AddAssembleStep(*result, *MergeColumns, "LAST_PK", false); + } if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); } - if (!exclusiveSource) { - acc.AddAssembleStep(*result, *PKColumns + *SpecColumns, "LAST_PK", false); - } acc.AddAssembleStep(*result, columnsFetch, "LAST", true); } else { return nullptr; @@ -151,29 +184,35 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c } AFL_VERIFY(columnsFetch.GetColumnsCount()); - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); if (needFilterDeletion) { acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); result->AddStep(std::make_shared()); } - if (needSnapshots || FFColumns->Cross(*SpecColumns)) { - acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); - result->AddStep(std::make_shared()); - } if (partialUsageByPredicate) { acc.AddAssembleStep(*result, *PredicateColumns, "PREDICATE", false); result->AddStep(std::make_shared()); } + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + result->AddStep(std::make_shared()); + } for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { - if (!i->IsFilterOnly()) { + if (i->GetFilterOriginalColumnIds().empty()) { break; } - TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); acc.AddAssembleStep(*result, stepColumnIds, "EF", true); result->AddStep(std::make_shared(i)); + if (!i->IsFilterOnly()) { + break; + } + } + if (GetReadMetadata()->Limit) { + result->AddStep(std::make_shared(GetReadMetadata()->Limit, GetReadMetadata()->IsDescSorted())); } - acc.AddFetchingStep(*result, *FFColumns); + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); acc.AddAssembleStep(*result, *FFColumns, "LAST", true); } else { result->SetBranchName("merge"); @@ -182,29 +221,32 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c columnsFetch = columnsFetch + *DeletionColumns; } AFL_VERIFY(columnsFetch.GetColumnsCount()); - acc.AddFetchingStep(*result, columnsFetch); + acc.AddFetchingStep(*result, columnsFetch, EStageFeaturesIndexes::Filter); - if (needFilterDeletion) { - acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); - result->AddStep(std::make_shared()); - } acc.AddAssembleStep(*result, *SpecColumns, "SPEC", false); + acc.AddAssembleStep(*result, *PKColumns, "PK", false); if (needSnapshots) { result->AddStep(std::make_shared()); } - acc.AddAssembleStep(*result, *PKColumns, "PK", false); + if (needFilterDeletion) { + acc.AddAssembleStep(*result, *DeletionColumns, "SPEC_DELETION", false); + result->AddStep(std::make_shared()); + } if (partialUsageByPredicate) { result->AddStep(std::make_shared()); } for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { - if (!i->IsFilterOnly()) { + if (i->GetFilterOriginalColumnIds().empty()) { break; } - TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetResultSchema()); acc.AddAssembleStep(*result, stepColumnIds, "EF", true); result->AddStep(std::make_shared(i)); + if (!i->IsFilterOnly()) { + break; + } } - acc.AddFetchingStep(*result, *FFColumns); + acc.AddFetchingStep(*result, *FFColumns, EStageFeaturesIndexes::Fetching); acc.AddAssembleStep(*result, *FFColumns, "LAST", true); } return result; @@ -212,42 +254,72 @@ std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(c TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& commonContext) : CommonContext(commonContext) { + ReadMetadata = dynamic_pointer_cast(CommonContext->GetReadMetadata()); Y_ABORT_UNLESS(ReadMetadata); Y_ABORT_UNLESS(ReadMetadata->SelectInfo); + double kffFilter = 0.45; + double kffFetching = 0.45; + double kffMerge = 0.10; + TString stagePrefix; + if (ReadMetadata->GetEarlyFilterColumnIds().size()) { + stagePrefix = "EF"; + kffFilter = 0.7; + kffFetching = 0.15; + kffMerge = 0.15; + } else { + stagePrefix = "FO"; + kffFilter = 0.1; + kffFetching = 0.75; + kffMerge = 0.15; + } + + std::vector> stages = { + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FILTER", kffFilter * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures( + stagePrefix + "::FETCHING", kffFetching * TGlobalLimits::ScanMemoryLimit), + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildStageFeatures(stagePrefix + "::MERGE", kffMerge * TGlobalLimits::ScanMemoryLimit) + }; + ProcessMemoryGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildProcessGuard(CommonContext->GetReadMetadata()->GetTxId(), stages); + ProcessScopeGuard = + NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildScopeGuard(CommonContext->GetReadMetadata()->GetTxId(), GetCommonContext()->GetScanId()); + auto readSchema = ReadMetadata->GetResultSchema(); - SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), ReadMetadata->GetIndexInfo(), readSchema); + SpecColumns = std::make_shared(TIndexInfo::GetSnapshotColumnIdsSet(), readSchema); IndexChecker = ReadMetadata->GetProgram().GetIndexChecker(); { auto predicateColumns = ReadMetadata->GetPKRangesFilter().GetColumnIds(ReadMetadata->GetIndexInfo()); if (predicateColumns.size()) { - PredicateColumns = std::make_shared(predicateColumns, ReadMetadata->GetIndexInfo(), readSchema); + PredicateColumns = std::make_shared(predicateColumns, readSchema); } else { PredicateColumns = std::make_shared(); } } { - std::set columnIds = {NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX}; - DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + std::set columnIds = { NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX }; + DeletionColumns = std::make_shared(columnIds, ReadMetadata->GetResultSchema()); } if (!!ReadMetadata->GetRequestShardingInfo()) { - auto shardingColumnIds = ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); - ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + auto shardingColumnIds = + ReadMetadata->GetIndexInfo().GetColumnIdsVerified(ReadMetadata->GetRequestShardingInfo()->GetShardingInfo()->GetColumnNames()); + ShardingColumns = std::make_shared(shardingColumnIds, ReadMetadata->GetResultSchema()); } else { ShardingColumns = std::make_shared(); } { auto efColumns = ReadMetadata->GetEarlyFilterColumnIds(); if (efColumns.size()) { - EFColumns = std::make_shared(efColumns, ReadMetadata->GetIndexInfo(), readSchema); + EFColumns = std::make_shared(efColumns, readSchema); } else { EFColumns = std::make_shared(); } } if (ReadMetadata->HasProcessingColumnIds()) { - FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), readSchema); if (SpecColumns->Contains(*FFColumns) && !EFColumns->IsEmpty()) { FFColumns = std::make_shared(*EFColumns + *SpecColumns); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_modified", FFColumns->DebugString()); @@ -264,18 +336,35 @@ TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& co ProgramInputColumns = FFColumns; } - PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), readSchema); MergeColumns = std::make_shared(*PKColumns + *SpecColumns); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); +} + +TString TSpecialReadContext::DebugString() const { + TStringBuilder sb; + sb << "ef=" << EFColumns->DebugString() << ";" + << "sharding=" << ShardingColumns->DebugString() << ";" + << "pk=" << PKColumns->DebugString() << ";" + << "ff=" << FFColumns->DebugString() << ";" + << "program_input=" << ProgramInputColumns->DebugString() << ";"; + return sb; +} + +TString TSpecialReadContext::ProfileDebugString() const { + TStringBuilder sb; const auto GetBit = [](const ui32 val, const ui32 pos) -> ui32 { return (val & (1 << pos)) ? 1 : 0; }; - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); - for (ui32 i = 0; i < (1 << 7); ++i) { - CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)][GetBit(i, 5)] - = BuildColumnsFetchingPlan(GetBit(i, 0), GetBit(i, 1), GetBit(i, 2), GetBit(i, 3), GetBit(i, 4), GetBit(i, 5)); + for (ui32 i = 0; i < (1 << 6); ++i) { + auto script = CacheFetchingScripts[GetBit(i, 0)][GetBit(i, 1)][GetBit(i, 2)][GetBit(i, 3)][GetBit(i, 4)][GetBit(i, 5)]; + if (script && *script) { + sb << (*script)->DebugString() << ";"; + } } + return sb; } } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h index 5a869c5fc78e..1ae41c039808 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h @@ -1,6 +1,7 @@ #pragma once #include "columns_set.h" #include "fetching.h" +#include #include #include #include @@ -13,6 +14,8 @@ class IDataSource; class TSpecialReadContext { private: YDB_READONLY_DEF(std::shared_ptr, CommonContext); + YDB_READONLY_DEF(std::shared_ptr, ProcessMemoryGuard); + YDB_READONLY_DEF(std::shared_ptr, ProcessScopeGuard); YDB_READONLY_DEF(std::shared_ptr, SpecColumns); YDB_READONLY_DEF(std::shared_ptr, MergeColumns); @@ -24,41 +27,58 @@ class TSpecialReadContext { YDB_READONLY_DEF(std::shared_ptr, FFColumns); YDB_READONLY_DEF(std::shared_ptr, ProgramInputColumns); + YDB_READONLY_DEF(std::shared_ptr, MergeStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FilterStageMemory); + YDB_READONLY_DEF(std::shared_ptr, FetchingStageMemory); + + TAtomic AbortFlag = 0; NIndexes::TIndexCheckerContainer IndexChecker; TReadMetadata::TConstPtr ReadMetadata; std::shared_ptr EmptyColumns = std::make_shared(); std::shared_ptr BuildColumnsFetchingPlan(const bool needSnapshotsFilter, const bool exclusiveSource, const bool partialUsageByPredicate, const bool useIndexes, const bool needFilterSharding, const bool needFilterDeletion) const; - std::array, 2>, 2>, 2>, 2>, 2>, 2> CacheFetchingScripts; + std::array>, 2>, 2>, 2>, 2>, 2>, 2> + CacheFetchingScripts; public: - static const inline ui64 DefaultRejectMemoryIntervalLimit = ((ui64)3) << 30; - static const inline ui64 DefaultReduceMemoryIntervalLimit = DefaultRejectMemoryIntervalLimit; - static const inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; - - const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(DefaultReduceMemoryIntervalLimit); - const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(DefaultRejectMemoryIntervalLimit); - const ui64 ReadSequentiallyBufferSize = DefaultReadSequentiallyBufferSize; + const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(); + const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(); + const ui64 ReadSequentiallyBufferSize = TGlobalLimits::DefaultReadSequentiallyBufferSize; - ui64 GetMemoryForSources(const THashMap>& sources, const bool isExclusive); + ui64 GetProcessMemoryControlId() const { + AFL_VERIFY(ProcessMemoryGuard); + return ProcessMemoryGuard->GetProcessId(); + } + ui64 GetMemoryForSources(const THashMap>& sources); + ui64 GetRequestedMemoryBytes() const { + return MergeStageMemory->GetFullMemory() + FilterStageMemory->GetFullMemory() + FetchingStageMemory->GetFullMemory(); + } const TReadMetadata::TConstPtr& GetReadMetadata() const { return ReadMetadata; } - std::unique_ptr BuildMerger() const; + bool IsAborted() const { + return AtomicGet(AbortFlag); + } - TString DebugString() const { - return TStringBuilder() << "ef=" << EFColumns->DebugString() << ";" - << "sharding=" << ShardingColumns->DebugString() << ";" - << "pk=" << PKColumns->DebugString() << ";" - << "ff=" << FFColumns->DebugString() << ";" - << "program_input=" << ProgramInputColumns->DebugString(); + void Abort() { + AtomicSet(AbortFlag, 1); } + ~TSpecialReadContext() { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("profile", ProfileDebugString()); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("fetching", DebugString()); + } + + std::unique_ptr BuildMerger() const; + + TString DebugString() const; + TString ProfileDebugString() const; + TSpecialReadContext(const std::shared_ptr& commonContext); - std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source) const; + std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source); }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp index 869269fd9d3a..bf38c466b75b 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp @@ -1,18 +1,21 @@ #include "fetched_data.h" -#include -#include -#include + +#include +#include +#include namespace NKikimr::NOlap { -void TFetchedData::SyncTableColumns(const std::vector>& fields) { +void TFetchedData::SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema) { for (auto&& i : fields) { if (Table->GetSchema()->GetFieldByName(i->name())) { continue; } - Table->AddField(i, std::make_shared( - NArrow::TThreadSimpleArraysCache::GetNull(i->type(), Table->num_rows()))).Validate(); + Table + ->AddField(i, std::make_shared(NArrow::TThreadSimpleArraysCache::Get( + i->type(), schema.GetExternalDefaultValueVerified(i->name()), Table->num_rows()))) + .Validate(); } } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h index 1bd31f77dc0e..b535c2bc4673 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h @@ -1,14 +1,17 @@ #pragma once -#include -#include #include #include +#include #include #include #include + #include #include +#include +#include + namespace NKikimr::NOlap { class TFetchedData { @@ -18,14 +21,20 @@ class TFetchedData { YDB_READONLY_DEF(std::shared_ptr, Table); YDB_READONLY_DEF(std::shared_ptr, Filter); YDB_READONLY(bool, UseFilter, false); + public: TFetchedData(const bool useFilter) - : UseFilter(useFilter) - { + : UseFilter(useFilter) { + } + ui32 GetFilteredCount(const ui32 recordsCount, const ui32 defLimit) const { + if (!Filter) { + return std::min(defLimit, recordsCount); + } + return Filter->GetFilteredCount().value_or(recordsCount); } - void SyncTableColumns(const std::vector>& fields); + void SyncTableColumns(const std::vector>& fields, const ISnapshotSchema& schema); std::shared_ptr GetAppliedFilter() const { return UseFilter ? Filter : nullptr; @@ -60,6 +69,11 @@ class TFetchedData { return (Filter && Filter->IsTotalDenyFilter()) || (Table && !Table->num_rows()); } + void Clear() { + Filter = std::make_shared(NArrow::TColumnFilter::BuildDenyFilter()); + Table = nullptr; + } + void AddFilter(const std::shared_ptr& filter) { if (!filter) { return; @@ -67,6 +81,31 @@ class TFetchedData { return AddFilter(*filter); } + void CutFilter(const ui32 recordsCount, const ui32 limit, const bool reverse) { + auto filter = std::make_shared(NArrow::TColumnFilter::BuildAllowFilter()); + ui32 recordsCountImpl = Filter ? Filter->GetFilteredCount().value_or(recordsCount) : recordsCount; + if (recordsCountImpl < limit) { + return; + } + if (reverse) { + filter->Add(false, recordsCountImpl - limit); + filter->Add(true, limit); + } else { + filter->Add(true, limit); + filter->Add(false, recordsCountImpl - limit); + } + if (Filter) { + if (UseFilter) { + AddFilter(*filter); + } else { + AddFilter(Filter->CombineSequentialAnd(*filter)); + } + } else { + AddFilter(*filter); + } + + } + void AddFilter(const NArrow::TColumnFilter& filter) { if (UseFilter && Table) { AFL_VERIFY(filter.Apply(Table)); @@ -80,14 +119,10 @@ class TFetchedData { } } - void AddBatch(const std::shared_ptr& batch) { - return AddBatch(arrow::Table::Make(batch->schema(), batch->columns(), batch->num_rows())); - } - void AddBatch(const std::shared_ptr& table) { AFL_VERIFY(table); if (UseFilter) { - AddBatch(table->BuildTable()); + AddBatch(table->BuildTableVerified()); } else { if (!Table) { Table = table; @@ -110,13 +145,13 @@ class TFetchedData { AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); } } - }; class TFetchedResult { private: YDB_READONLY_DEF(std::shared_ptr, Batch); YDB_READONLY_DEF(std::shared_ptr, NotAppliedFilter); + public: TFetchedResult(std::unique_ptr&& data) : Batch(data->GetTable()) @@ -128,4 +163,4 @@ class TFetchedResult { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp index 995958f48449..2d0ec349aa6a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp @@ -1,7 +1,10 @@ #include "fetching.h" #include "source.h" + +#include #include -#include +#include +#include #include @@ -15,36 +18,46 @@ bool TStepAction::DoApply(IDataReader& /*owner*/) const { return true; } -bool TStepAction::DoExecute() { - if (Source->IsAborted()) { - return true; +TConclusionStatus TStepAction::DoExecuteImpl() { + if (Source->GetContext()->IsAborted()) { + return TConclusionStatus::Success(); } auto executeResult = Cursor.Execute(Source); if (!executeResult) { - SetErrorMessage(executeResult.GetErrorMessage()); - return false; + return executeResult; } if (*executeResult) { Source->Finalize(); FinishedFlag = true; } - return true; + return TConclusionStatus::Success(); } -TConclusion TColumnBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { +TConclusion TColumnBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { return !source->StartFetchingColumns(source, step, Columns); } ui64 TColumnBlobsFetchingStep::DoPredictRawBytes(const std::shared_ptr& source) const { - const ui64 result = source->GetColumnRawBytes(Columns->GetColumnIds()); + ui64 result = source->GetColumnRawBytes(Columns.GetColumnIds()); + if (source->GetContext()->GetReadMetadata()->Limit && source->GetExclusiveIntervalOnly()) { + result = std::max(result * 1.0 * source->GetContext()->GetReadMetadata()->Limit / source->GetRecordsCount(), + source->GetColumnBlobBytes(Columns.GetColumnIds())); + } if (!result) { - return Columns->GetColumnIds().size() * source->GetRecordsCount() * sizeof(ui32); // null for all records for all columns in future will be + return Columns.GetColumnIds().size() * source->GetRecordsCount() * + sizeof(ui32); // null for all records for all columns in future will be } else { return result; } } -TConclusion TIndexBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { +ui64 TColumnBlobsFetchingStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnBlobBytes(Columns.GetColumnIds()); +} + +TConclusion TIndexBlobsFetchingStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { return !source->StartFetchingIndexes(source, step, Indexes); } @@ -57,7 +70,12 @@ TConclusion TAssemblerStep::DoExecuteInplace(const std::shared_ptr TOptionalAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { +ui64 TAssemblerStep::GetProcessingDataSize(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + +TConclusion TOptionalAssemblerStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { source->AssembleColumns(Columns); return true; } @@ -71,6 +89,10 @@ bool TOptionalAssemblerStep::DoInitSourceSeqColumnIds(const std::shared_ptr& source) const { + return source->GetColumnRawBytes(Columns->GetColumnIds()); +} + TConclusion TFilterProgramStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { AFL_VERIFY(source); AFL_VERIFY(Step); @@ -90,13 +112,20 @@ ui64 TFilterProgramStep::DoPredictRawBytes(const std::shared_ptr& s } TConclusion TPredicateFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { - auto filter = source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTable()); + auto filter = + source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTableVerified()); source->MutableStageData().AddFilter(filter); return true; } TConclusion TSnapshotFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { - auto filter = MakeSnapshotFilter(source->GetStageData().GetTable()->BuildTable(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + auto filter = MakeSnapshotFilter( + source->GetStageData().GetTable()->BuildTableVerified(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + if (filter.GetFilteredCount().value_or(source->GetRecordsCount()) != source->GetRecordsCount()) { + if (source->AddTxConflict()) { + return true; + } + } source->MutableStageData().AddFilter(filter); return true; } @@ -120,7 +149,8 @@ TConclusion TDeletionFilter::DoExecuteInplace(const std::shared_ptr TShardingFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { NYDBTest::TControllers::GetColumnShardController()->OnSelectShardingFilter(); - auto filter = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo()->GetFilter(source->GetStageData().GetTable()->BuildTable()); + const auto& shardingInfo = source->GetContext()->GetReadMetadata()->GetRequestShardingInfo()->GetShardingInfo(); + auto filter = shardingInfo->GetFilter(source->GetStageData().GetTable()->BuildTableVerified()); source->MutableStageData().AddFilter(filter); return true; } @@ -130,7 +160,8 @@ TConclusion TBuildFakeSpec::DoExecuteInplace(const std::shared_ptrfields()) { columns.emplace_back(NArrow::TThreadSimpleArraysCache::GetConst(f->type(), NArrow::DefaultScalar(f->type()), Count)); } - source->MutableStageData().AddBatch(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns)); + source->MutableStageData().AddBatch( + std::make_shared(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns))); return true; } @@ -142,14 +173,21 @@ TConclusion TApplyIndexStep::DoExecuteInplace(const std::shared_ptr TFetchingScriptCursor::Execute(const std::shared_ptr& source) { AFL_VERIFY(source); NMiniKQL::TThrowingBindTerminator bind; + Script->OnExecute(); AFL_VERIFY(!Script->IsFinished(CurrentStepIdx)); while (!Script->IsFinished(CurrentStepIdx)) { if (source->GetStageData().IsEmpty()) { + source->OnEmptyStageData(); break; } auto step = Script->GetStep(CurrentStepIdx); - TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), + IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("scan_step", step->DebugString())("scan_step_idx", CurrentStepIdx); + AFL_VERIFY(!CurrentStartInstant); + CurrentStartInstant = TMonotonic::Now(); + AFL_VERIFY(!CurrentStartDataSize); + CurrentStartDataSize = step->GetProcessingDataSize(source); const TConclusion resultStep = step->ExecuteInplace(source, *this); if (!resultStep) { return resultStep; @@ -157,9 +195,83 @@ TConclusion TFetchingScriptCursor::Execute(const std::shared_ptr&& guard, + const std::shared_ptr& /*allocation*/) { + auto data = Source.lock(); + if (!data || data->GetContext()->IsAborted()) { + guard->Release(); + return false; + } + data->RegisterAllocationGuard(std::move(guard)); + Step.Next(); + auto task = std::make_shared(data, std::move(Step), data->GetContext()->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + return true; +} + +TAllocateMemoryStep::TFetchingStepAllocation::TFetchingStepAllocation( + const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step) + : TBase(mem) + , Source(source) + , Step(step) + , TasksGuard(source->GetContext()->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) { } + +TConclusion TAllocateMemoryStep::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + + auto allocation = std::make_shared(source, GetProcessingDataSize(source), step); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(source->GetContext()->GetProcessMemoryControlId(), + source->GetContext()->GetCommonContext()->GetScanId(), source->GetFirstIntervalId(), { allocation }, (ui32)StageIndex); + return false; +} + +ui64 TAllocateMemoryStep::GetProcessingDataSize(const std::shared_ptr& source) const { + ui64 size = source->GetColumnRawBytes(Columns.GetColumnIds()); + + if (source->GetStageData().GetUseFilter() && source->GetContext()->GetReadMetadata()->Limit) { + const ui32 filtered = source->GetStageData().GetFilteredCount(source->GetRecordsCount(), source->GetContext()->GetReadMetadata()->Limit); + if (filtered < source->GetRecordsCount()) { + size = std::max(size * 1.0 * filtered / source->GetRecordsCount(), source->GetColumnBlobBytes(Columns.GetColumnIds())); + } + } + return size; +} + +TString TFetchingScript::DebugString() const { + TStringBuilder sb; + TStringBuilder sbBranch; + for (auto&& i : Steps) { + if (i->GetSumDuration() > TDuration::MilliSeconds(10)) { + sbBranch << "{" << i->DebugString() << "};"; + } + } + if (!sbBranch) { + return ""; + } + sb << "{branch:" << BranchName << ";limit:" << Limit << ";"; + if (FinishInstant && StartInstant) { + sb << "duration:" << *FinishInstant - *StartInstant << ";"; + } + + sb << "steps_10Ms:[" << sbBranch << "]}"; + return sb; +} + +TFetchingScript::TFetchingScript(const TSpecialReadContext& context) + : Limit(context.GetReadMetadata()->Limit) { +} + +NKikimr::TConclusion TFilterCutLimit::DoExecuteInplace( + const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->MutableStageData().CutFilter(source->GetRecordsCount(), Limit, Reverse); + return true; +} + +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h index 5cddf4955c71..133aa4db3669 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h @@ -1,23 +1,38 @@ #pragma once #include "columns_set.h" -#include -#include -#include + +#include #include #include +#include +#include +#include + +#include namespace NKikimr::NOlap::NReader::NPlain { class IDataSource; class TFetchingScriptCursor; +class TSpecialReadContext; class IFetchingStep { private: YDB_READONLY_DEF(TString, Name); + YDB_READONLY(TDuration, SumDuration, TDuration::Zero()); + YDB_READONLY(ui64, SumSize, 0); + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const = 0; virtual TString DoDebugString() const { return ""; } + public: + void AddDuration(const TDuration d) { + SumDuration += d; + } + void AddDataSize(const ui64 size) { + SumSize += size; + } virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const { return 0; } @@ -31,15 +46,18 @@ class IFetchingStep { return DoExecuteInplace(source, step); } - IFetchingStep(const TString& name) - : Name(name) - { + virtual ui64 GetProcessingDataSize(const std::shared_ptr& /*source*/) const { + return 0; + } + IFetchingStep(const TString& name) + : Name(name) { } TString DebugString() const { TStringBuilder sb; - sb << "name=" << Name << ";details={" << DoDebugString() << "};"; + sb << "name=" << Name << ";duration=" << SumDuration << ";" + << "size=" << 1e-9 * SumSize << ";details={" << DoDebugString() << "};"; return sb; } }; @@ -48,19 +66,30 @@ class TFetchingScript { private: YDB_ACCESSOR(TString, BranchName, "UNDEFINED"); std::vector> Steps; + std::optional StartInstant; + std::optional FinishInstant; + const ui32 Limit; + public: - TFetchingScript() = default; + TFetchingScript(const TSpecialReadContext& context); - TString DebugString() const { - TStringBuilder sb; - sb << "["; - for (auto&& i : Steps) { - sb << "{" << i->DebugString() << "};"; + void AddStepDataSize(const ui32 index, const ui64 size) { + GetStep(index)->AddDataSize(size); + } + + void AddStepDuration(const ui32 index, const TDuration d) { + FinishInstant = TMonotonic::Now(); + GetStep(index)->AddDuration(d); + } + + void OnExecute() { + if (!StartInstant) { + StartInstant = TMonotonic::Now(); } - sb << "]"; - return sb; } + TString DebugString() const; + const std::shared_ptr& GetStep(const ui32 index) const { AFL_VERIFY(index < Steps.size()); return Steps[index]; @@ -68,7 +97,7 @@ class TFetchingScript { ui64 PredictRawBytes(const std::shared_ptr& source) const { ui64 result = 0; - for (auto&& current: Steps) { + for (auto&& current : Steps) { result += current->DoPredictRawBytes(source); } return result; @@ -98,14 +127,23 @@ class TFetchingScript { class TFetchingScriptCursor { private: + std::optional CurrentStartInstant; + std::optional CurrentStartDataSize; ui32 CurrentStepIdx = 0; std::shared_ptr Script; + void FlushDuration() { + AFL_VERIFY(CurrentStartInstant); + AFL_VERIFY(CurrentStartDataSize); + Script->AddStepDuration(CurrentStepIdx, TMonotonic::Now() - *CurrentStartInstant); + Script->AddStepDataSize(CurrentStepIdx, *CurrentStartDataSize); + CurrentStartInstant.reset(); + CurrentStartDataSize.reset(); + } + public: TFetchingScriptCursor(const std::shared_ptr& script, const ui32 index) : CurrentStepIdx(index) - , Script(script) - { - + , Script(script) { } const TString& GetName() const { @@ -117,6 +155,7 @@ class TFetchingScriptCursor { } bool Next() { + FlushDuration(); return !Script->IsFinished(++CurrentStepIdx); } @@ -129,9 +168,11 @@ class TStepAction: public IDataTasksProcessor::ITask { std::shared_ptr Source; TFetchingScriptCursor Cursor; bool FinishedFlag = false; + protected: virtual bool DoApply(IDataReader& owner) const override; - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecuteImpl() override; + public: virtual TString GetTaskClassIdentifier() const override { return "STEP_ACTION"; @@ -140,9 +181,7 @@ class TStepAction: public IDataTasksProcessor::ITask { TStepAction(const std::shared_ptr& source, TFetchingScriptCursor&& cursor, const NActors::TActorId& ownerActorId) : TBase(ownerActorId) , Source(source) - , Cursor(std::move(cursor)) - { - + , Cursor(std::move(cursor)) { } }; @@ -150,16 +189,17 @@ class TBuildFakeSpec: public IFetchingStep { private: using TBase = IFetchingStep; const ui32 Count = 0; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { return TIndexInfo::GetSpecialColumnsRecordSize() * Count; } + public: TBuildFakeSpec(const ui32 count) : TBase("FAKE_SPEC") - , Count(count) - { + , Count(count) { AFL_VERIFY(Count); } }; @@ -168,33 +208,73 @@ class TApplyIndexStep: public IFetchingStep { private: using TBase = IFetchingStep; const NIndexes::TIndexCheckerContainer IndexChecker; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + public: TApplyIndexStep(const NIndexes::TIndexCheckerContainer& indexChecker) : TBase("APPLY_INDEX") - , IndexChecker(indexChecker) - { + , IndexChecker(indexChecker) { + } +}; + +class TAllocateMemoryStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + TColumnsSetIds Columns; + const EStageFeaturesIndexes StageIndex; + +protected: + class TFetchingStepAllocation: public NGroupedMemoryManager::IAllocation { + private: + using TBase = NGroupedMemoryManager::IAllocation; + std::weak_ptr Source; + TFetchingScriptCursor Step; + NColumnShard::TCounterGuard TasksGuard; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + + public: + TFetchingStepAllocation(const std::shared_ptr& source, const ui64 mem, const TFetchingScriptCursor& step); + }; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return 0; + } + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns.DebugString() << ";stage=" << StageIndex << ";"; + } + +public: + TAllocateMemoryStep(const TColumnsSetIds& columns, const EStageFeaturesIndexes stageIndex) + : TBase("ALLOCATE_MEMORY::" + ::ToString(stageIndex)) + , Columns(columns) + , StageIndex(stageIndex) { + AFL_VERIFY(Columns.GetColumnsCount()); } }; class TColumnBlobsFetchingStep: public IFetchingStep { private: using TBase = IFetchingStep; - std::shared_ptr Columns; + TColumnsSetIds Columns; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; virtual TString DoDebugString() const override { - return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + return TStringBuilder() << "columns=" << Columns.DebugString() << ";"; } + public: - TColumnBlobsFetchingStep(const std::shared_ptr& columns) + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + TColumnBlobsFetchingStep(const TColumnsSetIds& columns) : TBase("FETCHING_COLUMNS") , Columns(columns) { - AFL_VERIFY(Columns); - AFL_VERIFY(Columns->GetColumnsCount()); + AFL_VERIFY(Columns.GetColumnsCount()); } }; @@ -202,12 +282,14 @@ class TIndexBlobsFetchingStep: public IFetchingStep { private: using TBase = IFetchingStep; std::shared_ptr Indexes; + protected: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; virtual TString DoDebugString() const override { return TStringBuilder() << "indexes=" << Indexes->DebugString() << ";"; } + public: TIndexBlobsFetchingStep(const std::shared_ptr& indexes) : TBase("FETCHING_INDEXES") @@ -224,12 +306,13 @@ class TAssemblerStep: public IFetchingStep { virtual TString DoDebugString() const override { return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; } + public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) : TBase("ASSEMBLER" + (specName ? "::" + specName : "")) - , Columns(columns) - { + , Columns(columns) { AFL_VERIFY(Columns); AFL_VERIFY(Columns->GetColumnsCount()); } @@ -242,9 +325,13 @@ class TOptionalAssemblerStep: public IFetchingStep { virtual TString DoDebugString() const override { return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; } + protected: virtual bool DoInitSourceSeqColumnIds(const std::shared_ptr& source) const override; + public: + virtual ui64 GetProcessingDataSize(const std::shared_ptr& source) const override; + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TOptionalAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) : TBase("OPTIONAL_ASSEMBLER" + (specName ? "::" + specName : "")) @@ -258,13 +345,35 @@ class TFilterProgramStep: public IFetchingStep { private: using TBase = IFetchingStep; std::shared_ptr Step; + protected: virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; + public: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TFilterProgramStep(const std::shared_ptr& step) : TBase("PROGRAM") - , Step(step) + , Step(step) { + } +}; + +class TFilterCutLimit: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 Limit; + const bool Reverse; + +protected: + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return 0; + } + +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TFilterCutLimit(const ui32 limit, const bool reverse) + : TBase("LIMIT") + , Limit(limit) + , Reverse(reverse) { } }; @@ -272,15 +381,15 @@ class TFilterProgramStep: public IFetchingStep { class TPredicateFilter: public IFetchingStep { private: using TBase = IFetchingStep; + public: virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; TPredicateFilter() : TBase("PREDICATE") { - } }; -class TSnapshotFilter : public IFetchingStep { +class TSnapshotFilter: public IFetchingStep { private: using TBase = IFetchingStep; @@ -302,7 +411,7 @@ class TDeletionFilter: public IFetchingStep { } }; -class TShardingFilter : public IFetchingStep { +class TShardingFilter: public IFetchingStep { private: using TBase = IFetchingStep; @@ -313,5 +422,4 @@ class TShardingFilter : public IFetchingStep { } }; - -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp index 8e228937b653..9da043a366c1 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp @@ -1,34 +1,30 @@ #include "interval.h" + #include +#include namespace NKikimr::NOlap::NReader::NPlain { void TFetchingInterval::ConstructResult() { - if (ReadySourcesCount.Val() != WaitSourcesCount || !ReadyGuards.Val()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx); + const ui32 ready = ReadySourcesCount.Val(); + if (ready != WaitSourcesCount) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx)( + "count", WaitSourcesCount)("ready", ready)("interval_id", GetIntervalId()); return; } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx)( + "interval_id", GetIntervalId()); } if (AtomicCas(&SourcesFinalized, 1, 0)) { IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerStart); + + MergingContext->SetIntervalChunkMemory(Context->GetMemoryForSources(Sources)); + auto task = std::make_shared(MergingContext, Context, std::move(Sources)); task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); - } -} - -void TFetchingInterval::OnInitResourcesGuard(const std::shared_ptr& guard) { - IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitSources); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "allocated")("interval_idx", IntervalIdx); - AFL_VERIFY(guard); - AFL_VERIFY(!ResourcesGuard); - ResourcesGuard = guard; - for (auto&& i : Sources) { - i.second->OnInitResourcesGuard(i.second); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(Context->GetProcessMemoryControlId(), + Context->GetCommonContext()->GetScanId(), GetIntervalId(), { task }, (ui32)EStageFeaturesIndexes::Merge); } - AFL_VERIFY(ReadyGuards.Inc() <= 1); - ConstructResult(); } void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { @@ -40,29 +36,27 @@ void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { TFetchingInterval::TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) - : TTaskBase(0, context->GetMemoryForSources(sources, isExclusiveInterval), "", context->GetCommonContext()->GetResourcesTaskContext()) - , MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) + : MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) , Context(context) , TaskGuard(Context->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) , Sources(sources) , IntervalIdx(intervalIdx) - , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) -{ - Y_ABORT_UNLESS(Sources.size()); + , IntervalGroupGuard(NGroupedMemoryManager::TScanMemoryLimiterOperator::BuildGroupGuard( + Context->GetProcessMemoryControlId(), context->GetCommonContext()->GetScanId())) + , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) { + AFL_VERIFY(Sources.size()); for (auto&& [_, i] : Sources) { if (!i->IsDataReady()) { ++WaitSourcesCount; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "ready_source")("interval_idx", IntervalIdx)( + "interval_id", GetIntervalId()); } - i->RegisterInterval(*this); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "register_source")("interval_idx", IntervalIdx)("interval_id", GetIntervalId()); + i->RegisterInterval(*this, i); } IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitResources); -} - -void TFetchingInterval::DoOnAllocationSuccess(const std::shared_ptr& guard) { - AFL_VERIFY(guard); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("interval_idx", IntervalIdx)("event", "resources_allocated") - ("resources", guard->DebugString())("start", MergingContext->GetIncludeStart())("finish", MergingContext->GetIncludeFinish())("sources", Sources.size()); - OnInitResourcesGuard(guard); + ConstructResult(); } void TFetchingInterval::SetMerger(std::unique_ptr&& merger) { @@ -82,13 +76,15 @@ void TFetchingInterval::OnPartSendingComplete() { AFL_VERIFY(Merger); AFL_VERIFY(AtomicCas(&PartSendingWait, 0, 1)); AFL_VERIFY(AtomicGet(SourcesFinalized) == 1); - if (AbortedFlag) { + if (Context->IsAborted()) { return; } IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerContinue); + auto task = std::make_shared(MergingContext, Context, std::move(Merger)); task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); + NGroupedMemoryManager::TScanMemoryLimiterOperator::SendToAllocation(Context->GetProcessMemoryControlId(), + Context->GetCommonContext()->GetScanId(), GetIntervalId(), { task }, (ui32)EStageFeaturesIndexes::Merge); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h index 80613ef5b2d2..86c3f1aa0510 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h @@ -6,11 +6,9 @@ namespace NKikimr::NOlap::NReader::NPlain { -class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe::ITask { +class TFetchingInterval: public TNonCopyable { private: - using TTaskBase = NResourceBroker::NSubscribe::ITask; std::shared_ptr MergingContext; - bool AbortedFlag = false; TAtomic SourcesFinalized = 0; TAtomic PartSendingWait = 0; std::unique_ptr Merger; @@ -20,15 +18,11 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe void ConstructResult(); - std::shared_ptr ResourcesGuard; const ui32 IntervalIdx; + const std::shared_ptr IntervalGroupGuard; TAtomicCounter ReadySourcesCount = 0; - TAtomicCounter ReadyGuards = 0; ui32 WaitSourcesCount = 0; NColumnShard::TConcreteScanCounters::TScanIntervalStateGuard IntervalStateGuard; - void OnInitResourcesGuard(const std::shared_ptr& guard); -protected: - virtual void DoOnAllocationSuccess(const std::shared_ptr& guard) override; public: std::set GetPathIds() const { @@ -43,16 +37,16 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe return IntervalIdx; } - const THashMap>& GetSources() const { - return Sources; + ui32 GetIntervalId() const { + AFL_VERIFY(IntervalGroupGuard); + return IntervalGroupGuard->GetGroupId(); } - const std::shared_ptr& GetResourcesGuard() const { - return ResourcesGuard; + const THashMap>& GetSources() const { + return Sources; } void Abort() { - AbortedFlag = true; if (AtomicCas(&SourcesFinalized, 1, 0)) { for (auto&& i : Sources) { i.second->Abort(); @@ -83,10 +77,16 @@ class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe void OnPartSendingComplete(); void SetMerger(std::unique_ptr&& merger); bool HasMerger() const; + std::shared_ptr GetGroupGuard() const { + return IntervalGroupGuard; + } TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval); + + ~TFetchingInterval() { + } }; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp index 60e41095303c..f705deb4501c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp @@ -11,7 +11,7 @@ TColumnShardScanIterator::TColumnShardScanIterator(const std::shared_ptrGetReadMetadata()->IsSorted()); } -TConclusion> TColumnShardScanIterator::GetBatch() { +TConclusion> TColumnShardScanIterator::GetBatch() { FillReadyResults(); return ReadyResults.pop_front(); } @@ -33,11 +33,11 @@ void TColumnShardScanIterator::FillReadyResults() { i64 limitLeft = Context->GetReadMetadata()->Limit == 0 ? INT64_MAX : Context->GetReadMetadata()->Limit - ItemsRead; for (size_t i = 0; i < ready.size() && limitLeft; ++i) { auto& batch = ReadyResults.emplace_back(std::move(ready[i])); - if (batch.GetResultBatch().num_rows() > limitLeft) { - batch.Cut(limitLeft); + if (batch->GetResultBatch().num_rows() > limitLeft) { + batch->Cut(limitLeft); } - limitLeft -= batch.GetResultBatch().num_rows(); - ItemsRead += batch.GetResultBatch().num_rows(); + limitLeft -= batch->GetResultBatch().num_rows(); + ItemsRead += batch->GetResultBatch().num_rows(); } if (limitLeft == 0) { @@ -53,7 +53,7 @@ TColumnShardScanIterator::~TColumnShardScanIterator() { ReadMetadata->ReadStats->PrintToLog(); } -void TColumnShardScanIterator::Apply(IDataTasksProcessor::ITask::TPtr task) { +void TColumnShardScanIterator::Apply(const std::shared_ptr& task) { if (!IndexedData->IsFinished()) { Y_ABORT_UNLESS(task->Apply(*IndexedData)); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h index 38f05ff276cd..38b1fcc29882 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap::NReader::NPlain { class TReadyResults { private: const NColumnShard::TConcreteScanCounters Counters; - std::deque Data; + std::deque> Data; i64 RecordsCount = 0; public: TString DebugString() const { @@ -19,7 +19,7 @@ class TReadyResults { << "records_count:" << RecordsCount << ";" ; if (Data.size()) { - sb << "schema=" << Data.front().GetResultBatch().schema()->ToString() << ";"; + sb << "schema=" << Data.front()->GetResultBatch().schema()->ToString() << ";"; } return sb; } @@ -28,17 +28,19 @@ class TReadyResults { { } - TPartialReadResult& emplace_back(TPartialReadResult&& v) { - RecordsCount += v.GetResultBatch().num_rows(); + const std::shared_ptr& emplace_back(std::shared_ptr&& v) { + AFL_VERIFY(!!v); + RecordsCount += v->GetResultBatch().num_rows(); Data.emplace_back(std::move(v)); return Data.back(); } - std::optional pop_front() { + std::shared_ptr pop_front() { if (Data.empty()) { return {}; } auto result = std::move(Data.front()); - RecordsCount -= result.GetResultBatch().num_rows(); + AFL_VERIFY(RecordsCount >= result->GetResultBatch().num_rows()); + RecordsCount -= result->GetResultBatch().num_rows(); Data.pop_front(); return result; } @@ -84,13 +86,13 @@ class TColumnShardScanIterator: public TScanIteratorBase { ; } - virtual void Apply(IDataTasksProcessor::ITask::TPtr task) override; + virtual void Apply(const std::shared_ptr& task) override; bool Finished() const override { return IndexedData->IsFinished() && ReadyResults.empty(); } - TConclusion> GetBatch() override; + virtual TConclusion> GetBatch() override; virtual void PrepareResults() override; virtual TConclusion ReadNextInterval() override; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp index 0b7bc55dffec..1981faaa4314 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp @@ -2,6 +2,9 @@ #include "plain_read_data.h" #include "source.h" +#include +#include + namespace NKikimr::NOlap::NReader::NPlain { std::optional TBaseMergeTask::DrainMergerLinearScan(const std::optional resultBufferLimit) { @@ -19,12 +22,13 @@ std::optional TBaseMergeTask::DrainMergerLinearScan(co Merger = nullptr; } Context->GetCommonContext()->GetCounters().OnLinearScanInterval(rbBuilder->GetRecordsCount()); - ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({rbBuilder->Finalize()})); + ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({ rbBuilder->Finalize() })); return lastResultPosition; } void TBaseMergeTask::PrepareResultBatch() { if (!ResultBatch || ResultBatch->num_rows() == 0) { + AllocationGuard = nullptr; ResultBatch = nullptr; LastPK = nullptr; return; @@ -41,8 +45,10 @@ void TBaseMergeTask::PrepareResultBatch() { } else { ShardedBatch = NArrow::TShardedRecordBatch(ResultBatch); } + AllocationGuard->Update(NArrow::GetTableMemorySize(ResultBatch)); AFL_VERIFY(!!LastPK == !!ShardedBatch->GetRecordsCount())("lpk", !!LastPK)("sb", ShardedBatch->GetRecordsCount()); } else { + AllocationGuard = nullptr; ResultBatch = nullptr; LastPK = nullptr; } @@ -51,14 +57,26 @@ void TBaseMergeTask::PrepareResultBatch() { bool TBaseMergeTask::DoApply(IDataReader& indexedDataRead) const { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoApply")("interval_idx", MergingContext->GetIntervalIdx()); auto& reader = static_cast(indexedDataRead); - reader.MutableScanner().OnIntervalResult(ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + auto copy = AllocationGuard; + reader.MutableScanner().OnIntervalResult(std::move(copy), ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + return true; +} + +bool TBaseMergeTask::DoOnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation) { + if (Context->IsAborted()) { + guard->Release(); + return false; + } + AllocationGuard = std::move(guard); + NConveyor::TScanServiceOperator::SendTaskToExecute(static_pointer_cast(allocation)); return true; } -bool TStartMergeTask::DoExecute() { +TConclusionStatus TStartMergeTask::DoExecuteImpl() { if (OnlyEmptySources) { ResultBatch = nullptr; - return true; + return TConclusionStatus::Success(); } bool sourcesInMemory = true; for (auto&& i : Sources) { @@ -71,9 +89,10 @@ bool TStartMergeTask::DoExecute() { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::EXCLUSIVE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); auto& container = Sources.begin()->second->GetStageResult().GetBatch(); if (container && container->num_rows()) { - ResultBatch = container->BuildTable(); + ResultBatch = container->BuildTableVerified(); LastPK = Sources.begin()->second->GetLastPK(); - ResultBatch = NArrow::TColumnOperator().VerifyIfAbsent().Extract(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); + ResultBatch = + NArrow::TColumnOperator().VerifyIfAbsent().Extract(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); Context->GetCommonContext()->GetCounters().OnNoScanInterval(ResultBatch->num_rows()); if (Context->GetCommonContext()->IsReverse()) { ResultBatch = NArrow::ReverseRecords(ResultBatch); @@ -82,7 +101,7 @@ bool TStartMergeTask::DoExecute() { } Sources.clear(); AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); - return true; + return TConclusionStatus::Success(); } TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::COMMON", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); AFL_VERIFY(!Merger); @@ -100,10 +119,10 @@ bool TStartMergeTask::DoExecute() { AFL_VERIFY(Merger->GetSourcesCount() <= Sources.size()); if (Merger->GetSourcesCount() == 0 || isEmpty) { ResultBatch = nullptr; - return true; + return TConclusionStatus::Success(); } } - Merger->PutControlPoint(MergingContext->GetFinish()); + Merger->PutControlPoint(MergingContext->GetFinish(), false); Merger->SkipToLowerBound(MergingContext->GetStart(), MergingContext->GetIncludeStart()); const ui32 originalSourcesCount = Sources.size(); Sources.clear(); @@ -115,10 +134,12 @@ bool TStartMergeTask::DoExecute() { ResultBatch = Merger->SingleSourceDrain(MergingContext->GetFinish(), MergingContext->GetIncludeFinish(), &lastResultPosition); if (ResultBatch) { Context->GetCommonContext()->GetCounters().OnLogScanInterval(ResultBatch->num_rows()); - AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())("ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); + AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())( + "ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); } if (MergingContext->GetIncludeFinish() && originalSourcesCount == 1) { - AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())("merger", Merger->DebugString()); + AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())( + "merger", Merger->DebugString()); } } else { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::MANY", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); @@ -130,13 +151,13 @@ bool TStartMergeTask::DoExecute() { } AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); PrepareResultBatch(); - return true; + return TConclusionStatus::Success(); } -TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, THashMap>&& sources) +TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + THashMap>&& sources) : TBase(mergingContext, readContext) - , Sources(std::move(sources)) -{ + , Sources(std::move(sources)) { for (auto&& s : Sources) { AFL_VERIFY(s.second->IsDataReady()); } @@ -147,7 +168,7 @@ TStartMergeTask::TStartMergeTask(const std::shared_ptr& merging } } -bool TContinueMergeTask::DoExecute() { +TConclusionStatus TContinueMergeTask::DoExecuteImpl() { TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::CONTINUE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); std::optional lastResultPosition = DrainMergerLinearScan(Context->ReadSequentiallyBufferSize); if (lastResultPosition) { @@ -155,7 +176,7 @@ bool TContinueMergeTask::DoExecute() { } AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); PrepareResultBatch(); - return true; + return TConclusionStatus::Success(); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h index 90cda0841363..bbe2d11ccb3a 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h @@ -1,10 +1,11 @@ #pragma once #include "context.h" + #include #include - -#include #include +#include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -16,6 +17,8 @@ class TMergingContext { YDB_READONLY(bool, IncludeStart, false); YDB_READONLY(ui32, IntervalIdx, 0); bool IsExclusiveIntervalFlag = false; + std::optional IntervalChunkMemory; + public: TMergingContext(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, const ui32 intervalIdx, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) @@ -24,9 +27,16 @@ class TMergingContext { , IncludeFinish(includeFinish) , IncludeStart(includeStart) , IntervalIdx(intervalIdx) - , IsExclusiveIntervalFlag(isExclusiveInterval) - { + , IsExclusiveIntervalFlag(isExclusiveInterval) { + } + + void SetIntervalChunkMemory(const ui64 value) { + IntervalChunkMemory = value; + } + ui64 GetIntervalChunkMemory() const { + AFL_VERIFY(IntervalChunkMemory); + return *IntervalChunkMemory; } bool IsExclusiveInterval() const { @@ -42,12 +52,12 @@ class TMergingContext { result.InsertValue("exclusive", IsExclusiveIntervalFlag); return result; } - }; -class TBaseMergeTask: public IDataTasksProcessor::ITask { +class TBaseMergeTask: public IDataTasksProcessor::ITask, public NGroupedMemoryManager::IAllocation { private: using TBase = IDataTasksProcessor::ITask; + protected: std::shared_ptr ResultBatch; std::shared_ptr LastPK; @@ -57,20 +67,25 @@ class TBaseMergeTask: public IDataTasksProcessor::ITask { std::shared_ptr MergingContext; const ui32 IntervalIdx; std::optional ShardedBatch; + std::shared_ptr AllocationGuard; [[nodiscard]] std::optional DrainMergerLinearScan(const std::optional resultBufferLimit); void PrepareResultBatch(); + private: virtual bool DoApply(IDataReader& indexedDataRead) const override; + virtual bool DoOnAllocated(std::shared_ptr&& guard, + const std::shared_ptr& allocation) override; + public: TBaseMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext) : TBase(readContext->GetCommonContext()->GetScanActorId()) + , IAllocation(TValidator::CheckNotNull(mergingContext)->GetIntervalChunkMemory()) , Guard(readContext->GetCommonContext()->GetCounters().GetMergeTasksGuard()) , Context(readContext) , MergingContext(mergingContext) , IntervalIdx(MergingContext->GetIntervalIdx()) { - } }; @@ -79,32 +94,37 @@ class TStartMergeTask: public TBaseMergeTask { using TBase = TBaseMergeTask; bool OnlyEmptySources = true; THashMap> Sources; + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecuteImpl() override; + public: virtual TString GetTaskClassIdentifier() const override { return "CS::MERGE_START"; } - TStartMergeTask(const std::shared_ptr& mergingContext, - const std::shared_ptr& readContext, THashMap>&& sources); + TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + THashMap>&& sources); }; class TContinueMergeTask: public TBaseMergeTask { private: using TBase = TBaseMergeTask; + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecuteImpl() override; + public: virtual TString GetTaskClassIdentifier() const override { return "CS::MERGE_CONTINUE"; } - TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, std::unique_ptr&& merger) + TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, + std::unique_ptr&& merger) : TBase(mergingContext, readContext) { AFL_VERIFY(merger); Merger = std::move(merger); } }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp index 9def8738cab9..04ed0d1c6f26 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp @@ -8,38 +8,43 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& context) { ui32 sourceIdx = 0; std::deque> sources; - const auto& portionsOrdered = GetReadMetadata()->SelectInfo->GetPortionsOrdered(GetReadMetadata()->IsDescSorted()); + const auto& portions = GetReadMetadata()->SelectInfo->PortionsOrderedPK; const auto& committed = GetReadMetadata()->CommittedBlobs; - auto itCommitted = committed.begin(); - auto itPortion = portionsOrdered.begin(); - ui64 committedPortionsBytes = 0; - ui64 insertedPortionsBytes = 0; ui64 compactedPortionsBytes = 0; - while (itCommitted != committed.end() || itPortion != portionsOrdered.end()) { - bool movePortion = false; - if (itCommitted == committed.end()) { - movePortion = true; - } else if (itPortion == portionsOrdered.end()) { - movePortion = false; - } else if (itCommitted->GetFirstVerified() < (*itPortion)->IndexKeyStart()) { - movePortion = false; + ui64 insertedPortionsBytes = 0; + ui64 committedPortionsBytes = 0; + for (auto&& i : portions) { + if (i->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || i->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { + compactedPortionsBytes += i->GetTotalBlobBytes(); } else { - movePortion = true; + insertedPortionsBytes += i->GetTotalBlobBytes(); + } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + } + for (auto&& i : committed) { + if (i.HasSnapshot()) { + continue; + } + if (GetReadMetadata()->IsMyUncommitted(i.GetWriteIdVerified())) { + continue; } + if (GetReadMetadata()->GetPKRangesFilter().CheckPoint(i.GetFirst()) || + GetReadMetadata()->GetPKRangesFilter().CheckPoint(i.GetLast())) { + GetReadMetadata()->SetConflictedWriteId(i.GetWriteIdVerified()); + } + } - if (movePortion) { - if ((*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || (*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { - compactedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); - } else { - insertedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); + for (auto&& i : committed) { + if (!i.HasSnapshot()) { + if (GetReadMetadata()->IsWriteConflictable(i.GetWriteIdVerified())) { + continue; } - sources.emplace_back(std::make_shared(sourceIdx++, *itPortion, SpecialReadContext, (*itPortion)->IndexKeyStart(), (*itPortion)->IndexKeyEnd())); - ++itPortion; - } else { - sources.emplace_back(std::make_shared(sourceIdx++, *itCommitted, SpecialReadContext, itCommitted->GetFirstVerified(), itCommitted->GetLastVerified())); - committedPortionsBytes += itCommitted->GetSize(); - ++itCommitted; + } else if (GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(i.GetFirst(), i.GetLast()) == + TPKRangeFilter::EUsageClass::DontUsage) { + continue; } + sources.emplace_back(std::make_shared(sourceIdx++, i, SpecialReadContext)); + committedPortionsBytes += i.GetSize(); } Scanner = std::make_shared(std::move(sources), SpecialReadContext); @@ -54,16 +59,16 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& context) } -std::vector TPlainReadData::DoExtractReadyResults(const int64_t maxRowsInBatch) { - auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); +std::vector> TPlainReadData::DoExtractReadyResults(const int64_t /*maxRowsInBatch*/) { + auto result = std::move(PartialResults); + PartialResults.clear(); +// auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); ui32 count = 0; for (auto&& r: result) { - count += r.GetRecordsCount(); + count += r->GetRecordsCount(); } AFL_VERIFY(count == ReadyResultsCount); - ReadyResultsCount = 0; - PartialResults.clear(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoExtractReadyResults")("result", result.size())("count", count)("finished", Scanner->IsFinished()); return result; @@ -76,7 +81,7 @@ TConclusion TPlainReadData::DoReadNextInterval() { void TPlainReadData::OnIntervalResult(const std::shared_ptr& result) { // result->GetResourcesGuardOnly()->Update(result->GetMemorySize()); ReadyResultsCount += result->GetRecordsCount(); - PartialResults.emplace_back(std::move(*result)); + PartialResults.emplace_back(result); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h index 39d993b156d6..93d2a56bad14 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h @@ -14,9 +14,8 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin using TBase = IDataReader; std::shared_ptr Scanner; std::shared_ptr SpecialReadContext; - std::vector PartialResults; + std::vector> PartialResults; ui32 ReadyResultsCount = 0; - bool AbortedFlag = false; protected: virtual TConclusionStatus DoStart() override { return Scanner->Start(); @@ -31,11 +30,11 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin return sb; } - virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) override; + virtual std::vector> DoExtractReadyResults(const int64_t maxRowsInBatch) override; virtual TConclusion DoReadNextInterval() override; virtual void DoAbort() override { - AbortedFlag = true; + SpecialReadContext->Abort(); Scanner->Abort(); PartialResults.clear(); Y_ABORT_UNLESS(IsFinished()); @@ -68,7 +67,7 @@ class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitorin TPlainReadData(const std::shared_ptr& context); ~TPlainReadData() { - if (!AbortedFlag) { + if (!SpecialReadContext->IsAborted()) { Abort("unexpected on destructor"); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp index 9eafaf6f7cf1..87de386beda9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp @@ -1,27 +1,33 @@ -#include "scanner.h" #include "plain_read_data.h" +#include "scanner.h" + #include + #include namespace NKikimr::NOlap::NReader::NPlain { -void TScanHead::OnIntervalResult(const std::optional& newBatch, const std::shared_ptr& lastPK, +void TScanHead::OnIntervalResult(std::shared_ptr&& allocationGuard, + const std::optional& newBatch, const std::shared_ptr& lastPK, std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader) { - if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < 1000) { - if (++ZeroCount == std::max(16, InFlightLimit)) { - InFlightLimit = std::max(MaxInFlight, InFlightLimit * 2); - ZeroCount = 0; - } - } else { - ZeroCount = 0; + if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < MaxInFlight) { + InFlightLimit = std::min(MaxInFlight, InFlightLimit * 4); } auto itInterval = FetchingIntervals.find(intervalIdx); AFL_VERIFY(itInterval != FetchingIntervals.end()); itInterval->second->SetMerger(std::move(merger)); AFL_VERIFY(Context->GetCommonContext()->GetReadMetadata()->IsSorted()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result_received")("interval_idx", intervalIdx)( + "intervalId", itInterval->second->GetIntervalId()); if (newBatch && newBatch->GetRecordsCount()) { - const std::optional callbackIdxSubscriver = itInterval->second->HasMerger() ? std::optional(intervalIdx) : std::nullopt; - AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(itInterval->second->GetResourcesGuard(), *newBatch, lastPK, callbackIdxSubscriver)).second); + std::optional callbackIdxSubscriver; + std::shared_ptr gGuard; + if (itInterval->second->HasMerger()) { + callbackIdxSubscriver = intervalIdx; + } else { + gGuard = itInterval->second->GetGroupGuard(); + } + AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(std::move(allocationGuard), std::move(gGuard), *newBatch, lastPK, callbackIdxSubscriver)).second); } else { AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, nullptr).second); } @@ -31,9 +37,13 @@ void TScanHead::OnIntervalResult(const std::optionalGetIntervalIdx(); auto it = ReadyIntervals.find(intervalIdx); if (it == ReadyIntervals.end()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result_absent")("interval_idx", intervalIdx)( + "merger", interval->HasMerger())("interval_id", interval->GetIntervalId()); break; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", + it->second ? it->second->GetRecordsCount() : 0)("merger", interval->HasMerger())("interval_id", interval->GetIntervalId()); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", it->second ? it->second->GetRecordsCount() : 0); auto result = it->second; ReadyIntervals.erase(it); if (result) { @@ -51,7 +61,8 @@ void TScanHead::OnIntervalResult(const std::optionalfirst); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "wait_interval")("remained", FetchingIntervals.size())( + "interval_idx", FetchingIntervals.begin()->first); } } @@ -67,7 +78,8 @@ TConclusionStatus TScanHead::Start() { i.second->IncIntervalsCount(); } if (!detectorResult) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")( + "reason", detectorResult.GetErrorMessage()); Abort(); return detectorResult; } @@ -83,9 +95,11 @@ TConclusionStatus TScanHead::Start() { for (auto&& i : context.GetCurrentSources()) { i.second->IncIntervalsCount(); } - auto detectorResult = DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), guaranteeExclusivePK || context.GetIsExclusiveInterval()); + auto detectorResult = + DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), guaranteeExclusivePK || context.GetIsExclusiveInterval()); if (!detectorResult) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")( + "reason", detectorResult.GetErrorMessage()); Abort(); return detectorResult; } @@ -95,12 +109,11 @@ TConclusionStatus TScanHead::Start() { } TScanHead::TScanHead(std::deque>&& sources, const std::shared_ptr& context) - : Context(context) -{ - if (!HasAppData() || !AppDataVerified().ColumnShardConfig.HasMaxInFlightIntervalsOnRequest()) { - MaxInFlight = 256; - } else { - MaxInFlight = AppDataVerified().ColumnShardConfig.GetMaxInFlightIntervalsOnRequest(); + : Context(context) { + if (HasAppData()) { + if (AppDataVerified().ColumnShardConfig.HasMaxInFlightIntervalsOnRequest()) { + MaxInFlight = AppDataVerified().ColumnShardConfig.GetMaxInFlightIntervalsOnRequest(); + } } if (Context->GetReadMetadata()->Limit) { @@ -120,88 +133,89 @@ class TSourcesStorageForMemoryOptimization { private: class TSourceInfo { private: + YDB_READONLY(ui64, Memory, 0); YDB_READONLY_DEF(std::shared_ptr, Source); YDB_READONLY_DEF(std::shared_ptr, FetchingInfo); + public: TSourceInfo(const std::shared_ptr& source, const std::shared_ptr& fetchingInfo) : Source(source) - , FetchingInfo(fetchingInfo) - { - + , FetchingInfo(fetchingInfo) { + Memory = FetchingInfo->PredictRawBytes(Source); } NJson::TJsonValue DebugJson() const { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("source", Source->DebugJsonForMemory()); -// result.InsertValue("fetching", Fetching->DebugJsonForMemory()); + result.InsertValue("memory", Memory); + // result.InsertValue("FetchingInfo", FetchingInfo->DebugJsonForMemory()); + return result; + } + + bool ReduceMemory() { + const bool result = FetchingInfo->InitSourceSeqColumnIds(Source); + if (result) { + Memory = FetchingInfo->PredictRawBytes(Source); + } return result; } + + bool operator<(const TSourceInfo& item) const { + return Memory < item.Memory; + } + }; - std::map> Sources; + std::vector Sources; YDB_READONLY(ui64, MemorySum, 0); - YDB_READONLY_DEF(std::set, PathIds); + public: TString DebugString() const { NJson::TJsonValue resultJson; auto& memorySourcesArr = resultJson.InsertValue("sources_by_memory", NJson::JSON_ARRAY); resultJson.InsertValue("sources_by_memory_count", Sources.size()); - for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { + for (auto&& it: Sources) { auto& sourceMap = memorySourcesArr.AppendValue(NJson::JSON_MAP); - sourceMap.InsertValue("memory", it->first); auto& sourcesArr = sourceMap.InsertValue("sources", NJson::JSON_ARRAY); - for (auto&& s : it->second) { - sourcesArr.AppendValue(s.second.DebugJson()); - } + sourcesArr.AppendValue(it.DebugJson()); } return resultJson.GetStringRobust(); } - void UpdateSource(const ui64 oldMemoryInfo, const ui32 sourceIdx) { - auto it = Sources.find(oldMemoryInfo); - AFL_VERIFY(it != Sources.end()); - auto itSource = it->second.find(sourceIdx); - AFL_VERIFY(itSource != it->second.end()); - auto sourceInfo = itSource->second; - it->second.erase(itSource); - if (it->second.empty()) { - Sources.erase(it); - } - AFL_VERIFY(MemorySum >= oldMemoryInfo); - MemorySum -= oldMemoryInfo; - AddSource(sourceInfo.GetSource(), sourceInfo.GetFetchingInfo()); - } - void AddSource(const std::shared_ptr& source, const std::shared_ptr& fetching) { - const ui64 sourceMemory = fetching->PredictRawBytes(source); - MemorySum += sourceMemory; - AFL_VERIFY(Sources[sourceMemory].emplace(source->GetSourceIdx(), TSourceInfo(source, fetching)).second); - PathIds.emplace(source->GetPathId()); + Sources.emplace_back(TSourceInfo(source, fetching)); + MemorySum += Sources.back().GetMemory(); } bool Optimize(const ui64 memoryLimit) { - bool modified = true; - while (MemorySum > memoryLimit && modified) { - modified = false; - for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { - for (auto&& [sourceIdx, sourceInfo] : it->second) { - if (!sourceInfo.GetFetchingInfo()->InitSourceSeqColumnIds(sourceInfo.GetSource())) { - continue; - } - modified = true; - UpdateSource(it->first, sourceIdx); - break; - } - if (modified) { - break; + if (MemorySum <= memoryLimit) { + return true; + } + std::sort(Sources.begin(), Sources.end()); + while (true) { + std::vector nextSources; + while (memoryLimit < MemorySum && Sources.size()) { + const ui64 currentMemory = Sources.back().GetMemory(); + if (Sources.back().ReduceMemory()) { + AFL_VERIFY(currentMemory <= MemorySum); + MemorySum -= currentMemory; + MemorySum += Sources.back().GetMemory(); + nextSources.emplace_back(std::move(Sources.back())); } + Sources.pop_back(); } + if (nextSources.empty() || MemorySum <= memoryLimit) { + break; + } + std::sort(nextSources.begin(), nextSources.end()); + std::swap(nextSources, Sources); } - return MemorySum < memoryLimit; + return MemorySum <= memoryLimit; } }; -TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const { +TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan( + const THashMap>& intervalSources, const bool isExclusiveInterval) const { TSourcesStorageForMemoryOptimization optimizer; for (auto&& i : intervalSources) { if (!isExclusiveInterval) { @@ -212,23 +226,18 @@ TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THa } const ui64 startMemory = optimizer.GetMemorySum(); if (!optimizer.Optimize(Context->ReduceMemoryIntervalLimit) && Context->RejectMemoryIntervalLimit < optimizer.GetMemorySum()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken") - ("reason", "a lot of memory need")("start", startMemory) - ("reduce_limit", Context->ReduceMemoryIntervalLimit) - ("reject_limit", Context->RejectMemoryIntervalLimit) - ("need", optimizer.GetMemorySum()) - ("path_ids", JoinSeq(",", optimizer.GetPathIds())) - ("details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() : "NEED_DEBUG_LEVEL"); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken")("reason", "a lot of memory need")("start", startMemory)( + "reduce_limit", Context->ReduceMemoryIntervalLimit)("reject_limit", Context->RejectMemoryIntervalLimit)( + "need", optimizer.GetMemorySum())("path_id", Context->GetReadMetadata()->GetPathId())( + "details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() + : "NEED_DEBUG_LEVEL"); Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryFailed(optimizer.GetMemorySum()); - return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + - ::ToString(optimizer.GetMemorySum()) + " path_ids: " + JoinSeq(",", optimizer.GetPathIds()) + ". We need wait compaction processing. Sorry."); + return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + ::ToString(optimizer.GetMemorySum()) + + " path_id: " + Context->GetReadMetadata()->GetPathId() + ". We need wait compaction processing. Sorry."); } else if (optimizer.GetMemorySum() < startMemory) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active") - ("reason", "need reduce memory")("start", startMemory) - ("reduce_limit", Context->ReduceMemoryIntervalLimit) - ("reject_limit", Context->RejectMemoryIntervalLimit) - ("need", optimizer.GetMemorySum()) - ("path_ids", JoinSeq(",", optimizer.GetPathIds())); + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active")("reason", "need reduce memory")("start", startMemory)( + "reduce_limit", Context->ReduceMemoryIntervalLimit)("reject_limit", Context->RejectMemoryIntervalLimit)( + "need", optimizer.GetMemorySum())("path_id", Context->GetReadMetadata()->GetPathId()); Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryReduced(startMemory - optimizer.GetMemorySum()); } Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryRequired(optimizer.GetMemorySum()); @@ -236,22 +245,28 @@ TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THa } TConclusion TScanHead::BuildNextInterval() { - if (AbortFlag) { + if (Context->IsAborted()) { return false; } - while (BorderPoints.size() && (FetchingIntervals.size() < InFlightLimit || BorderPoints.begin()->second.GetStartSources().empty())) { + while (BorderPoints.size()) { + if (BorderPoints.begin()->second.GetStartSources().size()) { + if (FetchingIntervals.size() >= InFlightLimit) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_next_interval")("reason", "too many intervals in flight")( + "count", FetchingIntervals.size())("limit", InFlightLimit); + return false; + } + } auto firstBorderPointInfo = std::move(BorderPoints.begin()->second); CurrentState.OnStartPoint(firstBorderPointInfo); if (CurrentState.GetIsSpecialPoint()) { const ui32 intervalIdx = SegmentIdxCounter++; - auto interval = std::make_shared( - BorderPoints.begin()->first, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), - Context, true, true, false); + auto interval = std::make_shared(BorderPoints.begin()->first, BorderPoints.begin()->first, intervalIdx, + CurrentState.GetCurrentSources(), Context, true, true, false); FetchingIntervals.emplace(intervalIdx, interval); IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), true); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)( + "interval", interval->DebugJson()); } CurrentState.OnFinishPoint(firstBorderPointInfo); @@ -262,12 +277,13 @@ TConclusion TScanHead::BuildNextInterval() { Y_ABORT_UNLESS(BorderPoints.size()); CurrentState.OnNextPointInfo(BorderPoints.begin()->second); const ui32 intervalIdx = SegmentIdxCounter++; - auto interval = std::make_shared(*CurrentStart, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), Context, - CurrentState.GetIncludeFinish(), CurrentState.GetIncludeStart(), CurrentState.GetIsExclusiveInterval()); + auto interval = + std::make_shared(*CurrentStart, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), + Context, CurrentState.GetIncludeFinish(), CurrentState.GetIncludeStart(), CurrentState.GetIsExclusiveInterval()); FetchingIntervals.emplace(intervalIdx, interval); IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)( + "interval", interval->DebugJson()); return true; } else { IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); @@ -285,7 +301,7 @@ bool TScanHead::IsReverse() const { } void TScanHead::Abort() { - AbortFlag = true; + AFL_VERIFY(Context->IsAborted()); THashSet sourceIds; for (auto&& i : FetchingIntervals) { for (auto&& s : i.second->GetSources()) { @@ -310,4 +326,4 @@ void TScanHead::Abort() { Y_ABORT_UNLESS(IsFinished()); } -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h index 7092dac19acd..09649e788147 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h @@ -2,6 +2,7 @@ #include "source.h" #include "interval.h" #include +#include #include #include @@ -80,12 +81,11 @@ class TScanHead { ui64 InFlightLimit = 1; ui64 MaxInFlight = 256; ui64 ZeroCount = 0; - bool AbortFlag = false; void DrainSources(); [[nodiscard]] TConclusionStatus DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const; public: void OnSentDataFromInterval(const ui32 intervalIdx) const { - if (AbortFlag) { + if (Context->IsAborted()) { return; } auto it = FetchingIntervals.find(intervalIdx); @@ -110,8 +110,10 @@ class TScanHead { return sb; } - void OnIntervalResult(const std::optional& batch, const std::shared_ptr& lastPK, - std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader); + void OnIntervalResult(std::shared_ptr&& allocationGuard, + const std::optional& batch, + const std::shared_ptr& lastPK, std::unique_ptr&& merger, + const ui32 intervalIdx, TPlainReadData& reader); TConclusionStatus Start(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index d825d0594392..bef10d38f6b1 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -1,15 +1,15 @@ -#include "source.h" - #include "constructor.h" #include "fetched_data.h" #include "interval.h" #include "plain_read_data.h" +#include "source.h" -#include +#include #include #include #include #include +#include namespace NKikimr::NOlap::NReader::NPlain { @@ -19,30 +19,19 @@ void IDataSource::InitFetchingPlan(const std::shared_ptr& fetch FetchingPlan = fetching; } -void IDataSource::RegisterInterval(TFetchingInterval& interval) { +void IDataSource::RegisterInterval(TFetchingInterval& interval, const std::shared_ptr& sourcePtr) { AFL_VERIFY(FetchingPlan); + AFL_VERIFY(!Context->IsAborted()); if (!IsReadyFlag) { AFL_VERIFY(Intervals.emplace(interval.GetIntervalIdx(), &interval).second); } -} - -void IDataSource::SetIsReady() { - AFL_VERIFY(!IsReadyFlag); - IsReadyFlag = true; - for (auto&& i : Intervals) { - i.second->OnSourceFetchStageReady(SourceIdx); - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "source_ready")("intervals_count", Intervals.size())("source_idx", SourceIdx); - Intervals.clear(); -} - -void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourcePtr) { - AFL_VERIFY(FetchingPlan); - if (AtomicCas(&FilterStageFlag, 1, 0)) { + if (AtomicCas(&SourceStartedFlag, 1, 0)) { + SetFirstIntervalId(interval.GetIntervalId()); + AFL_VERIFY(FetchingPlan); StageData = std::make_unique(GetExclusiveIntervalOnly() && IsSourceInMemory()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("InitFetchingPlan", FetchingPlan->DebugString())("source_idx", SourceIdx); NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("source", SourceIdx)("method", "InitFetchingPlan")); - if (IsAborted()) { + if (Context->IsAborted()) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "InitFetchingPlanAborted"); return; } @@ -52,6 +41,16 @@ void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourc } } +void IDataSource::SetIsReady() { + AFL_VERIFY(!IsReadyFlag); + IsReadyFlag = true; + for (auto&& i : Intervals) { + i.second->OnSourceFetchStageReady(SourceIdx); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "source_ready")("intervals_count", Intervals.size())("source_idx", SourceIdx); + Intervals.clear(); +} + void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, THashMap& defaultBlocks, const std::shared_ptr& filter) { const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); @@ -65,30 +64,32 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, TBlob auto itFilter = cFilter.GetIterator(false, Portion->NumRows(i)); bool itFinished = false; for (auto&& c : columnChunks) { - Y_ABORT_UNLESS(!itFinished); + AFL_VERIFY(!itFinished); if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRows())) { - auto reading = blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); + auto reading = + blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); reading->SetIsBackgroundProcess(false); reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; } else { defaultBlocks.emplace(c->GetAddress(), - TPortionInfo::TAssembleBlobInfo(c->GetMeta().GetNumRows(), Schema->GetDefaultValueVerified(c->GetColumnId()))); + TPortionInfo::TAssembleBlobInfo(c->GetMeta().GetNumRows(), Schema->GetExternalDefaultValueVerified(c->GetColumnId()))); ++nullChunks; } itFinished = !itFilter.Next(c->GetMeta().GetNumRows()); } AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows(i)); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks) - ("reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)( + "reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); } -bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { +bool TPortionDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); - AFL_VERIFY(columns->GetColumnsCount()); + AFL_VERIFY(columns.GetColumnsCount()); AFL_VERIFY(!StageData->GetAppliedFilter() || !StageData->GetAppliedFilter()->IsTotalDenyFilter()); - auto& columnIds = columns->GetColumnIds(); + auto& columnIds = columns.GetColumnIds(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); @@ -108,9 +109,10 @@ bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { +bool TPortionDataSource::DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); - Y_ABORT_UNLESS(indexes->GetIndexesCount()); + AFL_VERIFY(indexes->GetIndexesCount()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); @@ -121,9 +123,11 @@ bool TPortionDataSource::DoStartFetchingIndexes(const std::shared_ptrGetIndexInfo().GetIndexStorageId(i.GetIndexId())); - readAction->SetIsBackgroundProcess(false); - readAction->AddRange(Portion->RestoreBlobRange(i.GetBlobRange())); + if (auto bRange = i.GetBlobRangeOptional()) { + auto readAction = action.GetReading(Schema->GetIndexInfo().GetIndexStorageId(i.GetIndexId())); + readAction->SetIsBackgroundProcess(false); + readAction->AddRange(Portion->RestoreBlobRange(*bRange)); + } } if (indexes->GetIndexIdsSet().size() != indexIds.size()) { return false; @@ -154,7 +158,11 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in if (!indexIds.contains(i->GetIndexId())) { continue; } - indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + if (i->HasBlobData()) { + indexBlobs[i->GetIndexId()].emplace_back(i->GetBlobDataVerified()); + } else { + indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); + } } for (auto&& i : indexIds) { if (!indexBlobs.contains(i)) { @@ -181,35 +189,12 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& columns) { auto blobSchema = GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion); - if (SequentialEntityIds.empty()) { - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()).AssembleTable()); - } else { - { - auto inMemColumns = columns->GetColumnIds(); - for (auto&& i : SequentialEntityIds) { - inMemColumns.erase(i); - } - if (inMemColumns.size()) { - auto filteredSchema = std::make_shared(columns->GetFilteredSchemaPtrVerified(), inMemColumns); - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, *filteredSchema, MutableStageData().MutableBlobs()).AssembleTable()); - } - } - { - std::set scanColumns; - for (auto&& i : columns->GetColumnIds()) { - if (SequentialEntityIds.contains(i)) { - scanColumns.emplace(i); - } - } - if (scanColumns.size()) { - auto filteredSchema = std::make_shared(columns->GetFilteredSchemaPtrVerified(), scanColumns); - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, *filteredSchema, MutableStageData().MutableBlobs()).AssembleForSeqAccess()); - } - } - } + MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()) + .AssembleToGeneralContainer(SequentialEntityIds)); } -bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& /*columns*/) { +bool TCommittedDataSource::DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& /*columns*/) { if (ReadStarted) { return false; } @@ -222,7 +207,7 @@ bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptrSetIsBackgroundProcess(false); readAction->AddRange(CommittedBlob.GetBlobRange()); - std::vector> actions = {readAction}; + std::vector> actions = { readAction }; auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); return true; @@ -230,17 +215,21 @@ bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& columns) { TMemoryProfileGuard mGuard("SCAN_PROFILE::ASSEMBLER::COMMITTED", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + const ISnapshotSchema::TPtr batchSchema = GetContext()->GetReadMetadata()->GetIndexVersions().GetSchemaVerified(GetCommitted().GetSchemaVersion()); + const ISnapshotSchema::TPtr resultSchema = GetContext()->GetReadMetadata()->GetResultSchema(); if (!GetStageData().GetTable()) { AFL_VERIFY(GetStageData().GetBlobs().size() == 1); auto bData = MutableStageData().ExtractBlob(GetStageData().GetBlobs().begin()->first); auto schema = GetContext()->GetReadMetadata()->GetBlobSchema(CommittedBlob.GetSchemaVersion()); - auto batch = NArrow::DeserializeBatch(bData, schema); - AFL_VERIFY(batch)("schema", schema->ToString()); - batch = GetContext()->GetReadMetadata()->GetIndexInfo().AddSnapshotColumns(batch, CommittedBlob.GetSnapshot()); - batch = GetContext()->GetReadMetadata()->GetIndexInfo().AddDeleteFlagsColumn(batch, CommittedBlob.GetIsDelete()); + auto rBatch = NArrow::DeserializeBatch(bData, std::make_shared(CommittedBlob.GetSchemaSubset().Apply(schema->fields()))); + AFL_VERIFY(rBatch)("schema", schema->ToString()); + auto batch = std::make_shared(rBatch); + batchSchema->AdaptBatchToSchema(*batch, resultSchema); + GetContext()->GetReadMetadata()->GetIndexInfo().AddSnapshotColumns(*batch, CommittedBlob.GetSnapshotDef(TSnapshot::Zero())); + GetContext()->GetReadMetadata()->GetIndexInfo().AddDeleteFlagsColumn(*batch, CommittedBlob.GetIsDelete()); MutableStageData().AddBatch(batch); } - MutableStageData().SyncTableColumns(columns->GetSchema()->fields()); + MutableStageData().SyncTableColumns(columns->GetSchema()->fields(), *resultSchema); } } // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 30e7fe04bf52..889f9fe5e7d4 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -1,16 +1,18 @@ #pragma once -#include "context.h" #include "columns_set.h" +#include "context.h" #include "fetched_data.h" + +#include +#include #include #include #include #include -#include -#include #include -#include -#include +#include +#include + #include namespace NKikimr::NOlap { @@ -41,8 +43,13 @@ class IDataSource { YDB_READONLY(ui32, IntervalsCount, 0); virtual NJson::TJsonValue DoDebugJson() const = 0; bool MergingStartedFlag = false; - bool AbortedFlag = false; + TAtomic SourceStartedFlag = 0; std::shared_ptr FetchingPlan; + std::vector> ResourceGuards; + std::optional FirstIntervalId; + ui32 CurrentPlanStepIndex = 0; + YDB_READONLY(TPKRangeFilter::EUsageClass, UsageClass, TPKRangeFilter::EUsageClass::PartialUsage); + protected: bool IsSourceInMemoryFlag = true; THashMap Intervals; @@ -53,8 +60,10 @@ class IDataSource { TAtomic FilterStageFlag = 0; bool IsReadyFlag = false; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) = 0; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) = 0; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; virtual void DoAssembleColumns(const std::shared_ptr& columns) = 0; virtual void DoAbort() = 0; virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) = 0; @@ -62,15 +71,43 @@ class IDataSource { virtual NJson::TJsonValue DoDebugJsonForMemory() const { return NJson::JSON_MAP; } + virtual bool DoAddTxConflict() = 0; + public: - void OnInitResourcesGuard(const std::shared_ptr& sourcePtr); + bool AddTxConflict() { + if (!Context->GetCommonContext()->HasLock()) { + return false; + } + if (DoAddTxConflict()) { + StageData->Clear(); + return true; + } + return false; + } + + ui64 GetResourceGuardsMemory() const { + ui64 result = 0; + for (auto&& i : ResourceGuards) { + result += i->GetMemory(); + } + return result; + } - bool IsAborted() const { - return AbortedFlag; + void RegisterAllocationGuard(const std::shared_ptr& guard) { + ResourceGuards.emplace_back(guard); } + bool IsSourceInMemory() const { return IsSourceInMemoryFlag; } + void SetFirstIntervalId(const ui64 value) { + AFL_VERIFY(!FirstIntervalId); + FirstIntervalId = value; + } + ui64 GetFirstIntervalId() const { + AFL_VERIFY(!!FirstIntervalId); + return *FirstIntervalId; + } virtual bool IsSourceInMemory(const std::set& fieldIds) const = 0; bool AddSequentialEntityIds(const ui32 entityId) { if (DoAddSequentialEntityIds(entityId)) { @@ -114,12 +151,12 @@ class IDataSource { DoAssembleColumns(columns); } - bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { - AFL_VERIFY(columns); + bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) { return DoStartFetchingColumns(sourcePtr, step, columns); } - bool StartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { + bool StartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_VERIFY(indexes); return DoStartFetchingIndexes(sourcePtr, step, indexes); } @@ -134,18 +171,18 @@ class IDataSource { virtual ui64 GetColumnRawBytes(const std::set& columnIds) const = 0; virtual ui64 GetIndexRawBytes(const std::set& indexIds) const = 0; + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const = 0; bool IsMergingStarted() const { return MergingStartedFlag; } void StartMerging() { - Y_ABORT_UNLESS(!MergingStartedFlag); + AFL_VERIFY(!MergingStartedFlag); MergingStartedFlag = true; } void Abort() { - AbortedFlag = true; Intervals.clear(); DoAbort(); } @@ -172,6 +209,17 @@ class IDataSource { return IsReadyFlag; } + void OnEmptyStageData() { + if (!ResourceGuards.size()) { + return; + } + if (ExclusiveIntervalOnly) { + ResourceGuards.back()->Update(0); + } else { + ResourceGuards.back()->Update(GetColumnRawBytes(Context->GetPKColumns()->GetColumnIds())); + } + } + const TFetchedData& GetStageData() const { AFL_VERIFY(StageData); return *StageData; @@ -182,11 +230,11 @@ class IDataSource { return *StageData; } - void RegisterInterval(TFetchingInterval& interval); + void RegisterInterval(TFetchingInterval& interval, const std::shared_ptr& sourcePtr); - IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, - const ui32 recordsCount, const std::optional shardingVersion, const bool hasDeletions) + IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, const NArrow::TReplaceKey& start, + const NArrow::TReplaceKey& finish, const TSnapshot& recordSnapshotMin, const TSnapshot& recordSnapshotMax, const ui32 recordsCount, + const std::optional shardingVersion, const bool hasDeletions) : SourceIdx(sourceIdx) , Start(context->GetReadMetadata()->BuildSortedPosition(start)) , Finish(context->GetReadMetadata()->BuildSortedPosition(finish)) @@ -197,8 +245,9 @@ class IDataSource { , RecordSnapshotMax(recordSnapshotMax) , RecordsCount(recordsCount) , ShardingVersionOptional(shardingVersion) - , HasDeletions(hasDeletions) - { + , HasDeletions(hasDeletions) { + UsageClass = Context->GetReadMetadata()->GetPKRangesFilter().IsPortionInPartialUsage(GetStartReplaceKey(), GetFinishReplaceKey()); + AFL_VERIFY(UsageClass != TPKRangeFilter::EUsageClass::DontUsage); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "portions_for_merge")("start", Start.DebugJson())("finish", Finish.DebugJson()); if (Start.IsReverseSort()) { std::swap(Start, Finish); @@ -207,7 +256,7 @@ class IDataSource { } virtual ~IDataSource() { - Y_ABORT_UNLESS(AbortedFlag || Intervals.empty()); + AFL_VERIFY(Intervals.empty()); } }; @@ -217,14 +266,16 @@ class TPortionDataSource: public IDataSource { std::set SequentialEntityIds; std::shared_ptr Portion; std::shared_ptr Schema; + mutable THashMap FingerprintedData; - void NeedFetchColumns(const std::set& columnIds, - TBlobsAction& blobsAction, THashMap& nullBlocks, - const std::shared_ptr& filter); + void NeedFetchColumns(const std::set& columnIds, TBlobsAction& blobsAction, + THashMap& nullBlocks, const std::shared_ptr& filter); virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) override; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; virtual void DoAssembleColumns(const std::shared_ptr& columns) override; virtual NJson::TJsonValue DoDebugJson() const override { NJson::TJsonValue result = NJson::JSON_MAP; @@ -239,7 +290,7 @@ class TPortionDataSource: public IDataSource { for (auto&& i : SequentialEntityIds) { AFL_VERIFY(columns.erase(i)); } -// result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); + // result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); if (SequentialEntityIds.size()) { result.InsertValue("min_memory_seq", Portion->GetMinMemoryForReadColumns(SequentialEntityIds)); result.InsertValue("min_memory_seq_blobs", Portion->GetColumnBlobBytes(SequentialEntityIds)); @@ -257,10 +308,16 @@ class TPortionDataSource: public IDataSource { return Portion->GetPathId(); } virtual bool DoAddSequentialEntityIds(const ui32 entityId) override { + FingerprintedData.clear(); return SequentialEntityIds.emplace(entityId).second; } public: + virtual bool DoAddTxConflict() override { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return false; + } + virtual bool HasIndexes(const std::set& indexIds) const override { return Portion->HasIndexes(indexIds); } @@ -279,6 +336,13 @@ class TPortionDataSource: public IDataSource { } virtual ui64 GetColumnRawBytes(const std::set& columnsIds) const override { + AFL_VERIFY(columnsIds.size()); + const ui64 fp = CombineHashes(*columnsIds.begin(), *columnsIds.rbegin()); + auto it = FingerprintedData.find(fp); + if (it != FingerprintedData.end()) { + return it->second; + } + ui64 result = 0; if (SequentialEntityIds.size()) { std::set selectedSeq; std::set selectedInMem; @@ -289,12 +353,17 @@ class TPortionDataSource: public IDataSource { selectedInMem.emplace(i); } } - return Portion->GetMinMemoryForReadColumns(selectedSeq) - + Portion->GetColumnBlobBytes(selectedSeq) - + Portion->GetColumnRawBytes(selectedInMem, false); + result = Portion->GetMinMemoryForReadColumns(selectedSeq) + Portion->GetColumnBlobBytes(selectedSeq) + + Portion->GetColumnRawBytes(selectedInMem, false); } else { - return Portion->GetColumnRawBytes(columnsIds, false); + result = Portion->GetColumnRawBytes(columnsIds, false); } + FingerprintedData.emplace(fp, result); + return result; + } + + virtual ui64 GetColumnBlobBytes(const std::set& columnsIds) const override { + return Portion->GetColumnBlobBytes(columnsIds, false); } virtual ui64 GetIndexRawBytes(const std::set& indexIds) const override { @@ -309,13 +378,11 @@ class TPortionDataSource: public IDataSource { return Portion; } - TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), portion->GetRecordsCount(), portion->GetShardingVersionOptional(), - portion->GetMeta().GetDeletionsCount()) + TPortionDataSource(const ui32 sourceIdx, const std::shared_ptr& portion, const std::shared_ptr& context) + : TBase(sourceIdx, context, portion->IndexKeyStart(), portion->IndexKeyEnd(), portion->RecordSnapshotMin(), portion->RecordSnapshotMax(), + portion->GetRecordsCount(), portion->GetShardingVersionOptional(), portion->GetMeta().GetDeletionsCount()) , Portion(portion) - , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) - { + , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) { } }; @@ -326,11 +393,12 @@ class TCommittedDataSource: public IDataSource { bool ReadStarted = false; virtual void DoAbort() override { - } - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, const std::shared_ptr& /*indexes*/) override { + virtual bool DoStartFetchingColumns( + const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const TColumnsSetIds& columns) override; + virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, + const std::shared_ptr& /*indexes*/) override { return false; } virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& /*indexMeta*/) override { @@ -350,6 +418,18 @@ class TCommittedDataSource: public IDataSource { virtual bool DoAddSequentialEntityIds(const ui32 /*entityId*/) override { return false; } + + virtual bool DoAddTxConflict() override { + if (CommittedBlob.HasSnapshot()) { + GetContext()->GetReadMetadata()->SetBrokenWithCommitted(); + return true; + } else if (!GetContext()->GetReadMetadata()->IsMyUncommitted(CommittedBlob.GetWriteIdVerified())) { + GetContext()->GetReadMetadata()->SetConflictedWriteId(CommittedBlob.GetWriteIdVerified()); + return true; + } + return false; + } + public: virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { THashMap result; @@ -373,6 +453,10 @@ class TCommittedDataSource: public IDataSource { return CommittedBlob.GetBlobRange().Size; } + virtual ui64 GetColumnBlobBytes(const std::set& /*columnsIds*/) const override { + return CommittedBlob.GetBlobRange().Size; + } + virtual ui64 GetIndexRawBytes(const std::set& /*columnIds*/) const override { AFL_VERIFY(false); return 0; @@ -382,13 +466,11 @@ class TCommittedDataSource: public IDataSource { return CommittedBlob; } - TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context, - const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, committed.GetSnapshot(), committed.GetSnapshot(), committed.GetRecordsCount(), {}, - committed.GetIsDelete()) + TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context) + : TBase(sourceIdx, context, committed.GetFirst(), committed.GetLast(), committed.GetSnapshotDef(TSnapshot::Zero()), + committed.GetSnapshotDef(TSnapshot::Zero()), committed.GetRecordsCount(), {}, committed.GetIsDelete()) , CommittedBlob(committed) { - } }; -} +} // namespace NKikimr::NOlap::NReader::NPlain diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make index cfa691a22e84..93ba27575ade 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make @@ -18,6 +18,9 @@ PEERDIR( ydb/core/formats/arrow ydb/core/tx/columnshard/blobs_action ydb/core/tx/conveyor/usage + ydb/core/tx/limiter/grouped_memory/usage ) +GENERATE_ENUM_SERIALIZATION(columns_set.h) + END() diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp index 076f8f11adc7..2a23b12c3fae 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/filler.cpp @@ -16,7 +16,8 @@ NKikimr::TConclusionStatus TMetadataFromStore::DoFillMetadata(const NColumnShard } THashSet pathIds; - for (auto&& filter : read.PKRangesFilter) { + AFL_VERIFY(read.PKRangesFilter); + for (auto&& filter : *read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); @@ -42,7 +43,8 @@ NKikimr::TConclusionStatus TMetadataFromTable::DoFillMetadata(const NColumnShard if (!logsIndex) { return TConclusionStatus::Success(); } - for (auto&& filter : read.PKRangesFilter) { + AFL_VERIFY(read.PKRangesFilter); + for (auto&& filter : *read.PKRangesFilter) { const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); if (fromPathId <= read.PathId && read.PathId <= toPathId) { diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h index 08f4d78e0c08..33be2ac027b1 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h @@ -29,12 +29,12 @@ class TStatsIteratorBase: public TScanIteratorBase { return IndexGranules.empty(); } - virtual TConclusion> GetBatch() override { + virtual TConclusion> GetBatch() override { while (!Finished()) { auto batchOpt = ExtractStatsBatch(); if (!batchOpt) { AFL_VERIFY(Finished()); - return std::nullopt; + return std::shared_ptr(); } auto originalBatch = *batchOpt; if (originalBatch->num_rows() == 0) { @@ -55,10 +55,9 @@ class TStatsIteratorBase: public TScanIteratorBase { continue; } auto table = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({resultBatch})); - TPartialReadResult out(table, lastKey, std::nullopt); - return std::move(out); + return std::make_shared(table, lastKey, std::nullopt); } - return std::nullopt; + return std::shared_ptr(); } std::optional> ExtractStatsBatch() { @@ -136,10 +135,6 @@ class TStatsIterator : public TStatsIteratorBase { } } - const NTable::TScheme::TTableSchema& GetSchema() const override { - return StatsSchema; - } - NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original(1, "PathId"); } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp index 1aeb83b4ea24..da3cc74f8e92 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -6,8 +6,19 @@ namespace NKikimr::NOlap::NReader::NSysView::NChunks { void TStatsIterator::AppendStats(const std::vector>& builders, const TPortionInfo& portion) const { auto portionSchema = ReadMetadata->GetLoadSchemaVerified(portion); - const std::string prod = ::ToString(portion.GetMeta().Produced); + auto it = PortionType.find(portion.GetMeta().Produced); + if (it == PortionType.end()) { + it = PortionType.emplace(portion.GetMeta().Produced, ::ToString(portion.GetMeta().Produced)).first; + } + const arrow::util::string_view prodView = it->second.GetView(); const bool activity = !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot()); + static const TString ConstantEntityIsColumn = "COL"; + static const arrow::util::string_view ConstantEntityIsColumnView = + arrow::util::string_view(ConstantEntityIsColumn.data(), ConstantEntityIsColumn.size()); + static const TString ConstantEntityIsIndex = "IDX"; + static const arrow::util::string_view ConstantEntityIsIndexView = + arrow::util::string_view(ConstantEntityIsIndex.data(), ConstantEntityIsIndex.size()); + auto& entityStorages = EntityStorageNames[portion.GetMeta().GetTierName()]; { std::vector records; for (auto&& r : portion.Records) { @@ -16,26 +27,54 @@ void TStatsIterator::AppendStats(const std::vector blobsIds; + std::optional lastColumnId; + arrow::util::string_view lastColumnName; + arrow::util::string_view lastTierName; for (auto&& r : records) { NArrow::Append(*builders[0], portion.GetPathId()); - NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); NArrow::Append(*builders[3], r->GetMeta().GetNumRows()); NArrow::Append(*builders[4], r->GetMeta().GetRawBytes()); NArrow::Append(*builders[5], portion.GetPortionId()); NArrow::Append(*builders[6], r->GetChunkIdx()); - NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); + if (!lastColumnId || *lastColumnId != r->GetColumnId()) { + { + auto it = ColumnNamesById.find(r->GetColumnId()); + if (it == ColumnNamesById.end()) { + it = + ColumnNamesById.emplace(r->GetColumnId(), portionSchema->GetFieldByColumnIdVerified(r->GetColumnId())->name()).first; + } + lastColumnName = it->second.GetView(); + } + { + auto it = entityStorages.find(r->GetColumnId()); + if (it == entityStorages.end()) { + it = entityStorages.emplace(r->GetColumnId(), + portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName())).first; + } + lastTierName = it->second.GetView(); + } + lastColumnId = r->GetColumnId(); + } + NArrow::Append(*builders[7], lastColumnName); NArrow::Append(*builders[8], r->GetColumnId()); - std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); + { + auto itBlobIdString = blobsIds.find(r->GetBlobRange().GetBlobIdxVerified()); + if (itBlobIdString == blobsIds.end()) { + itBlobIdString = blobsIds.emplace( + r->GetBlobRange().GetBlobIdxVerified(), portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy()).first; + } + NArrow::Append( + *builders[9], arrow::util::string_view(itBlobIdString->second.data(), itBlobIdString->second.size())); + } NArrow::Append(*builders[10], r->BlobRange.Offset); NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], activity); + NArrow::Append(*builders[12], activity); - const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName()); - std::string strTierName(tierName.data(), tierName.size()); - NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "COL"); + NArrow::Append(*builders[13], arrow::util::string_view(lastTierName.data(), lastTierName.size())); + NArrow::Append(*builders[14], ConstantEntityIsColumnView); } } { @@ -48,7 +87,7 @@ void TStatsIterator::AppendStats(const std::vector(*builders[0], portion.GetPathId()); - NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[1], prodView); NArrow::Append(*builders[2], ReadMetadata->TabletId); NArrow::Append(*builders[3], r->GetRecordsCount()); NArrow::Append(*builders[4], r->GetRawBytes()); @@ -56,15 +95,21 @@ void TStatsIterator::AppendStats(const std::vector(*builders[6], r->GetChunkIdx()); NArrow::Append(*builders[7], ReadMetadata->GetEntityName(r->GetIndexId()).value_or("undefined")); NArrow::Append(*builders[8], r->GetIndexId()); - std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); - NArrow::Append(*builders[10], r->GetBlobRange().Offset); - NArrow::Append(*builders[11], r->GetBlobRange().Size); - NArrow::Append(*builders[12], activity); + if (auto bRange = r->GetBlobRangeOptional()) { + std::string blobIdString = portion.GetBlobId(bRange->GetBlobIdxVerified()).ToStringLegacy(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], bRange->Offset); + NArrow::Append(*builders[11], bRange->Size); + } else if (auto bData = r->GetBlobDataOptional()) { + NArrow::Append(*builders[9], "INPLACE"); + NArrow::Append(*builders[10], 0); + NArrow::Append(*builders[11], bData->size()); + } + NArrow::Append(*builders[12], activity); const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName()); std::string strTierName(tierName.data(), tierName.size()); NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "IDX"); + NArrow::Append(*builders[14], ConstantEntityIsIndexView); } } } diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h index c881d4f161bf..6fb758f46911 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h @@ -28,6 +28,32 @@ class TReadStatsMetadata: public NAbstract::TReadStatsMetadata, std::enable_shar class TStatsIterator: public NAbstract::TStatsIterator { private: + class TViewContainer { + private: + TString Data; + std::string STLData; + arrow::util::string_view View; + + public: + const arrow::util::string_view& GetView() const { + return View; + } + + TViewContainer(const TString& data) + : Data(data) + , View(arrow::util::string_view(Data.data(), Data.size())) { + } + + TViewContainer(const std::string& data) + : STLData(data) + , View(arrow::util::string_view(STLData.data(), STLData.size())) { + } + }; + + mutable THashMap ColumnNamesById; + mutable THashMap PortionType; + mutable THashMap> EntityStorageNames; + using TBase = NAbstract::TStatsIterator; virtual bool AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const override; virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const override; diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp index 76f1bdda2c7c..83b2306a1bff 100644 --- a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -16,12 +16,23 @@ void TStatsIterator::AppendStats(const std::vector(*builders[6], portion.GetColumnBlobBytes()); NArrow::Append(*builders[7], portion.GetIndexBlobBytes()); NArrow::Append(*builders[8], portion.GetPortionId()); - NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); + NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); auto tierName = portion.GetTierNameDef(NBlobOperations::TGlobal::DefaultStorageId); NArrow::Append(*builders[10], arrow::util::string_view(tierName.data(), tierName.size())); - auto statInfo = portion.GetMeta().GetStatisticsStorage().SerializeToProto().DebugString(); + NJson::TJsonValue statReport = NJson::JSON_ARRAY; + for (auto&& i : portion.GetIndexes()) { + if (!i.HasBlobData()) { + continue; + } + auto schema = portion.GetSchema(ReadMetadata->GetIndexVersions()); + auto indexMeta = schema->GetIndexInfo().GetIndexVerified(i.GetEntityId()); + statReport.AppendValue(indexMeta->SerializeDataToJson(i, schema->GetIndexInfo())); + } + auto statInfo = statReport.GetStringRobust(); NArrow::Append(*builders[11], arrow::util::string_view(statInfo.data(), statInfo.size())); + + NArrow::Append(*builders[12], portion.HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)); } ui32 TStatsIterator::PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const { diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp index 5decb79c2203..55d28a5a61f4 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.cpp @@ -1,94 +1,91 @@ #include "tx_internal_scan.h" -#include -#include -#include + #include #include +#include +#include #include +#include +#include namespace NKikimr::NOlap::NReader { -bool TTxInternalScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { - TMemoryProfileGuard mpg("TTxInternalScan::Execute"); +void TTxInternalScan::SendError(const TString& problem, const TString& details, const TActorContext& ctx) const { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("problem", problem)("details", details); auto& request = *InternalScanEvent->Get(); - const TSnapshot snapshot = request.ReadToSnapshot.value_or(NOlap::TSnapshot(Self->LastPlannedStep, Self->LastPlannedTxId)); + auto scanComputeActor = InternalScanEvent->Sender; - TReadDescription read(snapshot, request.GetReverse()); - read.PathId = request.GetPathId(); - read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - std::unique_ptr scannerConstructor(new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); - read.ColumnIds = request.GetColumnIds(); - read.ColumnNames = request.GetColumnNames(); - if (request.RangesFilter) { - read.PKRangesFilter = std::move(*request.RangesFilter); - } + auto ev = MakeHolder(ScanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, + TStringBuilder() << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << problem << "/" + << details); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; - AFL_VERIFY(vIndex); - { - TProgramContainer pContainer; - pContainer.OverrideProcessingColumns(read.ColumnNames); - read.SetProgram(std::move(pContainer)); - } + ctx.Send(scanComputeActor, ev.Release()); +} - { - auto newRange = scannerConstructor->BuildReadMetadata(Self, read); - if (!newRange) { - ErrorDescription = newRange.GetErrorMessage(); - ReadMetadataRange = nullptr; - return true; - } - ReadMetadataRange = newRange.DetachResult(); - } - AFL_VERIFY(ReadMetadataRange); +bool TTxInternalScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { return true; } void TTxInternalScan::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxInternalScan::Complete"); + auto& request = *InternalScanEvent->Get(); auto scanComputeActor = InternalScanEvent->Sender; - const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build()("tablet", Self->TabletID()); - - if (!ReadMetadataRange) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata")("error", ErrorDescription); + const TSnapshot snapshot = request.ReadToSnapshot.value_or(NOlap::TSnapshot(Self->LastPlannedStep, Self->LastPlannedTxId)); + const NActors::TLogContextGuard gLogging = + NActors::TLogContextBuilder::Build()("tablet", Self->TabletID())("snapshot", snapshot.DebugString()); + TReadMetadataPtr readMetadataRange; + { + TReadDescription read(snapshot, request.GetReverse()); + read.PathId = request.GetPathId(); + read.LockId = LockId; + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + std::unique_ptr scannerConstructor( + new NPlain::TIndexScannerConstructor(snapshot, request.GetItemsLimit(), request.GetReverse())); + read.ColumnIds = request.GetColumnIds(); + read.ColumnNames = request.GetColumnNames(); + if (request.RangesFilter) { + read.PKRangesFilter = request.RangesFilter; + } - auto ev = MakeHolder(ScanGen, Self->TabletID()); - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + AFL_VERIFY(vIndex); + { + TProgramContainer pContainer; + pContainer.OverrideProcessingColumns(read.ColumnNames); + read.SetProgram(std::move(pContainer)); + } - ctx.Send(scanComputeActor, ev.Release()); - return; + { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + return SendError("cannot create read metadata", newRange.GetErrorMessage(), ctx); + } + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } } + TStringBuilder detailedInfo; if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << *ReadMetadataRange << ")"; + detailedInfo << " read metadata: (" << *readMetadataRange << ")"; } const TVersionedIndex* index = nullptr; if (Self->HasIndex()) { index = &Self->GetIndexAs().GetVersionedIndex(); } - const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); - if (!requestCookie) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); - auto ev = MakeHolder(ScanGen, Self->TabletID()); + readMetadataRange->OnBeforeStartReading(*Self); - ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() - << "Table " << request.GetPathId() << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); - ctx.Send(scanComputeActor, ev.Release()); - return; - } - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - TComputeShardingPolicy(), ScanId, TxId, ScanGen, *requestCookie, Self->TabletID(), TDuration::Max(), ReadMetadataRange, - NKikimrDataEvents::FORMAT_ARROW, Self->ScanCounters)); + const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(readMetadataRange, index); + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), TComputeShardingPolicy(), + ScanId, LockId.value_or(0), ScanGen, requestCookie, Self->TabletID(), TDuration::Max(), readMetadataRange, + NKikimrDataEvents::FORMAT_ARROW, + Self->Counters.GetScanCounters())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxInternalScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h index 3e20c23a7c10..413d33714867 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_internal_scan.h @@ -7,25 +7,26 @@ namespace NKikimr::NOlap::NReader { class TTxInternalScan: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + TEvColumnShard::TEvInternalScan::TPtr InternalScanEvent; const ui32 ScanGen = 1; - const ui32 TxId = 1; const ui32 ScanId = 1; + const std::optional LockId; + void SendError(const TString& problem, const TString& details, const TActorContext& ctx) const; + public: using TReadMetadataPtr = TReadMetadataBase::TConstPtr; TTxInternalScan(NColumnShard::TColumnShard* self, TEvColumnShard::TEvInternalScan::TPtr& ev) : TBase(self) - , InternalScanEvent(ev) { + , InternalScanEvent(ev) + , LockId(InternalScanEvent->Get()->GetLockId()) + { } bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; void Complete(const TActorContext& ctx) override; TTxType GetTxType() const override { return NColumnShard::TXTYPE_START_INTERNAL_SCAN; } -private: - TString ErrorDescription; - TEvColumnShard::TEvInternalScan::TPtr InternalScanEvent; - TReadMetadataPtr ReadMetadataRange; }; } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp index ab0d41db1931..74f09deb0197 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp @@ -1,181 +1,32 @@ #include "tx_scan.h" -#include -#include -#include + #include #include +#include +#include #include +#include +#include namespace NKikimr::NOlap::NReader { -std::vector ExtractTypes(const std::vector>& columns) { - std::vector types; - types.reserve(columns.size()); - for (auto& [name, type] : columns) { - types.push_back(type); - } - return types; -} - -TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { - Y_ABORT_UNLESS(cells.size() == columns.size()); - if (cells.empty()) { - return {}; - } - - std::vector types = ExtractTypes(columns); - - NArrow::TArrowBatchBuilder batchBuilder; - batchBuilder.Reserve(1); - auto startStatus = batchBuilder.Start(columns); - Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); - - batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); - - auto batch = batchBuilder.FlushBatch(false); - Y_ABORT_UNLESS(batch); - Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); - Y_ABORT_UNLESS(batch->num_rows() == 1); - return NArrow::SerializeBatchNoCompression(batch); -} - -std::pair RangePredicates(const TSerializedTableRange& range, const std::vector>& columns) { - std::vector leftCells; - std::vector> leftColumns; - bool leftTrailingNull = false; - { - TConstArrayRef cells = range.From.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - leftCells.reserve(size); - leftColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - leftCells.push_back(cells[i]); - leftColumns.push_back(columns[i]); - leftTrailingNull = false; - } else { - leftTrailingNull = true; - } - } - } - - std::vector rightCells; - std::vector> rightColumns; - bool rightTrailingNull = false; - { - TConstArrayRef cells = range.To.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - rightCells.reserve(size); - rightColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - rightCells.push_back(cells[i]); - rightColumns.push_back(columns[i]); - rightTrailingNull = false; - } else { - rightTrailingNull = true; - } - } - } +void TTxScan::SendError(const TString& problem, const TString& details, const TActorContext& ctx) const { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("problem", problem)("details", details); + const auto& request = Ev->Get()->Record; + const TString table = request.GetTablePath(); + const ui32 scanGen = request.GetGeneration(); + const auto scanComputeActor = Ev->Sender; - const bool fromInclusive = range.FromInclusive || leftTrailingNull; - const bool toInclusive = range.ToInclusive && !rightTrailingNull; - - TString leftBorder = FromCells(leftCells, leftColumns); - TString rightBorder = FromCells(rightCells, rightColumns); - auto leftSchema = NArrow::MakeArrowSchema(leftColumns); - Y_ASSERT(leftSchema.ok()); - auto rightSchema = NArrow::MakeArrowSchema(rightColumns); - Y_ASSERT(rightSchema.ok()); - return std::make_pair( - TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), - TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); -} + auto ev = MakeHolder(scanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, + TStringBuilder() << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << problem << "/" << details); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); -static bool FillPredicatesFromRange(TReadDescription& read, const ::NKikimrTx::TKeyRange& keyRange, - const std::vector>& ydbPk, ui64 tabletId, const TIndexInfo* indexInfo, TString& error) { - TSerializedTableRange range(keyRange); - auto fromPredicate = std::make_shared(); - auto toPredicate = std::make_shared(); - std::tie(*fromPredicate, *toPredicate) = RangePredicates(range, ydbPk); - - LOG_S_DEBUG("TTxScan range predicate. From key size: " << range.From.GetCells().size() - << " To key size: " << range.To.GetCells().size() - << " greater predicate over columns: " << fromPredicate->ToString() - << " less predicate over columns: " << toPredicate->ToString() - << " at tablet " << tabletId); - - if (!read.PKRangesFilter.Add(fromPredicate, toPredicate, indexInfo)) { - error = "Error building filter"; - return false; - } - return true; + ctx.Send(scanComputeActor, ev.Release()); } bool TTxScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { - TMemoryProfileGuard mpg("TTxScan::Execute"); - auto& record = Ev->Get()->Record; - TSnapshot snapshot(record.GetSnapshot().GetStep(), record.GetSnapshot().GetTxId()); - const auto scanId = record.GetScanId(); - const ui64 txId = record.GetTxId(); - - LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - - TReadDescription read(snapshot, record.GetReverse()); - read.PathId = record.GetLocalPathId(); - read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); - read.TableName = record.GetTablePath(); - bool isIndex = false; - std::unique_ptr scannerConstructor = [&]() { - const ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; - auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); - isIndex = !sysViewPolicy; - if (!sysViewPolicy) { - return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, record.GetReverse())); - } else { - return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, record.GetReverse()); - } - }(); - read.ColumnIds.assign(record.GetColumnTags().begin(), record.GetColumnTags().end()); - read.StatsMode = record.GetStatsMode(); - - const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; - auto parseResult = scannerConstructor->ParseProgram(vIndex, record, read); - if (!parseResult) { - ErrorDescription = parseResult.GetErrorMessage(); - return true; - } - - if (!record.RangesSize()) { - auto range = scannerConstructor->BuildReadMetadata(Self, read); - if (range.IsSuccess()) { - ReadMetadataRange = range.DetachResult(); - } else { - ErrorDescription = range.GetErrorMessage(); - } - return true; - } - - auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); - auto* indexInfo = (vIndex && isIndex) ? &vIndex->GetSchema(snapshot)->GetIndexInfo() : nullptr; - for (auto& range : record.GetRanges()) { - if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), indexInfo, ErrorDescription)) { - ReadMetadataRange = nullptr; - return true; - } - } - { - auto newRange = scannerConstructor->BuildReadMetadata(Self, read); - if (!newRange) { - ErrorDescription = newRange.GetErrorMessage(); - ReadMetadataRange = nullptr; - return true; - } - ReadMetadataRange = newRange.DetachResult(); - } - AFL_VERIFY(ReadMetadataRange); return true; } @@ -183,67 +34,101 @@ void TTxScan::Complete(const TActorContext& ctx) { TMemoryProfileGuard mpg("TTxScan::Complete"); auto& request = Ev->Get()->Record; auto scanComputeActor = Ev->Sender; - const auto& snapshot = request.GetSnapshot(); + TSnapshot snapshot = TSnapshot(request.GetSnapshot().GetStep(), request.GetSnapshot().GetTxId()); + if (snapshot.IsZero()) { + snapshot = Self->GetLastTxSnapshot(); + } const auto scanId = request.GetScanId(); const ui64 txId = request.GetTxId(); const ui32 scanGen = request.GetGeneration(); - TString table = request.GetTablePath(); - auto dataFormat = request.GetDataFormat(); + const TString table = request.GetTablePath(); + const auto dataFormat = request.GetDataFormat(); const TDuration timeout = TDuration::MilliSeconds(request.GetTimeoutMs()); if (scanGen > 1) { - Self->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); + Self->Counters.GetTabletCounters()->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); } - const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() - ("tx_id", txId)("scan_id", scanId)("gen", scanGen)("table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); + const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() ("tx_id", txId)("scan_id", scanId)("gen", scanGen)( + "table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); - if (!ReadMetadataRange) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata")("error", ErrorDescription); + TReadMetadataPtr readMetadataRange; + { + LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - auto ev = MakeHolder(scanGen, Self->TabletID()); - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + TReadDescription read(snapshot, request.GetReverse()); + read.TxId = txId; + if (request.HasLockTxId()) { + read.LockId = request.GetLockTxId(); + } + read.PathId = request.GetLocalPathId(); + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + read.TableName = table; + bool isIndex = false; + std::unique_ptr scannerConstructor = [&]() { + const ui64 itemsLimit = request.HasItemsLimit() ? request.GetItemsLimit() : 0; + auto sysViewPolicy = NSysView::NAbstract::ISysViewPolicy::BuildByPath(read.TableName); + isIndex = !sysViewPolicy; + if (!sysViewPolicy) { + return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, request.GetReverse())); + } else { + return sysViewPolicy->CreateConstructor(snapshot, itemsLimit, request.GetReverse()); + } + }(); + read.ColumnIds.assign(request.GetColumnTags().begin(), request.GetColumnTags().end()); + read.StatsMode = request.GetStatsMode(); + + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + auto parseResult = scannerConstructor->ParseProgram(vIndex, request, read); + if (!parseResult) { + return SendError("cannot parse program", parseResult.GetErrorMessage(), ctx); + } - ctx.Send(scanComputeActor, ev.Release()); - return; + if (!request.RangesSize()) { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (newRange.IsSuccess()) { + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } else { + return SendError("cannot build metadata withno ranges", newRange.GetErrorMessage(), ctx); + } + } else { + auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); + { + auto filterConclusion = NOlap::TPKRangesFilter::BuildFromProto(request, request.GetReverse(), ydbKey); + if (filterConclusion.IsFail()) { + return SendError("cannot build ranges filter", filterConclusion.GetErrorMessage(), ctx); + } + read.PKRangesFilter = std::make_shared(filterConclusion.DetachResult()); + } + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + return SendError("cannot build metadata", newRange.GetErrorMessage(), ctx); + } + readMetadataRange = TValidator::CheckNotNull(newRange.DetachResult()); + } } + AFL_VERIFY(readMetadataRange); + readMetadataRange->OnBeforeStartReading(*Self); + TStringBuilder detailedInfo; if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << *ReadMetadataRange << ")" << " req: " << request; + detailedInfo << " read metadata: (" << *readMetadataRange << ")" + << " req: " << request; } const TVersionedIndex* index = nullptr; if (Self->HasIndex()) { index = &Self->GetIndexAs().GetVersionedIndex(); } - const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); - if (!requestCookie) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); - auto ev = MakeHolder(scanGen, Self->TabletID()); - - ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); - ctx.Send(scanComputeActor, ev.Release()); - return; - } - auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); + const ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(readMetadataRange, index); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, statsDelta.Portions); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, statsDelta.Blobs); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, statsDelta.Rows); - Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); + Self->Counters.GetTabletCounters()->OnScanStarted(Self->InFlightReadsTracker.GetSelectStatsDelta()); TComputeShardingPolicy shardingPolicy; AFL_VERIFY(shardingPolicy.DeserializeFromProto(request.GetComputeShardingPolicy())); - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - shardingPolicy, scanId, txId, scanGen, *requestCookie, Self->TabletID(), timeout, ReadMetadataRange, dataFormat, Self->ScanCounters)); + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), shardingPolicy, scanId, + txId, scanGen, requestCookie, Self->TabletID(), timeout, readMetadataRange, dataFormat, Self->Counters.GetScanCounters())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); } -} +} // namespace NKikimr::NOlap::NReader diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h index 2d9eb9619a64..347def3dea14 100644 --- a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h @@ -7,6 +7,8 @@ namespace NKikimr::NOlap::NReader { class TTxScan: public NTabletFlatExecutor::TTransactionBase { private: using TBase = NTabletFlatExecutor::TTransactionBase; + void SendError(const TString& problem, const TString& details, const TActorContext& ctx) const; + public: using TReadMetadataPtr = TReadMetadataBase::TConstPtr; @@ -17,12 +19,12 @@ class TTxScan: public NTabletFlatExecutor::TTransactionBase #include -#include +#include #include #include @@ -14,36 +14,25 @@ std::shared_ptr IIndexInfo::GetColumnLoaderVerifi return result; } -std::shared_ptr IIndexInfo::AddDeleteFlagsColumn(const std::shared_ptr& batch, const bool isDelete) { - Y_ABORT_UNLESS(batch); - i64 numColumns = batch->num_columns(); - i64 numRows = batch->num_rows(); +void IIndexInfo::AddDeleteFlagsColumn(NArrow::TGeneralContainer& batch, const bool isDelete) { + const i64 numRows = batch.num_rows(); - AFL_VERIFY(!batch->GetColumnByName(SPEC_COL_DELETE_FLAG)); - return NArrow::TStatusValidator::GetValid(batch->AddColumn(numColumns, arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()), - NArrow::TThreadSimpleArraysCache::GetConst(arrow::boolean(), std::make_shared(isDelete), numRows))); + batch.AddField(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()), + NArrow::TThreadSimpleArraysCache::GetConst(arrow::boolean(), std::make_shared(isDelete), numRows)).Validate(); } -std::shared_ptr IIndexInfo::AddSnapshotColumns(const std::shared_ptr& batch, const TSnapshot& snapshot) { - Y_ABORT_UNLESS(batch); - i64 numColumns = batch->num_columns(); - i64 numRows = batch->num_rows(); +void IIndexInfo::AddSnapshotColumns(NArrow::TGeneralContainer& batch, const TSnapshot& snapshot) { + const i64 numRows = batch.num_rows(); - auto res = batch->AddColumn(numColumns, arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), - NArrow::MakeUI64Array(snapshot.GetPlanStep(), numRows)); - Y_ABORT_UNLESS(res.ok()); - res = (*res)->AddColumn(numColumns + 1, arrow::field(SPEC_COL_TX_ID, arrow::uint64()), - NArrow::MakeUI64Array(snapshot.GetTxId(), numRows)); - Y_ABORT_UNLESS(res.ok()); - Y_ABORT_UNLESS((*res)->num_columns() == numColumns + 2); - return *res; + batch.AddField(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), NArrow::MakeUI64Array(snapshot.GetPlanStep(), numRows)).Validate(); + batch.AddField(arrow::field(SPEC_COL_TX_ID, arrow::uint64()), NArrow::MakeUI64Array(snapshot.GetTxId(), numRows)).Validate(); } -std::shared_ptr IIndexInfo::NormalizeDeletionColumn(const std::shared_ptr& batch) { - if (batch->schema()->GetFieldIndex(SPEC_COL_DELETE_FLAG) >= 0) { - return batch; +void IIndexInfo::NormalizeDeletionColumn(NArrow::TGeneralContainer& batch) { + if (batch.HasColumn(SPEC_COL_DELETE_FLAG)) { + return; } - return AddDeleteFlagsColumn(batch, false); + AddDeleteFlagsColumn(batch, false); } std::optional IIndexInfo::GetColumnIdOptional(const std::string& name) const { diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h index ab6cd67a3937..b630b5780fbd 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h @@ -1,14 +1,19 @@ #pragma once -#include "loader.h" +#include +#include +#include #include #include namespace NKikimr::NOlap { +using TColumnLoader = NArrow::NAccessor::TColumnLoader; +using TColumnSaver = NArrow::NAccessor::TColumnSaver; + class IIndexInfo { public: - enum class ESpecialColumn: ui32 { + enum class ESpecialColumn : ui32 { PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID_INDEX, DELETE_FLAG = NOlap::NPortion::TSpecialColumns::SPEC_COL_DELETE_FLAG_INDEX @@ -16,7 +21,7 @@ class IIndexInfo { using TSystemColumnsSet = ui64; - enum class ESystemColumnsSet: ui64 { + enum class ESystemColumnsSet : ui64 { Snapshot = 1, Deletion = 1 << 1, }; @@ -29,6 +34,11 @@ class IIndexInfo { return SPEC_COL_DELETE_FLAG; } + static const std::set& GetNecessarySystemColumnIdsSet() { + static const std::set result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID }; + return result; + } + static const std::vector& GetSnapshotColumnNames() { static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID) }; return result; @@ -61,15 +71,10 @@ class IIndexInfo { return std::make_shared(std::move(fields)); } - void AddSpecialFieldIds(std::vector& result) const { - result.emplace_back((ui32)ESpecialColumn::PLAN_STEP); - result.emplace_back((ui32)ESpecialColumn::TX_ID); - result.emplace_back((ui32)ESpecialColumn::DELETE_FLAG); - } - static void AddSpecialFields(std::vector>& fields) { AddSnapshotFields(fields); - fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); + static const std::shared_ptr f = arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()); + fields.push_back(f); } static const std::vector& SnapshotColumnNames() { @@ -78,8 +83,14 @@ class IIndexInfo { } static void AddSnapshotFields(std::vector>& fields) { - fields.push_back(arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64())); - fields.push_back(arrow::field(SPEC_COL_TX_ID, arrow::uint64())); + static const std::shared_ptr ps = arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()); + static const std::shared_ptr txid = arrow::field(SPEC_COL_TX_ID, arrow::uint64()); + fields.push_back(ps); + fields.push_back(txid); + } + + static void AddDeleteFields(std::vector>& fields) { + fields.push_back(arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean())); } static const std::set& GetSnapshotColumnIdsSet() { @@ -88,15 +99,38 @@ class IIndexInfo { } static const std::vector& GetSystemColumnNames() { - static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID), std::string(SPEC_COL_DELETE_FLAG) }; + static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID), + std::string(SPEC_COL_DELETE_FLAG) }; return result; } static const std::vector& GetSystemColumnIds() { - static const std::vector result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID, (ui32)ESpecialColumn::DELETE_FLAG }; + static const std::vector result = { (ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID, + (ui32)ESpecialColumn::DELETE_FLAG }; return result; } + [[nodiscard]] static std::vector AddSpecialFieldIds(const std::vector& baseColumnIds) { + std::vector result = baseColumnIds; + const auto& cIds = GetSystemColumnIds(); + result.insert(result.end(), cIds.begin(), cIds.end()); + return result; + } + + [[nodiscard]] static std::set AddSpecialFieldIds(const std::set& baseColumnIds) { + std::set result = baseColumnIds; + const auto& cIds = GetSystemColumnIds(); + result.insert(cIds.begin(), cIds.end()); + return result; + } + + [[nodiscard]] static std::vector AddSnapshotFieldIds(const std::vector& baseColumnIds) { + std::vector result = baseColumnIds; + for (auto&& i : GetSnapshotColumnIds()) { + result.emplace_back(i); + } + return result; + } std::optional GetColumnIdOptional(const std::string& name) const; TString GetColumnName(ui32 id, bool required) const; @@ -106,27 +140,24 @@ class IIndexInfo { virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; - static std::shared_ptr NormalizeDeletionColumn(const std::shared_ptr& batch); + static void NormalizeDeletionColumn(NArrow::TGeneralContainer& batch); - static std::shared_ptr AddSnapshotColumns(const std::shared_ptr& batch, const TSnapshot& snapshot); - static std::shared_ptr AddDeleteFlagsColumn(const std::shared_ptr& batch, const bool isDelete); + static void AddSnapshotColumns(NArrow::TGeneralContainer& batch, const TSnapshot& snapshot); + static void AddDeleteFlagsColumn(NArrow::TGeneralContainer& batch, const bool isDelete); static ui64 GetSpecialColumnsRecordSize() { return sizeof(ui64) + sizeof(ui64) + sizeof(bool); } static std::shared_ptr ArrowSchemaSnapshot() { - static std::shared_ptr result = std::make_shared(arrow::FieldVector{ - arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), - arrow::field(SPEC_COL_TX_ID, arrow::uint64()) - }); + static std::shared_ptr result = std::make_shared( + arrow::FieldVector{ arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), arrow::field(SPEC_COL_TX_ID, arrow::uint64()) }); return result; } static std::shared_ptr ArrowSchemaDeletion() { - static std::shared_ptr result = std::make_shared(arrow::FieldVector{ - arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()) - }); + static std::shared_ptr result = + std::make_shared(arrow::FieldVector{ arrow::field(SPEC_COL_DELETE_FLAG, arrow::boolean()) }); return result; } @@ -135,15 +166,16 @@ class IIndexInfo { } static bool IsSpecialColumn(const std::string& fieldName) { - return fieldName == SPEC_COL_PLAN_STEP - || fieldName == SPEC_COL_TX_ID - || fieldName == SPEC_COL_DELETE_FLAG; + return fieldName == SPEC_COL_PLAN_STEP || fieldName == SPEC_COL_TX_ID || fieldName == SPEC_COL_DELETE_FLAG; } static bool IsSpecialColumn(const ui32 fieldId) { - return fieldId == (ui32)ESpecialColumn::PLAN_STEP - || fieldId == (ui32)ESpecialColumn::TX_ID - || fieldId == (ui32)ESpecialColumn::DELETE_FLAG; + return fieldId == (ui32)ESpecialColumn::PLAN_STEP || fieldId == (ui32)ESpecialColumn::TX_ID || + fieldId == (ui32)ESpecialColumn::DELETE_FLAG; + } + + static bool IsNullableVerified(const ui32 /*fieldId*/) { + return false; } static ui32 GetSpecialColumnByteWidth(const ui32 field) { @@ -164,4 +196,4 @@ class IIndexInfo { virtual ~IIndexInfo() = default; }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp deleted file mode 100644 index d74dc491519d..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "loader.h" -#include - -namespace NKikimr::NOlap { - -TString TColumnLoader::DebugString() const { - TStringBuilder result; - if (ExpectedSchema) { - result << "schema:" << ExpectedSchema->ToString() << ";"; - } - if (Transformer) { - result << "transformer:" << Transformer->DebugString() << ";"; - } - if (Serializer) { - result << "serializer:" << Serializer->DebugString() << ";"; - } - return result; -} - -TColumnLoader::TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const std::shared_ptr& defaultValue, const ui32 columnId) - : Transformer(transformer) - , Serializer(serializer) - , ExpectedSchema(expectedSchema) - , DefaultValue(defaultValue) - , ColumnId(columnId) { - Y_ABORT_UNLESS(ExpectedSchema); - auto fieldsCountStr = ::ToString(ExpectedSchema->num_fields()); - Y_ABORT_UNLESS(ExpectedSchema->num_fields() == 1, "%s", fieldsCountStr.data()); - Y_ABORT_UNLESS(Serializer); -} - -const std::shared_ptr& TColumnLoader::GetField() const { - return ExpectedSchema->field(0); -} - -arrow::Result> TColumnLoader::Apply(const TString& data) const { - Y_ABORT_UNLESS(Serializer); - arrow::Result> columnArray = - Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, ExpectedSchema); - if (!columnArray.ok()) { - return columnArray; - } - if (Transformer) { - return Transformer->Transform(*columnArray); - } else { - return columnArray; - } -} - -std::shared_ptr TColumnLoader::ApplyVerified(const TString& data) const { - return NArrow::TStatusValidator::GetValid(Apply(data)); -} - -std::shared_ptr TColumnLoader::ApplyVerifiedColumn(const TString& data) const { - auto rb = ApplyVerified(data); - AFL_VERIFY(rb->num_columns() == 1)("schema", rb->schema()->ToString()); - return rb->column(0); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h deleted file mode 100644 index d128caaecb58..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h +++ /dev/null @@ -1,47 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -class TColumnLoader { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; - YDB_READONLY_DEF(std::shared_ptr, ExpectedSchema); - YDB_READONLY_DEF(std::shared_ptr, DefaultValue); - const ui32 ColumnId; -public: - bool IsEqualTo(const TColumnLoader& item) const { - if (!!Transformer != !!item.Transformer) { - return false; - } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { - return false; - } - if (!Serializer.IsEqualTo(item.Serializer)) { - return false; - } - return true; - } - - TString DebugString() const; - - TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const std::shared_ptr& defaultValue, const ui32 columnId); - - ui32 GetColumnId() const { - return ColumnId; - } - - const std::shared_ptr& GetField() const; - - arrow::Result> Apply(const TString& data) const; - - std::shared_ptr ApplyVerified(const TString& data) const; - - std::shared_ptr ApplyVerifiedColumn(const TString& data) const; -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp deleted file mode 100644 index c15db92b8eec..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "saver.h" - -namespace NKikimr::NOlap { - -TColumnSaver::TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) - : Transformer(transformer) - , Serializer(serializer) -{ - Y_ABORT_UNLESS(Serializer); -} - -bool TColumnSaver::IsHardPacker() const { - return Serializer->IsHardPacker(); -} - -TString TColumnSaver::Apply(std::shared_ptr data, std::shared_ptr field) const { - auto schema = std::make_shared(arrow::FieldVector{field}); - auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); - return Apply(batch); -} - -TString TColumnSaver::Apply(const std::shared_ptr& data) const { - Y_ABORT_UNLESS(Serializer); - if (Transformer) { - return Serializer->SerializeFull(Transformer->Transform(data)); - } else { - return Serializer->SerializePayload(data); - } -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h deleted file mode 100644 index c4d10c55a359..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h +++ /dev/null @@ -1,34 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -class TColumnSaver { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; -public: - TColumnSaver() = default; - TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer); - - void ResetSerializer(const NArrow::NSerialization::TSerializerContainer& serializer) { - AFL_VERIFY(serializer); - if (Serializer.IsCompatibleForExchange(serializer)) { - Serializer = serializer; - } else { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_reset_serializer")("reason", "incompatible_serializers"); - } - } - - bool IsHardPacker() const; - - TString Apply(std::shared_ptr data, std::shared_ptr field) const; - - TString Apply(const std::shared_ptr& data) const; -}; - - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make index b830415daae1..79b12f94389e 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make @@ -1,15 +1,12 @@ LIBRARY() SRCS( - saver.cpp index_info.cpp - loader.cpp ) PEERDIR( ydb/library/actors/core - ydb/core/formats/arrow/transformer - ydb/core/formats/arrow/serializer + ydb/core/formats/arrow/save_load ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp index b9473398fdf3..b4fae5cf09ab 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp @@ -19,8 +19,7 @@ NArrow::NTransformation::ITransformer::TPtr TSimpleColumnInfo::GetLoadTransforme return transformer; } -TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) -{ +TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) { AFL_VERIFY(columnInfo.GetId() == ColumnId); if (columnInfo.HasSerializer()) { AFL_VERIFY(Serializer.DeserializeFromProto(columnInfo.GetSerializer())); @@ -30,29 +29,34 @@ TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp: if (columnInfo.HasDefaultValue()) { DefaultValue.DeserializeFromProto(columnInfo.GetDefaultValue()).Validate(); } + if (columnInfo.HasDataAccessorConstructor()) { + AFL_VERIFY(DataAccessorConstructor.DeserializeFromProto(columnInfo.GetDataAccessorConstructor())); + } + IsNullable = columnInfo.HasNotNull() ? !columnInfo.GetNotNull() : true; AFL_VERIFY(Serializer); if (columnInfo.HasDictionaryEncoding()) { auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnInfo.GetDictionaryEncoding()); Y_ABORT_UNLESS(settings.IsSuccess()); DictionaryEncoding = *settings; } - Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, DefaultValue.GetValue(), ColumnId); + Loader = std::make_shared(GetLoadTransformer(), Serializer, DataAccessorConstructor, ArrowField, DefaultValue.GetValue(), ColumnId); return TConclusionStatus::Success(); } -TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, - const bool needMinMax, const bool isSorted, +TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, + const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue) : ColumnId(columnId) , ArrowField(arrowField) - , ArrowSchema(std::make_shared(arrow::FieldVector({arrowField}))) , Serializer(serializer) , NeedMinMax(needMinMax) , IsSorted(isSorted) + , IsNullable(isNullable) , DefaultValue(defaultValue) { ColumnName = ArrowField->name(); - Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, DefaultValue.GetValue(), ColumnId); + Loader = std::make_shared( + GetLoadTransformer(), Serializer, DataAccessorConstructor, ArrowField, DefaultValue.GetValue(), ColumnId); } std::vector> TSimpleColumnInfo::ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const { @@ -86,7 +90,7 @@ std::vector> TSimpleColumnInf } std::vector> result; for (auto&& s : source) { - auto data = NArrow::TStatusValidator::GetValid(sourceColumnFeatures.Loader->Apply(s->GetData())); + auto data = sourceColumnFeatures.Loader->ApplyRawVerified(s->GetData()); result.emplace_back(s->CopyWithAnotherBlob(GetColumnSaver().Apply(data), *this)); } return result; diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.h b/ydb/core/tx/columnshard/engines/scheme/column/info.h index 6db21f1fe876..ef47445bf665 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column/info.h +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.h @@ -1,17 +1,18 @@ #pragma once -#include -#include -#include - +#include +#include #include +#include +#include #include -#include -#include +#include +#include +#include #include -#include #include +#include namespace NKikimr::NOlap { @@ -22,19 +23,19 @@ class TSimpleColumnInfo { YDB_READONLY(ui32, ColumnId, 0); YDB_READONLY_DEF(TString, ColumnName); YDB_READONLY_DEF(std::shared_ptr, ArrowField); - YDB_READONLY_DEF(std::shared_ptr, ArrowSchema); YDB_READONLY(NArrow::NSerialization::TSerializerContainer, Serializer, NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()); + YDB_READONLY(NArrow::NAccessor::TConstructorContainer, DataAccessorConstructor, NArrow::NAccessor::TConstructorContainer::GetDefaultConstructor()); YDB_READONLY(bool, NeedMinMax, false); YDB_READONLY(bool, IsSorted, false); + YDB_READONLY(bool, IsNullable, false); YDB_READONLY_DEF(TColumnDefaultScalarValue, DefaultValue); std::optional DictionaryEncoding; std::shared_ptr Loader; NArrow::NTransformation::ITransformer::TPtr GetLoadTransformer() const; public: - - TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, - const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, + TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, + const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue); TColumnSaver GetColumnSaver() const { @@ -43,7 +44,8 @@ class TSimpleColumnInfo { return TColumnSaver(transformer, Serializer); } - std::vector> ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; + std::vector> ActualizeColumnData( + const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; TString DebugString() const { TStringBuilder sb; @@ -62,4 +64,4 @@ class TSimpleColumnInfo { } }; -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/column_features.h b/ydb/core/tx/columnshard/engines/scheme/column_features.h index 671b35e57734..c31c2a970a00 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column_features.h +++ b/ydb/core/tx/columnshard/engines/scheme/column_features.h @@ -1,15 +1,14 @@ #pragma once -#include "abstract/loader.h" -#include "abstract/saver.h" #include "column/info.h" #include #include -#include +#include #include #include #include -#include +#include +#include #include #include @@ -34,9 +33,9 @@ class TColumnFeatures: public TSimpleColumnInfo { YDB_READONLY_DEF(std::shared_ptr, Operator); public: TColumnFeatures(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted, + const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted, const bool isNullable, const std::shared_ptr& defaultValue) - : TBase(columnId, arrowField, serializer, needMinMax, isSorted, defaultValue) + : TBase(columnId, arrowField, serializer, needMinMax, isSorted, isNullable, defaultValue) , Operator(bOperator) { AFL_VERIFY(Operator); diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index b49c6a3e7b53..25c2f4e1d22d 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -1,36 +1,24 @@ #include "index_info.h" -#include "statistics/abstract/operator.h" - -#include -#include #include #include #include -#include +#include #include #include +#include +#include +#include namespace NKikimr::NOlap { -static std::vector NamesOnly(const std::vector& columns) { - std::vector out; - out.reserve(columns.size()); - for (const auto& [name, _] : columns) { - out.push_back(name); - } - return out; -} - TIndexInfo::TIndexInfo(const TString& name) - : NTable::TScheme::TTableSchema() - , Name(name) -{ + : Name(name) { CompactionPlannerConstructor = NStorageOptimizer::IOptimizerPlannerConstructor::BuildDefault(); } bool TIndexInfo::CheckCompatible(const TIndexInfo& other) const { - if (!other.GetPrimaryKey()->Equals(GetPrimaryKey())) { + if (!other.GetPrimaryKey()->Equals(PrimaryKey)) { return false; } return true; @@ -43,42 +31,39 @@ ui32 TIndexInfo::GetColumnIdVerified(const std::string& name) const { } std::optional TIndexInfo::GetColumnIdOptional(const std::string& name) const { - const auto ni = ColumnNames.find(name); - - if (ni != ColumnNames.end()) { - return ni->second; + const auto pred = [](const TNameInfo& item, const std::string& value) { + return item.GetName() < value; + }; + auto it = std::lower_bound(ColumnNames.begin(), ColumnNames.end(), name, pred); + if (it != ColumnNames.end() && it->GetName() == name) { + return it->GetColumnId(); } return IIndexInfo::GetColumnIdOptional(name); } -TString TIndexInfo::GetColumnName(ui32 id, bool required) const { - const auto ci = Columns.find(id); - - if (ci != Columns.end()) { - return ci->second.Name; +TString TIndexInfo::GetColumnName(const ui32 id, bool required) const { + const auto& f = GetColumnFeaturesOptional(id); + if (!f) { + AFL_VERIFY(!required); + return ""; + } else { + return f->GetColumnName(); } - - return IIndexInfo::GetColumnName(id, required); } -std::vector TIndexInfo::GetColumnIds(const bool withSpecial) const { - std::vector result; - for (auto&& i : Columns) { - result.emplace_back(i.first); - } +const std::vector& TIndexInfo::GetColumnIds(const bool withSpecial) const { if (withSpecial) { - IIndexInfo::AddSpecialFieldIds(result); + return SchemaColumnIdsWithSpecials; + } else { + return SchemaColumnIds; } - return result; } std::vector TIndexInfo::GetColumnNames(const std::vector& ids) const { std::vector out; out.reserve(ids.size()); for (ui32 id : ids) { - const auto ci = Columns.find(id); - Y_ABORT_UNLESS(ci != Columns.end()); - out.push_back(ci->second.Name); + out.push_back(GetColumnName(id)); } return out; } @@ -87,46 +72,21 @@ std::vector TIndexInfo::GetColumnSTLNames(const std::vector& std::vector out; out.reserve(ids.size()); for (ui32 id : ids) { - const auto ci = Columns.find(id); - Y_ABORT_UNLESS(ci != Columns.end()); - out.push_back(ci->second.Name); + out.push_back(GetColumnName(id)); } return out; } -std::vector TIndexInfo::GetColumns(const std::vector& ids) const { - return NOlap::GetColumns(*this, ids); -} - -std::shared_ptr TIndexInfo::ArrowSchema() const { +const std::shared_ptr& TIndexInfo::ArrowSchema() const { AFL_VERIFY(Schema); return Schema; } -std::shared_ptr TIndexInfo::ArrowSchemaWithSpecials() const { +const std::shared_ptr& TIndexInfo::ArrowSchemaWithSpecials() const { AFL_VERIFY(SchemaWithSpecials); return SchemaWithSpecials; } -std::shared_ptr TIndexInfo::AddColumns( - const std::shared_ptr& src, - const std::vector& columns) const { - std::shared_ptr all = ArrowSchemaWithSpecials(); - auto fields = src->fields(); - - for (const auto& col : columns) { - const std::string name(col.data(), col.size()); - if (!src->GetFieldByName(name)) { - auto field = all->GetFieldByName(name); - if (!field) { - return {}; - } - fields.push_back(field); - } - } - return std::make_shared(std::move(fields)); -} - std::vector TIndexInfo::GetColumnIds(const std::vector& columnNames) const { std::vector ids; ids.reserve(columnNames.size()); @@ -140,33 +100,20 @@ std::vector TIndexInfo::GetColumnIds(const std::vector& columnNam return ids; } -std::shared_ptr TIndexInfo::ArrowColumnFieldVerified(const ui32 columnId) const { - auto result = ArrowColumnFieldOptional(columnId); - AFL_VERIFY(result); - return result; -} - -std::shared_ptr TIndexInfo::ArrowColumnFieldOptional(const ui32 columnId) const { - auto it = ArrowColumnByColumnIdCache.find(columnId); - if (it == ArrowColumnByColumnIdCache.end()) { - return nullptr; - } else { - return it->second; - } -} - -void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) { +void TIndexInfo::SetAllKeys(const std::shared_ptr& operators, const THashMap& columns) { /// @note Setting replace and sorting key to PK we are able to: /// * apply REPLACE by MergeSort /// * apply PK predicate before REPLACE - { - const auto& primaryKeyNames = NamesOnly(GetPrimaryKeyColumns()); - auto columnIds = GetColumnIds(primaryKeyNames); - AFL_VERIFY(columnIds.size()); - PrimaryKey = MakeArrowSchema(Columns, columnIds); + PrimaryKey = MakeArrowSchema(columns, PKColumnIds, nullptr); + + AFL_VERIFY(PKColumns.empty()); + for (auto&& i : PKColumnIds) { + auto it = columns.find(i); + AFL_VERIFY(it != columns.end()); + PKColumns.emplace_back(TNameTypeInfo(it->second.Name, it->second.PType)); } - for (const auto& [colId, column] : Columns) { + for (const auto& [colId, column] : columns) { if (NArrow::IsPrimitiveYqlType(column.PType)) { MinMaxIdxColumnsIds.insert(colId); } @@ -174,38 +121,39 @@ void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); if (!Schema) { AFL_VERIFY(!SchemaWithSpecials); - InitializeCaches(operators); + InitializeCaches(operators, columns, nullptr); } } TColumnSaver TIndexInfo::GetColumnSaver(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second.GetColumnSaver(); + return GetColumnFeaturesVerified(columnId).GetColumnSaver(); } std::shared_ptr TIndexInfo::GetColumnLoaderOptional(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - if (it == ColumnFeatures.end()) { + const auto& cFeatures = GetColumnFeaturesOptional(columnId); + if (!cFeatures) { return nullptr; } else { - return it->second.GetLoader(); + return cFeatures->GetLoader(); } } -std::shared_ptr TIndexInfo::GetColumnFieldOptional(const ui32 columnId) const { - std::shared_ptr schema; - if (IsSpecialColumn(columnId)) { - return IIndexInfo::GetColumnFieldOptional(columnId); +std::optional TIndexInfo::GetColumnIndexOptional(const ui32 id) const { + auto it = std::lower_bound(SchemaColumnIdsWithSpecials.begin(), SchemaColumnIdsWithSpecials.end(), id); + if (it == SchemaColumnIdsWithSpecials.end() || *it != id) { + return std::nullopt; } else { - schema = ArrowSchema(); + return it - SchemaColumnIdsWithSpecials.begin(); } - if (const TString columnName = GetColumnName(columnId, false)) { - return schema->GetFieldByName(columnName); - } else { +} + +std::shared_ptr TIndexInfo::GetColumnFieldOptional(const ui32 columnId) const { + const std::optional index = GetColumnIndexOptional(columnId); + if (!index) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("column_id", columnId)("event", "incorrect_column_id"); return nullptr; } + return ArrowSchemaWithSpecials()->GetFieldByIndexVerified(*index); } std::shared_ptr TIndexInfo::GetColumnFieldVerified(const ui32 columnId) const { @@ -224,20 +172,24 @@ std::shared_ptr TIndexInfo::GetColumnsSchema(const std::set } std::shared_ptr TIndexInfo::GetColumnSchema(const ui32 columnId) const { - return GetColumnsSchema({columnId}); + return GetColumnsSchema({ columnId }); } -bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { +bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, + const std::shared_ptr& cache) { if (schema.GetEngine() != NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "incorrect_engine_in_schema"); return false; } + AFL_VERIFY(cache); { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Optimizer"); SchemeNeedActualization = schema.GetOptions().GetSchemeNeedActualization(); ExternalGuaranteeExclusivePK = schema.GetOptions().GetExternalGuaranteeExclusivePK(); if (schema.GetOptions().HasCompactionPlannerConstructor()) { - auto container = NStorageOptimizer::TOptimizerPlannerConstructorContainer::BuildFromProto(schema.GetOptions().GetCompactionPlannerConstructor()); + auto container = + NStorageOptimizer::TOptimizerPlannerConstructorContainer::BuildFromProto(schema.GetOptions().GetCompactionPlannerConstructor()); CompactionPlannerConstructor = container.DetachResult().GetObjectPtrVerified(); } else { AFL_VERIFY(!!CompactionPlannerConstructor); @@ -245,6 +197,7 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } if (schema.HasDefaultCompression()) { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Serializer"); NArrow::NSerialization::TSerializerContainer container; if (!container.DeserializeFromProto(schema.GetDefaultCompression())) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "cannot_parse_default_serializer"); @@ -252,49 +205,68 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& } DefaultSerializer = container; } - { - for (const auto& stat : schema.GetStatistics()) { - NStatistics::TOperatorContainer container; - AFL_VERIFY(container.DeserializeFromProto(stat)); - AFL_VERIFY(StatisticsByName.emplace(container.GetName(), std::move(container)).second); - } - NStatistics::TPortionStorageCursor cursor; - for (auto&& [_, container] : StatisticsByName) { - container.SetCursor(cursor); - container->ShiftCursor(cursor); + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Indexes"); + for (const auto& idx : schema.GetIndexes()) { + NIndexes::TIndexMetaContainer meta; + AFL_VERIFY(meta.DeserializeFromProto(idx)); + Indexes.emplace(meta->GetIndexId(), meta); } } - - for (const auto& idx : schema.GetIndexes()) { - NIndexes::TIndexMetaContainer meta; - AFL_VERIFY(meta.DeserializeFromProto(idx)); - Indexes.emplace(meta->GetIndexId(), meta); - } - for (const auto& col : schema.GetColumns()) { - const ui32 id = col.GetId(); - const TString& name = col.GetName(); - const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; - auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); - Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull); - ColumnNames[name] = id; + THashMap columns; + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns"); + ColumnNames.clear(); + for (const auto& col : schema.GetColumns()) { + const ui32 id = col.GetId(); + const TString& name = cache->GetStringCache(col.GetName()); + const bool notNull = col.HasNotNull() ? col.GetNotNull() : false; + auto typeInfoMod = NScheme::TypeInfoModFromProtoColumnType(col.GetTypeId(), col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); + columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, cache->GetStringCache(typeInfoMod.TypeMod), notNull); + ColumnNames.emplace_back(name, id); + } + std::sort(ColumnNames.begin(), ColumnNames.end()); } for (const auto& keyName : schema.GetKeyColumnNames()) { - Y_ABORT_UNLESS(ColumnNames.contains(keyName)); - KeyColumns.push_back(ColumnNames[keyName]); - } - InitializeCaches(operators); - for (const auto& col : schema.GetColumns()) { - auto it = ColumnFeatures.find(col.GetId()); - AFL_VERIFY(it != ColumnFeatures.end()); - auto parsed = it->second.DeserializeFromProto(col, operators); - if (!parsed) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", parsed.GetErrorMessage()); - return false; + PKColumnIds.push_back(GetColumnIdVerified(keyName)); + } + InitializeCaches(operators, columns, cache, false); + SetAllKeys(operators, columns); + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::Columns::Features"); + for (const auto& col : schema.GetColumns()) { + THashMap> it; + const TString fingerprint = cache ? ("C:" + col.SerializeAsString()) : Default(); + const auto createPred = [&]() -> TConclusion> { + auto f = BuildDefaultColumnFeatures(col.GetId(), columns, operators); + auto parsed = f->DeserializeFromProto(col, operators); + if (parsed.IsFail()) { + return parsed; + } + return f; + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + if (fConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", fConclusion.GetErrorMessage()); + return false; + } + ColumnFeatures.emplace_back(fConclusion.DetachResult()); + } + for (auto&& cId : GetSystemColumnIds()) { + THashMap> it; + const TString fingerprint = "SC:" + ::ToString(cId); + const auto createPred = [&]() -> TConclusion> { + return BuildDefaultColumnFeatures(cId, {}, operators); + }; + auto fConclusion = cache->GetOrCreateColumnFeatures(fingerprint, createPred); + ColumnFeatures.emplace_back(fConclusion.DetachResult()); } + const auto pred = [](const std::shared_ptr& l, const std::shared_ptr& r) { + return l->GetColumnId() < r->GetColumnId(); + }; + std::sort(ColumnFeatures.begin(), ColumnFeatures.end(), pred); } - Version = schema.GetVersion(); return true; } @@ -310,25 +282,20 @@ std::vector GetColumns(const NTable::TScheme::TTableSchema& table return out; } -std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { +std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, + const std::shared_ptr& operators, const std::shared_ptr& cache) { TIndexInfo result(""); - if (!result.DeserializeFromProto(schema, operators)) { + if (!result.DeserializeFromProto(schema, operators, cache)) { return std::nullopt; } return result; } -std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const bool withSpecials) { +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache) { std::vector> fields; - if (withSpecials) { - IIndexInfo::AddSpecialFields(fields); - } - for (const ui32 id : ids) { - if (TIndexInfo::IsSpecialColumn(id)) { - AFL_VERIFY(withSpecials); - continue; - } + AFL_VERIFY(!TIndexInfo::IsSpecialColumn(id)); auto it = columns.find(id); AFL_VERIFY(it != columns.end()); @@ -336,51 +303,62 @@ std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSche std::string colName(column.Name.data(), column.Name.size()); auto arrowType = NArrow::GetArrowType(column.PType); AFL_VERIFY(arrowType.ok()); - fields.emplace_back(std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull)); + auto f = std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull); + if (cache) { + auto fFound = cache->GetField(f->ToString(true)); + if (!fFound) { + cache->RegisterField(f->ToString(true), f); + fields.emplace_back(f); + } else { + fields.emplace_back(fFound); + } + } else { + fields.emplace_back(f); + } } - return std::make_shared(std::move(fields)); + return fields; +} + +std::shared_ptr MakeArrowSchema( + const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const std::shared_ptr& cache) { + return std::make_shared(MakeArrowFields(columns, ids, cache)); } -void TIndexInfo::InitializeCaches(const std::shared_ptr& operators) { +void TIndexInfo::InitializeCaches(const std::shared_ptr& operators, const THashMap& columns, const std::shared_ptr& cache, + const bool withColumnFeatures) { { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Schema"); AFL_VERIFY(!Schema); - std::vector ids; - ids.reserve(Columns.size()); - for (const auto& [id, _] : Columns) { - ids.push_back(id); + SchemaColumnIds.reserve(columns.size()); + for (const auto& [id, _] : columns) { + SchemaColumnIds.push_back(id); } - // The ids had a set type before so we keep them sorted. - std::sort(ids.begin(), ids.end()); - Schema = MakeArrowSchema(Columns, ids); - } - SchemaWithSpecials = IIndexInfo::AddSpecialFields(ArrowSchema()); - - for (auto&& c : Columns) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(c.first, GetColumnFieldVerified(c.first)).second); - AFL_VERIFY(ColumnFeatures.emplace(c.first, TColumnFeatures(c.first, GetColumnFieldVerified(c.first), DefaultSerializer, operators->GetDefaultOperator(), - NArrow::IsPrimitiveYqlType(c.second.PType), c.first == GetPKFirstColumnId(), nullptr)).second); + std::sort(SchemaColumnIds.begin(), SchemaColumnIds.end()); + auto originalFields = MakeArrowFields(columns, SchemaColumnIds, cache); + Schema = std::make_shared(originalFields); + IIndexInfo::AddSpecialFields(originalFields); + SchemaWithSpecials = std::make_shared(originalFields); } - for (auto&& cId : GetSystemColumnIds()) { - AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(cId, GetColumnFieldVerified(cId)).second); - AFL_VERIFY(ColumnFeatures.emplace(cId, TColumnFeatures(cId, GetColumnFieldVerified(cId), DefaultSerializer, operators->GetDefaultOperator(), - false, false, IIndexInfo::DefaultColumnValue(cId))).second); - } -} - -std::vector> TIndexInfo::MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const { - std::vector> result; - auto columnArrowSchema = GetColumnSchema(columnId); - TColumnSaver saver = GetColumnSaver(columnId); - ui32 idx = 0; - for (auto p : pages) { - auto arr = NArrow::MakeEmptyBatch(columnArrowSchema, p); - AFL_VERIFY(arr->num_columns() == 1)("count", arr->num_columns()); - result.emplace_back(std::make_shared(saver.Apply(arr), arr->column(0), TChunkAddress(columnId, idx), columnInfo)); - ++idx; + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SchemaFields"); + SchemaColumnIdsWithSpecials = IIndexInfo::AddSpecialFieldIds(SchemaColumnIds); + } + if (withColumnFeatures) { + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::Columns"); + for (auto&& c : columns) { + ColumnFeatures.emplace_back(BuildDefaultColumnFeatures(c.first, columns, operators)); + } + } + { + TMemoryProfileGuard g("TIndexInfo::DeserializeFromProto::InitializeCaches::SysColumns"); + for (auto&& cId : GetSystemColumnIds()) { + ColumnFeatures.emplace_back(BuildDefaultColumnFeatures(cId, columns, operators)); + } + } } - return result; } NSplitter::TEntityGroups TIndexInfo::GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const { @@ -401,18 +379,71 @@ std::shared_ptr TIndexInfo::Get return CompactionPlannerConstructor; } -std::shared_ptr TIndexInfo::GetColumnDefaultValueVerified(const std::string& colName) const { +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueVerified(const std::string& colName) const { const ui32 columnId = GetColumnIdVerified(colName); - return GetColumnDefaultValueVerified(columnId); + return GetColumnExternalDefaultValueVerified(columnId); +} + +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueVerified(const ui32 columnId) const { + return GetColumnFeaturesVerified(columnId).GetDefaultValue().GetValue(); +} + +NKikimr::TConclusionStatus TIndexInfo::AppendIndex(const THashMap>>& originalData, + const ui32 indexId, const std::shared_ptr& operators, TSecondaryData& result) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + auto& index = it->second; + std::shared_ptr chunk = index->BuildIndex(originalData, *this); + auto opStorage = operators->GetOperatorVerified(index->GetStorageId()); + if ((i64)chunk->GetPackedSize() > opStorage->GetBlobSplitSettings().GetMaxBlobSize()) { + return TConclusionStatus::Fail("blob size for secondary data (" + ::ToString(indexId) + ") bigger than limit (" + + ::ToString(opStorage->GetBlobSplitSettings().GetMaxBlobSize()) + ")"); + } + if (index->GetStorageId() == IStoragesManager::LocalMetadataStorageId) { + AFL_VERIFY(result.MutableSecondaryInplaceData().emplace(indexId, chunk).second); + } else { + AFL_VERIFY(result.MutableExternalData().emplace(indexId, std::vector>({chunk})).second); + } + return TConclusionStatus::Success(); +} + +std::shared_ptr TIndexInfo::GetIndexMax(const ui32 columnId) const { + for (auto&& i : Indexes) { + if (i.second->GetClassName() != NIndexes::NMax::TIndexMeta::GetClassNameStatic()) { + continue; + } + auto maxIndex = static_pointer_cast(i.second.GetObjectPtr()); + if (maxIndex->GetColumnId() == columnId) { + return maxIndex; + } + } + return nullptr; +} + +std::vector TIndexInfo::GetEntityIds() const { + auto result = GetColumnIds(true); + for (auto&& i : Indexes) { + result.emplace_back(i.first); + } + return result; } -std::shared_ptr TIndexInfo::GetColumnDefaultValueVerified(const ui32 columnId) const { - auto& features = GetColumnFeaturesVerified(columnId); - if (features.GetDefaultValue().IsEmpty() && !IsNullableVerified(columnId)) { - return NArrow::DefaultScalar(GetColumnFieldVerified(columnId)->type()); +std::shared_ptr TIndexInfo::BuildDefaultColumnFeatures( + const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const { + if (IsSpecialColumn(columnId)) { + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + false, false, false, IIndexInfo::DefaultColumnValue(columnId)); } else { - return features.GetDefaultValue().GetValue(); + auto itC = columns.find(columnId); + AFL_VERIFY(itC != columns.end()); + return std::make_shared(columnId, GetColumnFieldVerified(columnId), DefaultSerializer, operators->GetDefaultOperator(), + NArrow::IsPrimitiveYqlType(itC->second.PType), columnId == GetPKFirstColumnId(), false, nullptr); } } -} // namespace NKikimr::NOlap +std::shared_ptr TIndexInfo::GetColumnExternalDefaultValueByIndexVerified(const ui32 colIndex) const { + AFL_VERIFY(colIndex < ColumnFeatures.size())("index", colIndex)("size", ColumnFeatures.size()); + return ColumnFeatures[colIndex]->GetDefaultValue().GetValue(); +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index a9f77745110d..d2fef0f4060c 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -5,75 +5,169 @@ #include "abstract/index_info.h" #include "indexes/abstract/meta.h" -#include "statistics/abstract/operator.h" -#include "statistics/abstract/common.h" -#include - -#include -#include -#include #include #include -#include +#include #include +#include +#include +#include +#include + +#include namespace arrow { - class Array; - class Field; - class Schema; -} +class Array; +class Field; +class Schema; +} // namespace arrow namespace NKikimr::NOlap { + +namespace NIndexes::NMax { +class TIndexMeta; +} + namespace NStorageOptimizer { class IOptimizerPlannerConstructor; } class TPortionInfoWithBlobs; -struct TInsertedData; class TSnapshotColumnInfo; class ISnapshotSchema; using TNameTypeInfo = std::pair; +class TSchemaObjectsCache { +private: + THashMap> Fields; + THashMap> ColumnFeatures; + THashSet StringsCache; + mutable ui64 AcceptionFieldsCount = 0; + mutable ui64 AcceptionFeaturesCount = 0; + +public: + const TString& GetStringCache(const TString& original) { + auto it = StringsCache.find(original); + if (it == StringsCache.end()) { + it = StringsCache.emplace(original).first; + } + return *it; + } + + void RegisterField(const TString& fingerprint, const std::shared_ptr& f) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_field")("fp", fingerprint)("f", f->ToString()); + AFL_VERIFY(Fields.emplace(fingerprint, f).second); + } + void RegisterColumnFeatures(const TString& fingerprint, const std::shared_ptr& f) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "register_column_features")("fp", fingerprint)("info", f->DebugString()); + AFL_VERIFY(ColumnFeatures.emplace(fingerprint, f).second); + } + std::shared_ptr GetField(const TString& fingerprint) const { + auto it = Fields.find(fingerprint); + if (it == Fields.end()) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_field_miss")("fp", fingerprint)("count", Fields.size())( + "acc", AcceptionFieldsCount); + return nullptr; + } + if (++AcceptionFieldsCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_field_accept")("fp", fingerprint)("count", Fields.size())( + "acc", AcceptionFieldsCount); + } + return it->second; + } + template + TConclusion> GetOrCreateColumnFeatures(const TString& fingerprint, const TConstructor& constructor) { + auto it = ColumnFeatures.find(fingerprint); + if (it == ColumnFeatures.end()) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_miss")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + TConclusion> resultConclusion = constructor(); + if (resultConclusion.IsFail()) { + return resultConclusion; + } + it = ColumnFeatures.emplace(fingerprint, resultConclusion.DetachResult()).first; + AFL_VERIFY(it->second); + } else { + if (++AcceptionFeaturesCount % 1000 == 0) { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "get_column_features_accept")("fp", UrlEscapeRet(fingerprint))( + "count", ColumnFeatures.size())("acc", AcceptionFeaturesCount); + } + } + return it->second; + } +}; + /// Column engine index description in terms of tablet's local table. /// We have to use YDB types for keys here. -struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { +struct TIndexInfo: public IIndexInfo { private: - THashMap ColumnFeatures; - THashMap> ArrowColumnByColumnIdCache; + using TColumns = THashMap; + + class TNameInfo { + private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(ui32, ColumnId, 0); + + public: + TNameInfo(const TString& name, const ui32 columnId) + : Name(name) + , ColumnId(columnId) + { + + } + + bool operator<(const TNameInfo& item) const { + return Name < item.Name; + } + }; + + std::vector ColumnNames; + std::vector PKColumnIds; + std::vector PKColumns; + + std::vector> ColumnFeatures; THashMap Indexes; - std::map StatisticsByName; TIndexInfo(const TString& name); bool SchemeNeedActualization = false; std::shared_ptr CompactionPlannerConstructor; bool ExternalGuaranteeExclusivePK = false; - bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); - void InitializeCaches(const std::shared_ptr& operators); + bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators, + const std::shared_ptr& cache); + void InitializeCaches(const std::shared_ptr& operators, const THashMap& columns, + const std::shared_ptr& cache, const bool withColumnFeatures = true); + std::shared_ptr BuildDefaultColumnFeatures( + const ui32 columnId, const THashMap& columns, const std::shared_ptr& operators) const; + public: std::shared_ptr GetCompactionPlannerConstructor() const; - - bool IsNullableVerified(const std::string& fName) const { - auto it = Columns.find(GetColumnIdVerified(fName)); - AFL_VERIFY(it != Columns.end()); - return !it->second.NotNull; + bool IsNullableVerifiedByIndex(const ui32 colIndex) const { + AFL_VERIFY(colIndex < ColumnFeatures.size()); + return ColumnFeatures[colIndex]->GetIsNullable(); } bool IsNullableVerified(const ui32 colId) const { - auto it = Columns.find(colId); - AFL_VERIFY(it != Columns.end()); - return !it->second.NotNull; + return GetColumnFeaturesVerified(colId).GetIsNullable(); } - std::shared_ptr GetColumnDefaultValueVerified(const std::string& colName) const; - std::shared_ptr GetColumnDefaultValueVerified(const ui32 colId) const; + std::shared_ptr GetColumnExternalDefaultValueVerified(const std::string& colName) const; + std::shared_ptr GetColumnExternalDefaultValueVerified(const ui32 colId) const; + std::shared_ptr GetColumnExternalDefaultValueByIndexVerified(const ui32 colIndex) const; + bool GetExternalGuaranteeExclusivePK() const { return ExternalGuaranteeExclusivePK; } const TColumnFeatures& GetColumnFeaturesVerified(const ui32 columnId) const { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second; + return *ColumnFeatures[GetColumnIndexVerified(columnId)]; + } + + const std::shared_ptr& GetColumnFeaturesOptional(const ui32 columnId) const { + if (auto idx = GetColumnIndexOptional(columnId)) { + return ColumnFeatures[*idx]; + } else { + return Default>(); + } } NSplitter::TEntityGroups GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const; @@ -88,27 +182,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { result.emplace(portionTierName); } else { for (auto&& i : ColumnFeatures) { - result.emplace(i.second.GetOperator()->GetStorageId()); + result.emplace(i->GetOperator()->GetStorageId()); } } return result; } - std::vector> MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const; - - const std::map& GetStatisticsByName() const { - return StatisticsByName; - } - - NStatistics::TOperatorContainer GetStatistics(const NStatistics::TIdentifier& id) const { - for (auto&& i : StatisticsByName) { - if (i.second->GetIdentifier() == id) { - return i.second; - } - } - return NStatistics::TOperatorContainer(); - } - const THashMap& GetIndexes() const { return Indexes; } @@ -123,9 +202,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { if (specialTier && specialTier != IStoragesManager::DefaultStorageId) { return specialTier; } else { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - return it->second.GetOperator()->GetStorageId(); + return GetColumnFeaturesVerified(columnId).GetOperator()->GetStorageId(); } } @@ -140,33 +217,48 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { TString DebugString() const { TStringBuilder sb; sb << "(" - << "version=" << Version << ";" - << "name=" << Name << ";" - << ")"; + << "version=" << Version << ";" + << "name=" << Name << ";" + << ")"; for (auto&& i : ColumnFeatures) { - sb << GetColumnName(i.first) << ":" << i.second.DebugString() << ";"; + sb << i->GetColumnName() << ":" << i->DebugString() << ";"; } return sb; } + void SetAllKeys(const std::shared_ptr& operators, const THashMap& columns); + public: static TIndexInfo BuildDefault() { TIndexInfo result("dummy"); return result; } - std::vector> ActualizeColumnData(const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { - auto itCurrent = ColumnFeatures.find(columnId); - auto itPred = sourceIndexInfo.ColumnFeatures.find(columnId); - AFL_VERIFY(itCurrent != ColumnFeatures.end()); - AFL_VERIFY(itPred != sourceIndexInfo.ColumnFeatures.end()); - return itCurrent->second.ActualizeColumnData(source, itPred->second); + static TIndexInfo BuildDefault( + const std::shared_ptr& operators, const TColumns& columns, const std::vector& pkNames) { + TIndexInfo result = BuildDefault(); + for (auto&& i : columns) { + result.ColumnNames.emplace_back(i.second.Name, i.first); + } + std::sort(result.ColumnNames.begin(), result.ColumnNames.end()); + for (auto&& i : pkNames) { + const ui32 columnId = result.GetColumnIdVerified(i); + result.PKColumnIds.emplace_back(columnId); + } + result.SetAllKeys(operators, columns); + return result; } - static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); + std::vector> ActualizeColumnData( + const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { + return GetColumnFeaturesVerified(columnId).ActualizeColumnData(source, sourceIndexInfo.GetColumnFeaturesVerified(columnId)); + } + + static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, + const std::shared_ptr& operators, const std::shared_ptr& cache); bool HasColumnId(const ui32 columnId) const { - return ColumnFeatures.contains(columnId); + return !!GetColumnIndexOptional(columnId); } bool HasColumnName(const std::string& columnName) const { @@ -177,6 +269,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return Indexes.contains(indexId); } + std::optional GetColumnIndexOptional(const ui32 id) const; + ui32 GetColumnIndexVerified(const ui32 id) const { + auto result = GetColumnIndexOptional(id); + AFL_VERIFY(result); + return *result; + } std::shared_ptr GetColumnFieldOptional(const ui32 columnId) const; std::shared_ptr GetColumnFieldVerified(const ui32 columnId) const; std::shared_ptr GetColumnSchema(const ui32 columnId) const; @@ -199,6 +297,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return it->second; } + NIndexes::TIndexMetaContainer GetIndexVerified(const ui32 indexId) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + return it->second; + } + std::optional GetIndexNameOptional(const ui32 indexId) const { auto meta = GetIndexOptional(indexId); if (!meta) { @@ -207,25 +311,37 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { return meta->GetIndexName(); } - void AppendIndexes(THashMap>>& originalData) const { + class TSecondaryData { + private: + using TStorageData = THashMap>; + YDB_ACCESSOR_DEF(TStorageData, SecondaryInplaceData); + using TPrimaryStorageData = THashMap>>; + YDB_ACCESSOR_DEF(TPrimaryStorageData, ExternalData); + + public: + TSecondaryData() = default; + }; + + [[nodiscard]] TConclusion AppendIndexes(const THashMap>>& primaryData, + const std::shared_ptr& operators) const { + TSecondaryData result; + result.MutableExternalData() = primaryData; for (auto&& i : Indexes) { - std::shared_ptr chunk = i.second->BuildIndex(i.first, originalData, *this); - AFL_VERIFY(originalData.emplace(i.first, std::vector>({chunk})).second); + auto conclusion = AppendIndex(primaryData, i.first, operators, result); + if (conclusion.IsFail()) { + return conclusion; + } } + return result; } - void AppendIndex(THashMap>>& originalData, const ui32 indexId) const { - auto it = Indexes.find(indexId); - AFL_VERIFY(it != Indexes.end()); - std::shared_ptr chunk = it->second->BuildIndex(indexId, originalData, *this); - AFL_VERIFY(originalData.emplace(indexId, std::vector>({chunk})).second); - } + std::shared_ptr GetIndexMax(const ui32 columnId) const; + + [[nodiscard]] TConclusionStatus AppendIndex(const THashMap>>& originalData, + const ui32 indexId, const std::shared_ptr& operators, TSecondaryData& result) const; /// Returns an id of the column located by name. The name should exists in the schema. ui32 GetColumnIdVerified(const std::string& name) const; - ui32 GetColumnId(const std::string& name) const { - return GetColumnIdVerified(name); - } std::set GetColumnIdsVerified(const std::set& names) const { std::set result; for (auto&& i : names) { @@ -236,58 +352,41 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { std::optional GetColumnIdOptional(const std::string& name) const; /// Returns a name of the column located by id. - TString GetColumnName(ui32 id, bool required = true) const; + TString GetColumnName(const ui32 id, bool required = true) const; /// Returns names of columns defined by the specific ids. std::vector GetColumnNames(const std::vector& ids) const; std::vector GetColumnSTLNames(const std::vector& ids) const; - std::vector GetColumnIds(const bool withSpecial = true) const; - std::vector GetEntityIds() const { - auto result = GetColumnIds(); - for (auto&& i : Indexes) { - result.emplace_back(i.first); - } - return result; + const std::vector& GetColumnIds(const bool withSpecial = true) const; + const std::vector& GetPKColumnIds() const { + AFL_VERIFY(PKColumnIds.size()); + return PKColumnIds; } - - /// Returns info of columns defined by specific ids. - std::vector GetColumns(const std::vector& ids) const; + std::vector GetEntityIds() const; /// Traditional Primary Key (includes uniqueness, search and sorting logic) - std::vector GetPrimaryKeyColumns() const { - return GetColumns(KeyColumns); + const std::vector& GetPrimaryKeyColumns() const { + return PKColumns; } /// Returns id of the first column of the primary key. ui32 GetPKFirstColumnId() const { - Y_ABORT_UNLESS(KeyColumns.size()); - return KeyColumns[0]; + Y_ABORT_UNLESS(PKColumnIds.size()); + return PKColumnIds[0]; } const std::shared_ptr& GetReplaceKey() const { return PrimaryKey; } const std::shared_ptr& GetPrimaryKey() const { return PrimaryKey; } - /// Initializes sorting, replace, index and extended keys. - void SetAllKeys(const std::shared_ptr& operators); - void CheckTtlColumn(const TString& ttlColumn) const { Y_ABORT_UNLESS(!ttlColumn.empty()); - Y_ABORT_UNLESS(MinMaxIdxColumnsIds.contains(GetColumnId(ttlColumn))); + Y_ABORT_UNLESS(MinMaxIdxColumnsIds.contains(GetColumnIdVerified(ttlColumn))); } std::vector GetColumnIds(const std::vector& columnNames) const; - std::shared_ptr ArrowSchema() const; - std::shared_ptr ArrowSchemaWithSpecials() const; - std::shared_ptr AddColumns(const std::shared_ptr& schema, - const std::vector& columns) const; - - std::shared_ptr ArrowColumnFieldOptional(const ui32 columnId) const; - std::shared_ptr ArrowColumnFieldVerified(const ui32 columnId) const; - - const THashSet& GetRequiredColumns() const { - return RequiredColumns; - } + const std::shared_ptr& ArrowSchema() const; + const std::shared_ptr& ArrowSchemaWithSpecials() const; const THashSet& GetMinMaxIdxColumns() const { return MinMaxIdxColumnsIds; @@ -296,8 +395,12 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { bool AllowTtlOverColumn(const TString& name) const; /// Returns whether the sorting keys defined. - bool IsSorted() const { return true; } - bool IsSortedColumn(const ui32 columnId) const { return GetPKFirstColumnId() == columnId; } + bool IsSorted() const { + return true; + } + bool IsSortedColumn(const ui32 columnId) const { + return GetPKFirstColumnId() == columnId; + } ui64 GetVersion() const { return Version; @@ -311,17 +414,21 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { private: ui64 Version = 0; TString Name; - std::shared_ptr Schema; - std::shared_ptr SchemaWithSpecials; + std::vector SchemaColumnIds; + std::vector SchemaColumnIdsWithSpecials; + std::shared_ptr SchemaWithSpecials; + std::shared_ptr Schema; std::shared_ptr PrimaryKey; - THashSet RequiredColumns; THashSet MinMaxIdxColumnsIds; NArrow::NSerialization::TSerializerContainer DefaultSerializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); }; -std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, const bool withSpecials = false); +std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); +std::vector> MakeArrowFields(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, + const std::shared_ptr& cache = nullptr); /// Extracts columns with the specific ids from the schema. std::vector GetColumns(const NTable::TScheme::TTableSchema& tableSchema, const std::vector& ids); -} // namespace NKikimr::NOlap +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h index a67a72df8e2f..88815c374eb2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/checker.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp index a93507bec06f..e6dad360d159 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.cpp @@ -2,4 +2,19 @@ namespace NKikimr::NOlap::NIndexes { +NKikimr::TConclusionStatus IIndexMetaConstructor::DeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (jsonInfo.Has("storage_id")) { + if (!jsonInfo["storage_id"].IsString()) { + return TConclusionStatus::Fail("incorrect storage_id field in json index description (have to be string)"); + } + StorageId = jsonInfo["storage_id"].GetStringSafe(); + if (!*StorageId) { + return TConclusionStatus::Fail("storage_id cannot be empty string"); + } else if (*StorageId != "__LOCAL_METADATA" && *StorageId != "__DEFAULT") { + return TConclusionStatus::Fail("storage_id have to been one of variant ['__LOCAL_METADATA', '__DEFAULT']"); + } + } + return DoDeserializeFromJson(jsonInfo); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h index e6fe22e3f273..dded1abd0081 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/constructor.h @@ -14,6 +14,9 @@ class TOlapSchema; namespace NKikimr::NOlap::NIndexes { class IIndexMetaConstructor { +private: + YDB_READONLY_DEF(std::optional, StorageId); + protected: virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) = 0; virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const = 0; @@ -25,19 +28,23 @@ class IIndexMetaConstructor { virtual ~IIndexMetaConstructor() = default; - TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonInfo) { - return DoDeserializeFromJson(jsonInfo); - } + TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonInfo); std::shared_ptr CreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { return DoCreateIndexMeta(indexId, indexName, currentSchema, errors); } TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (proto.HasStorageId()) { + StorageId = proto.GetStorageId(); + } return DoDeserializeFromProto(proto); } void SerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + if (StorageId) { + proto.SetStorageId(*StorageId); + } return DoSerializeToProto(proto); } diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp index ae4fd22fba3d..4d4efc52a2d6 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp @@ -1,14 +1,42 @@ #include "meta.h" +#include namespace NKikimr::NOlap::NIndexes { bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) { + if (!proto.GetId()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect id - 0"); + return false; + } + if (!proto.GetName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse secondary data builder")("reason", "incorrect name - empty string"); + return false; + } IndexId = proto.GetId(); - AFL_VERIFY(IndexId); IndexName = proto.GetName(); - AFL_VERIFY(IndexName); StorageId = proto.GetStorageId() ? proto.GetStorageId() : IStoragesManager::DefaultStorageId; return DoDeserializeFromProto(proto); } +void IIndexMeta::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { + AFL_VERIFY(IndexId); + proto.SetId(IndexId); + AFL_VERIFY(IndexName); + proto.SetName(IndexName); + if (StorageId) { + proto.SetStorageId(StorageId); + } + return DoSerializeToProto(proto); +} + +NJson::TJsonValue IIndexMeta::SerializeDataToJson(const TIndexChunk& iChunk, const TIndexInfo& indexInfo) const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("entity_id", iChunk.GetEntityId()); + result.InsertValue("chunk_idx", iChunk.GetChunkIdx()); + if (iChunk.HasBlobData()) { + result.InsertValue("data", DoSerializeDataToJson(iChunk.GetBlobDataVerified(), indexInfo)); + } + return result; +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h index 821d36af8cf5..d5185cbca236 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h @@ -16,6 +16,7 @@ class TExprBase; namespace NKikimr::NOlap { struct TIndexInfo; class TProgramContainer; +class TIndexChunk; } namespace NKikimr::NSchemeShard { @@ -30,24 +31,30 @@ class IIndexMeta { YDB_READONLY(ui32, IndexId, 0); YDB_READONLY(TString, StorageId, IStoragesManager::DefaultStorageId); protected: - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const = 0; + virtual std::shared_ptr DoBuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const = 0; virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const = 0; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) = 0; virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const = 0; virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const = 0; + virtual NJson::TJsonValue DoSerializeDataToJson(const TString& /*data*/, const TIndexInfo& /*indexInfo*/) const { + return "NO_IMPLEMENTED"; + } public: using TFactory = NObjectFactory::TObjectFactory; using TProto = NKikimrSchemeOp::TOlapIndexDescription; IIndexMeta() = default; - IIndexMeta(const ui32 indexId, const TString& indexName) + IIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId) : IndexName(indexName) , IndexId(indexId) + , StorageId(storageId) { } + NJson::TJsonValue SerializeDataToJson(const TIndexChunk& iChunk, const TIndexInfo& indexInfo) const; + TConclusionStatus CheckModificationCompatibility(const std::shared_ptr& newMeta) const { if (!newMeta) { return TConclusionStatus::Fail("new meta cannot be absent"); @@ -60,8 +67,8 @@ class IIndexMeta { virtual ~IIndexMeta() = default; - std::shared_ptr BuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { - return DoBuildIndex(indexId, data, indexInfo); + std::shared_ptr BuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const { + return DoBuildIndex(data, indexInfo); } void FillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const { @@ -69,17 +76,7 @@ class IIndexMeta { } bool DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto); - - void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { - AFL_VERIFY(IndexId); - proto.SetId(IndexId); - AFL_VERIFY(IndexName); - proto.SetName(IndexName); - if (StorageId) { - proto.SetStorageId(StorageId); - } - return DoSerializeToProto(proto); - } + void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const; virtual TString GetClassName() const = 0; }; diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp index d16f5fcfb33f..d6a3e9b800e5 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/program.cpp @@ -401,12 +401,15 @@ class TNormalForm { public: TNormalForm() = default; - bool Add(const NSsa::TAssign& assign) { + bool Add(const NSsa::TAssign& assign, const TProgramContainer& program) { std::vector> argNodes; for (auto&& arg : assign.GetArguments()) { if (arg.IsGenerated()) { auto it = Nodes.find(arg.GetColumnName()); - AFL_VERIFY(it != Nodes.end()); + if (it == Nodes.end()) { + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("event", "program_arg_is_missing")("program", program.DebugString()); + return false; + } argNodes.emplace_back(it->second); } else { argNodes.emplace_back(std::make_shared(arg.GetColumnName())); @@ -443,7 +446,7 @@ std::shared_ptr TDataForIndexesCheckers::Build(const TP auto fStep = program.GetSteps().front(); TNormalForm nForm; for (auto&& s : fStep->GetAssignes()) { - if (!nForm.Add(s)) { + if (!nForm.Add(s, program)) { return nullptr; } } diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make index e758f9ecc430..a9991e37e26a 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/ya.make @@ -11,7 +11,7 @@ SRCS( PEERDIR( ydb/core/formats/arrow - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp deleted file mode 100644 index e7960e66809e..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp +++ /dev/null @@ -1,40 +0,0 @@ -#include "common.h" -#include - -namespace NKikimr::NOlap::NStatistics { - -TIdentifier::TIdentifier(const EType type, const std::vector& entities) - : Type(type) - , EntityIds(entities) -{ - AFL_VERIFY(EntityIds.size()); -} - -bool TIdentifier::operator<(const TIdentifier& item) const { - if (Type != item.Type) { - return (ui32)Type < (ui32)item.Type; - } - for (ui32 i = 0; i < std::min(EntityIds.size(), item.EntityIds.size()); ++i) { - if (EntityIds[i] < item.EntityIds[i]) { - return true; - } - } - return false; -} - -bool TIdentifier::operator==(const TIdentifier& item) const { - if (Type != item.Type) { - return false; - } - if (EntityIds.size() != item.EntityIds.size()) { - return false; - } - for (ui32 i = 0; i < EntityIds.size(); ++i) { - if (EntityIds[i] != item.EntityIds[i]) { - return false; - } - } - return true; -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h deleted file mode 100644 index abfd7159a97b..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h +++ /dev/null @@ -1,24 +0,0 @@ -#pragma once -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics { -enum class EType { - Undefined /* "undefined" */, - Max /* "max" */, - Variability /* "variability" */ -}; - -class TIdentifier { -private: - YDB_READONLY(EType, Type, EType::Undefined); - YDB_READONLY_DEF(std::vector, EntityIds); -public: - TIdentifier(const EType type, const std::vector& entities); - - bool operator<(const TIdentifier& item) const; - bool operator==(const TIdentifier& item) const; -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp deleted file mode 100644 index 5713317c7d21..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "constructor.h" - -namespace NKikimr::NOlap::NStatistics { - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h deleted file mode 100644 index 8948e93d482c..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include "common.h" -#include "portion_storage.h" -#include "operator.h" - -#include - -namespace NKikimr::NSchemeShard { -class TOlapSchema; -} - -namespace NKikimrColumnShardStatisticsProto { -class TOperatorContainer; -} - -namespace NKikimr::NOlap::NStatistics { - -class IConstructor { -private: - YDB_READONLY(EType, Type, EType::Undefined); - IConstructor() = default; -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const = 0; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) = 0; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const = 0; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) = 0; -public: - using TProto = NKikimrColumnShardStatisticsProto::TConstructorContainer; - using TFactory = NObjectFactory::TObjectFactory; - - virtual ~IConstructor() = default; - - IConstructor(const EType type) - :Type(type) { - - } - - TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonData) { - return DoDeserializeFromJson(jsonData); - } - - TConclusion CreateOperator(const TString& name, const NSchemeShard::TOlapSchema& currentSchema) const { - auto result = DoCreateOperator(currentSchema); - if (!result) { - return result.GetError(); - } - return TOperatorContainer(name, result.DetachResult()); - } - - TString GetClassName() const { - return ::ToString(Type); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!TryFromString(proto.GetClassName(), Type)) { - return false; - } - return DoDeserializeFromProto(proto); - } - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - return DoSerializeToProto(proto); - } -}; - -class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { -private: - using TBase = NBackgroundTasks::TInterfaceProtoContainer; -public: - using TBase::TBase; -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp deleted file mode 100644 index 357d8bbd3934..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics { - -bool IOperator::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!TryFromString(proto.GetClassName(), Type)) { - return false; - } - return DoDeserializeFromProto(proto); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h deleted file mode 100644 index 29f6f6744ac4..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once -#include "common.h" -#include "portion_storage.h" - -#include -#include - -#include -#include - -#include - -namespace NKikimr::NOlap { -class IPortionDataChunk; -} - -namespace NKikimr::NOlap::NStatistics { - -class IOperator { -private: - YDB_READONLY(EType, Type, EType::Undefined); - IOperator() = default; -protected: - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const = 0; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const = 0; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) = 0; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const = 0; - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const = 0; -public: - using TProto = NKikimrColumnShardStatisticsProto::TOperatorContainer; - using TFactory = NObjectFactory::TObjectFactory; - - virtual ~IOperator() = default; - - virtual std::vector GetEntityIds() const = 0; - - IOperator(const EType type) - :Type(type) { - - } - - void ShiftCursor(TPortionStorageCursor& cursor) const { - DoShiftCursor(cursor); - } - - void CopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const { - return DoCopyData(cursor, portionStatsFrom, portionStatsTo); - } - - void FillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - DoFillStatisticsData(data, portionStats, index); - } - - TString GetClassName() const { - return ::ToString(Type); - } - - TIdentifier GetIdentifier() const { - return TIdentifier(Type, GetEntityIds()); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - return DoSerializeToProto(proto); - } -}; - -class TOperatorContainer: public NBackgroundTasks::TInterfaceProtoContainer { -private: - YDB_READONLY_DEF(TString, Name); - std::optional Cursor; - using TBase = NBackgroundTasks::TInterfaceProtoContainer; -public: - TOperatorContainer() = default; - - TOperatorContainer(const TString& name, const std::shared_ptr& object) - : TBase(object) - , Name(name) - { - AFL_VERIFY(Name); - } - - const TPortionStorageCursor& GetCursorVerified() const { - AFL_VERIFY(Cursor); - return *Cursor; - } - - void SetCursor(const TPortionStorageCursor& cursor) { - AFL_VERIFY(!Cursor); - Cursor = cursor; - } - - std::shared_ptr GetScalarVerified(const TPortionStorage& storage) { - AFL_VERIFY(!!Cursor); - return storage.GetScalarVerified(*Cursor); - } - - NKikimrColumnShardStatisticsProto::TOperatorContainer SerializeToProto() const { - NKikimrColumnShardStatisticsProto::TOperatorContainer result = TBase::SerializeToProto(); - result.SetName(Name); - AFL_VERIFY(Name); - return result; - } - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - TBase::SerializeToProto(proto); - proto.SetName(Name); - AFL_VERIFY(Name); - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - Name = proto.GetName(); - if (!Name) { - return false; - } - if (!TBase::DeserializeFromProto(proto)) { - return false; - } - return true; - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp deleted file mode 100644 index f0d67ecf7d42..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "portion_storage.h" -#include -#include - -namespace NKikimr::NOlap::NStatistics { - -NKikimrColumnShardStatisticsProto::TScalar TPortionStorage::ScalarToProto(const arrow::Scalar& scalar) { - NKikimrColumnShardStatisticsProto::TScalar result; - switch (scalar.type->id()) { - case arrow::Type::BOOL: - result.SetBool(static_cast(scalar).value); - break; - case arrow::Type::UINT8: - result.SetUint8(static_cast(scalar).value); - break; - case arrow::Type::UINT16: - result.SetUint16(static_cast(scalar).value); - break; - case arrow::Type::UINT32: - result.SetUint32(static_cast(scalar).value); - break; - case arrow::Type::UINT64: - result.SetUint64(static_cast(scalar).value); - break; - case arrow::Type::INT8: - result.SetInt8(static_cast(scalar).value); - break; - case arrow::Type::INT16: - result.SetInt16(static_cast(scalar).value); - break; - case arrow::Type::INT32: - result.SetInt32(static_cast(scalar).value); - break; - case arrow::Type::INT64: - result.SetInt64(static_cast(scalar).value); - break; - case arrow::Type::DOUBLE: - result.SetDouble(static_cast(scalar).value); - break; - case arrow::Type::FLOAT: - result.SetFloat(static_cast(scalar).value); - break; - case arrow::Type::TIMESTAMP: - { - auto* ts = result.MutableTimestamp(); - ts->SetValue(static_cast(scalar).value); - ts->SetUnit(static_cast(*scalar.type).unit()); - break; - } - default: - AFL_VERIFY(false)("problem", "incorrect type for statistics usage")("type", scalar.type->ToString()); - } - return result; -} - -std::shared_ptr TPortionStorage::ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto) { - if (proto.HasBool()) { - return std::make_shared(proto.GetBool()); - } else if (proto.HasUint8()) { - return std::make_shared(proto.GetUint8()); - } else if (proto.HasUint16()) { - return std::make_shared(proto.GetUint16()); - } else if (proto.HasUint32()) { - return std::make_shared(proto.GetUint32()); - } else if (proto.HasUint64()) { - return std::make_shared(proto.GetUint64()); - } else if (proto.HasInt8()) { - return std::make_shared(proto.GetInt8()); - } else if (proto.HasInt16()) { - return std::make_shared(proto.GetInt16()); - } else if (proto.HasInt32()) { - return std::make_shared(proto.GetInt32()); - } else if (proto.HasInt64()) { - return std::make_shared(proto.GetInt64()); - } else if (proto.HasDouble()) { - return std::make_shared(proto.GetDouble()); - } else if (proto.HasFloat()) { - return std::make_shared(proto.GetFloat()); - } else if (proto.HasTimestamp()) { - arrow::TimeUnit::type unit = arrow::TimeUnit::type(proto.GetTimestamp().GetUnit()); - return std::make_shared(proto.GetTimestamp().GetValue(), std::make_shared(unit)); - } - AFL_VERIFY(false)("problem", "incorrect statistics proto")("proto", proto.DebugString()); - return nullptr; -} - -std::shared_ptr TPortionStorage::GetScalarVerified(const TPortionStorageCursor& cursor) const { - AFL_VERIFY(cursor.GetScalarsPosition() < Data.size()); - AFL_VERIFY(Data[cursor.GetScalarsPosition()]); - return Data[cursor.GetScalarsPosition()]; -} - -void TPortionStorage::AddScalar(const std::shared_ptr& scalar) { - const auto type = scalar->type->id(); - AFL_VERIFY(type == arrow::Type::BOOL || - type == arrow::Type::UINT8 || type == arrow::Type::UINT16 || type == arrow::Type::UINT32 || type == arrow::Type::UINT64 || - type == arrow::Type::INT8 || type == arrow::Type::INT16 || type == arrow::Type::INT32 || type == arrow::Type::INT64 || - type == arrow::Type::DOUBLE || type == arrow::Type::TIMESTAMP || type == arrow::Type::FLOAT) - ("problem", "incorrect_stat_type")("incoming", scalar->type->ToString()); - Data.emplace_back(scalar); -} - -NKikimrColumnShardStatisticsProto::TPortionStorage TPortionStorage::SerializeToProto() const { - NKikimrColumnShardStatisticsProto::TPortionStorage result; - for (auto&& i : Data) { - AFL_VERIFY(i); - *result.AddScalars() = ScalarToProto(*i); - } - return result; -} - -NKikimr::TConclusionStatus TPortionStorage::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { - for (auto&& i : proto.GetScalars()) { - Data.emplace_back(ProtoToScalar(i)); - } - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h deleted file mode 100644 index a3e4b6bcb0dd..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h +++ /dev/null @@ -1,53 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - -namespace NKikimrColumnShardStatisticsProto { -class TScalar; -class TPortionStorage; -} - -namespace NKikimr::NOlap::NStatistics { -class TPortionStorageCursor { -private: - YDB_READONLY(ui32, ScalarsPosition, 0); -public: - TPortionStorageCursor() = default; - - void AddScalarsPosition(const ui32 shift) { - ScalarsPosition += shift; - } -}; - -class TPortionStorage { -private: - YDB_READONLY_DEF(std::vector>, Data); - static NKikimrColumnShardStatisticsProto::TScalar ScalarToProto(const arrow::Scalar& value); - static std::shared_ptr ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto); - TConclusionStatus DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto); - -public: - bool IsEmpty() const { - return Data.empty(); - } - - std::shared_ptr GetScalarVerified(const TPortionStorageCursor& cursor) const; - - void AddScalar(const std::shared_ptr& scalar); - - NKikimrColumnShardStatisticsProto::TPortionStorage SerializeToProto() const; - - static TConclusion BuildFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { - TPortionStorage result; - auto parse = result.DeserializeFromProto(proto); - if (!parse) { - return parse; - } - return result; - } -}; -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make deleted file mode 100644 index f63520354edf..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make +++ /dev/null @@ -1,20 +0,0 @@ -LIBRARY() - -SRCS( - portion_storage.cpp - constructor.cpp - operator.cpp - common.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/protos - ydb/core/tx/columnshard/engines/scheme/abstract - contrib/libs/apache/arrow - ydb/library/actors/core - ydb/library/conclusion -) - -GENERATE_ENUM_SERIALIZATION(common.h) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp deleted file mode 100644 index a12a27812350..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "constructor.h" -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics::NMax { - -NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { - auto column = currentSchema.GetColumns().GetByName(ColumnName); - if (!TOperator::IsAvailableType(column->GetType())) { - return TConclusionStatus::Fail("incorrect type for stat calculation"); - } - return std::make_shared(column->GetId()); -} - -bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!proto.HasMax()) { - return false; - } - ColumnName = proto.GetMax().GetColumnName(); - if (!ColumnName) { - return false; - } - return true; -} - -void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - AFL_VERIFY(!!ColumnName); - proto.MutableMax()->SetColumnName(ColumnName); -} - -NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { - if (!jsonData.Has("column_name")) { - return TConclusionStatus::Fail("no column_name field in json description"); - } - TString columnNameLocal; - if (!jsonData["column_name"].GetString(&columnNameLocal)) { - return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); - } - if (!columnNameLocal) { - return TConclusionStatus::Fail("empty column_name field in json description"); - } - ColumnName = columnNameLocal; - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h deleted file mode 100644 index 695096a63d2f..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -class TConstructor: public IConstructor { -private: - using TBase = IConstructor; - static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); - YDB_READONLY(TString, ColumnName, 0); -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; -public: - TConstructor(const TString& columnName) - : TBase(EType::Max) - , ColumnName(columnName) - { - - } - - TConstructor() - :TBase(EType::Max) { - - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp deleted file mode 100644 index 8e2c179e077b..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "operator.h" -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - auto loader = index.GetColumnLoaderVerified(EntityId); - auto it = data.find(EntityId); - AFL_VERIFY(it != data.end()); - std::shared_ptr result; - for (auto&& i : it->second) { - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); - AFL_VERIFY(rb->num_columns() == 1); - auto res = NArrow::FindMinMaxPosition(rb->column(0)); - auto currentScalarMax = NArrow::TStatusValidator::GetValid(rb->column(0)->GetScalar(res.second)); - if (!result || NArrow::ScalarCompare(result, currentScalarMax) < 0) { - result = currentScalarMax; - } - } - portionStats.AddScalar(result); -} - -bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!proto.HasMax()) { - return false; - } - EntityId = proto.GetMax().GetEntityId(); - if (!EntityId) { - return false; - } - return true; -} - -void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - AFL_VERIFY(EntityId); - proto.MutableMax()->SetEntityId(EntityId); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h deleted file mode 100644 index b3478e5a24bb..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h +++ /dev/null @@ -1,68 +0,0 @@ -#pragma once -#include -#include - -namespace NKikimr::NOlap::NStatistics::NMax { - -class TOperator: public IOperator { -private: - using TBase = IOperator; - ui32 EntityId = 0; - static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); -protected: - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { - std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); - portionStatsTo.AddScalar(scalar); - } - - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { - cursor.AddScalarsPosition(1); - } - virtual std::vector GetEntityIds() const override { - return {EntityId}; - } - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; -public: - - static bool IsAvailableType(const NScheme::TTypeInfo type) { - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int8: - case NScheme::NTypeIds::Uint8: - case NScheme::NTypeIds::Int16: - case NScheme::NTypeIds::Uint16: - case NScheme::NTypeIds::Int32: - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Int64: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::Timestamp: - case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::Float: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Date32: - case NScheme::NTypeIds::Datetime64: - case NScheme::NTypeIds::Timestamp64: - case NScheme::NTypeIds::Interval64: - return true; - default: - break; - } - return false; - } - - TOperator() - : TBase(EType::Max) - { - - } - - TOperator(const ui32 entityId) - : TBase(EType::Max) - , EntityId(entityId) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make deleted file mode 100644 index 631c95eeb3d8..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - GLOBAL constructor.cpp - GLOBAL operator.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/abstract - ydb/core/tx/columnshard/splitter/abstract - ydb/core/formats/arrow -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto deleted file mode 100644 index c99f485d399d..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto +++ /dev/null @@ -1,66 +0,0 @@ -package NKikimrColumnShardStatisticsProto; - -message TScalar { - message TTimestamp { - optional uint64 Value = 1; - optional uint32 Unit = 2; - } - oneof Value { - bool Bool = 1; - uint32 Uint8 = 2; - uint32 Uint16 = 3; - uint32 Uint32 = 4; - uint64 Uint64 = 5; - - int32 Int8 = 6; - int32 Int16 = 7; - int32 Int32 = 8; - int64 Int64 = 9; - - double Double = 10; - - TTimestamp Timestamp = 11; - - float Float = 12; - } -} - -message TPortionStorage { - repeated TScalar Scalars = 1; -} - -message TMaxConstructor { - optional string ColumnName = 3; -} - -message TVariabilityConstructor { - optional string ColumnName = 3; -} - -message TConstructorContainer { - optional string Name = 1; - - optional string ClassName = 40; - oneof Implementation { - TMaxConstructor Max = 41; - TVariabilityConstructor Variability = 42; - } -} - -message TMaxOperator { - optional uint32 EntityId = 1; -} - -message TVariabilityOperator { - optional uint32 EntityId = 1; -} - -message TOperatorContainer { - optional string Name = 1; - - optional string ClassName = 40; - oneof Implementation { - TMaxOperator Max = 41; - TVariabilityOperator Variability = 42; - } -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make deleted file mode 100644 index f72b3b7cf620..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make +++ /dev/null @@ -1,11 +0,0 @@ -PROTO_LIBRARY() - -SRCS( - data.proto -) - -PEERDIR( - -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp deleted file mode 100644 index 25840673fcb5..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "constructor.h" -#include "operator.h" - -namespace NKikimr::NOlap::NStatistics::NVariability { - -NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { - auto column = currentSchema.GetColumns().GetByName(ColumnName); - if (!TOperator::IsAvailableType(column->GetType())) { - return TConclusionStatus::Fail("incorrect type for stat calculation"); - } - return std::make_shared(column->GetId()); -} - -bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - if (!proto.HasVariability()) { - return false; - } - ColumnName = proto.GetVariability().GetColumnName(); - if (!ColumnName) { - return false; - } - return true; -} - -void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { - AFL_VERIFY(!!ColumnName); - proto.MutableVariability()->SetColumnName(ColumnName); -} - -NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { - if (!jsonData.Has("column_name")) { - return TConclusionStatus::Fail("no column_name field in json description"); - } - TString columnNameLocal; - if (!jsonData["column_name"].GetString(&columnNameLocal)) { - return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); - } - if (!columnNameLocal) { - return TConclusionStatus::Fail("empty column_name field in json description"); - } - ColumnName = columnNameLocal; - return TConclusionStatus::Success(); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h deleted file mode 100644 index 809c9043faac..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h +++ /dev/null @@ -1,33 +0,0 @@ -#pragma once -#include -#include - -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class TConstructor: public IConstructor { -private: - using TBase = IConstructor; - static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); - YDB_READONLY(TString, ColumnName, 0); -protected: - virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; - virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; -public: - TConstructor(const TString& columnName) - : TBase(EType::Max) - , ColumnName(columnName) - { - - } - - TConstructor() - :TBase(EType::Variability) { - - } -}; - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp deleted file mode 100644 index d43d617171bb..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp +++ /dev/null @@ -1,164 +0,0 @@ -#include "operator.h" -#include -#include -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class IValuesContainer { -protected: - std::optional DataType; - ui32 DifferentCount = 0; - - virtual void DoAddArray(const std::shared_ptr& array) = 0; -public: - virtual ~IValuesContainer() = default; - ui32 GetDifferentCount() const { - return DifferentCount; - } - - void AddArray(const std::shared_ptr& array) { - if (!DataType) { - DataType = array->type_id(); - } else { - AFL_VERIFY(DataType == array->type_id())("base", (ui32)*DataType)("to", (ui32)array->type_id()); - } - return DoAddArray(array); - } -}; - -template -class TCTypeValuesContainer: public IValuesContainer { -private: - using TWrap = TArrowElement; - using TArray = typename arrow::TypeTraits::ArrayType; - using TCType = typename TWrap::T::c_type; - using TCContainer = THashSet; - - TCContainer ElementsStorage; -protected: - virtual void DoAddArray(const std::shared_ptr& array) override { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if constexpr (std::is_same_v) { - const TArray& arrTyped = static_cast(*array); - for (ui32 i = 0; i < array->length(); ++i) { - if constexpr (arrow::has_c_type()) { - if (ElementsStorage.emplace(arrTyped.Value(i)).second) { - ++DifferentCount; - } - continue; - } - AFL_VERIFY(false); - } - return true; - } - AFL_VERIFY(false); - return false; - }); - } -}; - -template -class TStringValuesContainer: public IValuesContainer { -private: - using TWrap = TArrowElement; - using TArray = typename arrow::TypeTraits::ArrayType; - using TCType = TString; - using TCContainer = THashSet; - - TCContainer ElementsStorage; -protected: - virtual void DoAddArray(const std::shared_ptr& array) override { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if constexpr (std::is_same_v) { - const TArray& arrTyped = static_cast(*array); - for (ui32 i = 0; i < array->length(); ++i) { - if constexpr (arrow::has_string_view()) { - auto value = arrTyped.GetView(i); - if (ElementsStorage.emplace(value.data(), value.size()).second) { - ++DifferentCount; - } - continue; - } - AFL_VERIFY(false); - } - return true; - } - AFL_VERIFY(false); - return false; - }); - } -}; - -class TDifferentElementsAggregator { -private: - std::shared_ptr Container; -public: - TDifferentElementsAggregator() = default; - - bool HasData() const { - return !!Container; - } - - ui32 GetDifferentCount() const { - return Container ? Container->GetDifferentCount() : 0; - } - - void AddArray(const std::shared_ptr& array) { - if (!Container) { - NArrow::SwitchType(array->type_id(), [&](const auto& type) { - using TWrap = std::decay_t; - if (!Container) { - if constexpr (arrow::has_c_type()) { - Container = std::make_shared>(); - Container->AddArray(array); - return true; - } - if constexpr (arrow::has_string_view()) { - Container = std::make_shared>(); - Container->AddArray(array); - return true; - } - AFL_VERIFY(false); - } - return false; - }); - } - Container->AddArray(array); - } -}; - -void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { - auto it = data.find(EntityId); - AFL_VERIFY(it != data.end()); - auto loader = index.GetColumnLoaderVerified(EntityId); - std::shared_ptr result; - TDifferentElementsAggregator aggregator; - for (auto&& i : it->second) { - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); - AFL_VERIFY(rb->num_columns() == 1); - aggregator.AddArray(rb->column(0)); - } - AFL_VERIFY(aggregator.HasData()); - portionStats.AddScalar(std::make_shared(aggregator.GetDifferentCount())); -} - -bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - if (!proto.HasVariability()) { - return false; - } - EntityId = proto.GetVariability().GetEntityId(); - if (!EntityId) { - return false; - } - return true; -} - -void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - AFL_VERIFY(EntityId); - proto.MutableVariability()->SetEntityId(EntityId); -} - -} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h deleted file mode 100644 index ca46daf02aa6..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h +++ /dev/null @@ -1,71 +0,0 @@ -#pragma once -#include -#include - -namespace NKikimr::NOlap::NStatistics::NVariability { - -class TOperator: public IOperator { -private: - using TBase = IOperator; - ui32 EntityId = 0; - static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); -protected: - virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { - std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); - portionStatsTo.AddScalar(scalar); - } - - virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; - virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { - cursor.AddScalarsPosition(1); - } - virtual std::vector GetEntityIds() const override { - return {EntityId}; - } - virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; - virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; -public: - - static bool IsAvailableType(const NScheme::TTypeInfo type) { - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int8: - case NScheme::NTypeIds::Uint8: - case NScheme::NTypeIds::Int16: - case NScheme::NTypeIds::Uint16: - case NScheme::NTypeIds::Int32: - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Int64: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::String: - case NScheme::NTypeIds::Utf8: - case NScheme::NTypeIds::Uuid: - case NScheme::NTypeIds::Timestamp: - case NScheme::NTypeIds::Double: - case NScheme::NTypeIds::Float: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Date32: - case NScheme::NTypeIds::Datetime64: - case NScheme::NTypeIds::Timestamp64: - case NScheme::NTypeIds::Interval64: - return true; - default: - break; - } - return false; - } - - TOperator() - : TBase(EType::Variability) - { - - } - - TOperator(const ui32 entityId) - : TBase(EType::Variability) - , EntityId(entityId) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make deleted file mode 100644 index 631c95eeb3d8..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - GLOBAL constructor.cpp - GLOBAL operator.cpp -) - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/abstract - ydb/core/tx/columnshard/splitter/abstract - ydb/core/formats/arrow -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make deleted file mode 100644 index 3baed9c3538a..000000000000 --- a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make +++ /dev/null @@ -1,10 +0,0 @@ -LIBRARY() - -PEERDIR( - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/tx/columnshard/engines/scheme/statistics/max - ydb/core/tx/columnshard/engines/scheme/statistics/variability - ydb/core/tx/columnshard/engines/scheme/statistics/protos -) - -END() diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h index c65cb1703ad0..8d290a8adcf2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h @@ -2,7 +2,7 @@ #include "common.h" #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ class TTiering { using TTiersMap = THashMap>; TTiersMap TierByName; TSet OrderedTiers; - TString TTLColumnName; + std::optional TTLColumnName; public: class TTieringContext { @@ -174,9 +174,14 @@ class TTiering { [[nodiscard]] bool Add(const std::shared_ptr& tier) { AFL_VERIFY(tier); if (!TTLColumnName) { + if (tier->GetEvictColumnName().Empty()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "empty_evict_column_name"); + return false; + } TTLColumnName = tier->GetEvictColumnName(); - } else if (TTLColumnName != tier->GetEvictColumnName()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", TTLColumnName)("column_new", tier->GetEvictColumnName()); + } else if (*TTLColumnName != tier->GetEvictColumnName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", *TTLColumnName) + ("column_new", tier->GetEvictColumnName()); return false; } @@ -194,13 +199,9 @@ class TTiering { return {}; } - const TString& GetTtlColumn() const { - AFL_VERIFY(TTLColumnName); - return TTLColumnName; - } - const TString& GetEvictColumnName() const { - return TTLColumnName; + AFL_VERIFY(TTLColumnName); + return *TTLColumnName; } TString GetDebugString() const { diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp index 300952cc3227..8143c27a82ef 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include namespace NKikimr::NOlap { @@ -27,34 +27,38 @@ std::set ISnapshotSchema::GetPkColumnsIds() const { } -TConclusion> ISnapshotSchema::NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const { +TConclusion> ISnapshotSchema::NormalizeBatch( + const ISnapshotSchema& dataSchema, const std::shared_ptr& batch, const std::set& restoreColumnIds) const { + AFL_VERIFY(dataSchema.GetSnapshot() <= GetSnapshot()); if (dataSchema.GetSnapshot() == GetSnapshot()) { - return batch; + if (batch->GetColumnsCount() == GetColumnsCount()) { + return batch; + } } - Y_ABORT_UNLESS(dataSchema.GetSnapshot() < GetSnapshot()); - const std::shared_ptr& resultArrowSchema = GetSchema(); - std::vector> newColumns; - newColumns.reserve(resultArrowSchema->num_fields()); + const std::shared_ptr& resultArrowSchema = GetSchema(); + std::shared_ptr result = std::make_shared(batch->GetRecordsCount()); for (size_t i = 0; i < resultArrowSchema->fields().size(); ++i) { auto& resultField = resultArrowSchema->fields()[i]; - auto columnId = GetIndexInfo().GetColumnId(resultField->name()); - auto oldColumnIndex = dataSchema.GetFieldIndex(columnId); - if (oldColumnIndex >= 0) { // ColumnExists - auto oldColumnInfo = dataSchema.GetFieldByIndex(oldColumnIndex); - Y_ABORT_UNLESS(oldColumnInfo); - auto columnData = batch->GetColumnByName(oldColumnInfo->name()); - Y_ABORT_UNLESS(columnData); - newColumns.push_back(columnData); - } else { // AddNullColumn - auto conclusion = BuildDefaultBatch({ resultField }, batch->num_rows()); - if (conclusion.IsFail()) { - return conclusion; + auto columnId = GetIndexInfo().GetColumnIdVerified(resultField->name()); + auto oldField = dataSchema.GetFieldByColumnIdOptional(columnId); + if (oldField) { + auto fAccessor = batch->GetAccessorByNameOptional(oldField->name()); + if (fAccessor) { + auto conclusion = result->AddField(resultField, fAccessor); + if (conclusion.IsFail()) { + return conclusion; + } + continue; } - newColumns.push_back((*conclusion)->column(0)); + } + if (restoreColumnIds.contains(columnId)) { + AFL_VERIFY(!!GetExternalDefaultValueVerified(columnId) || GetIndexInfo().IsNullableVerified(columnId))("column_name", + GetIndexInfo().GetColumnName(columnId, false))("id", columnId); + result->AddField(resultField, GetColumnLoaderVerified(columnId)->BuildDefaultAccessor(batch->num_rows())).Validate(); } } - return arrow::RecordBatch::Make(resultArrowSchema, batch->num_rows(), newColumns); + return result; } TConclusion> ISnapshotSchema::PrepareForModification( @@ -74,16 +78,18 @@ TConclusion> ISnapshotSchema::PrepareForModi return TConclusionStatus::Fail("not valid incoming batch: " + status.ToString()); } - const std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); + const std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); - auto batch = NArrow::TColumnOperator().SkipIfAbsent().Extract(incomingBatch, dstSchema->field_names()); + auto batch = NArrow::TColumnOperator().SkipIfAbsent().Extract(incomingBatch, dstSchema->fields()); for (auto&& i : batch->schema()->fields()) { - AFL_VERIFY(GetIndexInfo().HasColumnName(i->name())); - if (!dstSchema->GetFieldByName(i->name())->Equals(i)) { - return TConclusionStatus::Fail("not equal field types for column '" + i->name() + "'"); + const ui32 columnId = GetIndexInfo().GetColumnIdVerified(i->name()); + auto fSchema = GetIndexInfo().GetColumnFieldVerified(columnId); + if (!fSchema->Equals(i)) { + return TConclusionStatus::Fail( + "not equal field types for column '" + i->name() + "': " + i->ToString() + " vs " + fSchema->ToString()); } - if (GetIndexInfo().IsNullableVerified(i->name())) { + if (GetIndexInfo().IsNullableVerified(columnId)) { continue; } if (NArrow::HasNulls(batch->GetColumnByName(i->name()))) { @@ -110,33 +116,48 @@ TConclusion> ISnapshotSchema::PrepareForModi Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, GetIndexInfo().GetPrimaryKey())); switch (mType) { - case NEvWrite::EModificationType::Delete: - return AddDefault(batch, true); case NEvWrite::EModificationType::Replace: - case NEvWrite::EModificationType::Insert: - return AddDefault(batch, false); case NEvWrite::EModificationType::Upsert: { AFL_VERIFY(batch->num_columns() <= dstSchema->num_fields()); if (batch->num_columns() < dstSchema->num_fields()) { - for (auto&& f : dstSchema->fields()) { - if (GetIndexInfo().IsNullableVerified(f->name())) { + for (ui32 idx = 0; idx < (ui32)dstSchema->num_fields(); ++idx) { + if (GetIndexInfo().IsNullableVerifiedByIndex(idx)) { continue; } - if (batch->GetColumnByName(f->name())) { + if (GetIndexInfo().GetColumnExternalDefaultValueByIndexVerified(idx)) { continue; } - if (!GetIndexInfo().GetColumnDefaultValueVerified(f->name())) { - return TConclusionStatus::Fail("empty field for non-default column: '" + f->name() + "'"); + if (batch->GetColumnByName(dstSchema->field(idx)->name())) { + continue; } + return TConclusionStatus::Fail("empty field for non-default column: '" + dstSchema->field(idx)->name() + "'"); } } return batch; } + case NEvWrite::EModificationType::Delete: + case NEvWrite::EModificationType::Insert: case NEvWrite::EModificationType::Update: return batch; } } +void ISnapshotSchema::AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const { + if (targetSchema->GetVersion() != GetVersion()) { + std::vector columnIdxToDelete; + for (size_t columnIdx = 0; columnIdx < batch.GetSchema()->GetFields().size(); ++columnIdx) { + const std::optional targetColumnId = targetSchema->GetColumnIdOptional(batch.GetSchema()->field(columnIdx)->name()); + const ui32 batchColumnId = GetColumnIdVerified(GetFieldByIndex(columnIdx)->name()); + if (!targetColumnId || *targetColumnId != batchColumnId) { + columnIdxToDelete.emplace_back(columnIdx); + } + } + if (!columnIdxToDelete.empty()) { + batch.DeleteFieldsByIndex(columnIdxToDelete); + } + } +} + ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const { auto id = GetColumnIdOptional(columnName); AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names())); @@ -149,19 +170,19 @@ std::shared_ptr ISnapshotSchema::GetFieldByColumnIdVerified(const return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { auto result = GetColumnLoaderOptional(columnId); AFL_VERIFY(result); return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { auto result = GetColumnLoaderOptional(columnName); AFL_VERIFY(result); return result; } -std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { +std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { const std::optional id = GetColumnIdOptional(columnName); if (id) { return GetColumnLoaderOptional(*id); @@ -184,48 +205,78 @@ std::vector> ISnapshotSchema::GetAbsentFields(cons return result; } -TConclusion> ISnapshotSchema::BuildDefaultBatch(const std::vector>& fields, const ui32 rowsCount) const { - std::vector> columns; +TConclusionStatus ISnapshotSchema::CheckColumnsDefault(const std::vector>& fields) const { for (auto&& i : fields) { - auto defaultValue = GetDefaultValueVerified(i->name()); - if (!defaultValue && !GetIndexInfo().IsNullableVerified(i->name())) { + const ui32 colId = GetColumnIdVerified(i->name()); + auto defaultValue = GetExternalDefaultValueVerified(colId); + if (!defaultValue && !GetIndexInfo().IsNullableVerified(colId)) { return TConclusionStatus::Fail("not nullable field with no default: " + i->name()); } + } + return TConclusionStatus::Success(); +} + +TConclusion> ISnapshotSchema::BuildDefaultBatch( + const std::vector>& fields, const ui32 rowsCount, const bool force) const { + std::vector> columns; + for (auto&& i : fields) { + const ui32 columnId = GetColumnIdVerified(i->name()); + auto defaultValue = GetExternalDefaultValueVerified(columnId); + if (!defaultValue && !GetIndexInfo().IsNullableVerified(columnId)) { + if (force) { + defaultValue = NArrow::DefaultScalar(i->type()); + } else { + return TConclusionStatus::Fail("not nullable field with no default: " + i->name()); + } + } columns.emplace_back(NArrow::TThreadSimpleArraysCache::Get(i->type(), defaultValue, rowsCount)); } return arrow::RecordBatch::Make(std::make_shared(fields), rowsCount, columns); } -std::shared_ptr ISnapshotSchema::GetDefaultValueVerified(const std::string& columnName) const { - return GetIndexInfo().GetColumnDefaultValueVerified(columnName); +std::shared_ptr ISnapshotSchema::GetExternalDefaultValueVerified(const std::string& columnName) const { + return GetIndexInfo().GetColumnExternalDefaultValueVerified(columnName); } -std::shared_ptr ISnapshotSchema::GetDefaultValueVerified(const ui32 columnId) const { - return GetIndexInfo().GetColumnDefaultValueVerified(columnId); +std::shared_ptr ISnapshotSchema::GetExternalDefaultValueVerified(const ui32 columnId) const { + return GetIndexInfo().GetColumnExternalDefaultValueVerified(columnId); } -TConclusion> ISnapshotSchema::AddDefault(const std::shared_ptr& batch, const bool force) const { - auto result = batch; - for (auto&& i : GetIndexInfo().ArrowSchema()->fields()) { - if (batch->schema()->GetFieldIndex(i->name()) != -1) { - continue; - } - auto defaultValue = GetDefaultValueVerified(i->name()); - if (!defaultValue && !GetIndexInfo().IsNullableVerified(i->name())) { - if (!force) { - return TConclusionStatus::Fail("not nullable field withno default: " + i->name()); - } else { - defaultValue = NArrow::DefaultScalar(i->type()); +bool ISnapshotSchema::IsSpecialColumnId(const ui32 columnId) const { + return GetIndexInfo().IsSpecialColumn(columnId); +} + +std::set ISnapshotSchema::GetColumnsWithDifferentDefaults( + const THashMap& schemas, const ISnapshotSchema::TPtr& targetSchema) { + std::set result; + if (schemas.size() <= 1) { + return {}; + } + std::map> defaults; + for (auto& [_, blobSchema] : schemas) { + for (auto&& columnId : blobSchema->GetIndexInfo().GetColumnIds(true)) { + if (result.contains(columnId)) { + continue; + } + if (targetSchema && !targetSchema->HasColumnId(columnId)) { + continue; + } + auto def = blobSchema->GetIndexInfo().GetColumnExternalDefaultValueVerified(columnId); + if (!blobSchema->GetIndexInfo().IsNullableVerified(columnId) && !def) { + continue; + } + auto it = defaults.find(columnId); + if (it == defaults.end()) { + defaults.emplace(columnId, def); + } else if (NArrow::ScalarCompareNullable(def, it->second) != 0) { + result.emplace(columnId); } } - std::shared_ptr column = NArrow::TThreadSimpleArraysCache::Get(i->type(), defaultValue, batch->num_rows()); - result = NArrow::TStatusValidator::GetValid(result->AddColumn(result->num_columns(), i->name(), column)); + if (targetSchema && result.size() == targetSchema->GetIndexInfo().GetColumnIds(true).size()) { + break; + } } return result; } -bool ISnapshotSchema::IsSpecialColumnId(const ui32 columnId) const { - return GetIndexInfo().IsSpecialColumn(columnId); -} - } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h index 91d53230ec38..962989d75fb2 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h @@ -1,6 +1,8 @@ #pragma once -#include -#include +#include + +#include +#include #include #include @@ -19,34 +21,39 @@ class ISnapshotSchema { using TPtr = std::shared_ptr; virtual ~ISnapshotSchema() {} - virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; - std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; - std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; - std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; + virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; + std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; + std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; + std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; bool IsSpecialColumnId(const ui32 columnId) const; + virtual const std::vector& GetColumnIds() const = 0; - virtual TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; - TColumnSaver GetColumnSaver(const TString& columnName) const { + virtual NArrow::NAccessor::TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; + NArrow::NAccessor::TColumnSaver GetColumnSaver(const TString& columnName) const { return GetColumnSaver(GetColumnId(columnName)); } - TColumnSaver GetColumnSaver(const std::string& columnName) const { + NArrow::NAccessor::TColumnSaver GetColumnSaver(const std::string& columnName) const { return GetColumnSaver(TString(columnName.data(), columnName.size())); } std::vector> GetAbsentFields(const std::shared_ptr& existsSchema) const; - std::shared_ptr GetDefaultValueVerified(const std::string& columnName) const; - std::shared_ptr GetDefaultValueVerified(const ui32 columnId) const; - - TConclusion> BuildDefaultBatch(const std::vector>& fields, const ui32 rowsCount) const; - TConclusion> AddDefault(const std::shared_ptr& batch, const bool force) const; + std::shared_ptr GetExternalDefaultValueVerified(const std::string& columnName) const; + std::shared_ptr GetExternalDefaultValueVerified(const ui32 columnId) const; + TConclusion> BuildDefaultBatch( + const std::vector>& fields, const ui32 rowsCount, const bool force) const; + TConclusionStatus CheckColumnsDefault(const std::vector>& fields) const; std::vector GetPKColumnNames() const; virtual std::optional GetColumnIdOptional(const std::string& columnName) const = 0; + virtual ui32 GetColumnIdVerified(const std::string& columnName) const = 0; virtual int GetFieldIndex(const ui32 columnId) const = 0; + bool HasColumnId(const ui32 columnId) const { + return GetFieldIndex(columnId) >= 0; + } ui32 GetColumnId(const std::string& columnName) const; std::shared_ptr GetFieldByIndex(const int index) const; @@ -56,7 +63,7 @@ class ISnapshotSchema { TString DebugString() const { return DoDebugString(); } - virtual const std::shared_ptr& GetSchema() const = 0; + virtual const std::shared_ptr& GetSchema() const = 0; virtual const TIndexInfo& GetIndexInfo() const = 0; virtual const TSnapshot& GetSnapshot() const = 0; virtual ui64 GetVersion() const = 0; @@ -64,9 +71,13 @@ class ISnapshotSchema { std::set GetPkColumnsIds() const; - [[nodiscard]] TConclusion> NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const; + static std::set GetColumnsWithDifferentDefaults(const THashMap& schemas, const ISnapshotSchema::TPtr& targetSchema); + + [[nodiscard]] TConclusion> NormalizeBatch( + const ISnapshotSchema& dataSchema, const std::shared_ptr& batch, const std::set& restoreColumnIds) const; [[nodiscard]] TConclusion> PrepareForModification( const std::shared_ptr& incomingBatch, const NEvWrite::EModificationType mType) const; + void AdaptBatchToSchema(NArrow::TGeneralContainer& batch, const ISnapshotSchema::TPtr& targetSchema) const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp index 8832e7eb0ec8..a371806b2349 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp @@ -4,66 +4,58 @@ namespace NKikimr::NOlap { -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds) - : TFilteredSnapshotSchema(originalSnapshot, std::set(columnIds.begin(), columnIds.end())) -{} +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds) + : TFilteredSnapshotSchema(originalSnapshot, std::vector(columnIds.begin(), columnIds.end())) { +} -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds) +TFilteredSnapshotSchema::TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds) : OriginalSnapshot(originalSnapshot) , ColumnIds(columnIds) { std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!ColumnIds.contains(OriginalSnapshot->GetIndexInfo().GetColumnId(i->name()))) { - continue; - } - schemaFields.emplace_back(i); - } - Schema = std::make_shared(schemaFields); -} - -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames) - : OriginalSnapshot(originalSnapshot) { - for (auto&& i : columnNames) { - ColumnIds.emplace(OriginalSnapshot->GetColumnId(i)); - } - std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!columnNames.contains(i->name())) { - continue; - } - schemaFields.emplace_back(i); + for (auto&& i : columnIds) { + IdIntoIndex.emplace(i, schemaFields.size()); + schemaFields.emplace_back(originalSnapshot->GetFieldByColumnIdVerified(i)); } - Schema = std::make_shared(schemaFields); + Schema = std::make_shared(schemaFields); } TColumnSaver TFilteredSnapshotSchema::GetColumnSaver(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(IdIntoIndex.contains(columnId)); return OriginalSnapshot->GetColumnSaver(columnId); } std::shared_ptr TFilteredSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + AFL_VERIFY(IdIntoIndex.contains(columnId)); return OriginalSnapshot->GetColumnLoaderOptional(columnId); } std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { - return OriginalSnapshot->GetColumnIdOptional(columnName); + auto result = OriginalSnapshot->GetColumnIdOptional(columnName); + if (!result) { + return result; + } + if (!IdIntoIndex.contains(*result)) { + return std::nullopt; + } + return result; +} + +ui32 TFilteredSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { + auto result = OriginalSnapshot->GetColumnIdVerified(columnName); + AFL_VERIFY(IdIntoIndex.contains(result)); + return result; } int TFilteredSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - if (!ColumnIds.contains(columnId)) { - return -1; - } - TString columnName = OriginalSnapshot->GetIndexInfo().GetColumnName(columnId, false); - if (!columnName) { + auto it = IdIntoIndex.find(columnId); + if (it == IdIntoIndex.end()) { return -1; } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); + return it->second; } -const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { +const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { return Schema; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h index e9fa1b41b7c2..8fc82ee6a304 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h @@ -8,21 +8,26 @@ namespace NKikimr::NOlap { class TFilteredSnapshotSchema: public ISnapshotSchema { ISnapshotSchema::TPtr OriginalSnapshot; - std::shared_ptr Schema; - std::set ColumnIds; + std::shared_ptr Schema; + std::vector ColumnIds; + THashMap IdIntoIndex; + protected: virtual TString DoDebugString() const override; public: - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames); + TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::vector& columnIds); + TFilteredSnapshotSchema(const ISnapshotSchema::TPtr& originalSnapshot, const std::set& columnIds); + virtual const std::vector& GetColumnIds() const override { + return ColumnIds; + } TColumnSaver GetColumnSaver(const ui32 columnId) const override; std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnIdOptional(const std::string& columnName) const override; + ui32 GetColumnIdVerified(const std::string& columnName) const override; int GetFieldIndex(const ui32 columnId) const override; - const std::shared_ptr& GetSchema() const override; + const std::shared_ptr& GetSchema() const override; const TIndexInfo& GetIndexInfo() const override; const TSnapshot& GetSnapshot() const override; ui32 GetColumnsCount() const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp index 1fe6820cf547..05277b7b8967 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp @@ -21,16 +21,15 @@ std::optional TSnapshotSchema::GetColumnIdOptional(const std::string& colu return IndexInfo.GetColumnIdOptional(columnName); } +ui32 TSnapshotSchema::GetColumnIdVerified(const std::string& columnName) const { + return IndexInfo.GetColumnIdVerified(columnName); +} + int TSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - const TString& columnName = IndexInfo.GetColumnName(columnId, false); - if (!columnName) { - return -1; - } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); + return IndexInfo.GetColumnIndexOptional(columnId).value_or(-1); } -const std::shared_ptr& TSnapshotSchema::GetSchema() const { +const std::shared_ptr& TSnapshotSchema::GetSchema() const { return Schema; } diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h index 539d8f99a02c..5fa3c4ef7551 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h @@ -9,7 +9,7 @@ namespace NKikimr::NOlap { class TSnapshotSchema: public ISnapshotSchema { private: TIndexInfo IndexInfo; - std::shared_ptr Schema; + std::shared_ptr Schema; TSnapshot Snapshot; protected: virtual TString DoDebugString() const override { @@ -23,12 +23,17 @@ class TSnapshotSchema: public ISnapshotSchema { public: TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot); + virtual const std::vector& GetColumnIds() const override { + return IndexInfo.GetColumnIds(); + } + TColumnSaver GetColumnSaver(const ui32 columnId) const override; std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnIdOptional(const std::string& columnName) const override; + ui32 GetColumnIdVerified(const std::string& columnName) const override; int GetFieldIndex(const ui32 columnId) const override; - const std::shared_ptr& GetSchema() const override; + const std::shared_ptr& GetSchema() const override; const TIndexInfo& GetIndexInfo() const override; const TSnapshot& GetSnapshot() const override; ui32 GetColumnsCount() const override; diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h index 70cf8830b051..fe554a790d8f 100644 --- a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h +++ b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h @@ -91,7 +91,7 @@ class TVersionedIndex { } } Y_ABORT_UNLESS(!Snapshots.empty()); - Y_ABORT_UNLESS(version.IsZero()); +// Y_ABORT_UNLESS(version.IsZero()); return Snapshots.begin()->second; } diff --git a/ydb/core/tx/columnshard/engines/scheme/ya.make b/ydb/core/tx/columnshard/engines/scheme/ya.make index 8684d7894338..744458ff4dcb 100644 --- a/ydb/core/tx/columnshard/engines/scheme/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/ya.make @@ -15,14 +15,13 @@ PEERDIR( ydb/library/actors/core ydb/core/tx/columnshard/engines/scheme/indexes - ydb/core/tx/columnshard/engines/scheme/statistics ydb/core/tx/columnshard/engines/scheme/abstract ydb/core/tx/columnshard/engines/scheme/versions ydb/core/tx/columnshard/engines/scheme/tiering ydb/core/tx/columnshard/engines/scheme/column ydb/core/tx/columnshard/engines/scheme/defaults + ydb/core/formats/arrow/accessor ydb/core/tx/columnshard/blobs_action/abstract - ydb/core/tx/columnshard/engines/changes/compaction ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h index 8f5ab2ff4311..95aa18603f46 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h index 7d7a1cc3d830..a5c278799fdd 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h @@ -1,6 +1,6 @@ #pragma once #include -#include +#include #include #include #include diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp index 3a2ce59ae22f..55882102eee9 100644 --- a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp @@ -1,6 +1,7 @@ #include "tiering.h" #include #include +#include #include #include #include @@ -25,17 +26,18 @@ std::optional TTieringActualizer::Bu if (Tiering) { AFL_VERIFY(TieringColumnId); - auto statOperator = portionSchema->GetIndexInfo().GetStatistics(NStatistics::TIdentifier(NStatistics::EType::Max, {*TieringColumnId})); + auto indexMeta = portionSchema->GetIndexInfo().GetIndexMax(*TieringColumnId); std::shared_ptr max; - if (!statOperator) { + if (!indexMeta) { max = portion.MaxValue(*TieringColumnId); if (!max) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_not_max"); return {}; } } else { - NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(statOperator); - max = statOperator.GetScalarVerified(portion.GetMeta().GetStatisticsStorage()); + NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(NIndexes::TIndexMetaContainer(indexMeta)); + const std::vector data = portion.GetIndexInplaceDataVerified(indexMeta->GetIndexId()); + max = indexMeta->GetMaxScalarVerified(data, portionSchema->GetIndexInfo().GetColumnFieldVerified(*TieringColumnId)->type()); } auto tieringInfo = Tiering->GetTierToMove(max, now); AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("tiering_info", tieringInfo.DebugString()); @@ -172,7 +174,7 @@ void TTieringActualizer::DoExtractTasks(TTieringProcessContext& tasksContext, co void TTieringActualizer::Refresh(const std::optional& info, const TAddExternalContext& externalContext) { Tiering = info; if (Tiering) { - TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetTtlColumn()); + TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetEvictColumnName()); } else { TieringColumnId = {}; } diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp index 485802b0e3c1..4a527f913fc7 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp @@ -1,18 +1,20 @@ #include "column.h" -#include +#include namespace NKikimr::NOlap::NChunks { -std::vector> TChunkPreparation::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto rb = NArrow::TStatusValidator::GetValid(ColumnInfo.GetLoader()->Apply(Data)); +std::vector> TChunkPreparation::DoInternalSplitImpl( + const TColumnSaver& saver, const std::shared_ptr& /*counters*/, const std::vector& splitSizes) const { + auto accessor = ColumnInfo.GetLoader()->ApplyVerified(Data, GetRecordsCountVerified()); + std::vector chunks = accessor->SplitBySizes(saver, Data, splitSizes); - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(rb, Data, splitSizes); std::vector> newChunks; for (auto&& i : chunks) { - Y_ABORT_UNLESS(i.GetSlicedBatch()->num_columns() == 1); - newChunks.emplace_back(std::make_shared(saver.Apply(i.GetSlicedBatch()), i.GetSlicedBatch()->column(0), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); + newChunks.emplace_back(std::make_shared( + i.GetSerializedData(), i.GetArray(), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); } + return newChunks; } -} +} // namespace NKikimr::NOlap::NChunks diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.h b/ydb/core/tx/columnshard/engines/storage/chunks/column.h index 7e05b45a9638..9de818c49fb6 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/column.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.h @@ -22,6 +22,9 @@ class TChunkPreparation: public IPortionColumnChunk { virtual ui32 DoGetRecordsCountImpl() const override { return Record.GetMeta().GetNumRows(); } + virtual ui64 DoGetRawBytesImpl() const override { + return Record.GetMeta().GetRawBytes(); + } virtual TString DoDebugString() const override { return ""; } @@ -53,14 +56,14 @@ class TChunkPreparation: public IPortionColumnChunk { AFL_VERIFY(Data.size() == Record.BlobRange.Size || Record.BlobRange.Size == 0)("data", Data.size())("record", Record.BlobRange.Size); } - TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) + TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) : TBase(address.GetColumnId()) , Data(data) , Record(address, column, columnInfo) , ColumnInfo(columnInfo) { - Y_ABORT_UNLESS(column->length()); - First = NArrow::TStatusValidator::GetValid(column->GetScalar(0)); - Last = NArrow::TStatusValidator::GetValid(column->GetScalar(column->length() - 1)); + Y_ABORT_UNLESS(column->GetRecordsCount()); + First = column->GetScalar(0); + Last = column->GetScalar(column->GetRecordsCount() - 1); Record.BlobRange.Size = data.size(); } }; diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp index 79613b5b9798..007dff83e914 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp @@ -9,4 +9,13 @@ void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRan portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); } +std::shared_ptr TPortionIndexChunk::DoCopyWithAnotherBlob( + TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const { + return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); +} + +void TPortionIndexChunk::DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, GetData())); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.h b/ydb/core/tx/columnshard/engines/storage/chunks/data.h index d5a91c19609c..e3f22ae2ed9d 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/data.h +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.h @@ -17,6 +17,7 @@ class TPortionIndexChunk: public IPortionDataChunk { return ""; } virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { + AFL_VERIFY(false); return {}; } virtual bool DoIsSplittable() const override { @@ -25,6 +26,9 @@ class TPortionIndexChunk: public IPortionDataChunk { virtual std::optional DoGetRecordsCount() const override { return RecordsCount; } + virtual std::optional DoGetRawBytes() const override { + return RawBytes; + } virtual std::shared_ptr DoGetFirstScalar() const override { return nullptr; } @@ -32,9 +36,9 @@ class TPortionIndexChunk: public IPortionDataChunk { return nullptr; } virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override { - return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); - } + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const override; + public: TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) : TBase(address.GetColumnId(), address.GetChunkIdx()) diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp deleted file mode 100644 index 9aa56e56eda3..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "null_column.h" - -namespace NKikimr::NOlap::NChunks { - -} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h deleted file mode 100644 index c848f0f0cb45..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h +++ /dev/null @@ -1,54 +0,0 @@ -#pragma once -#include -#include -#include -#include - -namespace NKikimr::NOlap::NChunks { - -class TDefaultChunkPreparation: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const std::shared_ptr DefaultValue; - const ui32 RecordsCount; - TString Data; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - AFL_VERIFY(false); - return {}; - } - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return RecordsCount; - } - virtual TString DoDebugString() const override { - return TStringBuilder() << "rc=" << RecordsCount << ";data_size=" << Data.size() << ";"; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - AFL_VERIFY(false); - return TSimpleChunkMeta(nullptr, false, false); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return DefaultValue; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return DefaultValue; - } - -public: - TDefaultChunkPreparation(const ui32 columnId, const ui32 recordsCount, const std::shared_ptr& f, - const std::shared_ptr& defaultValue, const TColumnSaver& saver) - : TBase(columnId) - , DefaultValue(defaultValue) - , RecordsCount(recordsCount) - { - Y_ABORT_UNLESS(RecordsCount); - Data = saver.Apply(NArrow::TThreadSimpleArraysCache::Get(f->type(), defaultValue, RecordsCount), f); - SetChunkIdx(0); - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make index d61554bd6f0c..cff5b9f40b53 100644 --- a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make @@ -3,7 +3,6 @@ LIBRARY() SRCS( data.cpp column.cpp - null_column.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp index d38851486e3d..ebb2b9acde63 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.cpp @@ -47,10 +47,9 @@ bool TGranuleMeta::ErasePortion(const ui64 portion) { void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr portionAfter, NStorageOptimizer::IOptimizerPlanner::TModificationGuard* modificationGuard) { if (portionAfter) { - PortionsIndex.AddPortion(portionAfter); - PortionInfoGuard.OnNewPortion(portionAfter); if (!portionAfter->HasRemoveSnapshot()) { + PortionsIndex.AddPortion(portionAfter); if (modificationGuard) { modificationGuard->AddPortion(portionAfter); } else { @@ -74,10 +73,9 @@ void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr port void TGranuleMeta::OnBeforeChangePortion(const std::shared_ptr portionBefore) { if (portionBefore) { - PortionsIndex.RemovePortion(portionBefore); - PortionInfoGuard.OnDropPortion(portionBefore); if (!portionBefore->HasRemoveSnapshot()) { + PortionsIndex.RemovePortion(portionBefore); OptimizerPlanner->StartModificationGuard().RemovePortion(portionBefore); ActualizationIndex->RemovePortion(portionBefore); } @@ -138,8 +136,7 @@ TGranuleMeta::TGranuleMeta(const ui64 pathId, const TGranulesStorage& owner, con , PortionInfoGuard(owner.GetCounters().BuildPortionBlobsGuard()) , Stats(owner.GetStats()) , StoragesManager(owner.GetStoragesManager()) - , PortionsIndex(*this) -{ + , PortionsIndex(*this, Counters.GetPortionsIndexCounters()) { NStorageOptimizer::IOptimizerPlannerConstructor::TBuildContext context(PathId, owner.GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetPrimaryKey()); OptimizerPlanner = versionedIndex.GetLastSchema()->GetIndexInfo().GetCompactionPlannerConstructor()->BuildPlanner(context).DetachResult(); AFL_VERIFY(!!OptimizerPlanner); diff --git a/ydb/core/tx/columnshard/engines/storage/granule/granule.h b/ydb/core/tx/columnshard/engines/storage/granule/granule.h index 3b3db33a72cf..d79ef50e1883 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/granule.h @@ -21,9 +21,10 @@ class TColumnChunkLoadContext; class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { private: friend class TGranuleMeta; - THashMap ColumnStats; + THashMap ColumnStats; + public: - const THashMap& GetColumnStats() const { + const THashMap& GetColumnStats() const { return ColumnStats; } @@ -196,7 +197,7 @@ class TGranuleMeta: TNonCopyable { return OptimizerPlanner->SerializeToJsonVisual(); } - std::vector GetBucketPositions() const { + NArrow::NMerger::TIntervalPositions GetBucketPositions() const { return OptimizerPlanner->GetBucketPositions(); } @@ -231,11 +232,11 @@ class TGranuleMeta: TNonCopyable { } } - std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { - auto result = std::make_shared(); + std::shared_ptr BuildSerializationStats(ISnapshotSchema::TPtr schema) const { + auto result = std::make_shared(); for (auto&& i : GetAdditiveSummary().GetCompacted().GetColumnStats()) { auto field = schema->GetFieldByColumnIdVerified(i.first); - NOlap::TColumnSerializationStat columnInfo(i.first, field->name()); + NArrow::NSplitter::TColumnSerializationStat columnInfo(i.first, field->name()); columnInfo.Merge(i.second); result->AddStat(columnInfo); } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp index 676d40ea1c48..e56487e5f8ef 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.cpp @@ -11,7 +11,7 @@ TPortionsIndex::TPortionIntervals TPortionsIndex::GetIntervalFeatures(const TPor TPortionIntervals portionExcludeIntervals; while (true) { std::optional nextKey; - for (auto&& p : itFrom->second.GetPortionIds()) { + for (auto&& [p, _] : itFrom->second.GetPortionIds()) { if (skipPortions.contains(p)) { continue; } @@ -55,9 +55,13 @@ void TPortionsIndex::RemovePortion(const std::shared_ptr& p) { auto itTo = Points.find(p->IndexKeyEnd()); AFL_VERIFY(itTo != Points.end()); { + const TPortionInfoStat stat(p); auto it = itFrom; while (true) { - it->second.RemoveContained(p->GetPortionId()); + RemoveFromMemoryUsageControl(it->second.GetIntervalStats()); + it->second.RemoveContained(stat); + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); if (it == itTo) { break; } @@ -67,19 +71,24 @@ void TPortionsIndex::RemovePortion(const std::shared_ptr& p) { if (itFrom != itTo) { itFrom->second.RemoveStart(p); if (itFrom->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itFrom->second.GetIntervalStats()); Points.erase(itFrom); } itTo->second.RemoveFinish(p); if (itTo->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itTo->second.GetIntervalStats()); Points.erase(itTo); } } else { itTo->second.RemoveStart(p); itTo->second.RemoveFinish(p); if (itTo->second.IsEmpty()) { + RemoveFromMemoryUsageControl(itTo->second.GetIntervalStats()); Points.erase(itTo); } } + RawMemoryUsage.FlushCounters(); + BlobMemoryUsage.FlushCounters(); } void TPortionsIndex::AddPortion(const std::shared_ptr& p) { @@ -89,13 +98,19 @@ void TPortionsIndex::AddPortion(const std::shared_ptr& p) { itTo->second.AddFinish(p); auto it = itFrom; + const TPortionInfoStat stat(p); while (true) { - it->second.AddContained(p->GetPortionId()); + RemoveFromMemoryUsageControl(it->second.GetIntervalStats()); + it->second.AddContained(stat); + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); if (it == itTo) { break; } AFL_VERIFY(++it != Points.end()); } + RawMemoryUsage.FlushCounters(); + BlobMemoryUsage.FlushCounters(); } } \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h index 09ca2d65e7c0..981943dc4dab 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h +++ b/ydb/core/tx/columnshard/engines/storage/granule/portions_index.h @@ -1,4 +1,5 @@ #pragma once +#include #include namespace NKikimr::NOlap { @@ -7,26 +8,74 @@ class TGranuleMeta; namespace NKikimr::NOlap::NGranule::NPortionsIndex { +class TPortionInfoStat { +private: + std::shared_ptr PortionInfo; + YDB_READONLY(ui64, MinRawBytes, 0); + YDB_READONLY(ui64, BlobBytes, 0); + +public: + TPortionInfoStat(const std::shared_ptr& portionInfo) + : PortionInfo(portionInfo) + , MinRawBytes(PortionInfo->GetMinMemoryForReadColumns({})) + , BlobBytes(PortionInfo->GetTotalBlobBytes()) + { + + } + + const TPortionInfo& GetPortionInfoVerified() const { + AFL_VERIFY(PortionInfo); + return *PortionInfo; + } +}; + +class TIntervalInfoStat { +private: + YDB_READONLY(ui64, MinRawBytes, 0); + YDB_READONLY(ui64, BlobBytes, 0); + +public: + void Add(const TPortionInfoStat& source) { + MinRawBytes += source.GetMinRawBytes(); + BlobBytes += source.GetBlobBytes(); + } + + void Sub(const TPortionInfoStat& source) { + AFL_VERIFY(MinRawBytes >= source.GetMinRawBytes()); + MinRawBytes -= source.GetMinRawBytes(); + AFL_VERIFY(BlobBytes >= source.GetBlobBytes()); + BlobBytes -= source.GetBlobBytes(); + AFL_VERIFY(!!BlobBytes == !!MinRawBytes); + } + + bool operator!() const { + return !BlobBytes && !MinRawBytes; + } +}; + class TPortionsPKPoint { private: THashMap> Start; THashMap> Finish; - THashSet PortionIds; + THashMap PortionIds; + YDB_READONLY_DEF(TIntervalInfoStat, IntervalStats); + public: const THashMap>& GetStart() const { return Start; } void ProvidePortions(const TPortionsPKPoint& source) { - for (auto&& i : source.PortionIds) { + IntervalStats = TIntervalInfoStat(); + for (auto&& [i, stat] : source.PortionIds) { if (source.Finish.contains(i)) { continue; } - AFL_VERIFY(PortionIds.emplace(i).second); + AddContained(stat); } } - const THashSet& GetPortionIds() const { + const THashMap& GetPortionIds() const { return PortionIds; } @@ -34,12 +83,19 @@ class TPortionsPKPoint { return Start.empty() && Finish.empty(); } - void AddContained(const ui64 portionId) { - AFL_VERIFY(PortionIds.emplace(portionId).second); + void AddContained(const TPortionInfoStat& stat) { + if (!stat.GetPortionInfoVerified().HasRemoveSnapshot()) { + IntervalStats.Add(stat); + } + AFL_VERIFY(PortionIds.emplace(stat.GetPortionInfoVerified().GetPortionId(), stat).second); } - void RemoveContained(const ui64 portionId) { - AFL_VERIFY(PortionIds.erase(portionId)); + void RemoveContained(const TPortionInfoStat& stat) { + if (!stat.GetPortionInfoVerified().HasRemoveSnapshot()) { + IntervalStats.Sub(stat); + } + AFL_VERIFY(PortionIds.erase(stat.GetPortionInfoVerified().GetPortionId())); + AFL_VERIFY(PortionIds.size() || !IntervalStats); } void RemoveStart(const std::shared_ptr& p) { @@ -61,9 +117,48 @@ class TPortionsPKPoint { } }; +class TIntervalMemoryMonitoring { +private: + std::map CountMemoryUsages; + const NColumnShard::TIntervalMemoryCounters& Counters; + +public: + void Add(const ui64 mem) { + ++CountMemoryUsages[mem]; + } + + void Remove(const ui64 mem) { + auto it = CountMemoryUsages.find(mem); + AFL_VERIFY(it != CountMemoryUsages.end())("mem", mem); + if (!--it->second) { + CountMemoryUsages.erase(it); + } + } + + TIntervalMemoryMonitoring(const NColumnShard::TIntervalMemoryCounters& counters) + : Counters(counters) + { + + } + + ui64 GetMax() const { + if (CountMemoryUsages.size()) { + return CountMemoryUsages.rbegin()->first; + } else { + return 0; + } + } + + void FlushCounters() const { + Counters.MinReadBytes->SetValue(GetMax()); + } +}; + class TPortionsIndex { private: std::map Points; + TIntervalMemoryMonitoring RawMemoryUsage; + TIntervalMemoryMonitoring BlobMemoryUsage; const TGranuleMeta& Owner; std::map::iterator InsertPoint(const NArrow::TReplaceKey& key) { @@ -75,17 +170,34 @@ class TPortionsIndex { --itPred; it->second.ProvidePortions(itPred->second); } + RawMemoryUsage.Add(it->second.GetIntervalStats().GetMinRawBytes()); + BlobMemoryUsage.Add(it->second.GetIntervalStats().GetBlobBytes()); } return it; } + void RemoveFromMemoryUsageControl(const TIntervalInfoStat& stat) { + RawMemoryUsage.Remove(stat.GetMinRawBytes()); + BlobMemoryUsage.Remove(stat.GetBlobBytes()); + } + public: - TPortionsIndex(const TGranuleMeta& owner) - : Owner(owner) + TPortionsIndex(const TGranuleMeta& owner, const NColumnShard::TPortionsIndexCounters& counters) + : RawMemoryUsage(counters.RawBytes) + , BlobMemoryUsage(counters.BlobBytes) + , Owner(owner) { } + ui64 GetMinRawMemoryRead() const { + return RawMemoryUsage.GetMax(); + } + + ui64 GetMinBlobMemoryRead() const { + return BlobMemoryUsage.GetMax(); + } + const std::map& GetPoints() const { return Points; } diff --git a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp index 385f9d818d23..b017464eefeb 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule/storage.cpp @@ -8,7 +8,7 @@ std::shared_ptr TGranulesStorage::GetGranuleForCom std::map> granulesSorted; ui32 countChecker = 0; std::optional priorityChecker; - const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(TDuration::Seconds(1)); + const TDuration actualizationLag = NYDBTest::TControllers::GetColumnShardController()->GetCompactionActualizationLag(); for (auto&& i : Tables) { NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", i.first); i.second->ActualizeOptimizer(now, actualizationLag); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp index 1ec8aede916d..1613bd10e7d0 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.cpp @@ -1,6 +1,6 @@ #include "checker.h" #include -#include +#include #include #include @@ -13,15 +13,12 @@ void TBloomFilterChecker::DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapInde } bool TBloomFilterChecker::DoCheckImpl(const std::vector& blobs) const { + AFL_VERIFY(blobs.size() == 1); for (auto&& blob : blobs) { - auto rb = NArrow::TStatusValidator::GetValid(NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()->Deserialize(blob)); - AFL_VERIFY(rb); - AFL_VERIFY(rb->schema()->num_fields() == 1); - AFL_VERIFY(rb->schema()->field(0)->type()->id() == arrow::Type::BOOL); - auto& bArray = static_cast(*rb->column(0)); + TFixStringBitsStorage bits(blob); bool found = true; for (auto&& i : HashValues) { - if (!bArray.Value(i % bArray.length())) { + if (!bits.Get(i % bits.GetSizeBits())) { found = false; break; } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h index 92ecf9534d29..740af9f1720d 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/checker.h @@ -2,6 +2,42 @@ #include namespace NKikimr::NOlap::NIndexes { +class TFixStringBitsStorage { +private: + YDB_READONLY_DEF(TString, Data); + +public: + TFixStringBitsStorage(const TString& data) + : Data(data) + {} + + ui32 GetSizeBits() const { + return Data.size() * 8; + } + + TFixStringBitsStorage(const ui32 sizeBits) + : Data(sizeBits / 8 + ((sizeBits % 8) ? 1 : 0), '\0') { + } + + void Set(const bool val, const ui32 idx) { + AFL_VERIFY(idx < GetSizeBits()); + auto* start = &Data[idx / 8]; + ui8 word = (*(ui8*)start); + if (val) { + word |= 1 << (idx % 8); + } else { + word &= (Max() - (1 << (idx % 8))); + } + memcpy(start, &word, sizeof(ui8)); + } + + bool Get(const ui32 idx) const { + AFL_VERIFY(idx < GetSizeBits()); + const ui8 start = (*(ui8*)&Data[idx / 8]); + return start & (1 << (idx % 8)); + } +}; + class TBloomFilterChecker: public TSimpleIndexChecker { public: static TString GetClassNameStatic() { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp index 9a464f67d595..fa11002fe17f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/constructor.cpp @@ -15,7 +15,7 @@ std::shared_ptr TBloomIndexConstructor::Do } AFL_VERIFY(columnIds.emplace(columnInfo->GetId()).second); } - return std::make_shared(indexId, indexName, columnIds, FalsePositiveProbability); + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::DefaultStorageId), columnIds, FalsePositiveProbability); } NKikimr::TConclusionStatus TBloomIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp index 3c52518d0b8b..6d761daf9eab 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.cpp @@ -1,6 +1,6 @@ #include "meta.h" #include "checker.h" -#include +#include #include #include #include @@ -10,32 +10,26 @@ namespace NKikimr::NOlap::NIndexes { -std::shared_ptr TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { +TString TBloomIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { std::set hashes; - for (ui32 i = 0; i < HashesCount; ++i) { - NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(3 * i); + { + NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(0); for (reader.Start(); reader.IsCorrect(); reader.ReadNext()) { hashCalcer.Start(); for (auto&& i : reader) { NArrow::NHash::TXX64::AppendField(i.GetCurrentChunk(), i.GetCurrentRecordIndex(), hashCalcer); } - const ui64 h = hashCalcer.Finish(); - hashes.emplace(h); + hashes.emplace(hashCalcer.Finish()); } } - const ui32 bitsCount = hashes.size() / std::log(2); - std::vector flags(bitsCount, false); - for (auto&& i : hashes) { - flags[i % flags.size()] = true; - } - - arrow::BooleanBuilder builder; - auto res = builder.Reserve(flags.size()); - NArrow::TStatusValidator::Validate(builder.AppendValues(flags)); - std::shared_ptr out; - NArrow::TStatusValidator::Validate(builder.Finish(&out)); - return arrow::RecordBatch::Make(ResultSchema, bitsCount, {out}); + const ui32 bitsCount = HashesCount * hashes.size() / std::log(2); + TFixStringBitsStorage bits(bitsCount); + const auto pred = [&bits](const ui64 hash) { + bits.Set(true, hash % bits.GetSizeBits()); + }; + BuildHashesSet(hashes, pred); + return bits.GetData(); } void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const { @@ -57,14 +51,16 @@ void TBloomIndexMeta::DoFillIndexCheckers(const std::shared_ptr hashes; + const auto pred = [&hashes](const ui64 hash) { + hashes.emplace(hash); + }; + NArrow::NHash::NXX64::TStreamStringHashCalcer calcer(0); for (ui32 i = 0; i < HashesCount; ++i) { - NArrow::NHash::NXX64::TStreamStringHashCalcer calcer(3 * i); calcer.Start(); for (auto&& i : foundColumns) { NArrow::NHash::TXX64::AppendField(i.second, calcer); } - const ui64 hash = calcer.Finish(); - hashes.emplace(hash); + BuildHashesSet(calcer.Finish(), pred); } branch->MutableIndexes().emplace_back(std::make_shared(GetIndexId(), std::move(hashes))); } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h index d7cb9c6ab4da..4fa0a5be0c0e 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/bloom/meta.h @@ -20,6 +20,27 @@ class TBloomIndexMeta: public TIndexByColumns { AFL_VERIFY(FalsePositiveProbability < 1 && FalsePositiveProbability >= 0.01); HashesCount = -1 * std::log(FalsePositiveProbability) / std::log(2); } + + static const ui64 HashesConstructorP = ((ui64)2 << 31) - 1; + static const ui64 HashesConstructorA = (ui64)2 << 16; + + template + void BuildHashesSet(const ui64 originalHash, const TActor& actor) const { + AFL_VERIFY(HashesCount < HashesConstructorP); + for (ui32 b = 1; b < HashesCount; ++b) { + const ui64 hash = (HashesConstructorA * originalHash + b) % HashesConstructorP; + actor(hash); + } + } + + template + void BuildHashesSet(const TContainer& originalHashes, const TActor& actor) const { + AFL_VERIFY(HashesCount < HashesConstructorP); + for (auto&& hOriginal : originalHashes) { + BuildHashesSet(hOriginal, actor); + } + } + protected: virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { const auto* bMeta = dynamic_cast(&newMeta); @@ -31,7 +52,7 @@ class TBloomIndexMeta: public TIndexByColumns { } virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; - virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); @@ -54,8 +75,8 @@ class TBloomIndexMeta: public TIndexByColumns { public: TBloomIndexMeta() = default; - TBloomIndexMeta(const ui32 indexId, const TString& indexName, std::set& columnIds, const double fpProbability) - : TBase(indexId, indexName, columnIds) + TBloomIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, std::set& columnIds, const double fpProbability) + : TBase(indexId, indexName, columnIds, storageId) , FalsePositiveProbability(fpProbability) { Initialize(); } diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp new file mode 100644 index 000000000000..aa40668897d4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/checker.cpp @@ -0,0 +1,22 @@ +#include "checker.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +void TCountMinSketchChecker::DoSerializeToProtoImpl(NKikimrSSA::TProgram::TOlapIndexChecker& proto) const { + proto.MutableCountMinSketch(); +} + +bool TCountMinSketchChecker::DoCheckImpl(const std::vector& blobs) const { + Y_UNUSED(blobs); + return true; +} + +bool TCountMinSketchChecker::DoDeserializeFromProtoImpl(const NKikimrSSA::TProgram::TOlapIndexChecker& proto) { + return proto.HasCountMinSketch(); +} + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp new file mode 100644 index 000000000000..80d154a751be --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/count_min_sketch/meta.cpp @@ -0,0 +1,56 @@ +#include "meta.h" +#include "checker.h" +#include +#include +#include +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NIndexes::NCountMinSketch { + +TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { + auto sketch = std::unique_ptr(TCountMinSketch::Create()); + + for (auto& colReader : reader) { + for (colReader.Start(); colReader.IsCorrect(); colReader.ReadNextChunk()) { + auto array = colReader.GetCurrentChunk(); + + NArrow::SwitchType(array->type_id(), [&](const auto& type) { + using TWrap = std::decay_t; + using TArray = typename arrow::TypeTraits::ArrayType; + + const TArray& arrTyped = static_cast(*array); + if constexpr (arrow::has_c_type()) { + for (int64_t i = 0; i < arrTyped.length(); ++i) { + auto cell = TCell::Make(arrTyped.Value(i)); + sketch->Count(cell.Data(), cell.Size()); + } + return true; + } + if constexpr (arrow::has_string_view()) { + for (int64_t i = 0; i < arrTyped.length(); ++i) { + auto view = arrTyped.GetView(i); + sketch->Count(view.data(), view.size()); + } + return true; + } + AFL_VERIFY(false)("message", "Unsupported arrow type for building an index"); + return false; + }); + } + } + + TString result(sketch->AsStringBuf()); + return result; +} + +void TIndexMeta::DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& /*schema*/) const { + for (auto&& branch : info->GetBranches()) { + branch->MutableIndexes().emplace_back(std::make_shared(GetIndexId())); + } +} + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp new file mode 100644 index 000000000000..6c1efcc0e570 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.cpp @@ -0,0 +1,56 @@ +#include "constructor.h" +#include "meta.h" + +#include + +namespace NKikimr::NOlap::NIndexes::NMax { + +std::shared_ptr TIndexConstructor::DoCreateIndexMeta( + const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const { + ui32 columnId; + { + auto* columnInfo = currentSchema.GetColumns().GetByName(ColumnName); + if (!columnInfo) { + errors.AddError("no column with name " + ColumnName); + return nullptr; + } + if (!TIndexMeta::IsAvailableType(columnInfo->GetType())) { + errors.AddError("inappropriate type for max index"); + return nullptr; + } + columnId = columnInfo->GetId(); + } + return std::make_shared(indexId, indexName, GetStorageId().value_or(NBlobOperations::TGlobal::LocalMetadataStorageId), columnId); +} + +NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) { + if (!jsonInfo.Has("column_name")) { + return TConclusionStatus::Fail("column_name have to be in max index features"); + } + if (!jsonInfo["column_name"].GetString(&ColumnName)) { + return TConclusionStatus::Fail("column_name have to be in max index features as string"); + } + return TConclusionStatus::Success(); +} + +NKikimr::TConclusionStatus TIndexConstructor::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) { + if (!proto.HasMaxIndex()) { + const TString errorMessage = "Not found MaxIndex section in proto: \"" + proto.DebugString() + "\""; + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", errorMessage); + return TConclusionStatus::Fail(errorMessage); + } + auto& bIndex = proto.GetMaxIndex(); + ColumnName = bIndex.GetColumnName(); + if (!ColumnName) { + return TConclusionStatus::Fail("Empty column name in MaxIndex proto"); + } + return TConclusionStatus::Success(); +} + +void TIndexConstructor::DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const { + auto* filterProto = proto.MutableMaxIndex(); + AFL_VERIFY(!!ColumnName)("problem", "not initialized max index info trying to serialize"); + filterProto->SetColumnName(ColumnName); +} + +} // namespace NKikimr::NOlap::NIndexes::NMax diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h new file mode 100644 index 000000000000..35faabebb220 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/constructor.h @@ -0,0 +1,30 @@ +#pragma once +#include +namespace NKikimr::NOlap::NIndexes::NMax { + +class TIndexConstructor: public IIndexMetaConstructor { +public: + static TString GetClassNameStatic() { + return "MAX"; + } +private: + TString ColumnName; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +protected: + virtual std::shared_ptr DoCreateIndexMeta(const ui32 indexId, const TString& indexName, const NSchemeShard::TOlapSchema& currentSchema, NSchemeShard::IErrorCollector& errors) const override; + + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonInfo) override; + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& proto) override; + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& proto) const override; + +public: + TIndexConstructor() = default; + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp new file mode 100644 index 000000000000..b672f278e017 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.cpp @@ -0,0 +1,52 @@ +#include "meta.h" + +#include +#include +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NIndexes::NMax { + +TString TIndexMeta::DoBuildIndexImpl(TChunkedBatchReader& reader) const { + std::shared_ptr result; + AFL_VERIFY(reader.GetColumnsCount() == 1)("count", reader.GetColumnsCount()); + { + TChunkedColumnReader cReader = *reader.begin(); + for (reader.Start(); cReader.IsCorrect(); cReader.ReadNextChunk()) { + auto currentScalar = cReader.GetCurrentAccessor()->GetMaxScalar(); + AFL_VERIFY(currentScalar); + if (!result || NArrow::ScalarCompare(*result, *currentScalar) == -1) { + result = currentScalar; + } + } + } + return NArrow::NScalar::TSerializer::SerializePayloadToString(result).DetachResult(); +} + +void TIndexMeta::DoFillIndexCheckers( + const std::shared_ptr& /*info*/, const NSchemeShard::TOlapSchema& /*schema*/) const { +} + +std::shared_ptr TIndexMeta::GetMaxScalarVerified( + const std::vector& data, const std::shared_ptr& dataType) const { + AFL_VERIFY(data.size()); + std::shared_ptr result; + for (auto&& d : data) { + std::shared_ptr current = NArrow::NScalar::TSerializer::DeserializeFromStringWithPayload(d, dataType).DetachResult(); + if (!result || NArrow::ScalarCompare(*result, *current) == -1) { + result = current; + } + } + return result; +} + +NJson::TJsonValue TIndexMeta::DoSerializeDataToJson(const TString& data, const TIndexInfo& indexInfo) const { + AFL_VERIFY(ColumnIds.size() == 1); + auto scalar = GetMaxScalarVerified({ data }, indexInfo.GetColumnFeaturesVerified(*ColumnIds.begin()).GetArrowField()->type()); + return scalar->ToString(); +} + +} // namespace NKikimr::NOlap::NIndexes::NMax diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h new file mode 100644 index 000000000000..ef58ede92956 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/meta.h @@ -0,0 +1,79 @@ +#pragma once +#include +namespace NKikimr::NOlap::NIndexes::NMax { + +class TIndexMeta: public TIndexByColumns { +public: + static TString GetClassNameStatic() { + return "MAX"; + } +private: + using TBase = TIndexByColumns; + static inline auto Registrator = TFactory::TRegistrator(GetClassNameStatic()); +protected: + virtual TConclusionStatus DoCheckModificationCompatibility(const IIndexMeta& newMeta) const override { + Y_UNUSED(newMeta); + return TConclusionStatus::Fail("max index not modifiable"); + } + virtual void DoFillIndexCheckers( + const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const override; + + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const override; + + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override { + AFL_VERIFY(TBase::DoDeserializeFromProto(proto)); + AFL_VERIFY(proto.HasMaxIndex()); + auto& bFilter = proto.GetMaxIndex(); + if (!bFilter.GetColumnId()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect column id"); + return false; + }; + ColumnIds.emplace(bFilter.GetColumnId()); + return true; + } + + virtual NJson::TJsonValue DoSerializeDataToJson(const TString& data, const TIndexInfo& indexInfo) const override; + + virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const override { + AFL_VERIFY(ColumnIds.size() == 1); + auto* filterProto = proto.MutableMaxIndex(); + filterProto->SetColumnId(*ColumnIds.begin()); + } + +public: + TIndexMeta() = default; + TIndexMeta(const ui32 indexId, const TString& indexName, const TString& storageId, const ui32& columnId) + : TBase(indexId, indexName, { columnId }, storageId) { + } + + ui32 GetColumnId() const { + AFL_VERIFY(ColumnIds.size() == 1); + return *ColumnIds.begin(); + } + + static bool IsAvailableType(const NScheme::TTypeInfo type) { + auto dataTypeResult = NArrow::GetArrowType(type); + if (!dataTypeResult.ok()) { + return false; + } + if (!NArrow::SwitchType((*dataTypeResult)->id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (arrow::has_c_type()) { + return true; + } + return false; + })) { + return false; + } + + return true; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + std::shared_ptr GetMaxScalarVerified(const std::vector& data, const std::shared_ptr& type) const; +}; + +} // namespace NKikimr::NOlap::NIndexes diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make new file mode 100644 index 000000000000..7a24787285e2 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/indexes/max/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + GLOBAL meta.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/formats/arrow + ydb/core/tx/columnshard/engines/storage/indexes/portions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp index 0aa6e4168f7e..e62bc99d0a7f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.cpp @@ -6,12 +6,8 @@ namespace NKikimr::NOlap::NIndexes { -void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const { - AFL_VERIFY(!bRange.IsValid()); - portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); -} - -std::shared_ptr TIndexByColumns::DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { +std::shared_ptr TIndexByColumns::DoBuildIndex( + const THashMap>>& data, const TIndexInfo& indexInfo) const { AFL_VERIFY(Serializer); AFL_VERIFY(data.size()); std::vector columnReaders; @@ -25,9 +21,8 @@ std::shared_ptr TIndexByColumns::DoBuildIndex recordsCount += i->GetRecordsCountVerified(); } TChunkedBatchReader reader(std::move(columnReaders)); - std::shared_ptr indexBatch = DoBuildIndexImpl(reader); - const TString indexData = Serializer->SerializeFull(indexBatch); - return std::make_shared(TChunkAddress(indexId, 0), recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); + const TString indexData = DoBuildIndexImpl(reader); + return std::make_shared(TChunkAddress(GetIndexId(), 0), recordsCount, indexData.size(), indexData); } bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) { @@ -35,8 +30,8 @@ bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDe return true; } -TIndexByColumns::TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds) - : TBase(indexId, indexName) +TIndexByColumns::TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds, const TString& storageId) + : TBase(indexId, indexName, storageId) , ColumnIds(columnIds) { Serializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h index fba7daa9e77c..427ee98d99d2 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/meta.h @@ -1,66 +1,26 @@ #pragma once #include -#include +#include #include namespace NKikimr::NOlap::NIndexes { -class TPortionIndexChunk: public IPortionDataChunk { -private: - using TBase = IPortionDataChunk; - const ui32 RecordsCount; - const ui64 RawBytes; - const TString Data; -protected: - virtual const TString& DoGetData() const override { - return Data; - } - virtual TString DoDebugString() const override { - return ""; - } - virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { - return {}; - } - virtual bool DoIsSplittable() const override { - return false; - } - virtual std::optional DoGetRecordsCount() const override { - return RecordsCount; - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } - virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; -public: - TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) - : TBase(address.GetColumnId(), address.GetChunkIdx()) - , RecordsCount(recordsCount) - , RawBytes(rawBytes) - , Data(data) - { - } - -}; - class TIndexByColumns: public IIndexMeta { private: using TBase = IIndexMeta; std::shared_ptr Serializer; protected: std::set ColumnIds; - virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; + virtual TString DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const override final; - virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) override; + virtual std::shared_ptr DoBuildIndex(const THashMap>>& data, const TIndexInfo& indexInfo) const override final; + virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) override; TConclusionStatus CheckSameColumnsForModification(const IIndexMeta& newMeta) const; public: TIndexByColumns() = default; - TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds); + TIndexByColumns(const ui32 indexId, const TString& indexName, const std::set& columnIds, const TString& storageId); }; } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make index 076d439d54af..0ce6d8f9987f 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/indexes/portions/ya.make @@ -7,7 +7,7 @@ SRCS( PEERDIR( ydb/core/formats/arrow - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ydb/core/tx/columnshard/engines/storage/chunks ydb/core/tx/columnshard/engines/scheme/indexes/abstract ydb/core/tx/columnshard/engines/portions diff --git a/ydb/core/tx/columnshard/engines/storage/indexes/ya.make b/ydb/core/tx/columnshard/engines/storage/indexes/ya.make index c97b9d1ae656..2edfa9332cd4 100644 --- a/ydb/core/tx/columnshard/engines/storage/indexes/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/indexes/ya.make @@ -3,6 +3,7 @@ LIBRARY() PEERDIR( ydb/core/tx/columnshard/engines/storage/indexes/portions ydb/core/tx/columnshard/engines/storage/indexes/bloom + ydb/core/tx/columnshard/engines/storage/indexes/max ) END() diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h index 21647072eb60..4bd196e552d0 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h @@ -134,7 +134,7 @@ class IOptimizerPlanner { return DoDebugString(); } - virtual std::vector GetBucketPositions() const = 0; + virtual NArrow::NMerger::TIntervalPositions GetBucketPositions() const = 0; bool IsLocked(const std::shared_ptr& dataLocksManager) const { return DoIsLocked(dataLocksManager); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp index a36e976ed351..36f467a03133 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.cpp @@ -3,8 +3,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NLBuckets { TDuration GetCommonFreshnessCheckDuration() { - static const TDuration CommonFreshnessCheckDuration = TDuration::Seconds(300); - return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(CommonFreshnessCheckDuration); + return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); } } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h index 246b349f2a77..d686fc719112 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/planner/optimizer.h @@ -19,8 +19,6 @@ namespace NKikimr::NOlap::NStorageOptimizer::NLBuckets { -static const ui64 SmallPortionDetectSizeLimit = 1 << 20; - TDuration GetCommonFreshnessCheckDuration(); class TSimplePortionsGroupInfo { @@ -683,7 +681,7 @@ class TPortionsBucket: public TMoveOnly { return; } MainPortion->InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Others.IsEmpty() && currentInstant > MainPortion->RecordSnapshotMax().GetPlanInstant() + - NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60))); + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings()); } public: TTaskDescription GetTaskDescription() const { @@ -899,15 +897,15 @@ class TPortionsBucket: public TMoveOnly { auto result = std::make_shared(granule, portions, saverContext); if (MainPortion) { NArrow::NMerger::TSortableBatchPosition pos(MainPortion->IndexKeyStart().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, false, false); + result->AddCheckPoint(pos, false); } if (!nextBorder && MainPortion && !forceMergeForTests) { NArrow::NMerger::TSortableBatchPosition pos(MainPortion->IndexKeyEnd().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, true, false); + result->AddCheckPoint(pos, true); } if (stopPoint) { NArrow::NMerger::TSortableBatchPosition pos(stopPoint->ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, false, false); + result->AddCheckPoint(pos, false); } return result; } @@ -1104,7 +1102,7 @@ class TPortionBuckets { } void RemovePortion(const std::shared_ptr& portion) { - if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector()) { Counters->SmallPortions->RemovePortion(portion); } if (!RemoveBucket(portion)) { @@ -1146,7 +1144,7 @@ class TPortionBuckets { } void AddPortion(const std::shared_ptr& portion, const TInstant now) { - if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector()) { Counters->SmallPortions->AddPortion(portion); AddOther(portion, now); return; @@ -1181,15 +1179,15 @@ class TPortionBuckets { } } - std::vector GetBucketPositions() const { - std::vector result; + NArrow::NMerger::TIntervalPositions GetBucketPositions() const { + NArrow::NMerger::TIntervalPositions result; for (auto&& i : Buckets) { AFL_VERIFY(i.second->GetStartPos()); - result.emplace_back(*i.second->GetStartPos()); + result.AddPosition(*i.second->GetStartPos(), false); } - if (Buckets.size()) { + if (Buckets.size() && Buckets.rbegin()->second->GetPortion()->GetRecordsCount() > 1) { NArrow::NMerger::TSortableBatchPosition pos(Buckets.rbegin()->second->GetPortion()->IndexKeyEnd().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); - result.emplace_back(pos); + result.AddPosition(std::move(pos), false); } return result; } @@ -1254,7 +1252,7 @@ class TOptimizerPlanner: public IOptimizerPlanner { public: - virtual std::vector GetBucketPositions() const override { + virtual NArrow::NMerger::TIntervalPositions GetBucketPositions() const override { return Buckets.GetBucketPositions(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp index baf229b3d1fd..2fe68710d805 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/common/optimizer.cpp @@ -4,8 +4,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { TDuration GetCommonFreshnessCheckDuration() { - static const TDuration CommonFreshnessCheckDuration = TDuration::Seconds(300); - return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(CommonFreshnessCheckDuration); + return NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); } -} +} // namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp index 9e8d21bb9357..bc007d0fff10 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.cpp @@ -2,12 +2,11 @@ #include #include #include -#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { std::shared_ptr TOptimizerPlannerConstructor::BuildLogic() const { - const TDuration freshnessCheckDuration = NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(FreshnessCheckDuration); + const TDuration freshnessCheckDuration = NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration(); std::shared_ptr logic; if (LogicName == "one_head") { logic = std::make_shared(freshnessCheckDuration); diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h index 8b85a25c0877..cabe72ccc2a9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/constructor/constructor.h @@ -1,13 +1,16 @@ #pragma once +#include #include #include +#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { class TOptimizerPlannerConstructor: public IOptimizerPlannerConstructor { private: YDB_READONLY_DEF(TString, LogicName); - YDB_READONLY(TDuration, FreshnessCheckDuration, TDuration::Seconds(300)); + YDB_READONLY(TDuration, FreshnessCheckDuration, NYDBTest::TControllers::GetColumnShardController()->GetOptimizerFreshnessCheckDuration()); + public: static TString GetClassNameStatic() { return "s-buckets"; diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp index 4fd497984104..ec344a674fd7 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/bucket.cpp @@ -9,7 +9,7 @@ namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { void TPortionsBucket::RebuildOptimizedFeature(const TInstant currentInstant) const { for (auto&& [_, p] : Portions) { p.MutablePortionInfo().InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Portions.size() == 1 && currentInstant > p->RecordSnapshotMax().GetPlanInstant() + - NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60)) + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings() ); } } @@ -28,7 +28,7 @@ std::shared_ptr TPortionsBucket::BuildOpti auto result = std::make_shared(granule, context.GetPortions(), saverContext); for (auto&& i : context.GetSplitRightOpenIntervalPoints()) { NArrow::NMerger::TSortableBatchPosition pos(i.ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); - result->AddCheckPoint(pos, false, false); + result->AddCheckPoint(pos, false); } return result; } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h index a551fbbb8a1d..56bddb8547fb 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/index/index.h @@ -220,14 +220,14 @@ class TPortionBuckets { return bucketForOptimization->BuildOptimizationTask(granule, locksManager, PrimaryKeysSchema, StoragesManager); } - std::vector GetBucketPositions() const { - std::vector result; + NArrow::NMerger::TIntervalPositions GetBucketPositions() const { + NArrow::NMerger::TIntervalPositions result; for (auto&& i : Buckets) { if (!i.first.HasValue()) { continue; } NArrow::NMerger::TSortableBatchPosition posStart(i.first.GetValueVerified().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); - result.emplace_back(posStart); + result.AddPosition(std::move(posStart), false); } return result; } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h index 3e4cef0f2970..b2d169db8698 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/abstract/logic.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace NKikimr::NOlap::NStorageOptimizer::NSBuckets { diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp index 465e19e8379b..28d2914ed392 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/logic/slices/logic.cpp @@ -67,7 +67,7 @@ NKikimr::NOlap::NStorageOptimizer::NSBuckets::TCompactionTaskResult TTimeSliceLo NKikimr::NOlap::NStorageOptimizer::NSBuckets::TCalcWeightResult TTimeSliceLogic::DoCalcWeight(const TInstant /*now*/, const TBucketInfo& bucket) const { ui64 size = 0; - ui32 count = 0; + ui64 count = 0; for (auto&& [maxInstant, portions] : bucket.GetSnapshotPortions()) { for (auto&& [_, p] : portions) { if (p.GetTotalBlobBytes() > compactedDetector) { diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h index 4e8595e20f1d..7d756f09deff 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/sbuckets/optimizer/optimizer.h @@ -64,7 +64,7 @@ class TOptimizerPlanner: public IOptimizerPlanner { } public: - virtual std::vector GetBucketPositions() const override { + virtual NArrow::NMerger::TIntervalPositions GetBucketPositions() const override { return Buckets.GetBucketPositions(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp index c4aec7dd234e..420a9e5901e9 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/ut/ut_optimizer.cpp @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include diff --git a/ydb/core/tx/columnshard/engines/ut/helper.cpp b/ydb/core/tx/columnshard/engines/ut/helper.cpp index eee9159edea7..56a5c26ba492 100644 --- a/ydb/core/tx/columnshard/engines/ut/helper.cpp +++ b/ydb/core/tx/columnshard/engines/ut/helper.cpp @@ -1,10 +1,25 @@ #include "helper.h" +#include namespace NKikimr::NOlap::NEngines::NTest { +std::shared_ptr TLocalHelper::GetMetaSchema() { + return std::make_shared(arrow::FieldVector({ std::make_shared("1", arrow::uint64()) })); +} + NKikimrTxColumnShard::TLogicalMetadata TLocalHelper::GetMetaProto() { NKikimrTxColumnShard::TLogicalMetadata result; result.SetDirtyWriteTimeSeconds(TInstant::Now().Seconds()); + + std::vector> columns; + auto schema = GetMetaSchema(); + for (auto&& i : schema->fields()) { + columns.emplace_back(NArrow::TThreadSimpleArraysCache::Get(i->type(), NArrow::DefaultScalar(i->type()), 1)); + } + auto batch = arrow::RecordBatch::Make(schema, 1, columns); + + NArrow::TFirstLastSpecialKeys flKeys = NArrow::TFirstLastSpecialKeys(batch); + result.SetSpecialKeysPayloadData(flKeys.SerializePayloadToString()); return result; } diff --git a/ydb/core/tx/columnshard/engines/ut/helper.h b/ydb/core/tx/columnshard/engines/ut/helper.h index 8e17730a21c6..c7072c13af91 100644 --- a/ydb/core/tx/columnshard/engines/ut/helper.h +++ b/ydb/core/tx/columnshard/engines/ut/helper.h @@ -6,6 +6,7 @@ namespace NKikimr::NOlap::NEngines::NTest { class TLocalHelper { public: static NKikimrTxColumnShard::TLogicalMetadata GetMetaProto(); + static std::shared_ptr GetMetaSchema(); }; }; \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp index 946b657400ca..d840a5a64f37 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp @@ -18,13 +18,13 @@ class TTestInsertTableDB : public IDbWrapper { public: void Insert(const TInsertedData&) override { } - void Commit(const TInsertedData&) override { + void Commit(const TCommittedData&) override { } void Abort(const TInsertedData&) override { } void EraseInserted(const TInsertedData&) override { } - void EraseCommitted(const TInsertedData&) override { + void EraseCommitted(const TCommittedData&) override { } void EraseAborted(const TInsertedData&) override { } @@ -73,7 +73,7 @@ class TTestInsertTableDB : public IDbWrapper { Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { Y_UNIT_TEST(TestInsertCommit) { - ui64 writeId = 0; + TInsertWriteId writeId = (TInsertWriteId)0; ui64 tableId = 0; TString dedupId = "0"; TUnifiedBlobId blobId1(2222, 1, 1, 100, 2, 0, 1); @@ -81,47 +81,38 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { TTestInsertTableDB dbTable; TInsertTable insertTable; ui64 indexSnapshot = 0; - + // insert, not commited - bool ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId1, TLocalHelper::GetMetaProto(), indexSnapshot, {})); + auto userData1 = std::make_shared(tableId, TBlobRange(blobId1), TLocalHelper::GetMetaProto(), indexSnapshot, std::nullopt); + bool ok = insertTable.Insert(dbTable, TInsertedData(writeId, userData1)); UNIT_ASSERT(ok); - // insert the same blobId1 again - ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId1, TLocalHelper::GetMetaProto(), indexSnapshot, {})); - UNIT_ASSERT(!ok); - - // insert different blodId with the same writeId and dedupId - TUnifiedBlobId blobId2(2222, 1, 2, 100, 2, 0, 1); - ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId2, TLocalHelper::GetMetaProto(), indexSnapshot, {})); - UNIT_ASSERT(!ok); - // read nothing - auto blobs = insertTable.Read(tableId, TSnapshot::Zero(), nullptr); + auto blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); // commit ui64 planStep = 100; ui64 txId = 42; - insertTable.Commit(dbTable, planStep, txId, {TWriteId{writeId}}, [](ui64) { + insertTable.Commit(dbTable, planStep, txId, { writeId }, [](ui64) { return true; }); - - UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().size(), 1); - UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().begin()->second.size(), 1); - UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1); +// UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().size(), 1); +// UNIT_ASSERT_EQUAL(insertTable.GetPathPriorities().begin()->second.size(), 1); +// UNIT_ASSERT_EQUAL((*insertTable.GetPathPriorities().begin()->second.begin())->GetCommitted().size(), 1); // read old snapshot - blobs = insertTable.Read(tableId, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); // read new snapshot - blobs = insertTable.Read(tableId, TSnapshot(planStep, txId), nullptr); + blobs = insertTable.Read(tableId, {}, TSnapshot(planStep, txId), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 1); - blobs = insertTable.Read(tableId + 1, TSnapshot::Zero(), nullptr); + blobs = insertTable.Read(tableId + 1, {}, TSnapshot::Zero(), TLocalHelper::GetMetaSchema(), nullptr); UNIT_ASSERT_EQUAL(blobs.size(), 0); } } diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index 4c07ce331af4..ecde3aa56673 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -48,27 +48,27 @@ class TTestDbWrapper : public IDbWrapper { } void Insert(const TInsertedData& data) override { - Inserted.emplace(TWriteId{data.WriteTxId}, data); + Inserted.emplace(data.GetInsertWriteId(), data); } - void Commit(const TInsertedData& data) override { - Committed[data.PathId].emplace(data); + void Commit(const TCommittedData& data) override { + Committed[data.GetPathId()].emplace(data); } void Abort(const TInsertedData& data) override { - Aborted.emplace(TWriteId{data.WriteTxId}, data); + Aborted.emplace(data.GetInsertWriteId(), data); } void EraseInserted(const TInsertedData& data) override { - Inserted.erase(TWriteId{data.WriteTxId}); + Inserted.erase(data.GetInsertWriteId()); } - void EraseCommitted(const TInsertedData& data) override { - Committed[data.PathId].erase(data); + void EraseCommitted(const TCommittedData& data) override { + Committed[data.GetPathId()].erase(data); } void EraseAborted(const TInsertedData& data) override { - Aborted.erase(TWriteId{data.WriteTxId}); + Aborted.erase(data.GetInsertWriteId()); } bool Load(TInsertTableAccessor& accessor, @@ -189,9 +189,9 @@ class TTestDbWrapper : public IDbWrapper { } private: - THashMap Inserted; - THashMap> Committed; - THashMap Aborted; + THashMap Inserted; + THashMap> Committed; + THashMap Aborted; THashMap Indices; }; @@ -275,23 +275,27 @@ TString MakeTestBlob(i64 start = 0, i64 end = 100, ui32 step = 1) { return NArrow::SerializeBatchNoCompression(batch); } -void AddIdsToBlobs(std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { +void AddIdsToBlobs(std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { for (auto& portion : portions) { - for (auto& rec : portion.GetPortionConstructor().MutableRecords()) { - rec.BlobRange.BlobIdx = portion.GetPortionConstructor().RegisterBlobId(MakeUnifiedBlobId(++step, portion.GetBlobFullSizeVerified(rec.ColumnId, rec.Chunk))); - TString data = portion.GetBlobByRangeVerified(rec.ColumnId, rec.Chunk); - blobs.Add(IStoragesManager::DefaultStorageId, portion.GetPortionConstructor().RestoreBlobRange(rec.BlobRange), std::move(data)); + THashMap blobsData; + for (auto& b : portion.GetBlobs()) { + const auto blobId = MakeUnifiedBlobId(++step, b.GetSize()); + b.RegisterBlobId(portion, blobId); + blobsData.emplace(blobId, b.GetResultBlob()); + } + for (auto&& rec : portion.GetPortionConstructor().GetRecords()) { + auto range = portion.GetPortionConstructor().RestoreBlobRange(rec.BlobRange); + auto it = blobsData.find(range.BlobId); + AFL_VERIFY(it != blobsData.end()); + const TString& data = it->second; + AFL_VERIFY(range.Offset + range.Size <= data.size()); + blobs.Add(IStoragesManager::DefaultStorageId, range, data.substr(range.Offset, range.Size)); } } } -bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, - std::vector&& dataToIndex, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { - - for (ui32 i = 0; i < dataToIndex.size(); ++i) { - // Commited data always has nonzero planstep (for WriteLoadRead tests) - dataToIndex[i].PlanStep = i + 1; - }; +bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, std::vector&& dataToIndex, + NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { std::shared_ptr changes = engine.StartInsert(std::move(dataToIndex)); if (!changes) { return false; @@ -425,6 +429,7 @@ std::shared_ptr CommonStoragesManager = Initia Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void WriteLoadRead(const std::vector& ydbSchema, const std::vector& key) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -438,16 +443,16 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] // load - TSnapshot indexSnaphot(1, 1); - TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnaphot, TIndexInfo(tableInfo)); + TSnapshot indexSnapshot(1, 1); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); for (auto&& i : paths) { engine.RegisterTable(i); } engine.Load(db); - std::vector dataToIndex = { - TInsertedData(2, paths[0], "", blobRanges[0].BlobId, TLocalHelper::GetMetaProto(), 0, {}), - TInsertedData(1, paths[0], "", blobRanges[1].BlobId, TLocalHelper::GetMetaProto(), 0, {}) + std::vector dataToIndex = { + TCommittedData(TUserData::Build(paths[0], blobRanges[0], TLocalHelper::GetMetaProto(), 0, {}), TSnapshot(1, 2), (TInsertWriteId)2), + TCommittedData(TUserData::Build(paths[0], blobRanges[1], TLocalHelper::GetMetaProto(), 0, {}), TSnapshot(2, 1), (TInsertWriteId)1) }; // write @@ -465,12 +470,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // selects auto lastSchema = engine.GetVersionedIndex().GetLastSchema(); - UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnaphot); + UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnapshot); const TIndexInfo& indexInfo = lastSchema->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(testColumns[0].GetName()) }; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(testColumns[0].GetName()) }; THashSet columnIds; for (auto& c : testColumns) { - columnIds.insert(indexInfo.GetColumnId(c.GetName())); + columnIds.insert(indexInfo.GetColumnIdVerified(c.GetName())); } { // select from snap before insert @@ -492,7 +497,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { ui64 txId = 1; auto selectInfo = engine.Select(paths[0], TSnapshot(planStep, txId), NOlap::TPKRangesFilter(false)); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSystemColumnNames().size()); + UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK[0]->NumChunks(), columnIds.size() + TIndexInfo::GetSnapshotColumnIdsSet().size()); } { // select another pathId @@ -520,6 +525,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { void ReadWithPredicates(const std::vector& ydbSchema, const std::vector& key) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -544,11 +550,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; + std::vector dataToIndex; + TSnapshot ss(planStep, txId); dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, (TInsertWriteId)txId)); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); } @@ -565,7 +572,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(key[0].GetName()) }; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(key[0].GetName()) }; { // full scan ui64 txId = 1; @@ -582,7 +589,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { gt10k = MakeStrPredicate("10000", NArrow::EOperation::Greater); } NOlap::TPKRangesFilter pkFilter(false); - Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, nullptr)); + Y_ABORT_UNLESS(pkFilter.Add(gt10k, nullptr, indexInfo.GetReplaceKey())); auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 10); } @@ -594,7 +601,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { lt10k = MakeStrPredicate("08999", NArrow::EOperation::Less); } NOlap::TPKRangesFilter pkFilter(false); - Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, nullptr)); + Y_ABORT_UNLESS(pkFilter.Add(nullptr, lt10k, indexInfo.GetReplaceKey())); auto selectInfo = engine.Select(pathId, TSnapshot(planStep, txId), pkFilter); UNIT_ASSERT_VALUES_EQUAL(selectInfo->PortionsOrderedPK.size(), 9); } @@ -616,6 +623,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexWriteOverload) { + TTestBasicRuntime runtime; TTestDbWrapper db; auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TIndexInfo tableInfo = NColumnShard::BuildTableInfo(testColumns, testKey);; @@ -641,11 +649,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; - dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + std::vector dataToIndex; + TSnapshot ss(planStep, txId); + dataToIndex.push_back(TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, (TInsertWriteId)txId)); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); blobsAll.Merge(std::move(blobs)); UNIT_ASSERT(ok); } @@ -672,11 +680,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; - dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + std::vector dataToIndex; + TSnapshot ss(planStep, txId); + dataToIndex.push_back(TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, TInsertWriteId(txId))); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); } @@ -688,10 +696,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexTtl) { + TTestBasicRuntime runtime; TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(testColumns, testKey); auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - csDefaultControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csDefaultControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); ui64 pathId = 1; ui32 step = 1000; @@ -718,11 +727,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] - std::vector dataToIndex; + TSnapshot ss(planStep, txId); + std::vector dataToIndex; dataToIndex.push_back( - TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); + TCommittedData(TUserData::Build(pathId, blobRange, TLocalHelper::GetMetaProto(), 0, {}), ss, TInsertWriteId(txId))); - bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); + bool ok = Insert(engine, db, ss, std::move(dataToIndex), blobs, step); UNIT_ASSERT(ok); blobStartTs += blobTsRange; if (txId == txCount / 2) { @@ -742,7 +752,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; + THashSet oneColumnId = {indexInfo.GetColumnIdVerified(testColumns[0].GetName())}; { // full scan ui64 txId = 1; @@ -782,7 +792,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { engine.Load(db); const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; + THashSet oneColumnId = { indexInfo.GetColumnIdVerified(testColumns[0].GetName()) }; { // full scan ui64 txId = 1; diff --git a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp index 798ba6ec505d..f957cfea5592 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp @@ -852,4 +852,64 @@ Y_UNIT_TEST_SUITE(TestProgram) { auto expected = result.BuildArrow(); UNIT_ASSERT_VALUES_EQUAL(batch->ToString(), expected->ToString()); } + + Y_UNIT_TEST(CountWithNulls) { + TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); + ; + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); + + NKikimrSSA::TProgram programProto; + { + auto* command = programProto.AddCommand(); + auto* functionProto = command->MutableAssign()->MutableFunction(); + auto* column = command->MutableAssign()->MutableColumn(); + column->SetName("0"); + auto* funcArg = functionProto->AddArguments(); + funcArg->SetName("uid"); + functionProto->SetId(NKikimrSSA::TProgram::TAssignment::EFunction::TProgram_TAssignment_EFunction_FUNC_IS_NULL); + } + { + auto* command = programProto.AddCommand(); + auto* filter = command->MutableFilter(); + auto* predicate = filter->MutablePredicate(); + predicate->SetName("0"); + } + { + auto* command = programProto.AddCommand(); + auto* groupBy = command->MutableGroupBy(); + auto* aggregate = groupBy->AddAggregates(); + aggregate->MutableFunction()->SetId(static_cast(NArrow::EAggregate::Count)); + aggregate->MutableColumn()->SetName("1"); + } + { + auto* command = programProto.AddCommand(); + auto* projectionProto = command->MutableProjection(); + auto* column = projectionProto->AddColumns(); + column->SetName("1"); + } + const auto programSerialized = SerializeProgram(programProto); + + TProgramContainer program; + TString errors; + UNIT_ASSERT_C( + program.Init(columnResolver, NKikimrSchemeOp::EOlapProgramType::OLAP_PROGRAM_SSA_PROGRAM_WITH_PARAMETERS, programSerialized, errors), + errors); + + TTableUpdatesBuilder updates(NArrow::MakeArrowSchema({ std::make_pair("uid", TTypeInfo(NTypeIds::Utf8)) })); + updates.AddRow().Add("a"); + updates.AddRow().AddNull(); + updates.AddRow().Add("bbb"); + updates.AddRow().AddNull(); + updates.AddRow().AddNull(); + + auto batch = updates.BuildArrow(); + auto res = program.ApplyProgram(batch); + UNIT_ASSERT_C(res.ok(), res.ToString()); + + TTableUpdatesBuilder result(NArrow::MakeArrowSchema({ std::make_pair("1", TTypeInfo(NTypeIds::Uint64)) })); + result.AddRow().Add(3); + + auto expected = result.BuildArrow(); + UNIT_ASSERT_VALUES_EQUAL(batch->ToString(), expected->ToString()); + } } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp index 0ffaaf3a9fee..eeb6242103d1 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp @@ -20,7 +20,7 @@ void TActor::Bootstrap() { void TActor::Flush() { if (Aggregations.size()) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "flush_writing")("size", SumSize)("count", Aggregations.size()); - auto action = Aggregations.front()->GetWriteData()->GetBlobsAction(); + auto action = Aggregations.front()->GetBlobsAction(); auto writeController = std::make_shared(ParentActorId, action, std::move(Aggregations)); if (action->NeedDraftTransaction()) { TActorContext::AsActorContext().Send(ParentActorId, std::make_unique(writeController)); @@ -48,7 +48,8 @@ void TActor::Handle(TEvAddInsertedDataToBuffer::TPtr& ev) { auto* evBase = ev->Get(); AFL_VERIFY(evBase->GetWriteData()->GetBlobsAction()->GetStorageId() == NOlap::IStoragesManager::DefaultStorageId); SumSize += evBase->GetWriteData()->GetSize(); - Aggregations.emplace_back(std::make_shared(evBase->GetWriteData(), std::move(evBase->MutableBlobsToWrite()))); + Aggregations.emplace_back( + std::make_shared(*evBase->GetWriteData(), std::move(evBase->MutableBlobsToWrite()), evBase->GetRecordBatch())); if (SumSize > 4 * 1024 * 1024 || Aggregations.size() > 750 || !FlushDuration) { Flush(); } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/events.h b/ydb/core/tx/columnshard/engines/writer/buffer/events.h index ee750ad69bcf..d2a4b4453b04 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/events.h +++ b/ydb/core/tx/columnshard/engines/writer/buffer/events.h @@ -11,11 +11,15 @@ namespace NKikimr::NColumnShard::NWriting { class TEvAddInsertedDataToBuffer: public NActors::TEventLocal { private: YDB_READONLY_DEF(std::shared_ptr, WriteData); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); YDB_ACCESSOR_DEF(std::vector, BlobsToWrite); + public: - explicit TEvAddInsertedDataToBuffer(const std::shared_ptr& writeData, std::vector&& blobs) + explicit TEvAddInsertedDataToBuffer(const std::shared_ptr& writeData, std::vector&& blobs, + const std::shared_ptr& recordBatch) : WriteData(writeData) + , RecordBatch(recordBatch) , BlobsToWrite(blobs) { } diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp index 976c2945d59a..9836a72a60cc 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp @@ -19,9 +19,9 @@ TCompactedWriteController::TCompactedWriteController(const TActorId& dstActor, T } auto* pInfo = changes.GetWritePortionInfo(i); Y_ABORT_UNLESS(pInfo); - TWritePortionInfoWithBlobs& portionWithBlobs = *pInfo; + TWritePortionInfoWithBlobsResult& portionWithBlobs = *pInfo; for (auto&& b : portionWithBlobs.GetBlobs()) { - auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetBlob(), changes.MutableBlobsAction().GetWriting(b.GetOperator()->GetStorageId()))); + auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetResultBlob(), changes.MutableBlobsAction().GetWriting(b.GetOperator()->GetStorageId()))); b.RegisterBlobId(portionWithBlobs, task.GetBlobId()); WriteVolume += b.GetSize(); } diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp index 9966dcd008c7..ceacecf155b6 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.cpp @@ -35,19 +35,19 @@ void TWideSerializedBatch::InitBlobId(const TUnifiedBlobId& id) { void TWritingBuffer::InitReadyInstant(const TMonotonic instant) { for (auto&& aggr : Aggregations) { - aggr->GetWriteData()->MutableWriteMeta().SetWriteMiddle5StartInstant(instant); + aggr->MutableWriteMeta().SetWriteMiddle5StartInstant(instant); } } void TWritingBuffer::InitStartSending(const TMonotonic instant) { for (auto&& aggr : Aggregations) { - aggr->GetWriteData()->MutableWriteMeta().SetWriteMiddle4StartInstant(instant); + aggr->MutableWriteMeta().SetWriteMiddle4StartInstant(instant); } } void TWritingBuffer::InitReplyReceived(const TMonotonic instant) { for (auto&& aggr : Aggregations) { - aggr->GetWriteData()->MutableWriteMeta().SetWriteMiddle6StartInstant(instant); + aggr->MutableWriteMeta().SetWriteMiddle6StartInstant(instant); } } diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index 49cc6efe47b6..92e59e9b197c 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -86,23 +86,56 @@ class TWritingBlob { class TWriteAggregation { private: - YDB_READONLY_DEF(std::shared_ptr, WriteData); + NEvWrite::TWriteMeta WriteMeta; + YDB_READONLY(ui64, SchemaVersion, 0); + YDB_READONLY(ui64, Size, 0); + YDB_READONLY(ui64, Rows, 0); YDB_ACCESSOR_DEF(std::vector, SplittedBlobs); - YDB_READONLY_DEF(TVector, WriteIds); + YDB_READONLY_DEF(TVector, InsertWriteIds); + YDB_READONLY_DEF(std::shared_ptr, BlobsAction); + YDB_READONLY_DEF(NArrow::TSchemaSubset, SchemaSubset); + std::shared_ptr RecordBatch; + public: - void AddWriteId(const TWriteId& id) { - WriteIds.emplace_back(id); + const std::shared_ptr& GetRecordBatch() const { + AFL_VERIFY(RecordBatch); + return RecordBatch; + } + + const NEvWrite::TWriteMeta& GetWriteMeta() const { + return WriteMeta; + } + + NEvWrite::TWriteMeta& MutableWriteMeta() { + return WriteMeta; } - TWriteAggregation(const std::shared_ptr& writeData, std::vector&& splittedBlobs) - : WriteData(writeData) { + void AddInsertWriteId(const TInsertWriteId id) { + InsertWriteIds.emplace_back(id); + } + + TWriteAggregation(const NEvWrite::TWriteData& writeData, std::vector&& splittedBlobs, const std::shared_ptr& batch) + : WriteMeta(writeData.GetWriteMeta()) + , SchemaVersion(writeData.GetData()->GetSchemaVersion()) + , Size(writeData.GetSize()) + , BlobsAction(writeData.GetBlobsAction()) + , SchemaSubset(writeData.GetSchemaSubsetVerified()) + , RecordBatch(batch) + { for (auto&& s : splittedBlobs) { SplittedBlobs.emplace_back(std::move(s), *this); } + for (const auto& batch : SplittedBlobs) { + Rows += batch->GetRowsCount(); + } } - TWriteAggregation(const std::shared_ptr& writeData) - : WriteData(writeData) { + TWriteAggregation(const NEvWrite::TWriteData& writeData) + : WriteMeta(writeData.GetWriteMeta()) + , SchemaVersion(writeData.GetData()->GetSchemaVersion()) + , Size(writeData.GetSize()) + , BlobsAction(writeData.GetBlobsAction()) { + AFL_VERIFY(!writeData.GetSchemaSubset()); } }; @@ -120,7 +153,7 @@ class TWritingBuffer: public TMoveOnly { { AFL_VERIFY(BlobsAction); for (auto&& aggr : Aggregations) { - SumSize += aggr->GetWriteData()->GetSize(); + SumSize += aggr->GetSize(); } } diff --git a/ydb/core/tx/columnshard/engines/ya.make b/ydb/core/tx/columnshard/engines/ya.make index 4772008f14f1..d49a325a7832 100644 --- a/ydb/core/tx/columnshard/engines/ya.make +++ b/ydb/core/tx/columnshard/engines/ya.make @@ -13,8 +13,11 @@ SRCS( filter.cpp portion_info.cpp tier_info.cpp + defs.cpp ) +GENERATE_ENUM_SERIALIZATION(column_engine_logs.h) + PEERDIR( contrib/libs/apache/arrow ydb/core/base diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp b/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp index a4d916545eac..b275e17f2fdb 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.cpp @@ -1,4 +1,26 @@ #include "abstract.h" +#include + namespace NKikimr::NYDBTest { + +TDuration ICSController::GetGuaranteeIndexationInterval() const { + const TDuration defaultValue = NColumnShard::TSettings::GuaranteeIndexationInterval; + return DoGetGuaranteeIndexationInterval(defaultValue); +} + +TDuration ICSController::GetPeriodicWakeupActivationPeriod() const { + const TDuration defaultValue = NColumnShard::TSettings::DefaultPeriodicWakeupActivationPeriod; + return DoGetPeriodicWakeupActivationPeriod(defaultValue); +} + +TDuration ICSController::GetStatsReportInterval() const { + const TDuration defaultValue = NColumnShard::TSettings::DefaultStatsReportInterval; + return DoGetStatsReportInterval(defaultValue); +} + +ui64 ICSController::GetGuaranteeIndexationStartBytesLimit() const { + const ui64 defaultValue = NColumnShard::TSettings::GuaranteeIndexationStartBytesLimit; + return DoGetGuaranteeIndexationStartBytesLimit(defaultValue); +} } diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.h b/ydb/core/tx/columnshard/hooks/abstract/abstract.h index d1ce343edf7e..c104590235d3 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.h +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.h @@ -1,30 +1,33 @@ #pragma once #include +#include #include - -#include #include +#include #include -#include -#include +#include + #include +#include +#include + #include namespace NKikimr::NColumnShard { class TTiersManager; class TColumnShard; -} +} // namespace NKikimr::NColumnShard namespace NKikimr::NOlap { class TColumnEngineChanges; class IBlobsGCAction; class TPortionInfo; -namespace NStatistics { -class TOperatorContainer; -} +namespace NIndexes { +class TIndexMetaContainer; } +} // namespace NKikimr::NOlap namespace arrow { class RecordBatch; } @@ -41,7 +44,8 @@ class ILocalDBModifier { public: using TPtr = std::shared_ptr; - virtual ~ILocalDBModifier() {} + virtual ~ILocalDBModifier() { + } virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const = 0; }; @@ -55,6 +59,7 @@ class ICSController { Cleanup, GC }; + protected: virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) { return; @@ -77,10 +82,75 @@ class ICSController { } virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& /*sessionId*/) { } - virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString & /*sessionId*/) { + virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& /*sessionId*/) { + } + + virtual TDuration DoGetPingCheckPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetOverridenGCPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetCompactionActualizationLag(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetActualizationTasksLag(const TDuration defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetRejectMemoryIntervalLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetReadSequentiallyBufferSize(const ui64 defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetReadTimeoutClean(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetStatsReportInterval(const TDuration defaultValue) const { + return defaultValue; + } + virtual ui64 DoGetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const { + return defaultValue; + } + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration defaultValue) const { + return defaultValue; + } + +private: + inline static const NKikimrConfig::TColumnShardConfig DefaultConfig = {}; + + static const NKikimrConfig::TColumnShardConfig& GetConfig() { + if (HasAppData()) { + return AppDataVerified().ColumnShardConfig; + } + return DefaultConfig; } public: + virtual void OnRequestTracingChanges( + const std::set& /*snapshotsToSave*/, const std::set& /*snapshotsToRemove*/) { + } + + TDuration GetPingCheckPeriod() const { + const TDuration defaultValue = 0.6 * GetReadTimeoutClean(); + return DoGetPingCheckPeriod(defaultValue); + } + virtual bool IsBackgroundEnabled(const EBackground /*id*/) const { return true; } @@ -88,53 +158,51 @@ class ICSController { using TPtr = std::shared_ptr; virtual ~ICSController() = default; - virtual TDuration GetOverridenGCPeriod(const TDuration def) const { - return def; + TDuration GetOverridenGCPeriod() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetGCIntervalMs()); + return DoGetOverridenGCPeriod(defaultValue); } virtual void OnSelectShardingFilter() { - } - virtual TDuration GetCompactionActualizationLag(const TDuration def) const { - return def; + TDuration GetCompactionActualizationLag() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetCompactionActualizationLagMs()); + return DoGetCompactionActualizationLag(defaultValue); } - virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { + virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction( + const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { return original; } - virtual TDuration GetRemovedPortionLivetime(const TDuration def) const { - return def; + TDuration GetActualizationTasksLag() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetActualizationTasksLagMs()); + return DoGetActualizationTasksLag(defaultValue); } - virtual TDuration GetActualizationTasksLag(const TDuration d) const { - return d; + ui64 GetReduceMemoryIntervalLimit() const { + const ui64 defaultValue = NOlap::TGlobalLimits::DefaultReduceMemoryIntervalLimit; + return DoGetReduceMemoryIntervalLimit(defaultValue); } - - virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const { - return def; - } - virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const { - return def; + ui64 GetRejectMemoryIntervalLimit() const { + const ui64 defaultValue = NOlap::TGlobalLimits::DefaultRejectMemoryIntervalLimit; + return DoGetRejectMemoryIntervalLimit(defaultValue); } virtual bool NeedForceCompactionBacketsConstruction() const { return false; } - virtual ui64 GetSmallPortionSizeDetector(const ui64 def) const { - return def; + ui64 GetSmallPortionSizeDetector() const { + const ui64 defaultValue = GetConfig().GetSmallPortionDetectSizeLimit(); + return DoGetSmallPortionSizeDetector(defaultValue); } virtual void OnExportFinished() { - } virtual void OnActualizationRefreshScheme() { - } virtual void OnActualizationRefreshTiering() { - } virtual void AddPortionForActualizer(const i32 /*portionsCount*/) { - } void OnDataSharingFinished(const ui64 tabletId, const TString& sessionId) { @@ -143,17 +211,16 @@ class ICSController { void OnDataSharingStarted(const ui64 tabletId, const TString& sessionId) { return DoOnDataSharingStarted(tabletId, sessionId); } - virtual void OnStatisticsUsage(const NOlap::NStatistics::TOperatorContainer& /*statOperator*/) { - + virtual void OnStatisticsUsage(const NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) { } virtual void OnPortionActualization(const NOlap::TPortionInfo& /*info*/) { - } virtual void OnMaxValueUsage() { } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const { - return def; + virtual TDuration GetLagForCompactionBeforeTierings() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetLagForCompactionBeforeTieringsMs()); + return DoGetLagForCompactionBeforeTierings(defaultValue); } void OnTabletInitCompleted(const NColumnShard::TColumnShard& shard) { @@ -182,29 +249,20 @@ class ICSController { } virtual void OnIndexSelectProcessed(const std::optional /*result*/) { } - virtual TDuration GetReadTimeoutClean(const TDuration def) { - return def; + TDuration GetReadTimeoutClean() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetMaxReadStaleness_ms()); + return DoGetReadTimeoutClean(defaultValue); } virtual EOptimizerCompactionWeightControl GetCompactionControl() const { return EOptimizerCompactionWeightControl::Force; } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const { - return defaultValue; - } - virtual TDuration GetStatsReportInterval(const TDuration defaultValue) const { - return defaultValue; - } - virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const { - return defaultValue; - } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const { - return defaultValue; + TDuration GetGuaranteeIndexationInterval() const; + TDuration GetPeriodicWakeupActivationPeriod() const; + TDuration GetStatsReportInterval() const; + ui64 GetGuaranteeIndexationStartBytesLimit() const; + TDuration GetOptimizerFreshnessCheckDuration() const { + const TDuration defaultValue = TDuration::MilliSeconds(GetConfig().GetOptimizerFreshnessCheckDurationMs()); + return DoGetOptimizerFreshnessCheckDuration(defaultValue); } virtual void OnTieringModified(const std::shared_ptr& /*tiers*/) { @@ -215,7 +273,8 @@ class ICSController { } virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const { - static std::shared_ptr result = std::make_shared(TInstant::Now()); + static std::shared_ptr result = + std::make_shared(TInstant::Now()); return result; } @@ -231,15 +290,16 @@ class ICSController { class TControllers { private: ICSController::TPtr CSController = std::make_shared(); + public: template class TGuard: TNonCopyable { private: std::shared_ptr Controller; + public: TGuard(std::shared_ptr controller) - : Controller(controller) - { + : Controller(controller) { Y_ABORT_UNLESS(Controller); } @@ -270,4 +330,4 @@ class TControllers { } }; -} +} // namespace NKikimr::NYDBTest diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.cpp b/ydb/core/tx/columnshard/hooks/testing/controller.cpp index e47dc08dcd67..9cf3a7e7e9b5 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.cpp +++ b/ydb/core/tx/columnshard/hooks/testing/controller.cpp @@ -12,10 +12,6 @@ namespace NKikimr::NYDBTest::NColumnShard { bool TController::DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& change, const ::NKikimr::NColumnShard::TColumnShard& shard) { TGuard g(Mutex); - if (SharingIds.empty()) { - TCheckContext context; - CheckInvariants(shard, context); - } return TBase::DoOnWriteIndexComplete(change, shard); } @@ -24,9 +20,6 @@ void TController::DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& for (auto d = action.GetBlobsToRemove().GetDirect().GetIterator(); d.IsValid(); ++d) { AFL_VERIFY(RemovedBlobIds[action.GetStorageId()][d.GetBlobId()].emplace(d.GetTabletId()).second); } -// if (SharingIds.empty()) { -// CheckInvariants(); -// } } void TController::CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& shard, TCheckContext& context) const { @@ -59,11 +52,11 @@ void TController::CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& s const NOlap::TTabletsByBlob blobs = manager->GetBlobsToDelete(); for (auto b = blobs.GetIterator(); b.IsValid(); ++b) { Cerr << shard.TabletID() << " SHARING_REMOVE_LOCAL:" << b.GetBlobId().ToStringNew() << " FROM " << b.GetTabletId() << Endl; - i.second.RemoveSharing(b.GetTabletId(), b.GetBlobId()); + Y_UNUSED(i.second.RemoveSharing(b.GetTabletId(), b.GetBlobId())); } for (auto b = blobs.GetIterator(); b.IsValid(); ++b) { Cerr << shard.TabletID() << " BORROWED_REMOVE_LOCAL:" << b.GetBlobId().ToStringNew() << " FROM " << b.GetTabletId() << Endl; - i.second.RemoveBorrowed(b.GetTabletId(), b.GetBlobId()); + Y_UNUSED(i.second.RemoveBorrowed(b.GetTabletId(), b.GetBlobId())); } } context.AddCategories(shard.TabletID(), std::move(shardBlobsCategories)); diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.h b/ydb/core/tx/columnshard/hooks/testing/controller.h index c8211afb5443..a8e259877fd0 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/controller.h @@ -12,20 +12,21 @@ namespace NKikimr::NYDBTest::NColumnShard { class TController: public TReadOnlyController { private: using TBase = TReadOnlyController; - YDB_ACCESSOR_DEF(std::optional, LagForCompactionBeforeTierings); - YDB_ACCESSOR(std::optional, GuaranteeIndexationInterval, TDuration::Zero()); - YDB_ACCESSOR(std::optional, PeriodicWakeupActivationPeriod, std::nullopt); - YDB_ACCESSOR(std::optional, StatsReportInterval, std::nullopt); - YDB_ACCESSOR(std::optional, GuaranteeIndexationStartBytesLimit, 0); - YDB_ACCESSOR(std::optional, OptimizerFreshnessCheckDuration, TDuration::Zero()); - YDB_ACCESSOR_DEF(std::optional, CompactionActualizationLag); - YDB_ACCESSOR_DEF(std::optional, TasksActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideRequestsTracePingCheckPeriod); + YDB_ACCESSOR_DEF(std::optional, OverrideLagForCompactionBeforeTierings); + YDB_ACCESSOR(std::optional, OverrideGuaranteeIndexationInterval, TDuration::Zero()); + YDB_ACCESSOR(std::optional, OverridePeriodicWakeupActivationPeriod, std::nullopt); + YDB_ACCESSOR(std::optional, OverrideStatsReportInterval, std::nullopt); + YDB_ACCESSOR(std::optional, OverrideGuaranteeIndexationStartBytesLimit, 0); + YDB_ACCESSOR(std::optional, OverrideOptimizerFreshnessCheckDuration, TDuration::Zero()); + YDB_ACCESSOR_DEF(std::optional, OverrideCompactionActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideTasksActualizationLag); + YDB_ACCESSOR_DEF(std::optional, OverrideReadTimeoutClean); EOptimizerCompactionWeightControl CompactionControl = EOptimizerCompactionWeightControl::Force; YDB_ACCESSOR(std::optional, OverrideReduceMemoryIntervalLimit, 1024); YDB_ACCESSOR_DEF(std::optional, OverrideRejectMemoryIntervalLimit); - std::optional ReadTimeoutClean; std::optional ExpectedShardsCount; THashMap ShardActuals; @@ -129,12 +130,16 @@ class TController: public TReadOnlyController { THashSet SharingIds; protected: virtual ::NKikimr::NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const override; - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const override { - return LagForCompactionBeforeTierings.value_or(def); + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration def) const override { + return OverrideLagForCompactionBeforeTierings.value_or(def); } - virtual TDuration GetCompactionActualizationLag(const TDuration def) const override { - return CompactionActualizationLag.value_or(def); + virtual TDuration DoGetPingCheckPeriod(const TDuration def) const override { + return OverrideRequestsTracePingCheckPeriod.value_or(def); + } + + virtual TDuration DoGetCompactionActualizationLag(const TDuration def) const override { + return OverrideCompactionActualizationLag.value_or(def); } @@ -143,8 +148,8 @@ class TController: public TReadOnlyController { return !DisabledBackgrounds.contains(id); } - virtual TDuration GetActualizationTasksLag(const TDuration d) const override { - return TasksActualizationLag.value_or(d); + virtual TDuration DoGetActualizationTasksLag(const TDuration d) const override { + return OverrideTasksActualizationLag.value_or(d); } virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& shard) override; @@ -152,23 +157,29 @@ class TController: public TReadOnlyController { virtual void DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& shard, const NOlap::IBlobsGCAction& action) override; virtual bool DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& changes, const ::NKikimr::NColumnShard::TColumnShard& shard) override; - virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const override { - return GuaranteeIndexationInterval.value_or(defaultValue); + virtual TDuration DoGetGuaranteeIndexationInterval(const TDuration defaultValue) const override { + return OverrideGuaranteeIndexationInterval.value_or(defaultValue); } - TDuration GetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const override { - return PeriodicWakeupActivationPeriod.value_or(defaultValue); + virtual TDuration DoGetPeriodicWakeupActivationPeriod(const TDuration defaultValue) const override { + return OverridePeriodicWakeupActivationPeriod.value_or(defaultValue); } - TDuration GetStatsReportInterval(const TDuration defaultValue) const override { - return StatsReportInterval.value_or(defaultValue); + virtual TDuration DoGetStatsReportInterval(const TDuration defaultValue) const override { + return OverrideStatsReportInterval.value_or(defaultValue); } - virtual ui64 GetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const override { - return GuaranteeIndexationStartBytesLimit.value_or(defaultValue); + virtual ui64 DoGetGuaranteeIndexationStartBytesLimit(const ui64 defaultValue) const override { + return OverrideGuaranteeIndexationStartBytesLimit.value_or(defaultValue); } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { - return OptimizerFreshnessCheckDuration.value_or(defaultValue); + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { + return OverrideOptimizerFreshnessCheckDuration.value_or(defaultValue); } - virtual TDuration GetReadTimeoutClean(const TDuration def) override { - return ReadTimeoutClean.value_or(def); + virtual TDuration DoGetReadTimeoutClean(const TDuration def) const override { + return OverrideReadTimeoutClean.value_or(def); + } + virtual ui64 DoGetReduceMemoryIntervalLimit(const ui64 def) const override { + return OverrideReduceMemoryIntervalLimit.value_or(def); + } + virtual ui64 DoGetRejectMemoryIntervalLimit(const ui64 def) const override { + return OverrideRejectMemoryIntervalLimit.value_or(def); } virtual EOptimizerCompactionWeightControl GetCompactionControl() const override { return CompactionControl; @@ -177,9 +188,6 @@ class TController: public TReadOnlyController { virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& sessionId) override { TGuard g(Mutex); AFL_VERIFY(SharingIds.erase(sessionId)); - if (SharingIds.empty()) { - CheckInvariants(); - } } virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& sessionId) override { // dont check here. on finish only @@ -188,18 +196,9 @@ class TController: public TReadOnlyController { } public: - virtual TDuration GetRemovedPortionLivetime(const TDuration /*def*/) const override { - return TDuration::Zero(); - } const TAtomicCounter& GetIndexWriteControllerBrokeCount() const { return IndexWriteControllerBrokeCount; } - virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const override { - return OverrideReduceMemoryIntervalLimit.value_or(def); - } - virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const override { - return OverrideRejectMemoryIntervalLimit.value_or(def); - } bool IsTrivialLinks() const; TCheckContext CheckInvariants() const; @@ -235,9 +234,6 @@ class TController: public TReadOnlyController { void SetCompactionControl(const EOptimizerCompactionWeightControl value) { CompactionControl = value; } - void SetReadTimeoutClean(const TDuration d) { - ReadTimeoutClean = d; - } bool HasPKSortingOnly() const; diff --git a/ydb/core/tx/columnshard/hooks/testing/ro_controller.h b/ydb/core/tx/columnshard/hooks/testing/ro_controller.h index c271878ea838..c55be9455204 100644 --- a/ydb/core/tx/columnshard/hooks/testing/ro_controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/ro_controller.h @@ -31,9 +31,18 @@ class TReadOnlyController: public ICSController { YDB_READONLY(TAtomicCounter, ActualizationRefreshTieringCount, 0); YDB_READONLY(TAtomicCounter, ShardingFiltersCount, 0); + YDB_READONLY(TAtomicCounter, RequestTracingSnapshotsSave, 0); + YDB_READONLY(TAtomicCounter, RequestTracingSnapshotsRemove, 0); + YDB_ACCESSOR(TAtomicCounter, CompactionsLimit, 10000000); protected: + virtual void OnRequestTracingChanges( + const std::set& snapshotsToSave, const std::set& snapshotsToRemove) override { + RequestTracingSnapshotsSave.Add(snapshotsToSave.size()); + RequestTracingSnapshotsRemove.Add(snapshotsToRemove.size()); + } + virtual void OnSelectShardingFilter() override { ShardingFiltersCount.Inc(); } @@ -62,11 +71,11 @@ class TReadOnlyController: public ICSController { return EOptimizerCompactionWeightControl::Force; } -public: - virtual TDuration GetOverridenGCPeriod(const TDuration /*def*/) const override { + virtual TDuration DoGetOverridenGCPeriod(const TDuration /*def*/) const override { return TDuration::Zero(); } +public: void WaitCompactions(const TDuration d) const { TInstant start = TInstant::Now(); ui32 compactionsStart = GetCompactionStartedCounter().Val(); @@ -82,10 +91,10 @@ class TReadOnlyController: public ICSController { void WaitIndexation(const TDuration d) const { TInstant start = TInstant::Now(); - ui32 compactionsStart = GetInsertStartedCounter().Val(); + ui32 insertsStart = GetInsertStartedCounter().Val(); while (Now() - start < d) { - if (compactionsStart != GetInsertStartedCounter().Val()) { - compactionsStart = GetInsertStartedCounter().Val(); + if (insertsStart != GetInsertStartedCounter().Val()) { + insertsStart = GetInsertStartedCounter().Val(); start = TInstant::Now(); } Cerr << "WAIT_INDEXATION: " << GetInsertStartedCounter().Val() << Endl; diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.cpp b/ydb/core/tx/columnshard/inflight_request_tracker.cpp index 98ca6d7ab6da..6b7830b26cb0 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.cpp +++ b/ydb/core/tx/columnshard/inflight_request_tracker.cpp @@ -1,90 +1,155 @@ +#include "columnshard_impl.h" +#include "columnshard_schema.h" #include "inflight_request_tracker.h" + +#include "data_sharing/common/transactions/tx_extension.h" #include "engines/column_engine.h" #include "engines/reader/plain_reader/constructor/read_metadata.h" +#include "hooks/abstract/abstract.h" namespace NKikimr::NColumnShard { -void TInFlightReadsTracker::RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index) { - Y_ABORT_UNLESS(RequestsMeta.contains(cookie), "Unknown request cookie %" PRIu64, cookie); - const auto& readMetaList = RequestsMeta[cookie]; - - for (const auto& readMetaBase : readMetaList) { - NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast(readMetaBase); - - if (!readMeta) { - continue; - } - - THashMap> portionBlobIds; - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - AFL_VERIFY(index); - portion->FillBlobIdsByStorage(portionBlobIds, *index); - auto it = PortionUseCount.find(portionId); - Y_ABORT_UNLESS(it != PortionUseCount.end(), "Portion id %" PRIu64 " not found in request %" PRIu64, portionId, cookie); - if (it->second == 1) { - PortionUseCount.erase(it); - } else { - it->second--; +NOlap::NReader::TReadMetadataBase::TConstPtr TInFlightReadsTracker::ExtractInFlightRequest( + ui64 cookie, const NOlap::TVersionedIndex* /*index*/, const TInstant now) { + auto it = RequestsMeta.find(cookie); + AFL_VERIFY(it != RequestsMeta.end())("cookie", cookie); + const NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase = it->second; + + { + { + auto it = SnapshotsLive.find(readMetaBase->GetRequestSnapshot()); + AFL_VERIFY(it != SnapshotsLive.end()); + if (it->second.DelRequest(cookie, now)) { + SnapshotsLive.erase(it); } } - for (auto&& i : portionBlobIds) { - auto storage = StoragesManager->GetOperatorVerified(i.first); - auto tracker = storage->GetBlobsTracker(); - for (auto& blobId : i.second) { - tracker->FreeBlob(blobId); + if (NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = + std::dynamic_pointer_cast(readMetaBase)) { + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); } } - - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); - } } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); RequestsMeta.erase(cookie); + return readMetaBase; } -TConclusionStatus TInFlightReadsTracker::AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index) { - RequestsMeta[cookie].push_back(readMetaBase); +void TInFlightReadsTracker::AddToInFlightRequest( + const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* /*index*/) { + AFL_VERIFY(RequestsMeta.emplace(cookie, readMetaBase).second); auto readMeta = std::dynamic_pointer_cast(readMetaBase); if (!readMeta) { - return TConclusionStatus::Success(); + return; } auto selectInfo = readMeta->SelectInfo; Y_ABORT_UNLESS(selectInfo); SelectStatsDelta += selectInfo->Stats(); - THashMap> portionBlobIds; - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - PortionUseCount[portionId]++; - AFL_VERIFY(index); - portion->FillBlobIdsByStorage(portionBlobIds, *index); + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); } +} - for (auto&& i : portionBlobIds) { - auto storage = StoragesManager->GetOperatorOptional(i.first); - if (!storage) { - return TConclusionStatus::Fail("blobs storage info not ready for '" + i.first + "'"); +namespace { +class TTransactionSavePersistentSnapshots: public NOlap::NDataSharing::TExtendedTransactionBase { +private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const std::set SaveSnapshots; + const std::set RemoveSnapshots; + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : SaveSnapshots) { + db.Table().Key(i.GetPlanStep(), i.GetTxId()).Update(); } - auto tracker = storage->GetBlobsTracker(); - for (auto& blobId : i.second) { - tracker->UseBlob(blobId); + for (auto&& i : RemoveSnapshots) { + db.Table().Key(i.GetPlanStep(), i.GetTxId()).Delete(); } + return true; } - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); + virtual void DoComplete(const TActorContext& /*ctx*/) override { + } + +public: + TTransactionSavePersistentSnapshots( + NColumnShard::TColumnShard* self, std::set&& saveSnapshots, std::set&& removeSnapshots) + : TBase(self) + , SaveSnapshots(std::move(saveSnapshots)) + , RemoveSnapshots(std::move(removeSnapshots)) { + AFL_VERIFY(SaveSnapshots.size() || RemoveSnapshots.size()); + } +}; +} // namespace + +std::unique_ptr TInFlightReadsTracker::Ping( + TColumnShard* self, const TDuration critDuration, const TInstant now) { + std::set snapshotsToSave; + std::set snapshotsToFree; + for (auto&& i : SnapshotsLive) { + if (i.second.Ping(critDuration, now)) { + if (i.second.GetIsLock()) { + Counters->OnSnapshotLocked(); + snapshotsToSave.emplace(i.first); + } else { + Counters->OnSnapshotUnlocked(); + snapshotsToFree.emplace(i.first); + } + } + } + for (auto&& i : snapshotsToFree) { + SnapshotsLive.erase(i); + } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + if (snapshotsToFree.size() || snapshotsToSave.size()) { + NYDBTest::TControllers::GetColumnShardController()->OnRequestTracingChanges(snapshotsToSave, snapshotsToFree); + return std::make_unique(self, std::move(snapshotsToSave), std::move(snapshotsToFree)); + } else { + return nullptr; } - return TConclusionStatus::Success(); } +bool TInFlightReadsTracker::LoadFromDatabase(NTable::TDatabase& tableDB) { + NIceDb::TNiceDb db(tableDB); + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const NOlap::TSnapshot snapshot( + rowset.GetValue(), rowset.GetValue()); + AFL_VERIFY(SnapshotsLive.emplace(snapshot, TSnapshotLiveInfo::BuildFromDatabase(snapshot)).second); + + if (!rowset.Next()) { + return false; + } + } + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + return true; } + +ui64 TInFlightReadsTracker::AddInFlightRequest( + NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { + const ui64 cookie = NextCookie++; + auto it = SnapshotsLive.find(readMeta->GetRequestSnapshot()); + if (it == SnapshotsLive.end()) { + it = SnapshotsLive.emplace(readMeta->GetRequestSnapshot(), TSnapshotLiveInfo::BuildFromRequest(readMeta->GetRequestSnapshot())).first; + Counters->OnSnapshotsInfo(SnapshotsLive.size(), GetSnapshotToClean()); + } + it->second.AddRequest(cookie); + AddToInFlightRequest(cookie, readMeta, index); + return cookie; +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.h b/ydb/core/tx/columnshard/inflight_request_tracker.h index d530c11d7a4f..0aeec5acddbe 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.h +++ b/ydb/core/tx/columnshard/inflight_request_tracker.h @@ -1,6 +1,9 @@ #pragma once #include "blob.h" + +#include "counters/req_tracer.h" + #include namespace NKikimr::NOlap { @@ -8,49 +11,111 @@ class TVersionedIndex; } namespace NKikimr::NColumnShard { - +class TColumnShard; using NOlap::IBlobInUseTracker; -class TInFlightReadsTracker { +class TSnapshotLiveInfo { +private: + const NOlap::TSnapshot Snapshot; + std::optional LastPingInstant; + std::optional LastRequestFinishedInstant; + THashSet Requests; + YDB_READONLY(bool, IsLock, false); + + TSnapshotLiveInfo(const NOlap::TSnapshot& snapshot) + : Snapshot(snapshot) { + } + public: - // Returns a unique cookie associated with this request - [[nodiscard]] TConclusion AddInFlightRequest(NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { - const ui64 cookie = NextCookie++; - auto status = AddToInFlightRequest(cookie, readMeta, index); - if (!status) { - return status; + void AddRequest(const ui32 cookie) { + AFL_VERIFY(Requests.emplace(cookie).second); + } + + [[nodiscard]] bool DelRequest(const ui32 cookie, const TInstant now) { + AFL_VERIFY(Requests.erase(cookie)); + if (Requests.empty()) { + LastRequestFinishedInstant = now; + } + if (!IsLock && Requests.empty()) { + return true; } - return cookie; + return false; } - void RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index); + static TSnapshotLiveInfo BuildFromRequest(const NOlap::TSnapshot& reqSnapshot) { + return TSnapshotLiveInfo(reqSnapshot); + } - // Checks if the portion is in use by any in-flight request - bool IsPortionUsed(ui64 portionId) const { - return PortionUseCount.contains(portionId); + static TSnapshotLiveInfo BuildFromDatabase(const NOlap::TSnapshot& reqSnapshot) { + TSnapshotLiveInfo result(reqSnapshot); + result.LastPingInstant = TInstant::Now(); + result.LastRequestFinishedInstant = result.LastPingInstant; + result.IsLock = true; + return result; } + bool Ping(const TDuration critDuration, const TInstant now) { + LastPingInstant = now; + if (Requests.empty()) { + AFL_VERIFY(LastRequestFinishedInstant); + if (critDuration < *LastPingInstant - *LastRequestFinishedInstant && IsLock) { + IsLock = false; + return true; + } + } else { + if (critDuration < *LastPingInstant - Snapshot.GetPlanInstant() && !IsLock) { + IsLock = true; + return true; + } + } + return false; + } +}; + +class TInFlightReadsTracker { +private: + std::map SnapshotsLive; + std::shared_ptr Counters; + +public: + std::optional GetSnapshotToClean() const { + if (SnapshotsLive.empty()) { + return std::nullopt; + } else { + return SnapshotsLive.begin()->first; + } + } + + bool LoadFromDatabase(NTable::TDatabase& db); + + [[nodiscard]] std::unique_ptr Ping(TColumnShard* self, const TDuration critDuration, const TInstant now); + + // Returns a unique cookie associated with this request + [[nodiscard]] ui64 AddInFlightRequest( + NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index); + + [[nodiscard]] NOlap::NReader::TReadMetadataBase::TConstPtr ExtractInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index, const TInstant now); + NOlap::TSelectInfo::TStats GetSelectStatsDelta() { auto delta = SelectStatsDelta; SelectStatsDelta = NOlap::TSelectInfo::TStats(); return delta; } - TInFlightReadsTracker(const std::shared_ptr& storagesManager) - : StoragesManager(storagesManager) - { - + TInFlightReadsTracker(const std::shared_ptr& storagesManager, const std::shared_ptr& counters) + : Counters(counters) + , StoragesManager(storagesManager) { } private: - [[nodiscard]] TConclusionStatus AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); + void AddToInFlightRequest( + const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); private: std::shared_ptr StoragesManager; - ui64 NextCookie{1}; - THashMap> RequestsMeta; - THashMap PortionUseCount; + ui64 NextCookie = 1; + THashMap RequestsMeta; NOlap::TSelectInfo::TStats SelectStatsDelta; }; -} +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h index aaa6b6be1d87..e75099ecd9ba 100644 --- a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h +++ b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h @@ -56,6 +56,9 @@ enum class ENormalizerSequentialId: ui32 { TablesCleaner, PortionsMetadata, CleanGranuleId, + EmptyPortionsCleaner, + CleanInsertionDedup, + GCCountersNormalizer, MAX }; diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp new file mode 100644 index 000000000000..5a0934261879 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.cpp @@ -0,0 +1,151 @@ +#include "broken_dedup.h" + +#include +#include + +namespace NKikimr::NOlap::NInsertionDedup { + +class TNormalizerRemoveChanges: public INormalizerChanges { +private: + std::vector Insertions; +public: + virtual bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /*normalizationContext*/) const override { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : Insertions) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "remove_aborted_record")("write_id", i.GetInsertWriteId()); + i.Remove(db); + } + return true; + } + virtual void ApplyOnComplete(const TNormalizationController& /*normalizationContext*/) const override { + + } + + virtual ui64 GetSize() const override { + return Insertions.size(); + } + + TNormalizerRemoveChanges(const std::vector& insertions) + : Insertions(insertions) + { + + } +}; + +class TNormalizerCleanDedupChanges: public INormalizerChanges { +private: + mutable std::vector Insertions; + +public: + virtual bool ApplyOnExecute( + NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /*normalizationContext*/) const override { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : Insertions) { + AFL_VERIFY(i.GetDedupId()); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "correct_record")("dedup", i.GetDedupId()); + i.Remove(db); + i.SetDedupId(""); + i.Upsert(db); + } + return true; + } + virtual void ApplyOnComplete(const TNormalizationController& /*normalizationContext*/) const override { + } + + virtual ui64 GetSize() const override { + return Insertions.size(); + } + + TNormalizerCleanDedupChanges(const std::vector& insertions) + : Insertions(insertions) { + } +}; + + +class TCollectionStates { +private: + YDB_READONLY_DEF(std::optional, Inserted); + YDB_READONLY_DEF(std::optional, Aborted); +public: + void SetInserted(const TInsertTableRecordLoadContext& context) { + AFL_VERIFY(!Inserted); + Inserted = context; + } + void SetAborted(const TInsertTableRecordLoadContext& context) { + AFL_VERIFY(!Aborted); + Aborted = context; + } +}; + +TConclusion> TInsertionsDedupNormalizer::DoInit( + const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + + using namespace NColumnShard; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("cannot read insertion info"); + } + THashMap insertions; + while (!rowset.EndOfSet()) { + TInsertTableRecordLoadContext constructor; + constructor.ParseFromDatabase(rowset); + if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Committed) { + AFL_VERIFY(constructor.GetPlanStep()); + } else { + AFL_VERIFY(!constructor.GetPlanStep()); + if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Aborted) { + insertions[constructor.GetInsertWriteId()].SetAborted(constructor); + } else if (constructor.GetRecType() == NColumnShard::Schema::EInsertTableIds::Inserted) { + insertions[constructor.GetInsertWriteId()].SetInserted(constructor); + } else { + AFL_VERIFY(false); + } + } + if (!rowset.Next()) { + return TConclusionStatus::Fail("cannot read insertion info"); + } + } + + std::vector result; + std::vector toRemove; + std::vector toCleanDedup; + for (auto&& [id, i] : insertions) { + if (i.GetInserted() && i.GetAborted()) { + toRemove.emplace_back(*i.GetInserted()); + if (i.GetAborted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetAborted()); + } + } else if (i.GetAborted()) { + if (i.GetAborted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetAborted()); + } + } else if (i.GetInserted()) { + if (i.GetInserted()->GetDedupId()) { + toCleanDedup.emplace_back(*i.GetInserted()); + } + } else { + AFL_VERIFY(false); + } + if (toCleanDedup.size() == 1000) { + result.emplace_back(std::make_shared(std::make_shared(toCleanDedup))); + toCleanDedup.clear(); + } + if (toRemove.size() == 1000) { + result.emplace_back(std::make_shared(std::make_shared(toRemove))); + toRemove.clear(); + } + } + if (toCleanDedup.size()) { + result.emplace_back(std::make_shared(std::make_shared(toCleanDedup))); + toCleanDedup.clear(); + } + if (toRemove.size()) { + result.emplace_back(std::make_shared(std::make_shared(toRemove))); + toRemove.clear(); + } + + return result; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h new file mode 100644 index 000000000000..c9a935e24371 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/broken_dedup.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap::NInsertionDedup { + +class TInsertionsDedupNormalizer: public TNormalizationController::INormalizerComponent { +public: + static TString GetClassNameStatic() { + return "CleanInsertionDedup"; + } +private: + class TNormalizerResult; + + static const inline INormalizerComponent::TFactory::TRegistrator Registrator = + INormalizerComponent::TFactory::TRegistrator(GetClassNameStatic()); + +public: + TInsertionsDedupNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual std::optional DoGetEnumSequentialId() const override { + return ENormalizerSequentialId::CleanInsertionDedup; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/insert_table/ya.make b/ydb/core/tx/columnshard/normalizer/insert_table/ya.make new file mode 100644 index 000000000000..99cdc40cfaf4 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/insert_table/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL broken_dedup.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/normalizer/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp index 03bd39787984..f42f38061e45 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp @@ -49,7 +49,7 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { std::vector Chunks; TNormalizationContext NormContext; protected: - virtual bool DoExecute() override { + virtual TConclusionStatus DoExecute(const std::shared_ptr& /*taskPtr*/) override { for (auto&& chunkInfo : Chunks) { const auto& blobRange = chunkInfo.GetBlobRange(); @@ -59,16 +59,17 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { Y_ABORT_UNLESS(!!columnLoader); TPortionInfo::TAssembleBlobInfo assembleBlob(blobData); + assembleBlob.SetExpectedRecordsCount(chunkInfo.GetRecordsCount()); auto batch = assembleBlob.BuildRecordBatch(*columnLoader); Y_ABORT_UNLESS(!!batch); - chunkInfo.MutableUpdate().SetNumRows(batch->num_rows()); - chunkInfo.MutableUpdate().SetRawBytes(NArrow::GetBatchDataSize(batch)); + chunkInfo.MutableUpdate().SetNumRows(batch->GetRecordsCount()); + chunkInfo.MutableUpdate().SetRawBytes(batch->GetRawSizeVerified()); } auto changes = std::make_shared(std::move(Chunks)); TActorContext::AsActorContext().Send(NormContext.GetShardActor(), std::make_unique(changes)); - return true; + return TConclusionStatus::Success(); } public: diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.h b/ydb/core/tx/columnshard/normalizer/portion/chunks.h index 59fbfe57da0c..c8a09669c7b8 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.h +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.h @@ -74,6 +74,10 @@ namespace NKikimr::NOlap { , CLContext(rowset, dsGroupSelector) {} + ui32 GetRecordsCount() const { + return CLContext.GetMetaProto().GetNumRows(); + } + const TBlobRange& GetBlobRange() const { return CLContext.GetBlobRange(); } diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp index 435083000889..d1e00669f8b3 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp @@ -76,6 +76,7 @@ INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask(std::vector + + +namespace NKikimr::NOlap { + +namespace { +std::optional> GetColumnPortionAddresses(NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + if (!Schema::Precharge(db, txc.DB.GetScheme())) { + return std::nullopt; + } + THashSet usedPortions; + auto rowset = db.Table().Select< + Schema::IndexColumns::PathId, + Schema::IndexColumns::Portion + >(); + if (!rowset.IsReady()) { + return std::nullopt; + } + while (!rowset.EndOfSet()) { + usedPortions.emplace( + rowset.GetValue(), + rowset.GetValue() + ); + if (!rowset.Next()) { + return std::nullopt; + } + } + return usedPortions; +} + +using TBatch = std::vector; + +std::optional> GetPortionsToDelete(NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + const auto usedPortions = GetColumnPortionAddresses(txc); + if (!usedPortions) { + return std::nullopt; + } + const size_t MaxBatchSize = 10000; + NIceDb::TNiceDb db(txc.DB); + if (!Schema::Precharge(db, txc.DB.GetScheme())) { + return std::nullopt; + } + auto rowset = db.Table().Select< + Schema::IndexPortions::PathId, + Schema::IndexPortions::PortionId + >(); + if (!rowset.IsReady()) { + return std::nullopt; + } + std::vector result; + TBatch portionsToDelete; + while (!rowset.EndOfSet()) { + TPortionAddress addr( + rowset.GetValue(), + rowset.GetValue() + ); + if (!usedPortions->contains(addr)) { + ACFL_WARN("normalizer", "TCleanEmptyPortionsNormalizer")("message", TStringBuilder() << addr.DebugString() << " marked for deletion"); + portionsToDelete.emplace_back(std::move(addr)); + if (portionsToDelete.size() == MaxBatchSize) { + result.emplace_back(std::move(portionsToDelete)); + portionsToDelete = TBatch{}; + } + } + if (!rowset.Next()) { + return std::nullopt; + } + } + if (!portionsToDelete.empty()) { + result.emplace_back(std::move(portionsToDelete)); + } + return result; +} + +class TChanges : public INormalizerChanges { +public: + TChanges(TBatch&& addresses) + : Addresses(addresses) + {} + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController&) const override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for(const auto& a: Addresses) { + db.Table().Key( + a.GetPathId(), + a.GetPortionId() + ).Delete(); + } + ACFL_WARN("normalizer", "TCleanEmptyPortionsNormalizer")("message", TStringBuilder() << GetSize() << " portions deleted"); + return true; + } + + ui64 GetSize() const override { + return Addresses.size(); + } +private: + const TBatch Addresses; +}; + +} //namespace + +TConclusion> TCleanEmptyPortionsNormalizer::DoInit(const TNormalizationController&, NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + auto batchesToDelete = GetPortionsToDelete(txc); + if (!batchesToDelete) { + return TConclusionStatus::Fail("Not ready"); + } + + std::vector result; + for (auto&& b: *batchesToDelete) { + result.emplace_back(std::make_shared(std::make_shared(std::move(b)))); + } + return result; +} + +} //namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean_empty.h b/ydb/core/tx/columnshard/normalizer/portion/clean_empty.h new file mode 100644 index 000000000000..920b3d8c0f56 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/portion/clean_empty.h @@ -0,0 +1,28 @@ +#pragma once + +#include + +namespace NKikimr::NOlap { + +class TCleanEmptyPortionsNormalizer : public TNormalizationController::INormalizerComponent { + + static TString ClassName() { + return ToString(ENormalizerSequentialId::EmptyPortionsCleaner); + } + static inline auto Registrator = INormalizerComponent::TFactory::TRegistrator(ClassName()); +public: + TCleanEmptyPortionsNormalizer(const TNormalizationController::TInitContext&) + {} + + std::optional DoGetEnumSequentialId() const override { + return ENormalizerSequentialId::EmptyPortionsCleaner; + } + + TString GetClassName() const override { + return ClassName(); + } + + TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} //namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp b/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp index fffee39865b2..63cea8b19952 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp @@ -7,7 +7,7 @@ namespace NKikimr::NOlap { TConclusion> TPortionsNormalizerBase::DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { - auto initRes = DoInitImpl(controller,txc); + auto initRes = DoInitImpl(controller, txc); if (initRes.IsFail()) { return initRes; @@ -33,51 +33,24 @@ TConclusion> TPortionsNormalizerBase::DoInit( return tasks; } - auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); - THashMap portions; auto schemas = std::make_shared>(); - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return TConclusionStatus::Fail("Not ready"); + auto conclusion = InitColumns(tablesManager, db, portions); + if (conclusion.IsFail()) { + return conclusion; } - - TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); - auto initPortion = [&](TPortionInfoConstructor&& portion, const TColumnChunkLoadContext& loadContext) { - auto currentSchema = schema.GetSchema(portion); - portion.SetSchemaVersion(currentSchema->GetVersion()); - - if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { - return; - } - auto it = portions.find(portion.GetPortionIdVerified()); - if (it == portions.end()) { - (*schemas)[portion.GetPortionIdVerified()] = currentSchema; - const ui64 portionId = portion.GetPortionIdVerified(); - it = portions.emplace(portionId, std::move(portion)).first; - } else { - it->second.Merge(std::move(portion)); - } - it->second.LoadRecord(currentSchema->GetIndexInfo(), loadContext); - }; - - while (!rowset.EndOfSet()) { - TPortionInfoConstructor portion(rowset.GetValue(), rowset.GetValue()); - Y_ABORT_UNLESS(rowset.GetValue() == 0); - - portion.SetMinSnapshotDeprecated(NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); - portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); - - NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); - initPortion(std::move(portion), chunkLoadContext); - - if (!rowset.Next()) { - return TConclusionStatus::Fail("Not ready"); - } + } + { + auto conclusion = InitIndexes(db, portions); + if (conclusion.IsFail()) { + return conclusion; } } + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + for (auto&& [_, p] : portions) { + (*schemas)[p.GetPortionIdVerified()] = schema.GetSchema(p); + } std::vector> package; package.reserve(100); @@ -110,4 +83,70 @@ TConclusion> TPortionsNormalizerBase::DoInit( return tasks; } +TConclusionStatus TPortionsNormalizerBase::InitColumns( + const NColumnShard::TTablesManager& tablesManager, NIceDb::TNiceDb& db, THashMap& portions) { + using namespace NColumnShard; + auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + auto initPortion = [&](TPortionInfoConstructor&& portion, const TColumnChunkLoadContext& loadContext) { + auto currentSchema = schema.GetSchema(portion); + portion.SetSchemaVersion(currentSchema->GetVersion()); + + if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { + return; + } + auto it = portions.find(portion.GetPortionIdVerified()); + if (it == portions.end()) { + const ui64 portionId = portion.GetPortionIdVerified(); + it = portions.emplace(portionId, std::move(portion)).first; + } else { + it->second.Merge(std::move(portion)); + } + it->second.LoadRecord(currentSchema->GetIndexInfo(), loadContext); + }; + + while (!rowset.EndOfSet()) { + TPortionInfoConstructor portion(rowset.GetValue(), rowset.GetValue()); + Y_ABORT_UNLESS(rowset.GetValue() == 0); + + portion.SetMinSnapshotDeprecated( + NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); + portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); + + NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + initPortion(std::move(portion), chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + return TConclusionStatus::Success(); +} + +TConclusionStatus TPortionsNormalizerBase::InitIndexes(NIceDb::TNiceDb& db, THashMap& portions) { + using IndexIndexes = NColumnShard::Schema::IndexIndexes; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + while (!rowset.EndOfSet()) { + NOlap::TIndexChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + + auto it = portions.find(rowset.GetValue()); + AFL_VERIFY(it != portions.end()); + it->second.LoadIndex(chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + return TConclusionStatus::Success(); +} + } diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h index 574a1c212873..8c23395eba0b 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h @@ -85,7 +85,12 @@ class TPortionsNormalizerBase : public TNormalizationController::INormalizerComp : DsGroupSelector(info.GetStorageInfo()) {} - virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; + TConclusionStatus InitColumns( + const NColumnShard::TTablesManager& tablesManager, NIceDb::TNiceDb& db, THashMap& portions); + TConclusionStatus InitIndexes(NIceDb::TNiceDb& db, THashMap& portions); + + virtual TConclusion> DoInit( + const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; protected: virtual INormalizerTask::TPtr BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const = 0; diff --git a/ydb/core/tx/columnshard/normalizer/portion/ya.make b/ydb/core/tx/columnshard/normalizer/portion/ya.make index ec31c82f7b31..ff813694a971 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/ya.make +++ b/ydb/core/tx/columnshard/normalizer/portion/ya.make @@ -5,6 +5,7 @@ SRCS( GLOBAL portion.cpp GLOBAL chunks.cpp GLOBAL clean.cpp + GLOBAL clean_empty.cpp GLOBAL broken_blobs.cpp ) diff --git a/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp new file mode 100644 index 000000000000..1b41c455217b --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.cpp @@ -0,0 +1,31 @@ +#include "broken_txs.h" + +#include +#include + +namespace NKikimr::NOlap { + +TConclusion> TBrokenTxsNormalizer::DoInit( + const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + + using namespace NColumnShard; + auto rowset = db.Table().GreaterOrEqual(0).Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("cannot read TxInfo"); + } + while (!rowset.EndOfSet()) { + const ui64 txId = rowset.GetValue(); + if (!rowset.HaveValue()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("event", "removed_by_normalizer")("condition", "no_kind"); + Schema::EraseTxInfo(db, txId); + } + + if (!rowset.Next()) { + return TConclusionStatus::Fail("cannot read TxInfo"); + } + } + return std::vector(); +} + +} diff --git a/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h new file mode 100644 index 000000000000..1ff68530bf35 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tablet/broken_txs.h @@ -0,0 +1,35 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap { + +class TBrokenTxsNormalizer: public TNormalizationController::INormalizerComponent { +public: + static TString GetClassNameStatic() { + return "BrokenTxsNormalizer"; + } +private: + class TNormalizerResult; + + static const inline INormalizerComponent::TFactory::TRegistrator Registrator = + INormalizerComponent::TFactory::TRegistrator(GetClassNameStatic()); + +public: + TBrokenTxsNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual std::optional DoGetEnumSequentialId() const override { + return {}; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h b/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h index 80b56c080071..8787da559489 100644 --- a/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h +++ b/ydb/core/tx/columnshard/normalizer/tablet/gc_counters.h @@ -21,7 +21,7 @@ class TGCCountersNormalizer: public TNormalizationController::INormalizerCompone } virtual std::optional DoGetEnumSequentialId() const override { - return {}; + return ENormalizerSequentialId::GCCountersNormalizer; } virtual TString GetClassName() const override { diff --git a/ydb/core/tx/columnshard/normalizer/tablet/ya.make b/ydb/core/tx/columnshard/normalizer/tablet/ya.make index 9be6336eede5..0b39efc35b5b 100644 --- a/ydb/core/tx/columnshard/normalizer/tablet/ya.make +++ b/ydb/core/tx/columnshard/normalizer/tablet/ya.make @@ -2,6 +2,7 @@ LIBRARY() SRCS( GLOBAL gc_counters.cpp + GLOBAL broken_txs.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/normalizer/ya.make b/ydb/core/tx/columnshard/normalizer/ya.make index 46f7baeaea98..ced78fd812af 100644 --- a/ydb/core/tx/columnshard/normalizer/ya.make +++ b/ydb/core/tx/columnshard/normalizer/ya.make @@ -6,6 +6,7 @@ PEERDIR( ydb/core/tx/columnshard/normalizer/tablet ydb/core/tx/columnshard/normalizer/tables ydb/core/tx/columnshard/normalizer/portion + ydb/core/tx/columnshard/normalizer/insert_table ) END() diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp index 319379133f3d..39aa61a9a008 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.cpp @@ -6,47 +6,54 @@ #include #include #include - #include namespace NKikimr::NOlap { -void TBuildBatchesTask::ReplyError(const TString& message) { +void TBuildBatchesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "cannot build batch for insert")("reason", message)("data", WriteData.GetWriteMeta().GetLongTxIdOptional()); auto writeDataPtr = std::make_shared(std::move(WriteData)); - TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(writeDataPtr) }); - auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error( - NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message); + TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); + auto result = + NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); TActorContext::AsActorContext().Send(ParentActorId, result.release()); } -bool TBuildBatchesTask::DoExecute() { +TConclusionStatus TBuildBatchesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { TConclusion> batchConclusion = WriteData.GetData()->ExtractBatch(); if (batchConclusion.IsFail()) { - ReplyError("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); - return true; + ReplyError( + "cannot extract incoming batch: " + batchConclusion.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot extract incoming batch: " + batchConclusion.GetErrorMessage()); } + WritingCounters->OnIncomingData(NArrow::GetBatchDataSize(*batchConclusion)); - auto preparedConclusion = ActualSchema->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); + auto preparedConclusion = + ActualSchema->PrepareForModification(batchConclusion.DetachResult(), WriteData.GetWriteMeta().GetModificationType()); if (preparedConclusion.IsFail()) { - ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage()); - return true; + ReplyError("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot prepare incoming batch: " + preparedConclusion.GetErrorMessage()); } auto batch = preparedConclusion.DetachResult(); - const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); std::shared_ptr merger; switch (WriteData.GetWriteMeta().GetModificationType()) { case NEvWrite::EModificationType::Upsert: { + const std::vector> defaultFields = ActualSchema->GetAbsentFields(batch->schema()); if (defaultFields.empty()) { - std::shared_ptr task = std::make_shared( - TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); + std::shared_ptr task = + std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); - return true; + return TConclusionStatus::Success(); } else { - auto conclusion = ActualSchema->BuildDefaultBatch(ActualSchema->GetIndexInfo().ArrowSchema()->fields(), 1); + auto insertionConclusion = ActualSchema->CheckColumnsDefault(defaultFields); + auto conclusion = ActualSchema->BuildDefaultBatch(ActualSchema->GetIndexInfo().ArrowSchema()->fields(), 1, true); AFL_VERIFY(!conclusion.IsFail())("error", conclusion.GetErrorMessage()); auto batchDefault = conclusion.DetachResult(); - NArrow::NMerger::TSortableBatchPosition pos(batchDefault, 0, batchDefault->schema()->field_names(), batchDefault->schema()->field_names(), false); - merger = std::make_shared(batch, ActualSchema, pos); + NArrow::NMerger::TSortableBatchPosition pos( + batchDefault, 0, batchDefault->schema()->field_names(), batchDefault->schema()->field_names(), false); + merger = std::make_shared( + batch, ActualSchema, insertionConclusion.IsSuccess() ? "" : insertionConclusion.GetErrorMessage(), pos); break; } } @@ -55,22 +62,22 @@ bool TBuildBatchesTask::DoExecute() { break; } case NEvWrite::EModificationType::Update: { - merger = std::make_shared(batch, ActualSchema); + merger = std::make_shared(batch, ActualSchema, ""); break; } case NEvWrite::EModificationType::Replace: case NEvWrite::EModificationType::Delete: { - std::shared_ptr task = std::make_shared( - TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); + std::shared_ptr task = + std::make_shared(TabletId, ParentActorId, BufferActorId, std::move(WriteData), batch, ActualSchema); NConveyor::TInsertServiceOperator::AsyncTaskToExecute(task); - return true; + return TConclusionStatus::Success(); } } std::shared_ptr task = std::make_shared( TabletId, ParentActorId, BufferActorId, std::move(WriteData), merger, ActualSchema, ActualSnapshot, batch); NActors::TActivationContext::AsActorContext().Register(new NDataReader::TActor(task)); - return true; + return TConclusionStatus::Success(); } -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/builder.h b/ydb/core/tx/columnshard/operations/batch_builder/builder.h index f49648790d61..31ca0ac7ac43 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/builder.h @@ -1,5 +1,7 @@ #pragma once +#include #include +#include #include #include #include @@ -14,24 +16,27 @@ class TBuildBatchesTask: public NConveyor::ITask { const NActors::TActorId BufferActorId; const std::shared_ptr ActualSchema; const TSnapshot ActualSnapshot; - void ReplyError(const TString& message); + const std::shared_ptr WritingCounters; + void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override; + public: virtual TString GetTaskClassIdentifier() const override { return "Write::ConstructBatches"; } - TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, - const TSnapshot& actualSnapshot) + TBuildBatchesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& actualSchema, const TSnapshot& actualSnapshot, + const std::shared_ptr& writingCounters) : WriteData(std::move(writeData)) , TabletId(tabletId) , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , ActualSchema(actualSchema) , ActualSnapshot(actualSnapshot) - { + , WritingCounters(writingCounters) { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp b/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp index eb156e5c2451..823f6ac1cf3d 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/merger.cpp @@ -1,6 +1,6 @@ #include "merger.h" #include -#include +#include namespace NKikimr::NOlap { @@ -60,17 +60,19 @@ NKikimr::TConclusionStatus TUpdateMerger::OnEqualKeys(const NArrow::NMerger::TSo return TConclusionStatus::Success(); } -TUpdateMerger::TUpdateMerger(const std::shared_ptr& incoming, const std::shared_ptr& actualSchema, const std::optional& defaultExists /*= {}*/) +TUpdateMerger::TUpdateMerger(const std::shared_ptr& incoming, const std::shared_ptr& actualSchema, + const TString& insertDenyReason, const std::optional& defaultExists /*= {}*/) : TBase(incoming, actualSchema) , Builder(actualSchema->GetIndexInfo().ArrowSchema()->fields()) , DefaultExists(defaultExists) + , InsertDenyReason(insertDenyReason) { - for (auto&& i : actualSchema->GetIndexInfo().ArrowSchema()->field_names()) { - auto fIdx = IncomingData->schema()->GetFieldIndex(i); + for (auto&& f : actualSchema->GetIndexInfo().ArrowSchema()->fields()) { + auto fIdx = IncomingData->schema()->GetFieldIndex(f->name()); if (fIdx == -1) { IncomingColumnRemap.emplace_back(); } else { - auto fExistsIdx = IncomingData->schema()->GetFieldIndex("$$EXISTS::" + i); + auto fExistsIdx = IncomingData->schema()->GetFieldIndex("$$EXISTS::" + f->name()); std::shared_ptr flagsArray; if (fExistsIdx != -1) { AFL_VERIFY(IncomingData->column(fExistsIdx)->type_id() == arrow::Type::BOOL); diff --git a/ydb/core/tx/columnshard/operations/batch_builder/merger.h b/ydb/core/tx/columnshard/operations/batch_builder/merger.h index c9c8a986c13e..e503f742b18f 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/merger.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/merger.h @@ -79,8 +79,12 @@ class TUpdateMerger: public IMerger { std::vector> IncomingColumnRemap; std::vector> HasIncomingDataFlags; const std::optional DefaultExists; + const TString InsertDenyReason; virtual TConclusionStatus OnEqualKeys(const NArrow::NMerger::TSortableBatchPosition& exists, const NArrow::NMerger::TSortableBatchPosition& incoming) override; virtual TConclusionStatus OnIncomingOnly(const NArrow::NMerger::TSortableBatchPosition& incoming) override { + if (!!InsertDenyReason) { + return TConclusionStatus::Fail("insertion is impossible: " + InsertDenyReason); + } if (!DefaultExists) { return TConclusionStatus::Success(); } else { @@ -93,7 +97,7 @@ class TUpdateMerger: public IMerger { } TUpdateMerger(const std::shared_ptr& incoming, const std::shared_ptr& actualSchema, - const std::optional& defaultExists = {}); + const TString& insertDenyReason, const std::optional& defaultExists = {}); }; } diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp index 6ad24b426dc0..e13c7fc74eaf 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.cpp @@ -6,16 +6,10 @@ namespace NKikimr::NOlap { std::unique_ptr TModificationRestoreTask::DoBuildRequestInitiator() const { - auto request = std::make_unique(LocalPathId); + auto request = std::make_unique(LocalPathId, WriteData.GetWriteMeta().GetLockIdOptional()); request->ReadToSnapshot = Snapshot; - request->RangesFilter = std::make_shared(false); auto pkData = NArrow::TColumnOperator().VerifyIfAbsent().Extract(IncomingData, ActualSchema->GetPKColumnNames()); - for (ui32 i = 0; i < pkData->num_rows(); ++i) { - auto batch = pkData->Slice(i, 1); - auto pFrom = std::make_shared(NKernels::EOperation::GreaterEqual, batch); - auto pTo = std::make_shared(NKernels::EOperation::LessEqual, batch); - AFL_VERIFY(request->RangesFilter->Add(pFrom, pTo, &ActualSchema->GetIndexInfo())); - } + request->RangesFilter = TPKRangesFilter::BuildFromRecordBatchLines(pkData, false); for (auto&& i : ActualSchema->GetIndexInfo().GetColumnIds(false)) { request->AddColumn(i, ActualSchema->GetIndexInfo().GetColumnName(i)); } @@ -25,17 +19,19 @@ std::unique_ptr TModificationRestoreTa NKikimr::TConclusionStatus TModificationRestoreTask::DoOnDataChunk(const std::shared_ptr& data) { auto result = Merger->AddExistsDataOrdered(data); if (result.IsFail()) { - auto writeDataPtr = std::make_shared(std::move(WriteData)); - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "restore_data_problems") + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "merge_data_problems") ("write_id", WriteData.GetWriteMeta().GetWriteId())("tablet_id", TabletId)("message", result.GetErrorMessage()); - TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(writeDataPtr) }); - auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, - std::move(buffer), result.GetErrorMessage()); - TActorContext::AsActorContext().Send(ParentActorId, evResult.release()); + SendErrorMessage(result.GetErrorMessage(), NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Request); } return result; } +void TModificationRestoreTask::DoOnError(const TString& errorMessage) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "restore_data_problems")("write_id", WriteData.GetWriteMeta().GetWriteId())( + "tablet_id", TabletId)("message", errorMessage); + SendErrorMessage(errorMessage, NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); +} + NKikimr::TConclusionStatus TModificationRestoreTask::DoOnFinished() { { auto result = Merger->Finish(); @@ -65,4 +61,11 @@ TModificationRestoreTask::TModificationRestoreTask(const ui64 tabletId, const NA } +void TModificationRestoreTask::SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { + auto writeDataPtr = std::make_shared(std::move(WriteData)); + TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); + auto evResult = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error(NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), errorMessage, errorClass); + TActorContext::AsActorContext().Send(ParentActorId, evResult.release()); +} + } diff --git a/ydb/core/tx/columnshard/operations/batch_builder/restore.h b/ydb/core/tx/columnshard/operations/batch_builder/restore.h index 7e244f1e95b4..b69a856a8a58 100644 --- a/ydb/core/tx/columnshard/operations/batch_builder/restore.h +++ b/ydb/core/tx/columnshard/operations/batch_builder/restore.h @@ -1,6 +1,7 @@ #pragma once #include "merger.h" +#include #include #include @@ -22,10 +23,13 @@ class TModificationRestoreTask: public NDataReader::IRestoreTask { virtual TConclusionStatus DoOnDataChunk(const std::shared_ptr& data) override; virtual TConclusionStatus DoOnFinished() override; + virtual void DoOnError(const TString& errorMessage) override; + void SendErrorMessage(const TString& errorMessage, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + public: - TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, - const std::shared_ptr& actualSchema, const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); + TModificationRestoreTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& merger, const std::shared_ptr& actualSchema, + const TSnapshot actualSnapshot, const std::shared_ptr& incomingData); }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/manager.cpp b/ydb/core/tx/columnshard/operations/manager.cpp new file mode 100644 index 000000000000..1527ec5d028d --- /dev/null +++ b/ydb/core/tx/columnshard/operations/manager.cpp @@ -0,0 +1,296 @@ +#include "manager.h" + +#include + +namespace NKikimr::NColumnShard { + +bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const TOperationWriteId writeId = (TOperationWriteId)rowset.GetValue(); + const ui64 createdAtSec = rowset.GetValue(); + const ui64 lockId = rowset.GetValue(); + const ui64 cookie = rowset.GetValueOrDefault(0); + const TString metadata = rowset.GetValue(); + const EOperationStatus status = (EOperationStatus)rowset.GetValue(); + std::optional granuleShardingVersionId; + if (rowset.HaveValue() && + rowset.GetValue()) { + granuleShardingVersionId = rowset.GetValue(); + } + + NKikimrTxColumnShard::TInternalOperationData metaProto; + Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); + + auto operation = std::make_shared( + writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); + operation->FromProto(metaProto); + LinkInsertWriteIdToOperationWriteId(operation->GetInsertWriteIds(), operation->GetWriteId()); + AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); + + AFL_VERIFY(Operations.emplace(operation->GetWriteId(), operation).second); + auto it = LockFeatures.find(lockId); + if (it == LockFeatures.end()) { + it = LockFeatures.emplace(lockId, TLockFeatures(lockId, 0)).first; + } + it->second.MutableWriteOperations().emplace_back(operation); + LastWriteId = std::max(LastWriteId, operation->GetWriteId()); + if (!rowset.Next()) { + return false; + } + } + } + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + const ui64 lockId = rowset.GetValue(); + const ui64 txId = rowset.GetValue(); + AFL_VERIFY(LockFeatures.contains(lockId))("lock_id", lockId); + AFL_VERIFY(Tx2Lock.emplace(txId, lockId).second); + if (!rowset.Next()) { + return false; + } + } + } + + return true; +} + +void TOperationsManager::CommitTransactionOnExecute( + TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { + auto& lock = GetLockFeaturesForTxVerified(txId); + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("commit_tx_id", txId)("commit_lock_id", lock.GetLockId())); + TVector commited; + for (auto&& opPtr : lock.GetWriteOperations()) { + opPtr->CommitOnExecute(owner, txc, snapshot); + commited.emplace_back(opPtr); + } + OnTransactionFinishOnExecute(commited, lock, txId, txc); +} + +void TOperationsManager::CommitTransactionOnComplete( + TColumnShard& owner, const ui64 txId, const NOlap::TSnapshot& snapshot) { + auto& lock = GetLockFeaturesForTxVerified(txId); + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("commit_tx_id", txId)("commit_lock_id", lock.GetLockId())); + for (auto&& i : lock.GetBrokeOnCommit()) { + if (auto lockNotify = GetLockOptional(i)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("broken_lock_id", i); + lockNotify->SetBroken(); + } + } + + for (auto&& i : lock.GetNotifyOnCommit()) { + if (auto lockNotify = GetLockOptional(i)) { + lockNotify->AddNotifyCommit(lock.GetLockId()); + } + } + + TVector commited; + for (auto&& opPtr : lock.GetWriteOperations()) { + opPtr->CommitOnComplete(owner, snapshot); + commited.emplace_back(opPtr); + } + OnTransactionFinishOnComplete(commited, lock, txId); +} + +void TOperationsManager::AbortTransactionOnExecute(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + auto* lock = GetLockFeaturesForTxOptional(txId); + if (!lock) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort")("tx_id", txId)("problem", "finished"); + return; + } + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("lock_id", lock->GetLockId())); + + TVector aborted; + for (auto&& opPtr : lock->GetWriteOperations()) { + opPtr->AbortOnExecute(owner, txc); + aborted.emplace_back(opPtr); + } + + OnTransactionFinishOnExecute(aborted, *lock, txId, txc); +} + +void TOperationsManager::AbortTransactionOnComplete(TColumnShard& owner, const ui64 txId) { + auto* lock = GetLockFeaturesForTxOptional(txId); + if (!lock) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "abort")("tx_id", txId)("problem", "finished"); + return; + } + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)("lock_id", lock->GetLockId())); + + TVector aborted; + for (auto&& opPtr : lock->GetWriteOperations()) { + opPtr->AbortOnComplete(owner); + aborted.emplace_back(opPtr); + } + + OnTransactionFinishOnComplete(aborted, *lock, txId); +} + +TWriteOperation::TPtr TOperationsManager::GetOperation(const TOperationWriteId writeId) const { + auto it = Operations.find(writeId); + if (it == Operations.end()) { + return nullptr; + } + return it->second; +} + +void TOperationsManager::OnTransactionFinishOnExecute( + const TVector& operations, const TLockFeatures& lock, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + for (auto&& op : operations) { + RemoveOperationOnExecute(op, txc); + } + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lock.GetLockId()).Delete(); +} + +void TOperationsManager::OnTransactionFinishOnComplete( + const TVector& operations, const TLockFeatures& lock, const ui64 txId) { + { + lock.RemoveInteractions(InteractionsContext); + LockFeatures.erase(lock.GetLockId()); + } + Tx2Lock.erase(txId); + for (auto&& op : operations) { + RemoveOperationOnComplete(op); + } +} + +void TOperationsManager::RemoveOperationOnExecute(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key((ui64)op->GetWriteId()).Delete(); +} + +void TOperationsManager::RemoveOperationOnComplete(const TWriteOperation::TPtr& op) { + for (auto&& i : op->GetInsertWriteIds()) { + AFL_VERIFY(InsertWriteIdToOpWriteId.erase(i)); + } + Operations.erase(op->GetWriteId()); +} + +TOperationWriteId TOperationsManager::BuildNextOperationWriteId() { + return ++LastWriteId; +} + +std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { + auto lockIt = Tx2Lock.find(txId); + if (lockIt != Tx2Lock.end()) { + return lockIt->second; + } + return std::nullopt; +} + +void TOperationsManager::LinkTransactionOnExecute(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lockId).Update(); + Tx2Lock[txId] = lockId; +} + +void TOperationsManager::LinkTransactionOnComplete(const ui64 /*lockId*/, const ui64 /*txId*/) { +} + +TWriteOperation::TPtr TOperationsManager::RegisterOperation( + const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { + auto writeId = BuildNextOperationWriteId(); + auto operation = std::make_shared( + writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); + Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); + GetLockVerified(operation->GetLockId()).MutableWriteOperations().emplace_back(operation); + GetLockVerified(operation->GetLockId()).AddWrite(); + return operation; +} + +TConclusion TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { + if (evWrite.Record.HasTxId() && evWrite.Record.HasLocks()) { + if (evWrite.Record.GetLocks().GetLocks().size() < 1) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("no locks in case tx/locks"); + } + auto& baseLock = evWrite.Record.GetLocks().GetLocks()[0]; + for (auto&& i : evWrite.Record.GetLocks().GetLocks()) { + if (i.GetLockId() != baseLock.GetLockId()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock ids in operation"); + } + if (i.GetGeneration() != baseLock.GetGeneration()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock generations in operation"); + } + if (i.GetCounter() != baseLock.GetCounter()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("different lock generation counters in operation"); + } + } + if (evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { + return EOperationBehaviour::CommitWriteLock; + } + if (evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Rollback) { + return EOperationBehaviour::AbortWriteLock; + } + } + + if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { + if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { + return EOperationBehaviour::WriteWithLock; + } + + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("mode not IMMEDIATE for LockTxId + LockNodeId"); + } + + if (!evWrite.Record.HasLockTxId() && !evWrite.Record.HasLockNodeId() && + evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { + return EOperationBehaviour::NoTxWrite; + } + + if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { + return EOperationBehaviour::InTxWrite; + } + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("proto", evWrite.Record.DebugString())("event", "undefined behaviour"); + return TConclusionStatus::Fail("undefined request for detect tx type"); +} + +TOperationsManager::TOperationsManager() { +} + +void TOperationsManager::AddEventForTx(TColumnShard& owner, const ui64 txId, const std::shared_ptr& writer) { + return AddEventForLock(owner, GetLockForTxVerified(txId), writer); +} + +void TOperationsManager::AddEventForLock( + TColumnShard& /*owner*/, const ui64 lockId, const std::shared_ptr& writer) { + AFL_VERIFY(writer); + NOlap::NTxInteractions::TTxConflicts txNotifications; + NOlap::NTxInteractions::TTxConflicts txConflicts; + auto& txLock = GetLockVerified(lockId); + writer->CheckInteraction(lockId, InteractionsContext, txConflicts, txNotifications); + for (auto&& i : txConflicts) { + if (auto lock = GetLockOptional(i.first)) { + GetLockVerified(i.first).AddBrokeOnCommit(i.second); + } else if (txLock.IsCommitted(i.first)) { + txLock.SetBroken(); + } + } + for (auto&& i : txNotifications) { + GetLockVerified(i.first).AddNotificationsOnCommit(i.second); + } + if (auto txEvent = writer->BuildEvent()) { + NOlap::NTxInteractions::TTxEventContainer container(lockId, txEvent); + container.AddToInteraction(InteractionsContext); + txLock.MutableEvents().emplace_back(std::move(container)); + } +} + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/manager.h b/ydb/core/tx/columnshard/operations/manager.h new file mode 100644 index 000000000000..9e2651e24da0 --- /dev/null +++ b/ydb/core/tx/columnshard/operations/manager.h @@ -0,0 +1,221 @@ +#pragma once +#include "write.h" + +#include +#include + +namespace NKikimr::NOlap::NTxInteractions { +class TManager; +class TTxEventContainer; +class TInteractionsContext; +class ITxEventWriter; +} // namespace NKikimr::NOlap::NTxInteractions + +namespace NKikimr::NColumnShard { + +class TColumnShard; +class TLockFeatures; + +class TLockSharingInfo { +private: + const ui64 LockId; + const ui64 Generation; + TAtomicCounter InternalGenerationCounter = 0; + TAtomicCounter Broken = 0; + TAtomicCounter WritesCounter = 0; + friend class TLockFeatures; + +public: + ui64 GetLockId() const { + return LockId; + } + ui64 GetGeneration() const { + return Generation; + } + + TLockSharingInfo(const ui64 lockId, const ui64 generation) + : LockId(lockId) + , Generation(generation) { + } + + bool HasWrites() const { + return WritesCounter.Val(); + } + + bool IsBroken() const { + return Broken.Val(); + } + + ui64 GetCounter() const { + return InternalGenerationCounter.Val(); + } +}; + +class TLockFeatures: TMoveOnly { +private: + YDB_ACCESSOR_DEF(std::vector, WriteOperations); + YDB_ACCESSOR_DEF(std::vector, Events); + YDB_ACCESSOR(ui64, LockId, 0); + YDB_ACCESSOR(ui64, Generation, 0); + std::shared_ptr SharingInfo; + + YDB_READONLY_DEF(THashSet, BrokeOnCommit); + YDB_READONLY_DEF(THashSet, NotifyOnCommit); + YDB_READONLY_DEF(THashSet, Committed); + +public: + const std::shared_ptr& GetSharingInfo() const { + return SharingInfo; + } + + ui64 GetInternalGenerationCounter() const { + return SharingInfo->GetCounter(); + } + + void AddWrite() { + SharingInfo->WritesCounter.Inc(); + } + + void SetBroken() { + SharingInfo->Broken = 1; + SharingInfo->InternalGenerationCounter = (i64)TSysTables::TLocksTable::TLock::ESetErrors::ErrorBroken; + } + + bool IsBroken() const { + return SharingInfo->IsBroken(); + } + + bool IsCommitted(const ui64 lockId) const { + return Committed.contains(lockId); + } + + void AddNotifyCommit(const ui64 lockId) { + AFL_VERIFY(NotifyOnCommit.erase(lockId)); + Committed.emplace(lockId); + } + + void AddBrokeOnCommit(const THashSet& lockIds) { + BrokeOnCommit.insert(lockIds.begin(), lockIds.end()); + } + + void AddNotificationsOnCommit(const THashSet& lockIds) { + NotifyOnCommit.insert(lockIds.begin(), lockIds.end()); + } + + void RemoveInteractions(NOlap::NTxInteractions::TInteractionsContext& context) const { + for (auto&& i : Events) { + i.RemoveFromInteraction(context); + } + } + + TLockFeatures(const ui64 lockId, const ui64 gen) + : LockId(lockId) + , Generation(gen) { + SharingInfo = std::make_shared(lockId, gen); + } +}; + +class TOperationsManager { + NOlap::NTxInteractions::TInteractionsContext InteractionsContext; + + THashMap Tx2Lock; + THashMap InsertWriteIdToOpWriteId; + THashMap LockFeatures; + THashMap Operations; + TOperationWriteId LastWriteId = TOperationWriteId(0); + +public: + + TWriteOperation::TPtr GetOperationByInsertWriteIdVerified(const TInsertWriteId insertWriteId) const { + auto it = InsertWriteIdToOpWriteId.find(insertWriteId); + AFL_VERIFY(it != InsertWriteIdToOpWriteId.end()); + return GetOperationVerified(it->second); + } + + void LinkInsertWriteIdToOperationWriteId(const std::vector& insertions, const TOperationWriteId operationId) { + for (auto&& i : insertions) { + InsertWriteIdToOpWriteId.emplace(i, operationId); + } + } + bool Load(NTabletFlatExecutor::TTransactionContext& txc); + void AddEventForTx(TColumnShard& owner, const ui64 txId, const std::shared_ptr& writer); + void AddEventForLock(TColumnShard& owner, const ui64 lockId, const std::shared_ptr& writer); + + TWriteOperation::TPtr GetOperation(const TOperationWriteId writeId) const; + TWriteOperation::TPtr GetOperationVerified(const TOperationWriteId writeId) const { + return TValidator::CheckNotNull(GetOperationOptional(writeId)); + } + TWriteOperation::TPtr GetOperationOptional(const TOperationWriteId writeId) const { + return GetOperation(writeId); + } + void CommitTransactionOnExecute( + TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); + void CommitTransactionOnComplete( + TColumnShard& owner, const ui64 txId, const NOlap::TSnapshot& snapshot); + void AddTemporaryTxLink(const ui64 lockId) { + AFL_VERIFY(Tx2Lock.emplace(lockId, lockId).second); + } + void LinkTransactionOnExecute(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void LinkTransactionOnComplete(const ui64 lockId, const ui64 txId); + void AbortTransactionOnExecute(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void AbortTransactionOnComplete(TColumnShard& owner, const ui64 txId); + + std::optional GetLockForTx(const ui64 txId) const; + std::optional GetLockForTxOptional(const ui64 txId) const { + return GetLockForTx(txId); + } + TLockFeatures* GetLockFeaturesForTxOptional(const ui64 txId) { + auto lockId = GetLockForTxOptional(txId); + if (!lockId) { + return nullptr; + } + return &GetLockVerified(*lockId); + } + TLockFeatures& GetLockFeaturesForTxVerified(const ui64 txId) { + auto lockId = GetLockForTxOptional(txId); + AFL_VERIFY(lockId); + return GetLockVerified(*lockId); + } + ui64 GetLockForTxVerified(const ui64 txId) const { + auto result = GetLockForTxOptional(txId); + AFL_VERIFY(result)("tx_id", txId); + return *result; + } + + TWriteOperation::TPtr RegisterOperation( + const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + bool RegisterLock(const ui64 lockId, const ui64 generationId) { + if (LockFeatures.contains(lockId)) { + return false; + } else { + LockFeatures.emplace(lockId, TLockFeatures(lockId, generationId)); + return true; + } + } + static TConclusion GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); + TLockFeatures& GetLockVerified(const ui64 lockId) { + auto result = GetLockOptional(lockId); + AFL_VERIFY(result)("lock_id", lockId); + return *result; + } + + TLockFeatures* GetLockOptional(const ui64 lockId) { + auto it = LockFeatures.find(lockId); + if (it != LockFeatures.end()) { + return &it->second; + } else { + return nullptr; + } + } + + TOperationsManager(); + +private: + TOperationWriteId BuildNextOperationWriteId(); + void RemoveOperationOnExecute(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); + void RemoveOperationOnComplete(const TWriteOperation::TPtr& op); + void OnTransactionFinishOnExecute(const TVector& operations, const TLockFeatures& lock, const ui64 txId, + NTabletFlatExecutor::TTransactionContext& txc); + void OnTransactionFinishOnComplete(const TVector& operations, const TLockFeatures& lock, const ui64 txId); +}; +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp index ba8c51c63b2e..2a9d42b00283 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.cpp @@ -27,49 +27,63 @@ std::optional> TBuildSlicesTask:: return result; } -void TBuildSlicesTask::ReplyError(const TString& message) { +void TBuildSlicesTask::ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass) { auto writeDataPtr = std::make_shared(std::move(WriteData)); - TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(writeDataPtr) }); + TWritingBuffer buffer(writeDataPtr->GetBlobsAction(), { std::make_shared(*writeDataPtr) }); auto result = NColumnShard::TEvPrivate::TEvWriteBlobsResult::Error( - NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message); + NKikimrProto::EReplyStatus::CORRUPTED, std::move(buffer), message, errorClass); TActorContext::AsActorContext().Send(ParentActorId, result.release()); } -bool TBuildSlicesTask::DoExecute() { +TConclusionStatus TBuildSlicesTask::DoExecute(const std::shared_ptr& /*taskPtr*/) { NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); if (!OriginalBatch) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "ev_write_bad_data")("write_id", WriteData.GetWriteMeta().GetWriteId())("table_id", WriteData.GetWriteMeta().GetTableId()); - ReplyError("no data in batch"); - return true; + ReplyError("no data in batch", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("no data in batch"); } const auto& indexSchema = ActualSchema->GetIndexInfo().ArrowSchema(); - auto reorderConclusion = NArrow::TColumnOperator().Reorder(OriginalBatch, indexSchema->field_names()); - if (reorderConclusion.IsFail()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())("problem", reorderConclusion.GetErrorMessage()); - ReplyError("cannot reorder schema: " + reorderConclusion.GetErrorMessage()); - return true; - } else { - OriginalBatch = reorderConclusion.DetachResult(); - } - if (!OriginalBatch->schema()->Equals(indexSchema)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unequal schemas")("batch", OriginalBatch->schema()->ToString()) - ("index", indexSchema->ToString()); - ReplyError("unequal schemas"); - return true; + auto subsetConclusion = NArrow::TColumnOperator().BuildSequentialSubset(OriginalBatch, indexSchema); + if (subsetConclusion.IsFail()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unadaptable schemas")("index", indexSchema->ToString())( + "problem", subsetConclusion.GetErrorMessage()); + ReplyError( + "unadaptable schema: " + subsetConclusion.GetErrorMessage(), + NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("cannot reorder schema: " + subsetConclusion.GetErrorMessage()); } + NArrow::TSchemaSubset subset = subsetConclusion.DetachResult(); + if (OriginalBatch->num_columns() != indexSchema->num_fields()) { + AFL_VERIFY(OriginalBatch->num_columns() < indexSchema->num_fields())("original", OriginalBatch->num_columns())( + "index", indexSchema->num_fields()); + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableOptionalColumnsInColumnShard()) { + subset = NArrow::TSchemaSubset::AllFieldsAccepted(); + const std::vector& columnIdsVector = ActualSchema->GetIndexInfo().GetColumnIds(false); + const std::set columnIdsSet(columnIdsVector.begin(), columnIdsVector.end()); + auto normalized = + ActualSchema->NormalizeBatch(*ActualSchema, std::make_shared(OriginalBatch), columnIdsSet).DetachResult(); + OriginalBatch = NArrow::ToBatch(normalized->BuildTableVerified(), true); + } + } WriteData.MutableWriteMeta().SetWriteMiddle2StartInstant(TMonotonic::Now()); auto batches = BuildSlices(); WriteData.MutableWriteMeta().SetWriteMiddle3StartInstant(TMonotonic::Now()); if (batches) { auto writeDataPtr = std::make_shared(std::move(WriteData)); - auto result = std::make_unique(writeDataPtr, std::move(*batches)); + writeDataPtr->SetSchemaSubset(std::move(subset)); + std::shared_ptr pkBatch; + if (!writeDataPtr->GetWriteMeta().HasLongTxId()) { + pkBatch = NArrow::TColumnOperator().Extract(OriginalBatch, ActualSchema->GetIndexInfo().GetPrimaryKey()->fields()); + } + auto result = std::make_unique(writeDataPtr, std::move(*batches), pkBatch); TActorContext::AsActorContext().Send(BufferActorId, result.release()); } else { - ReplyError("Cannot slice input to batches"); + ReplyError("Cannot slice input to batches", NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass::Internal); + return TConclusionStatus::Fail("Cannot slice input to batches"); } - return true; + return TConclusionStatus::Success(); } } diff --git a/ydb/core/tx/columnshard/operations/slice_builder/builder.h b/ydb/core/tx/columnshard/operations/slice_builder/builder.h index f9eb29fbe9a4..a22b0c7d6ca7 100644 --- a/ydb/core/tx/columnshard/operations/slice_builder/builder.h +++ b/ydb/core/tx/columnshard/operations/slice_builder/builder.h @@ -1,8 +1,9 @@ #pragma once +#include +#include #include #include #include -#include namespace NKikimr::NOlap { @@ -15,24 +16,24 @@ class TBuildSlicesTask: public NConveyor::ITask { std::shared_ptr OriginalBatch; std::optional> BuildSlices(); const std::shared_ptr ActualSchema; - void ReplyError(const TString& message); + void ReplyError(const TString& message, const NColumnShard::TEvPrivate::TEvWriteBlobsResult::EErrorClass errorClass); + protected: - virtual bool DoExecute() override; + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) override; + public: virtual TString GetTaskClassIdentifier() const override { return "Write::ConstructBlobs::Slices"; } - TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, - const NActors::TActorId bufferActorId, NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, - const std::shared_ptr& actualSchema) + TBuildSlicesTask(const ui64 tabletId, const NActors::TActorId parentActorId, const NActors::TActorId bufferActorId, + NEvWrite::TWriteData&& writeData, const std::shared_ptr& batch, const std::shared_ptr& actualSchema) : WriteData(std::move(writeData)) , TabletId(tabletId) , ParentActorId(parentActorId) , BufferActorId(bufferActorId) , OriginalBatch(batch) - , ActualSchema(actualSchema) - { + , ActualSchema(actualSchema) { } }; -} +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index 4ff54d395e73..06b7701cd7bd 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -1,6 +1,7 @@ -#include "batch_builder/builder.h" #include "write.h" +#include "batch_builder/builder.h" + #include #include #include @@ -11,272 +12,111 @@ namespace NKikimr::NColumnShard { - TWriteOperation::TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, - const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) - : Status(status) - , CreatedAt(createdAt) - , WriteId(writeId) - , LockId(lockId) - , Cookie(cookie) - , GranuleShardingVersionId(granuleShardingVersionId) - , ModificationType(mType) - { - } - - void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, - const NActors::TActorId& source, const std::shared_ptr& schema, const TActorContext& ctx) { - Y_ABORT_UNLESS(Status == EOperationStatus::Draft); - - NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); - writeMeta.SetModificationType(ModificationType); - std::shared_ptr task = std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, +TWriteOperation::TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, + const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) + : Status(status) + , CreatedAt(createdAt) + , WriteId(writeId) + , LockId(lockId) + , Cookie(cookie) + , GranuleShardingVersionId(granuleShardingVersionId) + , ModificationType(mType) { +} + +void TWriteOperation::Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const std::shared_ptr& schema, const TActorContext& ctx) { + Y_ABORT_UNLESS(Status == EOperationStatus::Draft); + + NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source, GranuleShardingVersionId); + writeMeta.SetLockId(LockId); + writeMeta.SetModificationType(ModificationType); + std::shared_ptr task = + std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, NEvWrite::TWriteData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), owner.StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING_OPERATOR)), - schema, owner.GetLastTxSnapshot()); - NConveyor::TCompServiceOperator::SendTaskToExecute(task); - - Status = EOperationStatus::Started; - } - - void TWriteOperation::Commit(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - - TBlobGroupSelector dsGroupSelector(owner.Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - - for (auto gWriteId : GlobalWriteIds) { - auto pathExists = [&](ui64 pathId) { - return owner.TablesManager.HasTable(pathId); - }; - - auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, - pathExists); - - owner.IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); - owner.IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); - owner.IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); - } - owner.UpdateInsertTableCounters(); - } - - void TWriteOperation::OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds) { - Y_ABORT_UNLESS(Status == EOperationStatus::Started); - Status = EOperationStatus::Prepared; - GlobalWriteIds = globalWriteIds; - - NIceDb::TNiceDb db(txc.DB); - NKikimrTxColumnShard::TInternalOperationData proto; - ToProto(proto); - - TString metadata; - Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - - db.Table().Key((ui64)WriteId).Update( - NIceDb::TUpdate((ui32)Status), - NIceDb::TUpdate(CreatedAt.Seconds()), - NIceDb::TUpdate(metadata), - NIceDb::TUpdate(LockId), - NIceDb::TUpdate(Cookie), - NIceDb::TUpdate(GranuleShardingVersionId.value_or(0))); - } + schema, owner.GetLastTxSnapshot(), owner.Counters.GetCSCounters().WritingCounters); + NConveyor::TCompServiceOperator::SendTaskToExecute(task); - void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { - for (auto&& writeId : GlobalWriteIds) { - proto.AddInternalWriteIds((ui64)writeId); - } - proto.SetModificationType((ui32)ModificationType); - } + Status = EOperationStatus::Started; +} - void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { - for (auto&& writeId : proto.GetInternalWriteIds()) { - GlobalWriteIds.push_back(TWriteId(writeId)); - } - if (proto.HasModificationType()) { - ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); - } else { - ModificationType = NEvWrite::EModificationType::Replace; - } - } +void TWriteOperation::CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - void TWriteOperation::Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { - Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + TBlobGroupSelector dsGroupSelector(owner.Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - TBlobGroupSelector dsGroupSelector(owner.Info()); - NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); + for (auto gWriteId : InsertWriteIds) { + auto pathExists = [&](ui64 pathId) { + return owner.TablesManager.HasTable(pathId); + }; - THashSet writeIds; - writeIds.insert(GlobalWriteIds.begin(), GlobalWriteIds.end()); - owner.InsertTable->Abort(dbTable, writeIds); + const auto counters = owner.InsertTable->Commit(dbTable, snapshot.GetPlanStep(), snapshot.GetTxId(), { gWriteId }, pathExists); + owner.Counters.GetTabletCounters()->OnWriteCommitted(counters); } +} - bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { - NIceDb::TNiceDb db(txc.DB); - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const TWriteId writeId = (TWriteId)rowset.GetValue(); - const ui64 createdAtSec = rowset.GetValue(); - const ui64 lockId = rowset.GetValue(); - const ui64 cookie = rowset.GetValueOrDefault(0); - const TString metadata = rowset.GetValue(); - const EOperationStatus status = (EOperationStatus)rowset.GetValue(); - std::optional granuleShardingVersionId; - if (rowset.HaveValue() && rowset.GetValue()) { - granuleShardingVersionId = rowset.GetValue(); - } +void TWriteOperation::CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& /*snapshot*/) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); + owner.UpdateInsertTableCounters(); +} - NKikimrTxColumnShard::TInternalOperationData metaProto; - Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); +void TWriteOperation::OnWriteFinish( + NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag) { + Y_ABORT_UNLESS(Status == EOperationStatus::Started); + Status = EOperationStatus::Prepared; + InsertWriteIds = insertWriteIds; - auto operation = std::make_shared(writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec), granuleShardingVersionId, NEvWrite::EModificationType::Upsert); - operation->FromProto(metaProto); - AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); - - auto [_, isOk] = Operations.emplace(operation->GetWriteId(), operation); - if (!isOk) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "duplicated_operation")("operation", *operation); - return false; - } - Locks[lockId].push_back(operation->GetWriteId()); - LastWriteId = std::max(LastWriteId, operation->GetWriteId()); - if (!rowset.Next()) { - return false; - } - } - } - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const ui64 lockId = rowset.GetValue(); - const ui64 txId = rowset.GetValue(); - AFL_VERIFY(Locks.contains(lockId))("lock_id", lockId); - Tx2Lock[txId] = lockId; - if (!rowset.Next()) { - return false; - } - } - } - return true; + if (ephemeralFlag) { + return; } - bool TOperationsManager::CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); - auto lockId = GetLockForTx(txId); - if (!lockId) { - ACFL_ERROR("details", "unknown_transaction"); - return true; - } - auto tIt = Locks.find(*lockId); - AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); + NIceDb::TNiceDb db(txc.DB); + NKikimrTxColumnShard::TInternalOperationData proto; + ToProto(proto); - TVector commited; - for (auto&& opId : tIt->second) { - auto opPtr = Operations.FindPtr(opId); - (*opPtr)->Commit(owner, txc, snapshot); - commited.emplace_back(*opPtr); - } - OnTransactionFinish(commited, txId, txc); - return true; - } - - bool TOperationsManager::AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); - - auto lockId = GetLockForTx(txId); - if (!lockId) { - ACFL_ERROR("details", "unknown_transaction"); - return true; - } - auto tIt = Locks.find(*lockId); - AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); - - TVector aborted; - for (auto&& opId : tIt->second) { - auto opPtr = Operations.FindPtr(opId); - (*opPtr)->Abort(owner, txc); - aborted.emplace_back(*opPtr); - } - - OnTransactionFinish(aborted, txId, txc); - return true; - } + TString metadata; + Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - TWriteOperation::TPtr TOperationsManager::GetOperation(const TWriteId writeId) const { - auto it = Operations.find(writeId); - if (it == Operations.end()) { - return nullptr; - } - return it->second; - } - - void TOperationsManager::OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - auto lockId = GetLockForTx(txId); - AFL_VERIFY(!!lockId)("tx_id", txId); - Locks.erase(*lockId); - Tx2Lock.erase(txId); - for (auto&& op : operations) { - RemoveOperation(op, txc); - } - NIceDb::TNiceDb db(txc.DB); - db.Table().Key(txId, *lockId).Delete(); - } - - void TOperationsManager::RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { - Operations.erase(op->GetWriteId()); - NIceDb::TNiceDb db(txc.DB); - db.Table().Key((ui64)op->GetWriteId()).Delete(); - } + db.Table() + .Key((ui64)WriteId) + .Update(NIceDb::TUpdate((ui32)Status), NIceDb::TUpdate(CreatedAt.Seconds()), + NIceDb::TUpdate(metadata), NIceDb::TUpdate(LockId), + NIceDb::TUpdate(Cookie), + NIceDb::TUpdate(GranuleShardingVersionId.value_or(0))); +} - TWriteId TOperationsManager::BuildNextWriteId() { - return ++LastWriteId; +void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { + for (auto&& writeId : InsertWriteIds) { + proto.AddInternalWriteIds((ui64)writeId); } + proto.SetModificationType((ui32)ModificationType); +} - std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { - auto lockIt = Tx2Lock.find(txId); - if (lockIt != Tx2Lock.end()) { - return lockIt->second; - } - return std::nullopt; +void TWriteOperation::FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto) { + for (auto&& writeId : proto.GetInternalWriteIds()) { + InsertWriteIds.push_back(TInsertWriteId(writeId)); } - - void TOperationsManager::LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - Tx2Lock[txId] = lockId; - NIceDb::TNiceDb db(txc.DB); - db.Table().Key(txId, lockId).Update(); + if (proto.HasModificationType()) { + ModificationType = (NEvWrite::EModificationType)proto.GetModificationType(); + } else { + ModificationType = NEvWrite::EModificationType::Replace; } +} - TWriteOperation::TPtr TOperationsManager::RegisterOperation(const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType) { - auto writeId = BuildNextWriteId(); - auto operation = std::make_shared(writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now(), granuleShardingVersionId, mType); - Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); - Locks[operation->GetLockId()].push_back(operation->GetWriteId()); - return operation; - } +void TWriteOperation::AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); - EOperationBehaviour TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { - if (evWrite.Record.HasTxId() && evWrite.Record.HasLocks() && evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { - return EOperationBehaviour::CommitWriteLock; - } + TBlobGroupSelector dsGroupSelector(owner.Info()); + NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); - if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { - if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE) { - return EOperationBehaviour::WriteWithLock; - } + THashSet writeIds; + writeIds.insert(InsertWriteIds.begin(), InsertWriteIds.end()); + owner.InsertTable->Abort(dbTable, writeIds); +} - return EOperationBehaviour::Undefined; - } +void TWriteOperation::AbortOnComplete(TColumnShard& /*owner*/) const { + Y_ABORT_UNLESS(Status == EOperationStatus::Prepared); +} - if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { - return EOperationBehaviour::InTxWrite; - } - return EOperationBehaviour::Undefined; - } } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/operations/write.h b/ydb/core/tx/columnshard/operations/write.h index b72827af6c58..ad22caa651d4 100644 --- a/ydb/core/tx/columnshard/operations/write.h +++ b/ydb/core/tx/columnshard/operations/write.h @@ -1,95 +1,83 @@ #pragma once -#include -#include +#include +#include #include -#include #include -#include +#include +#include +#include -#include #include #include -#include +#include namespace NKikimr::NTabletFlatExecutor { - class TTransactionContext; +class TTransactionContext; +} + +namespace NKikimr::NOlap::NTxInteractions { +class TManager; } namespace NKikimr::NColumnShard { - class TColumnShard; - - using TWriteId = NOlap::TWriteId; - - enum class EOperationStatus : ui32 { - Draft = 1, - Started = 2, - Prepared = 3 - }; - - enum class EOperationBehaviour : ui32 { - Undefined = 1, - InTxWrite = 2, - WriteWithLock = 3, - CommitWriteLock = 4 - }; - - class TWriteOperation { - YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); - YDB_READONLY_DEF(TInstant, CreatedAt); - YDB_READONLY_DEF(TWriteId, WriteId); - YDB_READONLY(ui64, LockId, 0); - YDB_READONLY(ui64, Cookie, 0); - YDB_READONLY_DEF(TVector, GlobalWriteIds); - YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); - YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); - YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); - public: - using TPtr = std::shared_ptr; - - TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); - - void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, - const NActors::TActorId& source, const std::shared_ptr& schema, const TActorContext& ctx); - void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds); - void Commit(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; - void Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; - - void Out(IOutputStream& out) const { - out << "write_id=" << (ui64) WriteId << ";lock_id=" << LockId; - } - - void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; - void FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto); - }; - - class TOperationsManager { - TMap> Locks; - TMap Tx2Lock; - TMap Operations; - TWriteId LastWriteId = TWriteId(0); - - public: - bool Load(NTabletFlatExecutor::TTransactionContext& txc); - - TWriteOperation::TPtr GetOperation(const TWriteId writeId) const; - bool CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); - bool AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - void LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - std::optional GetLockForTx(const ui64 lockId) const; - - TWriteOperation::TPtr RegisterOperation(const ui64 lockId, const ui64 cookie, const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); - static EOperationBehaviour GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); - - private: - TWriteId BuildNextWriteId(); - void RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); - void OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - }; -} +class TColumnShard; + +using TOperationWriteId = NOlap::TOperationWriteId; +using TInsertWriteId = NOlap::TInsertWriteId; + +enum class EOperationStatus : ui32 { + Draft = 1, + Started = 2, + Prepared = 3 +}; + +enum class EOperationBehaviour : ui32 { + Undefined = 1, + InTxWrite = 2, + WriteWithLock = 3, + CommitWriteLock = 4, + AbortWriteLock = 5, + NoTxWrite = 6 +}; + +class TWriteOperation { + YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); + YDB_READONLY_DEF(TInstant, CreatedAt); + YDB_READONLY_DEF(TOperationWriteId, WriteId); + YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Cookie, 0); + YDB_READONLY_DEF(std::vector, InsertWriteIds); + YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); + YDB_READONLY_DEF(std::optional, GranuleShardingVersionId); + YDB_READONLY(NEvWrite::EModificationType, ModificationType, NEvWrite::EModificationType::Upsert); + +public: + using TPtr = std::shared_ptr; + + TWriteOperation(const TOperationWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt, + const std::optional granuleShardingVersionId, const NEvWrite::EModificationType mType); + + void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, + const std::shared_ptr& schema, const TActorContext& ctx); + void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const std::vector& insertWriteIds, const bool ephemeralFlag); + void CommitOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) const; + void CommitOnComplete(TColumnShard& owner, const NOlap::TSnapshot& snapshot) const; + void AbortOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; + void AbortOnComplete(TColumnShard& owner) const; + + void Out(IOutputStream& out) const { + out << "write_id=" << (ui64)WriteId << ";lock_id=" << LockId; + } + + void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; + void FromProto(const NKikimrTxColumnShard::TInternalOperationData& proto); +}; + +} // namespace NKikimr::NColumnShard template <> inline void Out(IOutputStream& o, const NKikimr::NColumnShard::TWriteOperation& x) { diff --git a/ydb/core/tx/columnshard/operations/write_data.cpp b/ydb/core/tx/columnshard/operations/write_data.cpp index a0cd6450f534..56a0ad5e16cb 100644 --- a/ydb/core/tx/columnshard/operations/write_data.cpp +++ b/ydb/core/tx/columnshard/operations/write_data.cpp @@ -31,7 +31,7 @@ bool TArrowData::Parse(const NKikimrDataEvents::TEvWrite_TOperation& proto, cons TConclusion> TArrowData::ExtractBatch() { Y_ABORT_UNLESS(!!IncomingData); - auto result = NArrow::DeserializeBatch(IncomingData, BatchSchema->GetSchema()); + auto result = NArrow::DeserializeBatch(IncomingData, std::make_shared(BatchSchema->GetSchema()->fields())); IncomingData = ""; return result; } diff --git a/ydb/core/tx/columnshard/operations/ya.make b/ydb/core/tx/columnshard/operations/ya.make index c626a22b508b..c0bd3f234b78 100644 --- a/ydb/core/tx/columnshard/operations/ya.make +++ b/ydb/core/tx/columnshard/operations/ya.make @@ -3,6 +3,7 @@ LIBRARY() SRCS( write.cpp write_data.cpp + manager.cpp ) PEERDIR( @@ -11,6 +12,7 @@ PEERDIR( ydb/services/metadata ydb/core/tx/columnshard/data_sharing/destination/events ydb/core/tx/columnshard/data_reader + ydb/core/tx/columnshard/transactions/locks ydb/core/tx/columnshard/operations/batch_builder ydb/core/tx/columnshard/operations/slice_builder ) diff --git a/ydb/core/tx/columnshard/resource_subscriber/counters.cpp b/ydb/core/tx/columnshard/resource_subscriber/counters.cpp index 785ca04ba8f9..45f785c10223 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/counters.cpp +++ b/ydb/core/tx/columnshard/resource_subscriber/counters.cpp @@ -1,9 +1,12 @@ #include "counters.h" +#include + namespace NKikimr::NOlap::NResourceBroker::NSubscribe { std::shared_ptr TSubscriberCounters::GetTypeCounters(const TString& resourceType) { + TGuard lock(Mutex); auto it = ResourceTypeCounters.find(resourceType); if (it == ResourceTypeCounters.end()) { it = ResourceTypeCounters.emplace(resourceType, std::make_shared(*this, resourceType)).first; diff --git a/ydb/core/tx/columnshard/resource_subscriber/task.h b/ydb/core/tx/columnshard/resource_subscriber/task.h index df4b742f1ad9..46a1ebebd32c 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/task.h +++ b/ydb/core/tx/columnshard/resource_subscriber/task.h @@ -28,6 +28,10 @@ class TResourcesGuard: public NColumnShard::TMonitoringObjectsCounter& column, const bool needMax, const bool isSortedColumn) { +TSimpleChunkMeta::TSimpleChunkMeta( + const std::shared_ptr& column, const bool needMax, const bool isSortedColumn) { Y_ABORT_UNLESS(column); - Y_ABORT_UNLESS(column->length()); - NumRows = column->length(); - RawBytes = NArrow::GetArrayDataSize(column); + Y_ABORT_UNLESS(column->GetRecordsCount()); + NumRows = column->GetRecordsCount(); + RawBytes = column->GetRawSizeVerified(); if (needMax) { - std::pair minMaxPos = {0, (column->length() - 1)}; if (!isSortedColumn) { - minMaxPos = NArrow::FindMinMaxPosition(column); - Y_ABORT_UNLESS(minMaxPos.first >= 0); - Y_ABORT_UNLESS(minMaxPos.second >= 0); + Max = column->GetMaxScalar(); + } else { + Max = column->GetScalar(column->GetRecordsCount() - 1); } - - Max = NArrow::GetScalar(column, minMaxPos.second); - - Y_ABORT_UNLESS(Max); +// AFL_VERIFY(Max); } } diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h index 8f8f902e4095..526a2a037967 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h @@ -1,4 +1,6 @@ #pragma once +#include + #include #include @@ -17,7 +19,7 @@ class TSimpleChunkMeta { ui32 RawBytes = 0; TSimpleChunkMeta() = default; public: - TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); + TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); ui64 GetMetadataSize() const { return sizeof(ui32) + sizeof(ui32) + 8 * 3 * 2; diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunks.h b/ydb/core/tx/columnshard/splitter/abstract/chunks.h index e3be37be2bd3..d0300915f098 100644 --- a/ydb/core/tx/columnshard/splitter/abstract/chunks.h +++ b/ydb/core/tx/columnshard/splitter/abstract/chunks.h @@ -15,14 +15,12 @@ namespace NKikimr::NOlap { class TPortionInfo; class TPortionInfoConstructor; class TSimpleColumnInfo; -class TColumnSaver; class IPortionDataChunk { private: YDB_READONLY(ui32, EntityId, 0); std::optional ChunkIdx; - protected: ui64 DoGetPackedSize() const { return GetData().size(); @@ -32,9 +30,14 @@ class IPortionDataChunk { virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; virtual bool DoIsSplittable() const = 0; virtual std::optional DoGetRecordsCount() const = 0; + virtual std::optional DoGetRawBytes() const = 0; + virtual std::shared_ptr DoGetFirstScalar() const = 0; virtual std::shared_ptr DoGetLastScalar() const = 0; virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const = 0; + virtual void DoAddInplaceIntoPortion(TPortionInfoConstructor& /*portionInfo*/) const { + AFL_VERIFY(false)("problem", "implemented only in index chunks"); + } virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const { AFL_VERIFY(false); return nullptr; @@ -63,6 +66,12 @@ class IPortionDataChunk { return DoGetRecordsCount(); } + ui64 GetRawBytesVerified() const { + auto result = DoGetRawBytes(); + AFL_VERIFY(result); + return *result; + } + ui32 GetRecordsCountVerified() const { auto result = DoGetRecordsCount(); AFL_VERIFY(result); @@ -121,6 +130,10 @@ class IPortionDataChunk { AFL_VERIFY(!bRange.IsValid()); return DoAddIntoPortionBeforeBlob(bRange, portionInfo); } + + void AddInplaceIntoPortion(TPortionInfoConstructor& portionInfo) const { + return DoAddInplaceIntoPortion(portionInfo); + } }; } diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/batch_slice.cpp index 83f9f90f77f2..7f6cc05c1e7b 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.cpp +++ b/ydb/core/tx/columnshard/splitter/batch_slice.cpp @@ -1,5 +1,4 @@ #include "batch_slice.h" -#include "simple.h" #include namespace NKikimr::NOlap { @@ -69,7 +68,6 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe chunksInProgress.AddChunks(i.GetChunks()); } InternalSplitsCount = 0; - AFL_VERIFY(chunksInProgress.size()); std::vector result; Y_ABORT_UNLESS(features.GetSplitSettings().GetMaxBlobSize() >= 2 * features.GetSplitSettings().GetMinBlobSize()); while (!chunksInProgress.IsEmpty()) { @@ -149,7 +147,8 @@ bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& fe return true; } -TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) +TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, + NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : Schema(schema) , Counters(counters) { std::optional recordsCount; @@ -170,60 +169,14 @@ TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap counters) +TGeneralSerializedSlice::TGeneralSerializedSlice( + const ui32 recordsCount, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : RecordsCount(recordsCount) , Schema(schema) , Counters(counters) { } -TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, - const NSplitter::TSplitSettings& settings) - : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) - , Batch(batch) -{ - Y_ABORT_UNLESS(batch); - Data.reserve(batch->num_columns()); - for (auto&& i : batch->schema()->fields()) { - TSplittedEntity c(schema->GetColumnId(i->name())); - Data.emplace_back(std::move(c)); - } - - ui32 idx = 0; - for (auto&& i : batch->columns()) { - auto& c = Data[idx]; - auto columnSaver = schema->GetColumnSaver(c.GetEntityId()); - auto stats = schema->GetColumnSerializationStats(c.GetEntityId()); - TSimpleSplitter splitter(columnSaver, Counters); - splitter.SetStats(stats); - std::vector> chunks; - for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { - chunks.emplace_back(std::make_shared(c.GetEntityId(), i, Schema)); - } - c.SetChunks(chunks); - Size += c.GetSize(); - ++idx; - } -} - -std::vector TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo) { - std::vector slices; - auto stats = schemaInfo->GetBatchSerializationStats(batch); - ui32 recordsCount = settings.GetMinRecordsCount(); - if (stats) { - const ui32 recordsCountForMinSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); - const ui32 recordsCountForMaxPortionSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); - recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); - } - auto linearSplitInfo = TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); - for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { - std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); - TBatchSerializedSlice slice(current, schemaInfo, counters, settings); - slices.emplace_back(std::move(slice)); - } - return slices; -} - void TGeneralSerializedSlice::MergeSlice(TGeneralSerializedSlice&& slice) { Y_ABORT_UNLESS(Data.size() == slice.Data.size()); RecordsCount += slice.GetRecordsCount(); diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.h b/ydb/core/tx/columnshard/splitter/batch_slice.h index acbb9a0414e4..f1b019544d8c 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.h +++ b/ydb/core/tx/columnshard/splitter/batch_slice.h @@ -1,10 +1,10 @@ #pragma once #include "chunks.h" -#include "stats.h" -#include "scheme_info.h" #include "column_info.h" #include "blob_info.h" -#include "similar_packer.h" +#include +#include +#include #include #include #include @@ -14,16 +14,17 @@ namespace NKikimr::NOlap { -class TDefaultSchemaDetails: public ISchemaDetailInfo { +class TDefaultSchemaDetails: public NArrow::NSplitter::ISchemaDetailInfo { private: ISnapshotSchema::TPtr Schema; - std::shared_ptr Stats; + std::shared_ptr Stats; + protected: virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { return Schema->GetColumnSaver(columnId); } public: - TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) + TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) : Schema(schema) , Stats(stats) { @@ -39,14 +40,15 @@ class TDefaultSchemaDetails: public ISchemaDetailInfo { return Schema->GetIndexInfo().IsSortedColumn(columnId); } - virtual std::optional GetColumnSerializationStats(const ui32 columnId) const override { + virtual std::optional GetColumnSerializationStats(const ui32 columnId) const override { auto stats = Stats->GetColumnInfo(columnId); if (stats && stats->GetRecordsCount() != 0) { return stats; } return std::nullopt; } - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const override { + virtual std::optional GetBatchSerializationStats( + const std::shared_ptr& rb) const override { return Stats->GetStatsForRecordBatch(rb); } virtual ui32 GetColumnId(const std::string& fieldName) const override { @@ -61,17 +63,17 @@ class TGeneralSerializedSlice { protected: std::vector Data; ui64 Size = 0; - ISchemaDetailInfo::TPtr Schema; + NArrow::NSplitter::ISchemaDetailInfo::TPtr Schema; std::shared_ptr Counters; TGeneralSerializedSlice() = default; - const TSplittedEntity& GetEntityDataVerified(const ui32& entityId) const { + const TSplittedEntity& GetEntityDataVerified(const ui32 entityId) const { for (auto&& i : Data) { if (i.GetEntityId() == entityId) { return i; } } - Y_ABORT_UNLESS(false); + AFL_VERIFY(false)("id", entityId); return Data.front(); } bool GroupBlobsImpl(const NSplitter::TGroupFeatures& features, std::vector& blobs); @@ -116,15 +118,17 @@ class TGeneralSerializedSlice { return blobs; } - explicit TGeneralSerializedSlice(TVectorView&& objects) { + explicit TGeneralSerializedSlice(NArrow::NSplitter::TVectorView&& objects) { Y_ABORT_UNLESS(objects.size()); std::swap(*this, objects.front()); for (ui32 i = 1; i < objects.size(); ++i) { MergeSlice(std::move(objects[i])); } } - TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); - TGeneralSerializedSlice(const ui32 recordsCount, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice(const THashMap>>& data, + NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice( + const ui32 recordsCount, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); void MergeSlice(TGeneralSerializedSlice&& slice); @@ -135,28 +139,4 @@ class TGeneralSerializedSlice { } }; -class TBatchSerializedSlice: public TGeneralSerializedSlice { -private: - using TBase = TGeneralSerializedSlice; - YDB_READONLY_DEF(std::shared_ptr, Batch); -public: - TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const NSplitter::TSplitSettings& settings); - - explicit TBatchSerializedSlice(TVectorView&& objects) { - Y_ABORT_UNLESS(objects.size()); - std::swap(*this, objects.front()); - for (ui32 i = 1; i < objects.size(); ++i) { - MergeSlice(std::move(objects[i])); - } - } - void MergeSlice(TBatchSerializedSlice&& slice) { - Batch = NArrow::CombineBatches({Batch, slice.Batch}); - TBase::MergeSlice(std::move(slice)); - } - - static std::vector BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, - const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo); - -}; - } diff --git a/ydb/core/tx/columnshard/splitter/chunks.h b/ydb/core/tx/columnshard/splitter/chunks.h index eae2f4d58fd0..e86806da299a 100644 --- a/ydb/core/tx/columnshard/splitter/chunks.h +++ b/ydb/core/tx/columnshard/splitter/chunks.h @@ -15,14 +15,22 @@ class IPortionColumnChunk : public IPortionDataChunk { protected: virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const = 0; virtual ui32 DoGetRecordsCountImpl() const = 0; + virtual ui64 DoGetRawBytesImpl() const = 0; + + virtual std::optional DoGetRawBytes() const final { + return DoGetRawBytesImpl(); + } + virtual std::optional DoGetRecordsCount() const override final { return DoGetRecordsCountImpl(); } virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfoConstructor& portionInfo) const override; - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; - virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; + virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, + const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; + virtual std::vector> DoInternalSplit(const TColumnSaver& saver, + const std::shared_ptr& counters, const std::vector& splitSizes) const override; virtual bool DoIsSplittable() const override { return GetRecordsCount() > 1; } @@ -47,7 +55,8 @@ class TChunkedColumnReader { std::vector> Chunks; std::shared_ptr Loader; - std::shared_ptr CurrentChunk; + std::shared_ptr CurrentChunk; + std::optional CurrentChunkArray; ui32 CurrentChunkIndex = 0; ui32 CurrentRecordIndex = 0; public: @@ -62,37 +71,56 @@ class TChunkedColumnReader { CurrentChunkIndex = 0; CurrentRecordIndex = 0; if (Chunks.size()) { - CurrentChunk = Loader->ApplyVerifiedColumn(Chunks.front()->GetData()); + CurrentChunk = Loader->ApplyVerified(Chunks.front()->GetData(), Chunks.front()->GetRecordsCountVerified()); + CurrentChunkArray.reset(); + } + } + + const std::shared_ptr& GetCurrentChunk() { + if (!CurrentChunkArray || !CurrentChunkArray->GetAddress().Contains(CurrentRecordIndex)) { + CurrentChunkArray = CurrentChunk->GetChunk(CurrentChunkArray, CurrentRecordIndex); } + AFL_VERIFY(CurrentChunkArray); + return CurrentChunkArray->GetArray(); } - const std::shared_ptr& GetCurrentChunk() const { + const std::shared_ptr& GetCurrentAccessor() const { + AFL_VERIFY(CurrentChunk); return CurrentChunk; } - ui32 GetCurrentRecordIndex() const { - return CurrentRecordIndex; + ui32 GetCurrentRecordIndex() { + if (!CurrentChunkArray || !CurrentChunkArray->GetAddress().Contains(CurrentRecordIndex)) { + CurrentChunkArray = CurrentChunk->GetChunk(CurrentChunkArray->GetAddress(), CurrentRecordIndex); + } + return CurrentChunkArray->GetAddress().GetLocalIndex(CurrentRecordIndex); } bool IsCorrect() const { return !!CurrentChunk; } - bool ReadNext() { - AFL_VERIFY(!!CurrentChunk); - if (++CurrentRecordIndex < CurrentChunk->length()) { - return true; - } + bool ReadNextChunk() { while (++CurrentChunkIndex < Chunks.size()) { - CurrentChunk = Loader->ApplyVerifiedColumn(Chunks[CurrentChunkIndex]->GetData()); + CurrentChunk = Loader->ApplyVerified(Chunks[CurrentChunkIndex]->GetData(), Chunks[CurrentChunkIndex]->GetRecordsCountVerified()); + CurrentChunkArray.reset(); CurrentRecordIndex = 0; - if (CurrentRecordIndex < CurrentChunk->length()) { + if (CurrentRecordIndex < CurrentChunk->GetRecordsCount()) { return true; } } + CurrentChunkArray.reset(); CurrentChunk = nullptr; return false; } + + bool ReadNext() { + AFL_VERIFY(!!CurrentChunk); + if (++CurrentRecordIndex < CurrentChunk->GetRecordsCount()) { + return true; + } + return ReadNextChunk(); + } }; class TChunkedBatchReader { @@ -135,6 +163,10 @@ class TChunkedBatchReader { return *result; } + ui32 GetColumnsCount() const { + return Columns.size(); + } + std::vector::const_iterator begin() const { return Columns.begin(); } @@ -142,6 +174,14 @@ class TChunkedBatchReader { std::vector::const_iterator end() const { return Columns.end(); } + + std::vector::iterator begin() { + return Columns.begin(); + } + + std::vector::iterator end() { + return Columns.end(); + } }; } diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.cpp b/ydb/core/tx/columnshard/splitter/scheme_info.cpp deleted file mode 100644 index fe4a65604e11..000000000000 --- a/ydb/core/tx/columnshard/splitter/scheme_info.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "scheme_info.h" - -namespace NKikimr::NOlap { - -NKikimr::NOlap::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { - auto saver = DoGetColumnSaver(columnId); - if (OverrideSerializer) { - saver.ResetSerializer(*OverrideSerializer); - } - return saver; -} - -} diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.h b/ydb/core/tx/columnshard/splitter/scheme_info.h deleted file mode 100644 index 1e72e63e9d35..000000000000 --- a/ydb/core/tx/columnshard/splitter/scheme_info.h +++ /dev/null @@ -1,26 +0,0 @@ -#pragma once -#include "stats.h" - -#include - -#include - -namespace NKikimr::NOlap { - -class ISchemaDetailInfo { -private: - YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); -protected: - virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; -public: - using TPtr = std::shared_ptr; - virtual ~ISchemaDetailInfo() = default; - virtual ui32 GetColumnId(const std::string& fieldName) const = 0; - TColumnSaver GetColumnSaver(const ui32 columnId) const; - virtual std::shared_ptr GetField(const ui32 columnId) const = 0; - virtual std::optional GetColumnSerializationStats(const ui32 columnId) const = 0; - virtual bool NeedMinMaxForColumn(const ui32 columnId) const = 0; - virtual bool IsSortedColumn(const ui32 columnId) const = 0; - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& rb) const = 0; -}; -} diff --git a/ydb/core/tx/columnshard/splitter/settings.h b/ydb/core/tx/columnshard/splitter/settings.h index 146d1147aef2..d370a5206047 100644 --- a/ydb/core/tx/columnshard/splitter/settings.h +++ b/ydb/core/tx/columnshard/splitter/settings.h @@ -22,6 +22,7 @@ class TSplitSettings { YDB_ACCESSOR(i64, MinBlobSize, DefaultMinBlobSize); YDB_ACCESSOR(i64, MinRecordsCount, DefaultMinRecordsCount); YDB_ACCESSOR(i64, MaxPortionSize, DefaultMaxPortionSize); + public: ui64 GetExpectedRecordsCountOnPage() const { return 1.5 * MinRecordsCount; diff --git a/ydb/core/tx/columnshard/splitter/similar_packer.cpp b/ydb/core/tx/columnshard/splitter/similar_packer.cpp deleted file mode 100644 index 9d22b3a6b255..000000000000 --- a/ydb/core/tx/columnshard/splitter/similar_packer.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "similar_packer.h" - -namespace NKikimr::NOlap { - -} diff --git a/ydb/core/tx/columnshard/splitter/simple.cpp b/ydb/core/tx/columnshard/splitter/simple.cpp deleted file mode 100644 index 7a155eb3158a..000000000000 --- a/ydb/core/tx/columnshard/splitter/simple.cpp +++ /dev/null @@ -1,224 +0,0 @@ -#include "simple.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -std::vector> TSplittedColumnChunk::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(Data.GetSlicedBatch(), Data.GetSerializedChunk(), splitSizes); - std::vector> newChunks; - for (auto&& i : chunks) { - newChunks.emplace_back(std::make_shared(GetColumnId(), i, SchemaInfo)); - } - return newChunks; -} - -TString TSplittedColumnChunk::DoDebugString() const { - return TStringBuilder() << "records_count=" << GetRecordsCount() << ";data=" << NArrow::DebugJson(Data.GetSlicedBatch(), 3, 3) << ";"; -} - -std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const { - AFL_VERIFY(data); - AFL_VERIFY(field); - auto schema = std::make_shared(arrow::FieldVector{field}); - auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); - return Split(batch, maxBlobSize); -} - -class TSplitChunk { -private: - std::shared_ptr Data; - YDB_READONLY_DEF(std::optional, Result); - ui32 SplitFactor = 0; - ui32 Iterations = 0; - ui32 MaxBlobSize = 8 * 1024 * 1024; - TColumnSaver ColumnSaver; - std::shared_ptr Counters; -public: - TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, const TColumnSaver& columnSaver, const std::shared_ptr& counters) - : Data(data) - , SplitFactor(baseSplitFactor) - , MaxBlobSize(maxBlobSize) - , ColumnSaver(columnSaver) - , Counters(counters) - { - AFL_VERIFY(Data && Data->num_rows()); - AFL_VERIFY(SplitFactor); - } - - TSplitChunk(const ui32 baseSplitFactor, const ui32 maxBlobSize, const std::shared_ptr& data, TString&& serializedData, const TColumnSaver& columnSaver, const std::shared_ptr& counters) - : Data(data) - , Result(TSaverSplittedChunk(data, std::move(serializedData))) - , SplitFactor(baseSplitFactor) - , MaxBlobSize(maxBlobSize) - , ColumnSaver(columnSaver) - , Counters(counters) - { - AFL_VERIFY(Data && Data->num_rows()); - AFL_VERIFY(SplitFactor); - } - - std::vector Split() { - while (true) { - AFL_VERIFY(!Result); - AFL_VERIFY(++Iterations < 100); - AFL_VERIFY(SplitFactor <= Data->num_rows())("factor", SplitFactor)("records", Data->num_rows())("iteration", Iterations)("size", NArrow::GetBatchDataSize(Data)); - bool found = false; - std::vector result; - if (SplitFactor == 1) { - TString blob = ColumnSaver.Apply(Data); - if (blob.size() < MaxBlobSize) { - Counters->SimpleSplitter.OnCorrectSerialized(blob.size()); - Result = TSaverSplittedChunk(Data, std::move(blob)); - found = true; - result.emplace_back(*this); - } else { - Counters->SimpleSplitter.OnTrashSerialized(blob.size()); - TBatchSerializationStat stats(blob.size(), Data->num_rows(), NArrow::GetBatchDataSize(Data)); - SplitFactor = stats.PredictOptimalSplitFactor(Data->num_rows(), MaxBlobSize).value_or(1); - if (SplitFactor == 1) { - SplitFactor = 2; - } - AFL_VERIFY(Data->num_rows() > 1); - } - } else { - TLinearSplitInfo linearSplitting = TSimpleSplitter::GetLinearSplittingByMax(Data->num_rows(), Data->num_rows() / SplitFactor); - TStringBuilder sb; - std::optional badStartPosition; - ui32 badBatchRecordsCount = 0; - ui64 badBatchSerializedSize = 0; - ui32 badBatchCount = 0; - for (auto it = linearSplitting.StartIterator(); it.IsValid(); it.Next()) { - auto slice = Data->Slice(it.GetPosition(), it.GetCurrentPackSize()); - TString blob = ColumnSaver.Apply(slice); - if (blob.size() >= MaxBlobSize) { - Counters->SimpleSplitter.OnTrashSerialized(blob.size()); - if (!badStartPosition) { - badStartPosition = it.GetPosition(); - } - badBatchSerializedSize += blob.size(); - badBatchRecordsCount += it.GetCurrentPackSize(); - ++badBatchCount; - Y_ABORT_UNLESS(!linearSplitting.IsMinimalGranularity()); - } else { - Counters->SimpleSplitter.OnCorrectSerialized(blob.size()); - if (badStartPosition) { - AFL_VERIFY(badBatchRecordsCount && badBatchCount)("count", badBatchCount)("records", badBatchRecordsCount); - auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); - TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); - result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver, Counters); - badStartPosition = {}; - badBatchRecordsCount = 0; - badBatchCount = 0; - badBatchSerializedSize = 0; - } - found = true; - result.emplace_back(1, MaxBlobSize, slice, std::move(blob), ColumnSaver, Counters); - } - } - if (badStartPosition) { - auto badSlice = Data->Slice(*badStartPosition, badBatchRecordsCount); - TBatchSerializationStat stats(badBatchSerializedSize, badBatchRecordsCount, Max()); - result.emplace_back(std::max(stats.PredictOptimalSplitFactor(badBatchRecordsCount, MaxBlobSize).value_or(1), badBatchCount) + 1, MaxBlobSize, badSlice, ColumnSaver, Counters); - } - ++SplitFactor; - } - if (found) { - return result; - } - } - AFL_VERIFY(false); - return {}; - } -}; - -std::vector TSimpleSplitter::Split(const std::shared_ptr& data, const ui32 maxBlobSize) const { - AFL_VERIFY(data->num_columns() == 1); - AFL_VERIFY(data->num_rows()); - TSplitChunk baseChunk(Stats ? Stats->PredictOptimalSplitFactor(data->num_rows(), maxBlobSize).value_or(1) : 1, maxBlobSize, data, ColumnSaver, Counters); - std::vector chunks = {baseChunk}; - for (auto it = chunks.begin(); it != chunks.end(); ) { - AFL_VERIFY(chunks.size() < 100); - if (!!it->GetResult()) { - ++it; - continue; - } - std::vector splitted = it->Split(); - if (splitted.size() == 1) { - *it = splitted.front(); - } else { - it = chunks.insert(it, splitted.begin(), splitted.end()); - chunks.erase(it + splitted.size()); - } - } - std::vector result; - for (auto&& i : chunks) { - AFL_VERIFY(i.GetResult()); - result.emplace_back(*i.GetResult()); - } - return result; -} - -std::vector TSimpleSplitter::SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const { - std::vector result; - ui64 position = 0; - for (auto&& i : recordsCount) { - auto subData = data->Slice(position, i); - result.emplace_back(subData, ColumnSaver.Apply(subData)); - position += i; - } - Y_ABORT_UNLESS(position == (ui64)data->num_rows()); - return result; -} - -std::vector TSimpleSplitter::SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const { - auto splitPartSizesLocal = splitPartSizesExt; - Y_ABORT_UNLESS(data); - { - ui32 sumSizes = 0; - for (auto&& i : splitPartSizesExt) { - sumSizes += i; - } - Y_ABORT_UNLESS(sumSizes <= dataSerialization.size()); - - if (sumSizes < dataSerialization.size()) { - splitPartSizesLocal.emplace_back(dataSerialization.size() - sumSizes); - } - } - std::vector recordsCount; - i64 remainedRecordsCount = data->num_rows(); - const double rowsPerByte = 1.0 * data->num_rows() / dataSerialization.size(); - i32 remainedParts = splitPartSizesLocal.size(); - for (ui32 idx = 0; idx < splitPartSizesLocal.size(); ++idx) { - AFL_VERIFY(remainedRecordsCount >= remainedParts)("remained_records_count", remainedRecordsCount) - ("remained_parts", remainedParts)("idx", idx)("size", splitPartSizesLocal.size())("sizes", JoinSeq(",", splitPartSizesLocal))("data_size", dataSerialization.size()); - --remainedParts; - i64 expectedRecordsCount = rowsPerByte * splitPartSizesLocal[idx]; - if (expectedRecordsCount < 1) { - expectedRecordsCount = 1; - } else if (remainedRecordsCount < expectedRecordsCount + remainedParts) { - expectedRecordsCount = remainedRecordsCount - remainedParts; - } - if (idx + 1 == splitPartSizesLocal.size()) { - expectedRecordsCount = remainedRecordsCount; - } - Y_ABORT_UNLESS(expectedRecordsCount); - recordsCount.emplace_back(expectedRecordsCount); - remainedRecordsCount -= expectedRecordsCount; - Y_ABORT_UNLESS(remainedRecordsCount >= 0); - } - Y_ABORT_UNLESS(remainedRecordsCount == 0); - return SplitByRecordsCount(data, recordsCount); -} - -std::shared_ptr TSaverSplittedChunk::GetFirstScalar() const { - return NArrow::TStatusValidator::GetValid(SlicedBatch->column(0)->GetScalar(0)); -} - -std::shared_ptr TSaverSplittedChunk::GetLastScalar() const { - return NArrow::TStatusValidator::GetValid(SlicedBatch->column(0)->GetScalar(GetRecordsCount() - 1)); -} - -} diff --git a/ydb/core/tx/columnshard/splitter/simple.h b/ydb/core/tx/columnshard/splitter/simple.h deleted file mode 100644 index 48c7b9efa009..000000000000 --- a/ydb/core/tx/columnshard/splitter/simple.h +++ /dev/null @@ -1,199 +0,0 @@ -#pragma once -#include -#include -#include -#include "stats.h" -#include "chunks.h" -#include "scheme_info.h" - -namespace NKikimr::NOlap { - -class TSaverSplittedChunk { -private: - YDB_READONLY_DEF(std::shared_ptr, SlicedBatch); - YDB_READONLY_DEF(TString, SerializedChunk); -public: - std::shared_ptr GetColumn() const { - return SlicedBatch->column(0); - } - - ui32 GetRecordsCount() const { - return SlicedBatch->num_rows(); - } - - std::shared_ptr GetFirstScalar() const; - std::shared_ptr GetLastScalar() const; - - TSaverSplittedChunk(const std::shared_ptr& batch, TString&& serializedChunk) - : SlicedBatch(batch) - , SerializedChunk(std::move(serializedChunk)) { - Y_ABORT_UNLESS(SlicedBatch); - Y_ABORT_UNLESS(SlicedBatch->num_columns() == 1); - Y_ABORT_UNLESS(SlicedBatch->num_rows()); - } - - bool IsCompatibleColumn(const std::shared_ptr& f) const { - if (!SlicedBatch) { - return false; - } - if (SlicedBatch->num_columns() != 1) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unexpected columns count")("expectation", 1)("actual", SlicedBatch->num_columns()); - return false; - } - if (!SlicedBatch->schema()->fields().front()->Equals(f)) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "unexpected column type")("expectation", f->ToString())("actual", SlicedBatch->schema()->fields().front()->ToString()); - return false; - } - return true; - } -}; - -class TLinearSplitInfo { -private: - YDB_READONLY(ui64, PacksCount, 0); - YDB_READONLY(ui64, PackSize, 0); - YDB_READONLY(ui64, ObjectsCount, 0); -public: - bool IsMinimalGranularity() const { - return PackSize == 1; - } - - TLinearSplitInfo(const ui64 packsCount, const ui64 packSize, const ui64 objectsCount) - : PacksCount(packsCount) - , PackSize(packSize) - , ObjectsCount(objectsCount) - { - AFL_VERIFY(objectsCount >= packsCount)("objects_count", objectsCount)("packs_count", packsCount); - AFL_VERIFY(PackSize); - AFL_VERIFY(PacksCount); - } - - class TIterator { - private: - const TLinearSplitInfo& Owner; - YDB_READONLY(ui64, Position, 0); - YDB_READONLY(ui64, CurrentPackSize, 0); - ui64 PackIdx = 0; - void InitPack() { - CurrentPackSize = (PackIdx + 1 == Owner.GetPacksCount()) ? Owner.ObjectsCount - Position : Owner.GetPackSize(); - } - public: - explicit TIterator(const TLinearSplitInfo& owner) - : Owner(owner) - { - InitPack(); - } - - bool IsValid() const { - if (Position < Owner.GetObjectsCount() && PackIdx < Owner.GetPacksCount()) { - return true; - } else { - Y_ABORT_UNLESS(Position == Owner.GetObjectsCount() && PackIdx == Owner.GetPacksCount()); - return false; - } - } - - bool Next() { - Y_ABORT_UNLESS(IsValid()); - Position += CurrentPackSize; - ++PackIdx; - InitPack(); - return IsValid(); - } - }; - - TIterator StartIterator() const { - return TIterator(*this); - } -}; - -class TSimpleSplitter { -private: - TColumnSaver ColumnSaver; - YDB_ACCESSOR_DEF(std::optional, Stats); - std::shared_ptr Counters; -public: - explicit TSimpleSplitter(const TColumnSaver& columnSaver, std::shared_ptr counters) - : ColumnSaver(columnSaver) - , Counters(counters) - { - - } - - static TLinearSplitInfo GetOptimalLinearSplitting(const ui64 objectsCount, const i64 optimalPackSizeExt) { - const i64 optimalPackSize = optimalPackSizeExt ? optimalPackSizeExt : 1; - const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / optimalPackSize)); - const ui32 countPacksMin = std::max(1, (ui32)ceil(1.0 * objectsCount / optimalPackSize)); - const ui32 stepPackMax = objectsCount / countPacksMin; - const ui32 stepPackMin = objectsCount / countPacksMax; - if (std::abs(optimalPackSize - stepPackMax) > std::abs(optimalPackSize - stepPackMin)) { - return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); - } else { - return TLinearSplitInfo(countPacksMin, stepPackMax, objectsCount); - } - } - - static TLinearSplitInfo GetLinearSplittingByMax(const ui64 objectsCount, const ui64 maxPackSizeExt) { - const ui64 maxPackSize = maxPackSizeExt ? maxPackSizeExt : 1; - const ui32 countPacksMax = std::max(1, (ui32)floor(1.0 * objectsCount / maxPackSize)); - const ui32 stepPackMin = objectsCount / countPacksMax; - return TLinearSplitInfo(countPacksMax, stepPackMin, objectsCount); - } - - std::vector Split(const std::shared_ptr& data, const std::shared_ptr& field, const ui32 maxBlobSize) const; - std::vector Split(const std::shared_ptr& data, const ui32 maxBlobSize) const; - std::vector SplitByRecordsCount(std::shared_ptr data, const std::vector& recordsCount) const; - std::vector SplitBySizes(std::shared_ptr data, const TString& dataSerialization, const std::vector& splitPartSizesExt) const; -}; - -class TSplittedColumnChunk: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - TSaverSplittedChunk Data; - ISchemaDetailInfo::TPtr SchemaInfo; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; - virtual const TString& DoGetData() const override { - return Data.GetSerializedChunk(); - } - virtual ui32 DoGetRecordsCountImpl() const override { - return Data.GetRecordsCount(); - } - - virtual TString DoDebugString() const override; - - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return TSimpleChunkMeta(Data.GetColumn(), SchemaInfo->NeedMinMaxForColumn(GetColumnId()), SchemaInfo->IsSortedColumn(GetColumnId())); - } - - virtual std::shared_ptr DoGetFirstScalar() const override { - return Data.GetFirstScalar(); - } - virtual std::shared_ptr DoGetLastScalar() const override { - return Data.GetLastScalar(); - } - virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const override { - AFL_VERIFY(false); - return nullptr; - } - -public: - i64 GetSize() const { - return Data.GetSerializedChunk().size(); - } - - const TSaverSplittedChunk& GetData() const { - return Data; - } - - TSplittedColumnChunk() = default; - - TSplittedColumnChunk(const ui32 columnId, const TSaverSplittedChunk& data, ISchemaDetailInfo::TPtr schemaInfo) - : TBase(columnId) - , Data(data) - , SchemaInfo(schemaInfo) { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp new file mode 100644 index 000000000000..880881238fa1 --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/ut/batch_slice.cpp @@ -0,0 +1,65 @@ +#include "batch_slice.h" + +#include +#include +#include + +#include + +namespace NKikimr::NOlap { + +TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, + std::shared_ptr counters, const NSplitter::TSplitSettings& settings) + : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) + , Batch(batch) { + Y_ABORT_UNLESS(batch); + Data.reserve(batch->num_columns()); + for (auto&& i : batch->schema()->fields()) { + TSplittedEntity c(schema->GetColumnId(i->name())); + Data.emplace_back(std::move(c)); + } + + ui32 idx = 0; + for (auto&& i : batch->columns()) { + auto& c = Data[idx]; + auto columnSaver = schema->GetColumnSaver(c.GetEntityId()); + auto stats = schema->GetColumnSerializationStats(c.GetEntityId()); + NKikimr::NArrow::NSplitter::TSimpleSplitter splitter(columnSaver); + splitter.SetStats(stats); + std::vector> chunks; + for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { + NOlap::TSimpleColumnInfo columnInfo(c.GetEntityId(), Schema->GetField(c.GetEntityId()), + Schema->GetColumnSaver(c.GetEntityId()).GetSerializer(), true, false, true, nullptr); + chunks.emplace_back(std::make_shared(i.GetSerializedChunk(), + std::make_shared(i.GetSlicedBatch()->column(0)), TChunkAddress(c.GetEntityId(), 0), + columnInfo)); + } + c.SetChunks(chunks); + Size += c.GetSize(); + ++idx; + } +} + +std::vector TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, + const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, + const NArrow::NSplitter::ISchemaDetailInfo::TPtr& schemaInfo) { + std::vector slices; + auto stats = schemaInfo->GetBatchSerializationStats(batch); + ui32 recordsCount = settings.GetMinRecordsCount(); + if (stats) { + const ui32 recordsCountForMinSize = + stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); + const ui32 recordsCountForMaxPortionSize = + stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); + recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); + } + auto linearSplitInfo = NKikimr::NArrow::NSplitter::TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); + for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { + std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); + TBatchSerializedSlice slice(current, schemaInfo, counters, settings); + slices.emplace_back(std::move(slice)); + } + return slices; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/splitter/ut/batch_slice.h b/ydb/core/tx/columnshard/splitter/ut/batch_slice.h new file mode 100644 index 000000000000..c085eb1568ce --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/ut/batch_slice.h @@ -0,0 +1,31 @@ +#pragma once +#include + +namespace NKikimr::NOlap { + +class TBatchSerializedSlice: public TGeneralSerializedSlice { +private: + using TBase = TGeneralSerializedSlice; + YDB_READONLY_DEF(std::shared_ptr, Batch); + +public: + TBatchSerializedSlice(const std::shared_ptr& batch, NArrow::NSplitter::ISchemaDetailInfo::TPtr schema, + std::shared_ptr counters, const NSplitter::TSplitSettings& settings); + + explicit TBatchSerializedSlice(NArrow::NSplitter::TVectorView&& objects) { + Y_ABORT_UNLESS(objects.size()); + std::swap(*this, objects.front()); + for (ui32 i = 1; i < objects.size(); ++i) { + MergeSlice(std::move(objects[i])); + } + } + void MergeSlice(TBatchSerializedSlice&& slice) { + Batch = NArrow::CombineBatches({ Batch, slice.Batch }); + TBase::MergeSlice(std::move(slice)); + } + + static std::vector BuildSimpleSlices(const std::shared_ptr& batch, + const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, + const NArrow::NSplitter::ISchemaDetailInfo::TPtr& schemaInfo); +}; +} diff --git a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp index f2f942dbbab3..7ca04ee36933 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp +++ b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp @@ -1,29 +1,32 @@ -#include -#include -#include -#include -#include -#include +#include "batch_slice.h" -#include -#include +#include +#include +#include #include -#include - -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include Y_UNIT_TEST_SUITE(Splitter) { - using namespace NKikimr::NArrow; - class TTestSnapshotSchema: public NKikimr::NOlap::ISchemaDetailInfo { + class TTestSnapshotSchema: public NKikimr::NArrow::NSplitter::ISchemaDetailInfo { private: mutable std::map Decoder; + protected: - virtual NKikimr::NOlap::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { - return NKikimr::NOlap::TColumnSaver(nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); + virtual NKikimr::NArrow::NAccessor::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { + return NKikimr::NArrow::NAccessor::TColumnSaver( + nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); } public: @@ -34,17 +37,18 @@ Y_UNIT_TEST_SUITE(Splitter) { return false; } - virtual std::optional GetColumnSerializationStats(const ui32 /*columnId*/) const override { + virtual std::optional GetColumnSerializationStats( + const ui32 /*columnId*/) const override { return {}; } - virtual std::optional GetBatchSerializationStats(const std::shared_ptr& /*rb*/) const override { + virtual std::optional GetBatchSerializationStats( + const std::shared_ptr& /*rb*/) const override { return {}; } - NKikimr::NOlap::TColumnLoader GetColumnLoader(const ui32 columnId) const { - arrow::FieldVector v = {std::make_shared(GetColumnName(columnId), std::make_shared())}; - auto schema = std::make_shared(v); - return NKikimr::NOlap::TColumnLoader(nullptr, NSerialization::TSerializerContainer::GetDefaultSerializer(), schema, nullptr, columnId); + NKikimr::NArrow::NAccessor::TColumnLoader GetColumnLoader(const ui32 columnId) const { + return NKikimr::NArrow::NAccessor::TColumnLoader(nullptr, NSerialization::TSerializerContainer::GetDefaultSerializer(), + NKikimr::NArrow::NAccessor::TConstructorContainer::GetDefaultConstructor(), GetField(columnId), nullptr, columnId); } virtual std::shared_ptr GetField(const ui32 columnId) const override { @@ -80,9 +84,8 @@ Y_UNIT_TEST_SUITE(Splitter) { YDB_ACCESSOR(std::optional, ExpectedInternalSplitsCount, 0); public: - void Execute(std::shared_ptr batch, - const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings() - ) { + void Execute(std::shared_ptr batch, + const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings()) { using namespace NKikimr::NOlap; NKikimr::NColumnShard::TIndexationCounters counters("test"); std::vector generalSlices; @@ -93,9 +96,9 @@ Y_UNIT_TEST_SUITE(Splitter) { } } - TSimilarPacker packer(settings.GetExpectedPortionSize()); + NKikimr::NArrow::NSplitter::TSimilarPacker packer(settings.GetExpectedPortionSize()); auto packs = packer.Split(generalSlices); - const NSplitter::TEntityGroups groups(settings, "default"); + const NKikimr::NOlap::NSplitter::TEntityGroups groups(settings, "default"); const ui32 portionsCount = packs.size(); ui32 blobsCount = 0; ui32 chunksCount = 0; @@ -129,9 +132,11 @@ Y_UNIT_TEST_SUITE(Splitter) { } portionSize += bSize; AFL_VERIFY(bSize < (ui64)settings.GetMaxBlobSize()); - AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))("blob_size", bSize); + AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))( + "blob_size", bSize); } - AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)("limit", settings.GetMaxPortionSize()); + AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)( + "limit", settings.GetMaxPortionSize()); THashMap> entitiesByRecordsCount; ui32 pagesRestore = 0; @@ -141,7 +146,7 @@ Y_UNIT_TEST_SUITE(Splitter) { ui32 count = 0; for (auto&& c : e.second) { auto slice = arr->Slice(count + portionShift, c->GetRecordsCountVerified()); - auto readBatch = *Schema->GetColumnLoader(e.first).Apply(c->GetData()); + auto readBatch = Schema->GetColumnLoader(e.first).ApplyRawVerified(c->GetData()); AFL_VERIFY(slice->length() == readBatch->num_rows()); Y_ABORT_UNLESS(readBatch->column(0)->RangeEquals(*slice, 0, readBatch->num_rows(), 0, arrow::EqualOptions::Defaults())); count += c->GetRecordsCountVerified(); @@ -161,91 +166,107 @@ Y_UNIT_TEST_SUITE(Splitter) { } AFL_VERIFY(portionShift = batch->num_rows()); AFL_VERIFY(pagesSum == generalSlices.size())("sum", pagesSum)("general_slices", generalSlices.size()); - AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))("expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); + AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))( + "expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); AFL_VERIFY(blobsCount == ExpectBlobsCount.value_or(blobsCount))("blobs_count", blobsCount)("expected", *ExpectBlobsCount); AFL_VERIFY(pagesSum == ExpectSlicesCount.value_or(pagesSum))("sum", pagesSum)("expected", *ExpectSlicesCount); - AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)("expected", *ExpectPortionsCount); + AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)( + "expected", *ExpectPortionsCount); AFL_VERIFY(chunksCount == ExpectChunksCount.value_or(chunksCount))("chunks_count", chunksCount)("expected", *ExpectChunksCount); - } }; Y_UNIT_TEST(Simple) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Small) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 24)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 24)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Minimal) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(4048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(4048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).Execute(batch); } Y_UNIT_TEST(Trivial) { - NConstruction::IArrayBuilder::TPtr column = std::make_shared>( - "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(10048); + NConstruction::IArrayBuilder::TPtr column = + std::make_shared>( + "field", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ column }).BuildBatch(10048); TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).Execute(batch); } Y_UNIT_TEST(BigAndSmall) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 1)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 512)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 1)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(CritSmallPortions) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40) - .Execute(batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40).Execute( + batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); } Y_UNIT_TEST(Crit) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( - "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = + std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = + NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig, columnSmall }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute( + batch); } Y_UNIT_TEST(CritSimple) { - NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); - std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig}).BuildBatch(80048); + NConstruction::IArrayBuilder::TPtr columnBig = + std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({ columnBig }).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute( + batch); } - }; diff --git a/ydb/core/tx/columnshard/splitter/ut/ya.make b/ydb/core/tx/columnshard/splitter/ut/ya.make index 24d266bffa8e..3f49857ce07a 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ya.make +++ b/ydb/core/tx/columnshard/splitter/ut/ya.make @@ -18,6 +18,7 @@ PEERDIR( ydb/core/kqp/session_actor ydb/core/tx/tx_proxy ydb/core/tx/columnshard/engines/storage/chunks + ydb/core/tx/columnshard/engines/storage/indexes/max ydb/core/tx ydb/core/mind ydb/library/yql/minikql/comp_nodes/llvm14 @@ -40,6 +41,7 @@ CFLAGS( SRCS( ut_splitter.cpp + batch_slice.cpp ) END() diff --git a/ydb/core/tx/columnshard/splitter/ya.make b/ydb/core/tx/columnshard/splitter/ya.make index 5f6c60cdf1ff..380d51bca325 100644 --- a/ydb/core/tx/columnshard/splitter/ya.make +++ b/ydb/core/tx/columnshard/splitter/ya.make @@ -3,12 +3,8 @@ LIBRARY() SRCS( batch_slice.cpp chunks.cpp - simple.cpp - similar_packer.cpp - stats.cpp column_info.cpp settings.cpp - scheme_info.cpp blob_info.cpp chunk_meta.cpp ) @@ -17,6 +13,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tx/columnshard/splitter/abstract ydb/core/tx/columnshard/engines/scheme + ydb/core/formats/arrow/splitter ) END() diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp index bee19d729f86..31de6ffef8a5 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.cpp @@ -1,8 +1,9 @@ #include "columnshard_ut_common.h" +#include "shard_reader.h" -#include #include #include +#include #include #include @@ -124,22 +125,25 @@ bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shar } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { + const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); - return WriteDataImpl(runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); - + return WriteDataImpl( + runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, const NEvWrite::EModificationType mType) { + const std::vector& ydbSchema, bool waitResult, std::vector* writeIds, + const NEvWrite::EModificationType mType) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); if (writeIds) { - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, + NArrow::MakeArrowSchema(ydbSchema), writeIds, mType); } std::vector ids; - return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); + return WriteDataImpl(runtime, sender, TTestTxConfig::TxTablet0, tableId, longTxId, writeId, data, + NArrow::MakeArrowSchema(ydbSchema), waitResult ? &ids : nullptr, mType); } std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, @@ -396,30 +400,54 @@ NMetadata::NFetcher::ISnapshot::TPtr TTestSchema::BuildSnapshot(const TTableSpec return cs; } +void TTestSchema::InitSchema(const std::vector& columns, const std::vector& pk, + const TTableSpecials& specials, NKikimrSchemeOp::TColumnTableSchema* schema) { + schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); + + for (ui32 i = 0; i < columns.size(); ++i) { + *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); + if (!specials.NeedTestStatistics()) { + continue; + } + if (NOlap::NIndexes::NMax::TIndexMeta::IsAvailableType(columns[i].GetType())) { + *schema->AddIndexes() = NOlap::NIndexes::TIndexMetaContainer( + std::make_shared(1000 + i, "MAX::INDEX::" + columns[i].GetName(), "__LOCAL_METADATA", i + 1)) + .SerializeToProto(); + } + } + + Y_ABORT_UNLESS(pk.size() > 0); + for (auto& column : ExtractNames(pk)) { + schema->AddKeyColumnNames(column); + } + + if (specials.HasCodec()) { + schema->MutableDefaultCompression()->SetCodec(specials.GetCodecId()); + } + if (specials.CompressionLevel) { + schema->MutableDefaultCompression()->SetLevel(*specials.CompressionLevel); + } +} + } namespace NKikimr::NColumnShard { NOlap::TIndexInfo BuildTableInfo(const std::vector& ydbSchema, const std::vector& key) { - NOlap::TIndexInfo indexInfo = NOlap::TIndexInfo::BuildDefault(); - + THashMap columns; for (ui32 i = 0; i < ydbSchema.size(); ++i) { ui32 id = i + 1; auto& name = ydbSchema[i].GetName(); auto& type = ydbSchema[i].GetType(); - indexInfo.Columns[id] = NTable::TColumn(name, id, type, ""); - indexInfo.ColumnNames[name] = id; + columns[id] = NTable::TColumn(name, id, type, ""); } + std::vector pkNames; for (const auto& c : key) { - indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[c.GetName()]); + pkNames.push_back(c.GetName()); } - - auto storage = std::make_shared(); - storage->Initialize(TInstant::Now().Seconds()); - indexInfo.SetAllKeys(NOlap::TTestStoragesManager::GetInstance()); - return indexInfo; + return NOlap::TIndexInfo::BuildDefault(NOlap::TTestStoragesManager::GetInstance(), columns, pkNames); } void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, const TString& txBody, const NOlap::TSnapshot& snapshot, bool succeed) { @@ -489,7 +517,7 @@ namespace NKikimr::NColumnShard { fields.emplace_back(f.GetName()); } - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); + NTxUT::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); reader.SetReplyColumns(fields); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); diff --git a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h index 12b056b6ba6a..7594be5da952 100644 --- a/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/test_helper/columnshard_ut_common.h @@ -1,21 +1,21 @@ #pragma once -#include -#include -#include - #include -#include +#include #include #include #include #include -#include +#include +#include +#include +#include +#include + +#include #include #include -#include -#include namespace NKikimr::NOlap { struct TIndexInfo; @@ -168,7 +168,7 @@ struct TTestSchema { // PK firstKeyItem, TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), - TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), TTestColumn("level", TTypeInfo(NTypeIds::Int32) ), TTestColumn("message", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), @@ -184,7 +184,7 @@ struct TTestSchema { std::vector schema = { // PK TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), - TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), // @@ -193,7 +193,7 @@ struct TTestSchema { TTestColumn("json_payload", TTypeInfo(NTypeIds::JsonDocument) ), TTestColumn("ingested_at", TTypeInfo(NTypeIds::Timestamp) ), TTestColumn("saved_at", TTypeInfo(NTypeIds::Timestamp) ), - TTestColumn("request_id", TTypeInfo(NTypeIds::Yson) ) + TTestColumn("request_id", TTypeInfo(NTypeIds::Yson)).SetAccessorClassName("SPARSED") }; return schema; }; @@ -202,7 +202,7 @@ struct TTestSchema { std::vector schema = { TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), - TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8)).SetAccessorClassName("SPARSED"), TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY") }; return schema; @@ -244,32 +244,7 @@ struct TTestSchema { static void InitSchema(const std::vector& columns, const std::vector& pk, const TTableSpecials& specials, - NKikimrSchemeOp::TColumnTableSchema* schema) - { - schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); - - for (ui32 i = 0; i < columns.size(); ++i) { - *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); - if (!specials.NeedTestStatistics()) { - continue; - } - if (NOlap::NStatistics::NMax::TOperator::IsAvailableType(columns[i].GetType())) { - *schema->AddStatistics() = NOlap::NStatistics::TOperatorContainer("MAX::" + columns[i].GetName(), std::make_shared(i + 1)).SerializeToProto(); - } - } - - Y_ABORT_UNLESS(pk.size() > 0); - for (auto& column : ExtractNames(pk)) { - schema->AddKeyColumnNames(column); - } - - if (specials.HasCodec()) { - schema->MutableDefaultCompression()->SetCodec(specials.GetCodecId()); - } - if (specials.CompressionLevel) { - schema->MutableDefaultCompression()->SetLevel(*specials.CompressionLevel); - } - } + NKikimrSchemeOp::TColumnTableSchema* schema); static void InitTtl(const TTableSpecials& specials, NKikimrSchemeOp::TColumnDataLifeCycle::TTtl* ttl) { Y_ABORT_UNLESS(specials.HasTtl()); @@ -432,10 +407,12 @@ void PlanSchemaTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot void PlanWriteTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot snap, bool waitResult = true); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, std::vector* writeIds, const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const std::vector& ydbSchema, std::vector* writeIds, + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); + const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr, + const NEvWrite::EModificationType mType = NEvWrite::EModificationType::Upsert); std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, ui64 tableId, const ui64 writePartId, const TString& data, @@ -500,11 +477,13 @@ namespace NKikimr::NColumnShard { auto& builder = Owner.Builders[Index]; auto type = builder->type(); - NArrow::SwitchType(type->id(), [&](const auto& t) { + Y_ABORT_UNLESS(NArrow::SwitchType(type->id(), [&](const auto& t) { using TWrap = std::decay_t; using T = typename TWrap::T; using TBuilder = typename arrow::TypeTraits::BuilderType; + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("T", typeid(T).name()); + auto& typedBuilder = static_cast(*builder); if constexpr (std::is_arithmetic::value) { if constexpr (arrow::has_c_type::value) { @@ -519,9 +498,16 @@ namespace NKikimr::NColumnShard { return true; } } + + if constexpr (std::is_same::value) { + if constexpr (arrow::is_decimal128_type::value) { + Y_ABORT_UNLESS(typedBuilder.Append(arrow::Decimal128(data.Hi_, data.Low_)).ok()); + return true; + } + } Y_ABORT("Unknown type combination"); return false; - }); + })); return TRowBuilder(Index + 1, Owner); } diff --git a/ydb/core/tx/columnshard/test_helper/controllers.h b/ydb/core/tx/columnshard/test_helper/controllers.h index b18c2bc34e8d..68cd6a1dc4ed 100644 --- a/ydb/core/tx/columnshard/test_helper/controllers.h +++ b/ydb/core/tx/columnshard/test_helper/controllers.h @@ -21,31 +21,28 @@ class TWaitCompactionController: public NYDBTest::NColumnShard::TController { virtual bool NeedForceCompactionBacketsConstruction() const override { return true; } - virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 /*def*/) const override { return SmallSizeDetector.value_or(0); } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { return TDuration::Zero(); } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetCompactionActualizationLag(const TDuration /*def*/) const override { + virtual TDuration DoGetCompactionActualizationLag(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { - return TDuration::Seconds(1); - } public: TWaitCompactionController() { - SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); } ui32 GetFinishedExportsCount() const { return ExportsFinishedCount.Val(); } - virtual void OnStatisticsUsage(const NKikimr::NOlap::NStatistics::TOperatorContainer& /*statOperator*/) override { + virtual void OnStatisticsUsage(const NKikimr::NOlap::NIndexes::TIndexMetaContainer& /*statOperator*/) override { StatisticsUsageCount.Inc(); } virtual void OnMaxValueUsage() override { diff --git a/ydb/core/tx/columnshard/test_helper/helper.cpp b/ydb/core/tx/columnshard/test_helper/helper.cpp index 8b35442b7abb..d64003c5d525 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.cpp +++ b/ydb/core/tx/columnshard/test_helper/helper.cpp @@ -1,12 +1,17 @@ #include "helper.h" -#include + #include +#include #include #include #include -#include -#include +#include #include +#include + +#include + +#include #ifndef KIKIMR_DISABLE_S3_OPS #include #endif @@ -21,6 +26,9 @@ NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) col.SetStorageId(StorageId); } auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(Type, ""); + if (AccessorClassName) { + col.MutableDataAccessorConstructor()->SetClassName(AccessorClassName); + } col.SetTypeId(columnType.TypeId); if (columnType.TypeInfo) { *col.MutableTypeInfo() = *columnType.TypeInfo; @@ -28,6 +36,16 @@ NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) return col; } +std::set TTestColumn::GetNullableSet(const std::vector& columns) { + std::set result; + for (auto&& i : columns) { + if (!i.GetNullable()) { + result.emplace(i.GetName()); + } + } + return result; +} + std::vector> TTestColumn::ConvertToPairs(const std::vector& columns) { std::vector> result; for (auto&& i : columns) { @@ -36,7 +54,8 @@ std::vector> TTestColumn::Conver return result; } -std::vector TTestColumn::BuildFromPairs(const std::vector>& columns) { +std::vector TTestColumn::BuildFromPairs( + const std::vector>& columns) { std::vector result; for (auto&& i : columns) { result.emplace_back(i.first, i.second); @@ -57,38 +76,44 @@ std::vector TTestColumn::CropSchema(const s return std::vector(input.begin(), input.begin() + size); } -} +} // namespace NKikimr::NArrow::NTest namespace NKikimr::NArrow { -std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::vector> MakeArrowFields(const std::vector& columns) { + std::set notNullColumns = NTest::TTestColumn::GetNullableSet(columns); auto result = MakeArrowFields(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { +std::shared_ptr MakeArrowSchema(const std::vector& columns) { + std::set notNullColumns = NTest::TTestColumn::GetNullableSet(columns); auto result = MakeArrowSchema(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); UNIT_ASSERT_C(result.ok(), result.status().ToString()); return result.ValueUnsafe(); } -} +} // namespace NKikimr::NArrow namespace NKikimr::NOlap { std::shared_ptr TTestStoragesManager::DoBuildOperator(const TString& storageId) { if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, NActors::TActorId(), TabletInfo, - GetGeneration(), SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + return std::make_shared(storageId, NActors::TActorId(), TabletInfo, GetGeneration(), + SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + } else if (storageId == TBase::LocalMetadataStorageId) { + return std::make_shared( + storageId, SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); } else if (storageId == TBase::MemoryStorageId) { #ifndef KIKIMR_DISABLE_S3_OPS Singleton()->SetSecretKey("fakeSecret"); - return std::make_shared(storageId, NActors::TActorId(), std::make_shared("fakeBucket", "fakeSecret"), + return std::make_shared(storageId, NActors::TActorId(), + std::make_shared("fakeBucket", "fakeSecret"), SharedBlobsManager->GetStorageManagerGuarantee(storageId), GetGeneration()); #endif } return nullptr; } -} \ No newline at end of file +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/test_helper/helper.h b/ydb/core/tx/columnshard/test_helper/helper.h index cf71657cf97c..98f9a09b526c 100644 --- a/ydb/core/tx/columnshard/test_helper/helper.h +++ b/ydb/core/tx/columnshard/test_helper/helper.h @@ -51,6 +51,9 @@ class TTestColumn { YDB_ACCESSOR_DEF(TString, Name); YDB_ACCESSOR_DEF(NScheme::TTypeInfo, Type); YDB_ACCESSOR_DEF(TString, StorageId); + YDB_ACCESSOR_DEF(TString, AccessorClassName); + YDB_ACCESSOR(bool, Nullable, true); + public: explicit TTestColumn(const TString& name, const NScheme::TTypeInfo& type) : Name(name) @@ -63,13 +66,14 @@ class TTestColumn { static THashMap ConvertToHash(const std::vector& columns); static std::vector BuildFromPairs(const std::vector>& columns); static std::vector CropSchema(const std::vector& input, const ui32 size); + static std::set GetNullableSet(const std::vector& columns); }; } namespace NKikimr::NArrow { -std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns = {}); -std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns = {}); +std::vector> MakeArrowFields(const std::vector& columns); +std::shared_ptr MakeArrowSchema(const std::vector& columns); } diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.h b/ydb/core/tx/columnshard/test_helper/shard_reader.h similarity index 99% rename from ydb/core/tx/columnshard/common/tests/shard_reader.h rename to ydb/core/tx/columnshard/test_helper/shard_reader.h index 1bb3ad353835..2beaa5a782d9 100644 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.h +++ b/ydb/core/tx/columnshard/test_helper/shard_reader.h @@ -10,7 +10,7 @@ #include #include -namespace NKikimr::NOlap::NTests { +namespace NKikimr::NTxUT { class TShardReader { private: @@ -267,4 +267,4 @@ class TShardReader { } }; -} +} //namespace NKikimr::NTxUT diff --git a/ydb/core/tx/columnshard/test_helper/ya.make b/ydb/core/tx/columnshard/test_helper/ya.make index a900deb266db..cab4937293dd 100644 --- a/ydb/core/tx/columnshard/test_helper/ya.make +++ b/ydb/core/tx/columnshard/test_helper/ya.make @@ -2,6 +2,7 @@ LIBRARY() PEERDIR( ydb/core/protos + ydb/core/formats/arrow contrib/libs/apache/arrow ydb/library/actors/core ydb/core/tx/columnshard/blobs_action/bs diff --git a/ydb/core/tx/columnshard/transactions/locks/abstract.cpp b/ydb/core/tx/columnshard/transactions/locks/abstract.cpp new file mode 100644 index 000000000000..927b73dcdd23 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/abstract.cpp @@ -0,0 +1,6 @@ +#include "abstract.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/abstract.h b/ydb/core/tx/columnshard/transactions/locks/abstract.h new file mode 100644 index 000000000000..5699d13d9705 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/abstract.h @@ -0,0 +1,105 @@ +#pragma once +#include "dependencies.h" +#include "interaction.h" + +#include + +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimrColumnShardTxProto { +class TEvent; +} + +namespace NKikimr::NOlap::NTxInteractions { + +class ITxEvent { +public: + using TFactory = NObjectFactory::TParametrizedObjectFactory; + using TProto = NKikimrColumnShardTxProto::TEvent; + +protected: + virtual void DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const = 0; + virtual void DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const = 0; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const = 0; + +public: + ITxEvent() = default; + virtual ~ITxEvent() = default; + + virtual TString GetClassName() const = 0; + + bool DeserializeFromProto(const TProto& proto) { + return DoDeserializeFromProto(proto); + } + + void SerializeToProto(TProto& proto) const { + DoSerializeToProto(proto); + } + + void AddToInteraction(const ui64 txId, TInteractionsContext& context) const { + return DoAddToInteraction(txId, context); + } + + void RemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const { + return DoRemoveFromInteraction(txId, context); + } +}; + +class TTxEventContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; + YDB_READONLY(ui64, TxId, 0); + +public: + void AddToInteraction(TInteractionsContext& context) const { + return GetObjectVerified().AddToInteraction(TxId, context); + } + + void RemoveFromInteraction(TInteractionsContext& context) const { + return GetObjectVerified().RemoveFromInteraction(TxId, context); + } + + TTxEventContainer(const ui64 txId, const std::shared_ptr& txEvent) + : TBase(txEvent) + , TxId(txId) { + } + + TTxEventContainer(const ui64 txId) + : TxId(txId) { + } + + bool operator<(const TTxEventContainer& item) const { + return TxId < item.TxId; + } +}; + +class ITxEventWriter { +protected: + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& notifications) const = 0; + virtual std::shared_ptr DoBuildEvent() = 0; + +public: + ITxEventWriter() = default; + virtual ~ITxEventWriter() = default; + + bool CheckInteraction(const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& notifications) const { + TTxConflicts conflictsResult; + TTxConflicts notificationsResult; + const bool result = DoCheckInteraction(selfTxId, context, conflictsResult, notificationsResult); + std::swap(conflictsResult, conflicts); + std::swap(notificationsResult, notifications); + return result; + } + + std::shared_ptr BuildEvent() { + return DoBuildEvent(); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp b/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp new file mode 100644 index 000000000000..921a024ce651 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/dependencies.cpp @@ -0,0 +1,48 @@ +#include "dependencies.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +bool TTxConflicts::LoadFromDatabase(NIceDb::TNiceDb& db) { + using namespace NColumnShard; + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + THashMap> local; + while (!rowset.EndOfSet()) { + const ui64 txId = rowset.GetValue(); + local[txId].emplace(rowset.GetValue()); + if (!rowset.Next()) { + return false; + } + } + std::swap(local, TxIdsFromCommitToBroken); + return true; +} + +void TTxConflicts::AddOnExecute(NTabletFlatExecutor::TTransactionContext& txc) const { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (auto&& [commitTxId, brokeTxIds] : TxIdsFromCommitToBroken) { + for (auto&& brokeTxId : brokeTxIds) { + db.Table().Key(commitTxId, brokeTxId).Update(); + } + } +} + +bool TTxConflicts::RemoveOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId) const { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + auto it = TxIdsFromCommitToBroken.find(txId); + if (it == TxIdsFromCommitToBroken.end()) { + return false; + } + for (auto&& brokeTxId : it->second) { + db.Table().Key(txId, brokeTxId).Delete(); + } + return true; +} + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/dependencies.h b/ydb/core/tx/columnshard/transactions/locks/dependencies.h new file mode 100644 index 000000000000..d749ed1db87e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/dependencies.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NTxInteractions { + +class TTxConflicts { +private: + THashMap> TxIdsFromCommitToBroken; + +public: + THashMap>::const_iterator begin() const { + return TxIdsFromCommitToBroken.begin(); + } + + THashMap>::const_iterator end() const { + return TxIdsFromCommitToBroken.end(); + } + + bool Add(const ui64 commitTxId, const ui64 brokenTxId) { + return TxIdsFromCommitToBroken[commitTxId].emplace(brokenTxId).second; + } + + THashSet GetBrokenTxIds(const ui64 txId) const { + auto it = TxIdsFromCommitToBroken.find(txId); + if (it == TxIdsFromCommitToBroken.end()) { + return Default>(); + } + return it->second; + } + + bool LoadFromDatabase(NIceDb::TNiceDb& db); + + bool RemoveOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId) const; + + [[nodiscard]] bool RemoveOnComplete(const ui64 txId) { + return TxIdsFromCommitToBroken.erase(txId); + } + + void AddOnExecute(NTabletFlatExecutor::TTransactionContext& txc) const; + + void MergeTo(TTxConflicts& dest) const { + for (auto&& i : TxIdsFromCommitToBroken) { + auto it = dest.TxIdsFromCommitToBroken.find(i.first); + if (it == dest.TxIdsFromCommitToBroken.end()) { + dest.TxIdsFromCommitToBroken.emplace(i.first, i.second); + } else { + it->second.insert(i.second.begin(), i.second.end()); + } + } + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/interaction.cpp b/ydb/core/tx/columnshard/transactions/locks/interaction.cpp new file mode 100644 index 000000000000..0dd52a2a79ba --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/interaction.cpp @@ -0,0 +1,26 @@ +#include "interaction.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { +TIntervalPoint TIntervalPoint::From( + const TPredicateContainer& container, const std::shared_ptr& pkSchema) { + i32 shift = container.IsInclude() ? 0 : 1; + if (!container.GetReplaceKey()) { + shift = -1; + } else if (container.GetReplaceKey()->Size() < (ui32)pkSchema->num_fields()) { + shift = 1; + } + return TIntervalPoint(container.GetReplaceKey(), shift); +} + +TIntervalPoint TIntervalPoint::To( + const TPredicateContainer& container, const std::shared_ptr& pkSchema) { + i32 shift = container.IsInclude() ? 0 : -1; + if (!container.GetReplaceKey() || container.GetReplaceKey()->Size() < (ui32)pkSchema->num_fields()) { + shift = Max(); + } + + return TIntervalPoint(container.GetReplaceKey(), shift); +} + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/interaction.h b/ydb/core/tx/columnshard/transactions/locks/interaction.h new file mode 100644 index 000000000000..abd9ef92f6d5 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/interaction.h @@ -0,0 +1,459 @@ +#pragma once +#include +#include + +#include +#include + +#include + +namespace NKikimr::NOlap { +class TPredicateContainer; +} + +namespace NKikimr::NOlap::NTxInteractions { + +class TPointTxCounters { +private: + YDB_READONLY(ui32, CountIncludes, 0); + YDB_READONLY(ui32, CountNotIncludes, 0); + +public: + void Inc(const bool include) { + if (include) { + IncInclude(); + } else { + IncNotInclude(); + } + } + bool Dec(const bool include) { + if (include) { + return DecInclude(); + } else { + return DecNotInclude(); + } + } + void IncInclude() { + ++CountIncludes; + } + [[nodiscard]] bool DecInclude() { + AFL_VERIFY(CountIncludes); + return --CountIncludes == 0; + } + void IncNotInclude() { + ++CountNotIncludes; + } + [[nodiscard]] bool DecNotInclude() { + AFL_VERIFY(CountNotIncludes); + return --CountNotIncludes == 0; + } + bool IsEmpty() const { + return !CountIncludes && !CountNotIncludes; + } + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + if (CountIncludes) { + result.InsertValue("count_include", CountIncludes); + } + if (CountNotIncludes) { + result.InsertValue("count_not_include", CountNotIncludes); + } + return result; + } + ui32 GetCountSum() const { + return CountIncludes + CountNotIncludes; + } +}; + +class TIntervalTxCounters { +private: + YDB_READONLY(ui32, Count, 0); + +public: + void Inc(const ui32 count = 1) { + Count += count; + } + [[nodiscard]] bool Dec(const ui32 count = 1) { + AFL_VERIFY(Count); + Count -= count; + return Count == 0; + } + bool IsEmpty() const { + return !Count; + } + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("count", Count); + return result; + } + + void ProvideFrom(const TIntervalTxCounters& counters) { + Count += counters.Count; + AFL_VERIFY(counters.Count); + } +}; + +class TPointInfo { +private: + THashMap StartTxIds; + THashMap FinishTxIds; + THashMap IntervalTxIds; + +public: + void InsertCurrentTxs(THashSet& txIds, const bool includePoint) const { + for (auto&& i : IntervalTxIds) { + txIds.emplace(i.first); + } + if (includePoint) { + for (auto&& i : FinishTxIds) { + if (!i.second.GetCountIncludes()) { + continue; + } + auto it = StartTxIds.find(i.first); + if (it != StartTxIds.end() && it->second.GetCountIncludes()) { + txIds.emplace(i.first); + } + } + } + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& starts = result.InsertValue("starts", NJson::JSON_ARRAY); + for (auto&& i : StartTxIds) { + auto& data = starts.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + auto& finish = result.InsertValue("finishes", NJson::JSON_ARRAY); + for (auto&& i : FinishTxIds) { + auto& data = finish.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + auto& txs = result.InsertValue("txs", NJson::JSON_ARRAY); + for (auto&& i : IntervalTxIds) { + auto& data = txs.AppendValue(NJson::JSON_MAP); + data.InsertValue("id", i.first); + data.InsertValue("inc", i.second.DebugJson()); + } + return result; + } + + void AddStart(const ui64 txId, const bool include) { + StartTxIds[txId].Inc(include); + } + void RemoveStart(const ui64 txId, const bool include) { + if (StartTxIds[txId].Dec(include)) { + StartTxIds.erase(txId); + } + } + void AddFinish(const ui64 txId, const bool include) { + FinishTxIds[txId].Inc(include); + } + void RemoveFinish(const ui64 txId, const bool include) { + if (FinishTxIds[txId].Dec(include)) { + FinishTxIds.erase(txId); + } + } + void AddIntervalTx(const ui64 txId) { + IntervalTxIds[txId].Inc(); + } + void RemoveIntervalTx(const ui64 txId) { + if (IntervalTxIds[txId].Dec()) { + IntervalTxIds.erase(txId); + } + } + bool TryRemoveTx(const ui64 txId, const bool include) { + bool result = false; + if (StartTxIds[txId].Dec(include)) { + StartTxIds.erase(txId); + result = true; + } + if (FinishTxIds[txId].Dec(include)) { + FinishTxIds.erase(txId); + result = true; + } + if (IntervalTxIds[txId].Dec(txId)) { + IntervalTxIds.erase(txId); + result = true; + } + return result; + } + + bool IsEmpty() const { + return StartTxIds.empty() && FinishTxIds.empty() && IntervalTxIds.empty(); + } + + void ProvideTxIdsFrom(const TPointInfo& previouse) { + for (auto&& i : previouse.IntervalTxIds) { + auto provided = i.second; + { + auto it = StartTxIds.find(i.first); + if (it != StartTxIds.end()) { + provided.Inc(it->second.GetCountSum()); + } + } + { + auto it = FinishTxIds.find(i.first); + if (it != FinishTxIds.end()) { + if (provided.Dec(it->second.GetCountSum())) { + return; + } + } + } + IntervalTxIds[i.first].ProvideFrom(provided); + } + } +}; + +class TIntervalPoint { +private: + i32 IncludeState = 0; + std::optional PrimaryKey; + + TIntervalPoint(const NArrow::TReplaceKey& primaryKey, const int includeState) + : IncludeState(includeState) + , PrimaryKey(primaryKey) { + } + + TIntervalPoint(const std::shared_ptr& primaryKey, const int includeState) + : IncludeState(includeState) { + if (primaryKey) { + PrimaryKey = *primaryKey; + } + } + +public: + static TIntervalPoint Equal(const NArrow::TReplaceKey& replaceKey) { + return TIntervalPoint(replaceKey, 0); + } + static TIntervalPoint From(const TPredicateContainer& container, const std::shared_ptr& pkSchema); + static TIntervalPoint To(const TPredicateContainer& container, const std::shared_ptr& pkSchema); + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("include", IncludeState); + if (PrimaryKey) { + result.InsertValue("pk", PrimaryKey->DebugString()); + } + return result; + } + + bool IsIncluded() const { + return IncludeState == 0; + } + + bool operator==(const TIntervalPoint& item) const { + if (!PrimaryKey && !item.PrimaryKey) { + return IncludeState == item.IncludeState; + } else if (!PrimaryKey && item.PrimaryKey) { + return false; + } else if (PrimaryKey && !item.PrimaryKey) { + return false; + } else if (IncludeState == item.IncludeState) { + if (PrimaryKey->Size() != item.PrimaryKey->Size()) { + return false; + } + return *PrimaryKey == *item.PrimaryKey; + } else { + return false; + } + } + + bool operator<=(const TIntervalPoint& point) const { + return !(point < *this); + } + + bool operator<(const TIntervalPoint& point) const { + if (!PrimaryKey && !point.PrimaryKey) { + return IncludeState < point.IncludeState; + } else if (!PrimaryKey && point.PrimaryKey) { + return IncludeState < 0; + } else if (PrimaryKey && !point.PrimaryKey) { + return 0 < point.IncludeState; + } else { + const ui32 sizeMin = std::min(PrimaryKey->Size(), point.PrimaryKey->Size()); + const std::partial_ordering compareResult = PrimaryKey->ComparePartNotNull(*point.PrimaryKey, sizeMin); + if (compareResult == std::partial_ordering::less) { + return true; + } else if (compareResult == std::partial_ordering::greater) { + return false; + } else { + AFL_VERIFY(compareResult == std::partial_ordering::equivalent); + if (PrimaryKey->Size() == point.PrimaryKey->Size()) { + return IncludeState < point.IncludeState; + } else if (PrimaryKey->Size() < point.PrimaryKey->Size()) { + if (IncludeState <= 1) { + return true; + } else { + return false; + } + } else { + if (point.IncludeState <= 1) { + return false; + } else { + return true; + } + } + return false; + } + } + } +}; + +class TReadIntervals { +private: + std::map IntervalsInfo; + +public: + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& jsonIntervals = result.InsertValue("intervals", NJson::JSON_ARRAY); + for (auto&& i : IntervalsInfo) { + auto& pointInfo = jsonIntervals.AppendValue(NJson::JSON_MAP); + pointInfo.InsertValue("p", i.first.DebugJson()); + pointInfo.InsertValue("i", i.second.DebugJson()); + } + return result; + } + + bool IsEmpty() const { + return IntervalsInfo.empty(); + } + + std::map::iterator Erase(const std::map::iterator& it) { + return IntervalsInfo.erase(it); + } + + std::map::iterator GetPointIterator(const TIntervalPoint& intervalPoint) { + auto it = IntervalsInfo.find(intervalPoint); + AFL_VERIFY(it != IntervalsInfo.end()); + return it; + } + + std::map::iterator InsertPoint(const TIntervalPoint& intervalPoint) { + auto it = IntervalsInfo.lower_bound(intervalPoint); + if (it == IntervalsInfo.end() || it == IntervalsInfo.begin()) { + return IntervalsInfo.emplace(intervalPoint, TPointInfo()).first; + } else if (it->first == intervalPoint) { + return it; + } else { + --it; + auto result = IntervalsInfo.emplace(intervalPoint, TPointInfo()).first; + result->second.ProvideTxIdsFrom(it->second); + return result; + } + } + + THashSet GetAffectedTxIds(const std::shared_ptr& writtenPrimaryKeys) const { + AFL_VERIFY(writtenPrimaryKeys); + auto it = IntervalsInfo.begin(); + THashSet affectedTxIds; + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("batch", writtenPrimaryKeys->ToString())("info", DebugJson().GetStringRobust()); + for (ui32 i = 0; i < writtenPrimaryKeys->num_rows();) { + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + auto rKey = NArrow::TReplaceKey::FromBatch(writtenPrimaryKeys, writtenPrimaryKeys->schema(), i); + auto pkIntervalPoint = TIntervalPoint::Equal(rKey); + while (it != IntervalsInfo.end() && it->first < pkIntervalPoint) { + ++it; + } + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + auto itPred = it; + bool equal = false; + if (pkIntervalPoint < it->first) { + if (it == IntervalsInfo.begin()) { + ++i; + continue; + } + if (pkIntervalPoint < it->first) { + --itPred; + } + } else { + equal = true; + ++it; + } + + itPred->second.InsertCurrentTxs(affectedTxIds, equal); + if (it == IntervalsInfo.end()) { + return affectedTxIds; + } + while (i < writtenPrimaryKeys->num_rows()) { + auto rKey = NArrow::TReplaceKey::FromBatch(writtenPrimaryKeys, writtenPrimaryKeys->schema(), i); + if (TIntervalPoint::Equal(rKey) < it->first) { + ++i; + } else { + break; + } + } + } + return affectedTxIds; + } +}; + +class TInteractionsContext { +private: + THashMap ReadIntervalsByPathId; + +public: + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + for (auto&& i : ReadIntervalsByPathId) { + result.InsertValue(::ToString(i.first), i.second.DebugJson()); + } + return result; + } + + THashSet GetAffectedTxIds(const ui64 pathId, const std::shared_ptr& batch) const { + auto it = ReadIntervalsByPathId.find(pathId); + if (it == ReadIntervalsByPathId.end()) { + return {}; + } + return it->second.GetAffectedTxIds(batch); + } + + void AddInterval(const ui64 txId, const ui64 pathId, const TIntervalPoint& from, const TIntervalPoint& to) { + auto& intervals = ReadIntervalsByPathId[pathId]; + auto itFrom = intervals.InsertPoint(from); + auto itTo = intervals.InsertPoint(to); + itFrom->second.AddStart(txId, from.IsIncluded()); + for (auto it = itFrom; it != itTo; ++it) { + it->second.AddIntervalTx(txId); + } + itTo->second.AddFinish(txId, to.IsIncluded()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "add_interval")("interactions_info", DebugJson().GetStringRobust()); + } + + void RemoveInterval(const ui64 txId, const ui64 pathId, const TIntervalPoint& from, const TIntervalPoint& to) { + auto itIntervals = ReadIntervalsByPathId.find(pathId); + AFL_VERIFY(itIntervals != ReadIntervalsByPathId.end())("path_id", pathId); + auto& intervals = itIntervals->second; + auto itFrom = intervals.GetPointIterator(from); + auto itTo = intervals.GetPointIterator(to); + itFrom->second.RemoveStart(txId, from.IsIncluded()); + for (auto it = itFrom; it != itTo; ++it) { + it->second.RemoveIntervalTx(txId); + } + itTo->second.RemoveFinish(txId, to.IsIncluded()); + for (auto&& it = itFrom; it != itTo;) { + if (it->second.IsEmpty()) { + it = intervals.Erase(it); + } else { + ++it; + } + } + if (itTo->second.IsEmpty()) { + intervals.Erase(itTo); + } + if (intervals.IsEmpty()) { + ReadIntervalsByPathId.erase(itIntervals); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_interval")("interactions_info", DebugJson().GetStringRobust()); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp b/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp new file mode 100644 index 000000000000..159b5d269533 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_finished.cpp @@ -0,0 +1,5 @@ +#include "read_finished.h" + +namespace NKikimr::NOlap::NTxInteractions { + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/read_finished.h b/ydb/core/tx/columnshard/transactions/locks/read_finished.h new file mode 100644 index 000000000000..895123e5097b --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_finished.h @@ -0,0 +1,30 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvReadFinishedWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + TTxConflicts Conflicts; + + virtual bool DoCheckInteraction( + const ui64 /*selfTxId*/, TInteractionsContext& /*context*/, TTxConflicts& conflicts, TTxConflicts& /*notifications*/) const override { + conflicts = Conflicts; + return true; + } + + virtual std::shared_ptr DoBuildEvent() override { + return nullptr; + } + +public: + TEvReadFinishedWriter(const ui64 pathId, const TTxConflicts& conflicts) + : PathId(pathId) + , Conflicts(conflicts) + { + AFL_VERIFY(PathId); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/read_start.cpp b/ydb/core/tx/columnshard/transactions/locks/read_start.cpp new file mode 100644 index 000000000000..963c47b068a9 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_start.cpp @@ -0,0 +1,47 @@ +#include "read_start.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +std::shared_ptr TEvReadStartWriter::DoBuildEvent() { + return std::make_shared(PathId, Schema, Filter); +} + +bool TEvReadStart::DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) { + if (!proto.HasRead()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "have not 'read' in proto"); + return false; + } + Schema = NArrow::DeserializeSchema(proto.GetRead().GetSchema()); + if (!Schema) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "cannot_parse_schema"); + return false; + } + Filter = TPKRangesFilter::BuildFromString(proto.GetRead().GetFilter(), Schema, false); + if (!Filter) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot_parse_TEvReadStart")("reason", "cannot_parse_filter"); + return false; + } + return true; +} + +void TEvReadStart::DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const { + AFL_VERIFY(!!Filter); + AFL_VERIFY(!!Schema); + *proto.MutableRead()->MutableFilter() = Filter->SerializeToString(Schema); + *proto.MutableRead()->MutableSchema() = NArrow::SerializeSchema(*Schema); +} + +void TEvReadStart::DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const { + for (auto&& i : *Filter) { + context.AddInterval(txId, PathId, TIntervalPoint::From(i.GetPredicateFrom(), Schema), TIntervalPoint::To(i.GetPredicateTo(), Schema)); + } +} + +void TEvReadStart::DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const { + for (auto&& i : *Filter) { + context.RemoveInterval(txId, PathId, TIntervalPoint::From(i.GetPredicateFrom(), Schema), TIntervalPoint::To(i.GetPredicateTo(), Schema)); + } +} + +} diff --git a/ydb/core/tx/columnshard/transactions/locks/read_start.h b/ydb/core/tx/columnshard/transactions/locks/read_start.h new file mode 100644 index 000000000000..6587dfea5bda --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/read_start.h @@ -0,0 +1,71 @@ +#pragma once +#include "abstract.h" +#include + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvReadStartWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + YDB_READONLY_DEF(std::shared_ptr, Filter); + YDB_READONLY_DEF(THashSet, LockIdsForCheck); + + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& /*context*/, TTxConflicts& /*conflicts*/, TTxConflicts& notifications) const override { + for (auto&& i : LockIdsForCheck) { + notifications.Add(i, selfTxId); + } + return true; + } + + virtual std::shared_ptr DoBuildEvent() override; + +public: + TEvReadStartWriter(const ui64 pathId, const std::shared_ptr& schema, const std::shared_ptr& filter, + const THashSet& lockIdsForCheck) + : PathId(pathId) + , Schema(schema) + , Filter(filter) + , LockIdsForCheck(lockIdsForCheck) + { + AFL_VERIFY(PathId); + AFL_VERIFY(Schema); + AFL_VERIFY(Filter); + } +}; + +class TEvReadStart: public ITxEvent { +public: + static TString GetClassNameStatic() { + return "READ_START"; + } + +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + YDB_READONLY_DEF(std::shared_ptr, Filter); + + virtual bool DoDeserializeFromProto(const NKikimrColumnShardTxProto::TEvent& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardTxProto::TEvent& proto) const override; + virtual void DoAddToInteraction(const ui64 txId, TInteractionsContext& context) const override; + virtual void DoRemoveFromInteraction(const ui64 txId, TInteractionsContext& context) const override; + static inline const TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); + +public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } + + TEvReadStart() = default; + TEvReadStart(const ui64 pathId, const std::shared_ptr& schema, const std::shared_ptr& filter) + : PathId(pathId) + , Schema(schema) + , Filter(filter) { + AFL_VERIFY(PathId); + AFL_VERIFY(Schema); + AFL_VERIFY(Filter); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/write.cpp b/ydb/core/tx/columnshard/transactions/locks/write.cpp new file mode 100644 index 000000000000..19ff258fe7c6 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/write.cpp @@ -0,0 +1,5 @@ +#include "write.h" + +namespace NKikimr::NOlap::NTxInteractions { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/transactions/locks/write.h b/ydb/core/tx/columnshard/transactions/locks/write.h new file mode 100644 index 000000000000..78ba8baf7e0e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/write.h @@ -0,0 +1,36 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NTxInteractions { + +class TEvWriteWriter: public ITxEventWriter { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); + + virtual bool DoCheckInteraction( + const ui64 selfTxId, TInteractionsContext& context, TTxConflicts& conflicts, TTxConflicts& /*notifications*/) const override { + THashSet txIds = context.GetAffectedTxIds(PathId, RecordBatch); + txIds.erase(selfTxId); + TTxConflicts result; + for (auto&& i : txIds) { + result.Add(selfTxId, i); + } + std::swap(result, conflicts); + return true; + } + + virtual std::shared_ptr DoBuildEvent() override { + return nullptr; + } + +public: + TEvWriteWriter(const ui64 pathId, const std::shared_ptr& batch, const std::shared_ptr& pkSchema) + : PathId(pathId) + , RecordBatch(NArrow::TColumnOperator().Extract(batch, pkSchema->field_names())) { + AFL_VERIFY(PathId); + AFL_VERIFY(RecordBatch); + } +}; + +} // namespace NKikimr::NOlap::NTxInteractions diff --git a/ydb/core/tx/columnshard/transactions/locks/ya.make b/ydb/core/tx/columnshard/transactions/locks/ya.make new file mode 100644 index 000000000000..a7ad6b27ab0e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/locks/ya.make @@ -0,0 +1,19 @@ +LIBRARY() + +SRCS( + dependencies.cpp + interaction.cpp + abstract.cpp + GLOBAL read_start.cpp + GLOBAL read_finished.cpp + GLOBAL write.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/transactions/protos + ydb/core/tx/columnshard/engines/predicate + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/data_sharing/destination/events +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.cpp b/ydb/core/tx/columnshard/transactions/operators/backup.cpp index 5ea7cdafb3da..8427cd482901 100644 --- a/ydb/core/tx/columnshard/transactions/operators/backup.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/backup.cpp @@ -59,11 +59,12 @@ void TBackupTransactionOperator::DoStartProposeOnComplete(TColumnShard& /*owner* } } -bool TBackupTransactionOperator::ExecuteOnProgress(TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { +bool TBackupTransactionOperator::ProgressOnExecute( + TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { return true; } -bool TBackupTransactionOperator::CompleteOnProgress(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) { +bool TBackupTransactionOperator::ProgressOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) { return true; } diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.h b/ydb/core/tx/columnshard/transactions/operators/backup.h index c941d773acc6..cd9d5785dd9c 100644 --- a/ydb/core/tx/columnshard/transactions/operators/backup.h +++ b/ydb/core/tx/columnshard/transactions/operators/backup.h @@ -6,7 +6,7 @@ namespace NKikimr::NColumnShard { -class TBackupTransactionOperator: public IProposeTxOperator { +class TBackupTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { private: using TBase = IProposeTxOperator; @@ -24,6 +24,9 @@ class TBackupTransactionOperator: public IProposeTxOperator { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "Backup"; + } virtual bool DoIsAsync() const override { return true; } @@ -35,9 +38,9 @@ class TBackupTransactionOperator: public IProposeTxOperator { public: using TBase::TBase; - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; + virtual bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override; + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override; virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; virtual bool CompleteOnAbort(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp deleted file mode 100644 index 3aa27316237f..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "ev_write.h" - -namespace NKikimr::NColumnShard { -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.h b/ydb/core/tx/columnshard/transactions/operators/ev_write.h deleted file mode 100644 index 2f33bec4ccbe..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once - -#include - -namespace NKikimr::NColumnShard { - - class TEvWriteTransactionOperator : public TTxController::ITransactionOperator { - using TBase = TTxController::ITransactionOperator; - using TProposeResult = TTxController::TProposeResult; - static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE); - private: - virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { - owner.OperationsManager->LinkTransaction(LockId, GetTxId(), txc); - return TProposeResult(); - } - virtual void DoStartProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - - } - virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override { - } - virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - } - virtual bool DoIsAsync() const override { - return false; - } - virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override { - return (currentTxInfo.Source == GetTxInfo().Source && currentTxInfo.Cookie == GetTxInfo().Cookie); - } - virtual TString DoDebugString() const override { - return "EV_WRITE"; - } - virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override { - const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult; - if (IsFail()) { - evResult = NEvents::TDataEvents::TEvWriteResult::BuildError(owner.TabletID(), txInfo.GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, GetProposeStartInfoVerified().GetStatusMessage()); - } else { - evResult = NEvents::TDataEvents::TEvWriteResult::BuildPrepared(owner.TabletID(), txInfo.GetTxId(), owner.GetProgressTxController().BuildCoordinatorInfo(txInfo)); - } - ctx.Send(txInfo.Source, evResult.release(), 0, txInfo.Cookie); - } - - virtual bool DoParse(TColumnShard& /*owner*/, const TString& data) override { - NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; - if (!commitTxBody.ParseFromString(data)) { - return false; - } - LockId = commitTxBody.GetLockId(); - return !!LockId; - } - - public: - using TBase::TBase; - - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { - return owner.OperationsManager->CommitTransaction(owner, GetTxId(), txc, version); - } - - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override { - auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); - ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); - return true; - } - - virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { - return owner.OperationsManager->AbortTransaction(owner, GetTxId(), txc); - } - virtual bool CompleteOnAbort(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { - return true; - } - - private: - ui64 LockId = 0; - }; - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp new file mode 100644 index 000000000000..1217abd09701 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.cpp @@ -0,0 +1,5 @@ +#include "abstract.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h new file mode 100644 index 000000000000..5be836f1d520 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/abstract.h @@ -0,0 +1,113 @@ +#pragma once + +#include + +namespace NKikimr::NColumnShard { + +class TBaseEvWriteTransactionOperator: public TTxController::ITransactionOperator { +private: + using TBase = TTxController::ITransactionOperator; + using TProposeResult = TTxController::TProposeResult; + +protected: + ui64 LockId = 0; + +private: + virtual bool DoParseImpl(TColumnShard& owner, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) = 0; + virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override final { + owner.GetOperationsManager().LinkTransactionOnExecute(LockId, GetTxId(), txc); + return TProposeResult(); + } + virtual void DoStartProposeOnComplete(TColumnShard& owner, const TActorContext& /*ctx*/) override final { + owner.GetOperationsManager().LinkTransactionOnComplete(LockId, GetTxId()); + } + virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override final { + } + virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override final { + } + virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override final { + return (currentTxInfo.Source == GetTxInfo().Source && currentTxInfo.Cookie == GetTxInfo().Cookie); + } + virtual bool DoParse(TColumnShard& owner, const TString& data) override final { + NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; + if (!commitTxBody.ParseFromString(data)) { + return false; + } + LockId = commitTxBody.GetLockId(); + return DoParseImpl(owner, commitTxBody); + } + + virtual bool DoIsAsync() const override final { + return false; + } + + virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override { + const auto& txInfo = GetTxInfo(); + std::unique_ptr evResult; + TLogContextGuard gLogging( + NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("send_reply_tx_id", GetTxId())("send_reply_lock_id", LockId)); + if (IsFail()) { + evResult = NEvents::TDataEvents::TEvWriteResult::BuildError(owner.TabletID(), txInfo.GetTxId(), + NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, GetProposeStartInfoVerified().GetStatusMessage()); + } else { + evResult = NEvents::TDataEvents::TEvWriteResult::BuildPrepared( + owner.TabletID(), txInfo.GetTxId(), owner.GetProgressTxController().BuildCoordinatorInfo(txInfo)); + } + ctx.Send(txInfo.Source, evResult.release(), 0, txInfo.Cookie); + } + std::optional Version; + +public: + using TBase::TBase; + TBaseEvWriteTransactionOperator(const TFullTxInfo& txInfo, const ui64 lockId) + : TBase(txInfo) + , LockId(lockId) { + } + + virtual bool IsTxBroken() const { + return false; + } + + ui64 GetLockId() const { + return LockId; + } + + virtual bool ProgressOnExecute( + TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + Version = version; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("progress_tx_id", GetTxId())("lock_id", LockId)("broken", IsTxBroken()); + if (IsTxBroken()) { + owner.GetOperationsManager().AbortTransactionOnExecute(owner, GetTxId(), txc); + } else { + owner.GetOperationsManager().CommitTransactionOnExecute(owner, GetTxId(), txc, version); + } + return true; + } + + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("progress_tx_id", GetTxId())("lock_id", LockId)("broken", IsTxBroken()); + AFL_VERIFY(Version); + if (IsTxBroken()) { + owner.GetOperationsManager().AbortTransactionOnComplete(owner, GetTxId()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError( + owner.TabletID(), GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN, "lock invalidated"); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + } else { + owner.GetOperationsManager().CommitTransactionOnComplete(owner, GetTxId(), *Version); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + } + return true; + } + + virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { + owner.GetOperationsManager().AbortTransactionOnExecute(owner, GetTxId(), txc); + return true; + } + virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& /*ctx*/) override { + owner.GetOperationsManager().AbortTransactionOnComplete(owner, GetTxId()); + return true; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp new file mode 100644 index 000000000000..9e33c29d2053 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.cpp @@ -0,0 +1,5 @@ +#include "primary.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h new file mode 100644 index 000000000000..9073e7458ae3 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/primary.h @@ -0,0 +1,287 @@ +#pragma once + +#include "sync.h" + +#include +#include + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitPrimaryTransactionOperator: public TEvWriteCommitSyncTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TEvWriteCommitSyncTransactionOperator; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = + TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_PRIMARY); + +private: + std::set ReceivingShards; + std::set SendingShards; + std::set WaitShardsBrokenFlags; + std::set WaitShardsResultAck; + std::optional TxBroken; + mutable TAtomicCounter ControlCounter = 0; + + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const override { + NKikimrTxColumnShard::TCommitWriteTxBody result; + auto& data = *result.MutablePrimaryTabletData(); + if (TxBroken) { + data.SetTxBroken(*TxBroken); + } + for (auto&& i : ReceivingShards) { + data.AddReceivingShards(i); + } + for (auto&& i : SendingShards) { + data.AddSendingShards(i); + } + for (auto&& i : WaitShardsBrokenFlags) { + data.AddWaitShardsBrokenFlags(i); + } + for (auto&& i : WaitShardsResultAck) { + data.AddWaitShardsResultAck(i); + } + return result; + } + + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) override { + if (!commitTxBody.HasPrimaryTabletData()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot read proto")("proto", commitTxBody.DebugString()); + return false; + } + auto& protoData = commitTxBody.GetPrimaryTabletData(); + for (auto&& i : protoData.GetReceivingShards()) { + ReceivingShards.emplace(i); + } + for (auto&& i : protoData.GetSendingShards()) { + SendingShards.emplace(i); + } + for (auto&& i : protoData.GetWaitShardsBrokenFlags()) { + WaitShardsBrokenFlags.emplace(i); + } + for (auto&& i : protoData.GetWaitShardsResultAck()) { + WaitShardsResultAck.emplace(i); + } + AFL_VERIFY(ReceivingShards.empty() == SendingShards.empty()); + if (protoData.HasTxBroken()) { + TxBroken = protoData.GetTxBroken(); + } + return true; + } + +private: + virtual TString DoGetOpType() const override { + return "EvWritePrimary"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_PRIMARY"; + } + class TTxWriteReceivedBrokenFlag: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const ui64 TabletId; + const bool BrokenFlag; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + if (copy.WaitShardsBrokenFlags.erase(TabletId)) { + copy.TxBroken = copy.TxBroken.value_or(false) || BrokenFlag; + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "repeated shard broken_flag info")("shard_id", TabletId); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.erase(TabletId)) { + op->TxBroken = op->TxBroken.value_or(false) || BrokenFlag; + op->SendBrokenFlagAck(*Self, TabletId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_tablet_id")("wait", JoinSeq(",", op->WaitShardsBrokenFlags))( + "receive", TabletId); + op->InitializeRequests(*Self); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "repeated shard broken_flag info")("shard_id", TabletId); + } + } + + public: + TTxWriteReceivedBrokenFlag(TColumnShard& owner, const ui64 txId, const ui64 tabletId, const bool broken) + : TBase(&owner, ::ToString(txId)) + , TxId(txId) + , TabletId(tabletId) + , BrokenFlag(broken) { + } + }; + + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const override { + return std::make_unique(owner, GetTxId(), sendTabletId, broken); + } + + class TTxWriteReceivedResultAck: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const ui64 TabletId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ack_tablet")("wait", JoinSeq(",", op->WaitShardsResultAck))("receive", TabletId); + AFL_VERIFY(copy.WaitShardsResultAck.erase(TabletId)); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ack_tablet")("wait", JoinSeq(",", op->WaitShardsResultAck))( + "receive", TabletId); + AFL_VERIFY(op->WaitShardsResultAck.erase(TabletId)); + op->CheckFinished(*Self); + } + + public: + TTxWriteReceivedResultAck(TColumnShard& owner, const ui64 txId, const ui64 tabletId) + : TBase(&owner) + , TxId(txId) + , TabletId(tabletId) { + } + }; + + virtual bool IsTxBroken() const override { + AFL_VERIFY(TxBroken); + return *TxBroken; + } + + void InitializeRequests(TColumnShard& owner) { + if (WaitShardsBrokenFlags.empty()) { + WaitShardsResultAck.erase(owner.TabletID()); + if (WaitShardsResultAck.size()) { + SendResult(owner); + } else { + CheckFinished(owner); + } + } + } + + void CheckFinished(TColumnShard& owner) { + if (WaitShardsResultAck.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "finished"); + owner.EnqueueProgressTx(NActors::TActivationContext::AsActorContext(), GetTxId()); + } + } + + virtual std::unique_ptr CreateReceiveResultAckTx( + TColumnShard& owner, const ui64 recvTabletId) const override + { + return std::make_unique(owner, GetTxId(), recvTabletId); + } + + void SendBrokenFlagAck(TColumnShard& owner, const std::optional tabletId = {}) { + for (auto&& i : SendingShards) { + if (!WaitShardsBrokenFlags.contains(i)) { + if (tabletId && *tabletId != i) { + continue; + } + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, GetTxId(), owner.TabletID(), i, owner.TabletID(), 0), i, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + } + } + + void SendResult(TColumnShard& owner) { + AFL_VERIFY(!!TxBroken); + NKikimrTx::TReadSetData readSetData; + readSetData.SetDecision(*TxBroken ? NKikimrTx::TReadSetData::DECISION_ABORT : NKikimrTx::TReadSetData::DECISION_COMMIT); + for (auto&& i : ReceivingShards) { + if (WaitShardsResultAck.contains(i)) { + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSet(0, GetTxId(), owner.TabletID(), i, owner.TabletID(), readSetData.SerializeAsString()), i, + true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + } + } + + virtual void DoOnTabletInit(TColumnShard& owner) override { + InitializeRequests(owner); + CheckFinished(owner); + } + + class TTxStartPreparation: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.contains(Self->TabletID())) { + auto copy = *op; + copy.TxBroken = lock.IsBroken(); + AFL_VERIFY(copy.WaitShardsBrokenFlags.erase(Self->TabletID())); + if (copy.WaitShardsBrokenFlags.empty()) { + AFL_VERIFY(copy.WaitShardsResultAck.erase(Self->TabletID())); + } + + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + if (op->WaitShardsBrokenFlags.contains(Self->TabletID())) { + op->TxBroken = lock.IsBroken(); + AFL_VERIFY(op->WaitShardsBrokenFlags.erase(Self->TabletID())); + if (op->WaitShardsBrokenFlags.empty()) { + AFL_VERIFY(op->WaitShardsResultAck.erase(Self->TabletID())); + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "remove_tablet_id")("wait", JoinSeq(",", op->WaitShardsBrokenFlags))( + "receive", Self->TabletID()); + op->CheckFinished(*Self); + } + } + + public: + TTxStartPreparation(TColumnShard* owner, const ui64 txId) + : TBase(owner) + , TxId(txId) { + } + }; + + virtual void OnTimeout(TColumnShard& owner) override { + InitializeRequests(owner); + } + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* owner) const override { + if (WaitShardsResultAck.empty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_prepare_for_progress")("lock_id", LockId); + return nullptr; + } + AFL_VERIFY(ControlCounter.Inc() <= 1); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "prepare_for_progress_started")("lock_id", LockId); + return std::make_unique(owner, GetTxId()); + } + +public: + using TBase::TBase; + TEvWriteCommitPrimaryTransactionOperator( + const TFullTxInfo& txInfo, const ui64 lockId, const std::set& receivingShards, const std::set& sendingShards) + : TBase(txInfo, lockId) + , ReceivingShards(receivingShards) + , SendingShards(sendingShards) { + WaitShardsBrokenFlags = SendingShards; + WaitShardsResultAck = ReceivingShards; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp new file mode 100644 index 000000000000..30f6078321e9 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.cpp @@ -0,0 +1,5 @@ +#include "secondary.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h new file mode 100644 index 000000000000..ae0224057b46 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/secondary.h @@ -0,0 +1,219 @@ +#pragma once + +#include "sync.h" + +#include +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitSecondaryTransactionOperator: public TEvWriteCommitSyncTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TEvWriteCommitSyncTransactionOperator; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = + TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE_SECONDARY); + +private: + ui64 ArbiterTabletId; + bool NeedReceiveBroken = false; + bool ReceiveAck = false; + bool SelfBroken = false; + mutable TAtomicCounter ControlCounter = 0; + std::optional TxBroken; + + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const override { + NKikimrTxColumnShard::TCommitWriteTxBody result; + auto& data = *result.MutableSecondaryTabletData(); + if (TxBroken) { + data.SetTxBroken(*TxBroken); + } + data.SetSelfBroken(SelfBroken); + data.SetNeedReceiveBroken(NeedReceiveBroken); + data.SetReceiveAck(ReceiveAck); + data.SetArbiterTabletId(ArbiterTabletId); + return result; + } + + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& commitTxBody) override { + if (!commitTxBody.HasSecondaryTabletData()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot read proto")("proto", commitTxBody.DebugString()); + return false; + } + auto& protoData = commitTxBody.GetSecondaryTabletData(); + SelfBroken = protoData.GetSelfBroken(); + ArbiterTabletId = protoData.GetArbiterTabletId(); + NeedReceiveBroken = protoData.GetNeedReceiveBroken(); + ReceiveAck = protoData.GetReceiveAck(); + if (protoData.HasTxBroken()) { + TxBroken = protoData.GetTxBroken(); + } + return true; + } + +private: + virtual TString DoGetOpType() const override { + return "EvWriteSecondary"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_SECONDARY"; + } + class TTxWriteReceivedAck: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.ReceiveAck = true; + auto proto = copy.SerializeToProto(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, proto.SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& ctx) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->ReceiveAck = true; + if (!op->NeedReceiveBroken) { + op->TxBroken = false; + Self->EnqueueProgressTx(ctx, TxId); + } + } + + public: + TTxWriteReceivedAck(TColumnShard& owner, const ui64 txId) + : TBase(&owner) + , TxId(txId) { + } + }; + + virtual std::unique_ptr CreateReceiveResultAckTx( + TColumnShard& owner, const ui64 recvTabletId) const override { + AFL_VERIFY(recvTabletId == ArbiterTabletId)("recv", recvTabletId)("arbiter", ArbiterTabletId); + return std::make_unique(owner, GetTxId()); + } + + class TTxWriteReceivedBrokenFlag: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + const bool BrokenFlag; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.TxBroken = BrokenFlag; + auto proto = copy.SerializeToProto(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, proto.SerializeAsString()); + if (BrokenFlag) { + Self->GetProgressTxController().ExecuteOnCancel(TxId, txc); + } + return true; + } + virtual void DoComplete(const NActors::TActorContext& ctx) override { + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->TxBroken = BrokenFlag; + op->SendBrokenFlagAck(*Self); + if (BrokenFlag) { + Self->GetProgressTxController().CompleteOnCancel(TxId, ctx); + } + Self->EnqueueProgressTx(ctx, TxId); + } + + public: + TTxWriteReceivedBrokenFlag(TColumnShard* owner, const ui64 txId, const bool broken) + : TBase(owner) + , TxId(txId) + , BrokenFlag(broken) { + } + }; + + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const override { + AFL_VERIFY(ArbiterTabletId == sendTabletId); + return std::make_unique(&owner, GetTxId(), broken); + } + + void SendBrokenFlagAck(TColumnShard& owner) { + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward( + new TEvTxProcessing::TEvReadSetAck(0, GetTxId(), owner.TabletID(), ArbiterTabletId, owner.TabletID(), 0), ArbiterTabletId, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + + void SendResult(TColumnShard& owner) { + NKikimrTx::TReadSetData readSetData; + readSetData.SetDecision(SelfBroken ? NKikimrTx::TReadSetData::DECISION_ABORT : NKikimrTx::TReadSetData::DECISION_COMMIT); + NActors::TActivationContext::AsActorContext().Send(MakePipePerNodeCacheID(EPipePerNodeCache::Persistent), + new TEvPipeCache::TEvForward(new TEvTxProcessing::TEvReadSet( + 0, GetTxId(), owner.TabletID(), ArbiterTabletId, owner.TabletID(), readSetData.SerializeAsString()), + ArbiterTabletId, true), + IEventHandle::FlagTrackDelivery, GetTxId()); + } + + virtual void DoOnTabletInit(TColumnShard& owner) override { + if (TxBroken || (ReceiveAck && !NeedReceiveBroken)) { + owner.EnqueueProgressTx(NActors::TActivationContext::AsActorContext(), GetTxId()); + } else if (!ReceiveAck) { + SendResult(owner); + } + } + + class TTxStartPreparation: public NOlap::NDataSharing::TExtendedTransactionBase { + private: + using TBase = NOlap::NDataSharing::TExtendedTransactionBase; + const ui64 TxId; + + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + auto copy = *op; + copy.SelfBroken = lock.IsBroken(); + Self->GetProgressTxController().WriteTxOperatorInfo(txc, TxId, copy.SerializeToProto().SerializeAsString()); + return true; + } + virtual void DoComplete(const NActors::TActorContext& /*ctx*/) override { + auto& lock = Self->GetOperationsManager().GetLockVerified(Self->GetOperationsManager().GetLockForTxVerified(TxId)); + auto op = Self->GetProgressTxController().GetTxOperatorVerifiedAs(TxId); + op->SelfBroken = lock.IsBroken(); + op->SendResult(*Self); + } + + public: + TTxStartPreparation(TColumnShard* owner, const ui64 txId) + : TBase(owner) + , TxId(txId) { + } + }; + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* owner) const override { + if (TxBroken || (!NeedReceiveBroken && ReceiveAck)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_prepare_for_progress")("lock_id", LockId); + return nullptr; + } + AFL_VERIFY(ControlCounter.Inc() <= 1); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "prepare_for_progress_started")("lock_id", LockId); + return std::make_unique(owner, GetTxId()); + } + + virtual void OnTimeout(TColumnShard& owner) override { + SendResult(owner); + } + +public: + using TBase::TBase; + virtual bool IsTxBroken() const override { + AFL_VERIFY(TxBroken); + return *TxBroken; + } + + TEvWriteCommitSecondaryTransactionOperator( + const TFullTxInfo& txInfo, const ui64 lockId, const ui64 arbiterTabletId, const bool needReceiveBroken) + : TBase(txInfo, lockId) + , ArbiterTabletId(arbiterTabletId) + , NeedReceiveBroken(needReceiveBroken) { + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp new file mode 100644 index 000000000000..a6c51118a30e --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.cpp @@ -0,0 +1,5 @@ +#include "simple.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h new file mode 100644 index 000000000000..e3301117bb48 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/simple.h @@ -0,0 +1,28 @@ +#pragma once + +#include "abstract.h" + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteSimpleCommitTransactionOperator: public TBaseEvWriteTransactionOperator, + public TMonitoringObjectsCounter { +private: + using TBase = TBaseEvWriteTransactionOperator; + virtual bool DoParseImpl(TColumnShard& /*owner*/, const NKikimrTxColumnShard::TCommitWriteTxBody& /*commitTxBody*/) override { + return true; + } + static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE); + +public: + using TBase::TBase; + virtual TString DoGetOpType() const override { + return "EvWriteSimple"; + } + virtual TString DoDebugString() const override { + return "EV_WRITE_SIMPLE"; + } +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp new file mode 100644 index 000000000000..a8a75f586177 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.cpp @@ -0,0 +1,5 @@ +#include "sync.h" + +namespace NKikimr::NColumnShard { + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h new file mode 100644 index 000000000000..8d149cca8100 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/sync.h @@ -0,0 +1,37 @@ +#pragma once + +#include "abstract.h" + +#include + +namespace NKikimr::NColumnShard { + +class TEvWriteCommitSyncTransactionOperator: public TBaseEvWriteTransactionOperator { +private: + using TBase = TBaseEvWriteTransactionOperator; + mutable std::optional DeadlockControlInstant; + virtual void OnTimeout(TColumnShard& owner) = 0; + + virtual bool DoPingTimeout(TColumnShard& owner, const TMonotonic now) override final { + if (!DeadlockControlInstant) { + DeadlockControlInstant = now; + } else if (now - *DeadlockControlInstant > TDuration::Seconds(2)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "tx_timeout")("lock", LockId)("tx_id", GetTxId())( + "d", now - *DeadlockControlInstant); + DeadlockControlInstant = now; + OnTimeout(owner); + return true; + } + return false; + } + +public: + using TBase::TBase; + + virtual std::unique_ptr CreateReceiveResultAckTx(TColumnShard& owner, const ui64 recvTabletId) const = 0; + virtual std::unique_ptr CreateReceiveBrokenFlagTx( + TColumnShard& owner, const ui64 sendTabletId, const bool broken) const = 0; + virtual NKikimrTxColumnShard::TCommitWriteTxBody SerializeToProto() const = 0; +}; + +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make b/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make new file mode 100644 index 000000000000..c7283c74fa3d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + GLOBAL secondary.cpp + GLOBAL simple.cpp + GLOBAL primary.cpp + abstract.cpp + sync.cpp +) + +PEERDIR( + ydb/services/metadata/abstract + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/transactions/locks +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp index 3c04dcdff64a..99298434b88b 100644 --- a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.cpp @@ -20,9 +20,9 @@ TLongTxTransactionOperator::TProposeResult TLongTxTransactionOperator::DoStartPr TStringBuilder() << "Commit TxId# " << GetTxId() << " references WriteId# " << (ui64)writeId << " that is already locked by TxId# " << lw.PreparedTxId); } - auto it = owner.InsertTable->GetInserted().find(writeId); - if (it != owner.InsertTable->GetInserted().end()) { - auto granuleShardingInfo = owner.GetIndexAs().GetVersionedIndex().GetShardingInfoActual(it->second.PathId); + if (auto* inserted = owner.InsertTable->GetInserted().GetOptional(writeId)) { + auto granuleShardingInfo = + owner.GetIndexAs().GetVersionedIndex().GetShardingInfoActual(inserted->GetPathId()); if (granuleShardingInfo && lw.GranuleShardingVersionId && *lw.GranuleShardingVersionId != granuleShardingInfo->GetSnapshotVersion()) { return TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Commit TxId# " << GetTxId() << " references WriteId# " << (ui64)writeId << " declined through sharding deprecated"); @@ -43,9 +43,14 @@ bool TLongTxTransactionOperator::DoParse(TColumnShard& /*owner*/, const TString& } for (auto& id : commitTxBody.GetWriteIds()) { - WriteIds.insert(TWriteId{ id }); + WriteIds.insert(TInsertWriteId{ id }); } return true; } +void TLongTxTransactionOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { + const auto& txInfo = GetTxInfo(); + ctx.Send(txInfo.Source, BuildProposeResultEvent(owner).release()); +} + } diff --git a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h index 5a56077ea470..45b642c8e98a 100644 --- a/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h +++ b/ydb/core/tx/columnshard/transactions/operators/long_tx_write.h @@ -6,7 +6,7 @@ namespace NKikimr::NColumnShard { - class TLongTxTransactionOperator: public IProposeTxOperator { + class TLongTxTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { using TBase = IProposeTxOperator; using TProposeResult = TTxController::TProposeResult; static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_COMMIT); @@ -16,14 +16,23 @@ namespace NKikimr::NColumnShard { return "LONG_TX_WRITE"; } + bool TxWithDeadline() const override { + return true; + } + virtual TProposeResult DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; virtual void DoStartProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; + virtual void DoFinishProposeOnExecute(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) override { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "LongTxWrite"; + } virtual bool DoIsAsync() const override { return false; } @@ -37,14 +46,14 @@ namespace NKikimr::NColumnShard { public: using TBase::TBase; - void OnTabletInit(TColumnShard& owner) override { + virtual void DoOnTabletInit(TColumnShard& owner) override { for (auto&& writeId : WriteIds) { AFL_VERIFY(owner.LongTxWrites.contains(writeId))("problem", "ltx_not_exists_for_write_id")("txId", GetTxId())("writeId", (ui64)writeId); owner.AddLongTxWrite(writeId, GetTxId()); } } - bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { TBlobGroupSelector dsGroupSelector(owner.Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); @@ -54,19 +63,19 @@ namespace NKikimr::NColumnShard { auto counters = owner.InsertTable->Commit(dbTable, version.GetPlanStep(), version.GetTxId(), WriteIds, pathExists); - owner.IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); - owner.IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); - owner.IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_BLOBS_COMMITTED, counters.Rows); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_BYTES_COMMITTED, counters.Bytes); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_RAW_BYTES_COMMITTED, counters.RawBytes); NIceDb::TNiceDb db(txc.DB); - for (TWriteId writeId : WriteIds) { + for (TInsertWriteId writeId : WriteIds) { AFL_VERIFY(owner.RemoveLongTxWrite(db, writeId, GetTxId())); } owner.UpdateInsertTableCounters(); return true; } - bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override { + bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { auto result = std::make_unique(owner.TabletID(), TxInfo.TxKind, GetTxId(), NKikimrTxColumnShard::SUCCESS); result->Record.SetStep(TxInfo.PlanStep); ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); @@ -75,7 +84,7 @@ namespace NKikimr::NColumnShard { virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { NIceDb::TNiceDb db(txc.DB); - for (TWriteId writeId : WriteIds) { + for (TInsertWriteId writeId : WriteIds) { AFL_VERIFY(owner.RemoveLongTxWrite(db, writeId, GetTxId())); } TBlobGroupSelector dsGroupSelector(owner.Info()); @@ -88,7 +97,7 @@ namespace NKikimr::NColumnShard { } private: - THashSet WriteIds; + THashSet WriteIds; }; } // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp b/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp index c7d39da52740..2a48ca49a279 100644 --- a/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/propose_tx.cpp @@ -3,21 +3,36 @@ namespace NKikimr::NColumnShard { void IProposeTxOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { + if (owner.CurrentSchemeShardId) { + AFL_VERIFY(owner.CurrentSchemeShardId); + ctx.Send(MakePipePerNodeCacheID(false), + new TEvPipeCache::TEvForward(BuildProposeResultEvent(owner).release(), (ui64)owner.CurrentSchemeShardId, true)); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "scheme_shard_tablet_not_initialized")("source", GetTxInfo().Source); + ctx.Send(GetTxInfo().Source, BuildProposeResultEvent(owner).release()); + } +} + +std::unique_ptr IProposeTxOperator::BuildProposeResultEvent(const TColumnShard& owner) const { const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult = std::make_unique( - owner.TabletID(), txInfo.TxKind, txInfo.TxId, GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); + std::unique_ptr evResult = + std::make_unique(owner.TabletID(), txInfo.TxKind, txInfo.TxId, + GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); if (IsFail()) { - owner.IncCounter(COUNTER_PREPARE_ERROR); - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())("tx_id", txInfo.TxId); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_ERROR); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())( + "tx_id", txInfo.TxId); } else { evResult->Record.SetMinStep(txInfo.MinStep); evResult->Record.SetMaxStep(txInfo.MaxStep); if (owner.ProcessingParams) { evResult->Record.MutableDomainCoordinators()->CopyFrom(owner.ProcessingParams->GetCoordinators()); } - owner.IncCounter(COUNTER_PREPARE_SUCCESS); + owner.Counters.GetTabletCounters()->IncCounter(COUNTER_PREPARE_SUCCESS); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())( + "tx_id", txInfo.TxId); } - ctx.Send(txInfo.Source, evResult.release()); + return evResult; } } diff --git a/ydb/core/tx/columnshard/transactions/operators/propose_tx.h b/ydb/core/tx/columnshard/transactions/operators/propose_tx.h index 84b2f7e8db66..d867e71bad9b 100644 --- a/ydb/core/tx/columnshard/transactions/operators/propose_tx.h +++ b/ydb/core/tx/columnshard/transactions/operators/propose_tx.h @@ -12,6 +12,7 @@ class IProposeTxOperator: public TTxController::ITransactionOperator { virtual bool DoCheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const override { return GetTxInfo() == originalTxInfo; } + std::unique_ptr BuildProposeResultEvent(const TColumnShard& owner) const; virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const override { if (!currentTxInfo.SeqNo || !GetTxInfo().SeqNo) { diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.cpp b/ydb/core/tx/columnshard/transactions/operators/schema.cpp index 13f854e277b5..d4019542bf1e 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/schema.cpp @@ -40,7 +40,17 @@ class TWaitEraseTablesTxSubscriber: public NSubscriber::ISubscriber { } }; -NKikimr::NColumnShard::TTxController::TProposeResult TSchemaTransactionOperator::DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TProposeResult TSchemaTransactionOperator::DoStartProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { + auto seqNo = SeqNoFromProto(SchemaTxBody.GetSeqNo()); + auto lastSeqNo = owner.LastSchemaSeqNo; + + // Check if proposal is outdated + if (seqNo < lastSeqNo) { + auto errorMessage = TStringBuilder() << "Ignoring outdated schema tx proposal at tablet " << owner.TabletID() << " txId " << GetTxId() + << " ssId " << owner.CurrentSchemeShardId << " seqNo " << seqNo << " lastSeqNo " << lastSeqNo; + return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_CHANGED, errorMessage); + } + switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: { @@ -67,21 +77,6 @@ NKikimr::NColumnShard::TTxController::TProposeResult TSchemaTransactionOperator: break; } - auto seqNo = SeqNoFromProto(SchemaTxBody.GetSeqNo()); - auto lastSeqNo = owner.LastSchemaSeqNo; - - // Check if proposal is outdated - if (seqNo < lastSeqNo) { - auto errorMessage = TStringBuilder() - << "Ignoring outdated schema tx proposal at tablet " - << owner.TabletID() - << " txId " << GetTxId() - << " ssId " << owner.CurrentSchemeShardId - << " seqNo " << seqNo - << " lastSeqNo " << lastSeqNo; - return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_CHANGED, errorMessage); - } - owner.UpdateSchemaSeqNo(seqNo, txc); return TProposeResult(); } @@ -166,7 +161,7 @@ NKikimr::TConclusionStatus TSchemaTransactionOperator::ValidateTables(::google:: } return TConclusionStatus::Success(); } -bool TSchemaTransactionOperator::DoOnStartAsync(TColumnShard& owner) { +void TSchemaTransactionOperator::DoOnTabletInit(TColumnShard& owner) { AFL_VERIFY(WaitPathIdsToErase.empty()); switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: @@ -190,11 +185,9 @@ bool TSchemaTransactionOperator::DoOnStartAsync(TColumnShard& owner) { if (WaitPathIdsToErase.size()) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "wait_remove_path_id")("pathes", JoinSeq(",", WaitPathIdsToErase))("tx_id", GetTxId()); owner.Subscribers->RegisterSubscriber(std::make_shared(WaitPathIdsToErase, GetTxId())); - return true; } else { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "remove_pathes_cleaned")("tx_id", GetTxId()); owner.Execute(new TTxFinishAsyncTransaction(owner, GetTxId())); - return false; } } diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.h b/ydb/core/tx/columnshard/transactions/operators/schema.h index f59e92bc2d6d..8b7575cc3cc4 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.h +++ b/ydb/core/tx/columnshard/transactions/operators/schema.h @@ -7,7 +7,7 @@ namespace NKikimr::NColumnShard { -class TSchemaTransactionOperator: public IProposeTxOperator { +class TSchemaTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { private: using TBase = IProposeTxOperator; @@ -18,7 +18,7 @@ class TSchemaTransactionOperator: public IProposeTxOperator { THashSet NotifySubscribers; THashSet WaitPathIdsToErase; - virtual bool DoOnStartAsync(TColumnShard& owner) override; + virtual void DoOnTabletInit(TColumnShard& owner) override; template THashSet GetNotErasedTableIds(const TColumnShard& owner, const TInfoProto& tables) const { @@ -43,6 +43,22 @@ class TSchemaTransactionOperator: public IProposeTxOperator { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + switch (SchemaTxBody.TxBody_case()) { + case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: + return "Scheme:InitShard"; + case NKikimrTxColumnShard::TSchemaTxBody::kEnsureTables: + return "Scheme:EnsureTables"; + case NKikimrTxColumnShard::TSchemaTxBody::kAlterTable: + return "Scheme:AlterTable"; + case NKikimrTxColumnShard::TSchemaTxBody::kAlterStore: + return "Scheme:AlterStore"; + case NKikimrTxColumnShard::TSchemaTxBody::kDropTable: + return "Scheme:DropTable"; + case NKikimrTxColumnShard::TSchemaTxBody::TXBODY_NOT_SET: + return "Scheme:TXBODY_NOT_SET"; + } + } virtual bool DoIsAsync() const override { return WaitPathIdsToErase.size(); } @@ -65,7 +81,8 @@ class TSchemaTransactionOperator: public IProposeTxOperator { public: using TBase::TBase; - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { + virtual bool ProgressOnExecute( + TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override { if (!!TxAddSharding) { auto* tx = dynamic_cast(TxAddSharding.get()); AFL_VERIFY(tx); @@ -79,7 +96,7 @@ class TSchemaTransactionOperator: public IProposeTxOperator { return true; } - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override { + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override { if (!!TxAddSharding) { TxAddSharding->Complete(ctx); } diff --git a/ydb/core/tx/columnshard/transactions/operators/sharing.cpp b/ydb/core/tx/columnshard/transactions/operators/sharing.cpp index b7851d77b75c..ec90f07c16eb 100644 --- a/ydb/core/tx/columnshard/transactions/operators/sharing.cpp +++ b/ydb/core/tx/columnshard/transactions/operators/sharing.cpp @@ -28,9 +28,9 @@ bool TSharingTransactionOperator::DoParse(TColumnShard& owner, const TString& da AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "session_exists")("session_id", SharingTask->GetSessionId())("info", SharingTask->DebugString()); } else { SharingTask->Confirm(); + TxPropose = SharingSessionsManager->ProposeDestSession(&owner, SharingTask); } - TxPropose = SharingSessionsManager->ProposeDestSession(&owner, SharingTask); return true; } @@ -47,15 +47,16 @@ void TSharingTransactionOperator::DoStartProposeOnComplete(TColumnShard& /*owner if (!SessionExistsFlag) { AFL_VERIFY(!!TxPropose); TxPropose->Complete(ctx); + TxPropose.reset(); } - TxPropose.release(); } -bool TSharingTransactionOperator::ExecuteOnProgress(TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { +bool TSharingTransactionOperator::ProgressOnExecute( + TColumnShard& /*owner*/, const NOlap::TSnapshot& /*version*/, NTabletFlatExecutor::TTransactionContext& /*txc*/) { return true; } -bool TSharingTransactionOperator::CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) { +bool TSharingTransactionOperator::ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) { for (TActorId subscriber : NotifySubscribers) { auto event = MakeHolder(owner.TabletID(), GetTxId()); ctx.Send(subscriber, event.Release(), 0, 0); diff --git a/ydb/core/tx/columnshard/transactions/operators/sharing.h b/ydb/core/tx/columnshard/transactions/operators/sharing.h index 4173b5e11c9a..13c7df7cad0e 100644 --- a/ydb/core/tx/columnshard/transactions/operators/sharing.h +++ b/ydb/core/tx/columnshard/transactions/operators/sharing.h @@ -6,7 +6,7 @@ namespace NKikimr::NColumnShard { -class TSharingTransactionOperator: public IProposeTxOperator { +class TSharingTransactionOperator: public IProposeTxOperator, public TMonitoringObjectsCounter { private: using TBase = IProposeTxOperator; @@ -25,6 +25,9 @@ class TSharingTransactionOperator: public IProposeTxOperator { } virtual void DoFinishProposeOnComplete(TColumnShard& /*owner*/, const TActorContext& /*ctx*/) override { } + virtual TString DoGetOpType() const override { + return "Sharing"; + } virtual bool DoIsAsync() const override { AFL_VERIFY(SharingTask); return !SharingTask->IsFinished(); @@ -40,9 +43,9 @@ class TSharingTransactionOperator: public IProposeTxOperator { NotifySubscribers.insert(actorId); } - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; + virtual bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) override; + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) override; virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& ctx) override; diff --git a/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp b/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp deleted file mode 100644 index 1bf60d44d373..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ss_operation.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "ss_operation.h" - -namespace NKikimr::NColumnShard { - -void ISSTransactionOperator::DoSendReply(TColumnShard& owner, const TActorContext& ctx) { - const auto& txInfo = GetTxInfo(); - std::unique_ptr evResult = std::make_unique( - owner.TabletID(), txInfo.TxKind, txInfo.TxId, GetProposeStartInfoVerified().GetStatus(), GetProposeStartInfoVerified().GetStatusMessage()); - if (IsFail()) { - owner.IncCounter(COUNTER_PREPARE_ERROR); - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", GetProposeStartInfoVerified().GetStatusMessage())("tablet_id", owner.TabletID())("tx_id", txInfo.TxId); - } else { - evResult->Record.SetMinStep(txInfo.MinStep); - evResult->Record.SetMaxStep(txInfo.MaxStep); - if (owner.ProcessingParams) { - evResult->Record.MutableDomainCoordinators()->CopyFrom(owner.ProcessingParams->GetCoordinators()); - } - owner.IncCounter(COUNTER_PREPARE_SUCCESS); - } - ctx.Send(txInfo.Source, evResult.release()); -} - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ss_operation.h b/ydb/core/tx/columnshard/transactions/operators/ss_operation.h deleted file mode 100644 index feff6af77225..000000000000 --- a/ydb/core/tx/columnshard/transactions/operators/ss_operation.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include - -namespace NKikimr::NColumnShard { - -class ISSTransactionOperator: public TTxController::ITransactionOperator { -private: - using TBase = TTxController::ITransactionOperator; -protected: - virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) override; -public: - using TBase::TBase; -}; - -} diff --git a/ydb/core/tx/columnshard/transactions/operators/ya.make b/ydb/core/tx/columnshard/transactions/operators/ya.make index 579b0d07679c..e96323b51b44 100644 --- a/ydb/core/tx/columnshard/transactions/operators/ya.make +++ b/ydb/core/tx/columnshard/transactions/operators/ya.make @@ -3,7 +3,6 @@ LIBRARY() SRCS( GLOBAL schema.cpp GLOBAL long_tx_write.cpp - GLOBAL ev_write.cpp GLOBAL backup.cpp GLOBAL sharing.cpp propose_tx.cpp @@ -11,6 +10,7 @@ SRCS( PEERDIR( ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/transactions/operators/ev_write ydb/core/tx/columnshard/export/session ) diff --git a/ydb/core/tx/columnshard/transactions/protos/tx_event.proto b/ydb/core/tx/columnshard/transactions/protos/tx_event.proto new file mode 100644 index 000000000000..6e64dde46e4d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/protos/tx_event.proto @@ -0,0 +1,19 @@ +package NKikimrColumnShardTxProto; + +message TEvent { + optional uint64 PathId = 1; + optional string ClassName = 2; + + message TReadEvent { + optional string Filter = 1; + optional string Schema = 2; + } + + message TWriteEvent { + } + + oneof Implementation { + TReadEvent Read = 20; + TWriteEvent Write = 21; + } +} diff --git a/ydb/core/tx/columnshard/transactions/protos/ya.make b/ydb/core/tx/columnshard/transactions/protos/ya.make new file mode 100644 index 000000000000..7a54fdc3404d --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/protos/ya.make @@ -0,0 +1,12 @@ +PROTO_LIBRARY() + +SRCS( + tx_event.proto +) + +PEERDIR( + ydb/core/tx/columnshard/common/protos + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.cpp b/ydb/core/tx/columnshard/transactions/tx_controller.cpp index aeebb5c78277..afb1e8a33d50 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.cpp +++ b/ydb/core/tx/columnshard/transactions/tx_controller.cpp @@ -1,4 +1,5 @@ #include "tx_controller.h" + #include "transactions/tx_finish_async.h" #include @@ -6,7 +7,8 @@ namespace NKikimr::NColumnShard { TTxController::TTxController(TColumnShard& owner) - : Owner(owner) { + : Owner(owner) + , Counters(owner.Counters.GetCSCounters().TxProgress) { } bool TTxController::HaveOutdatedTxs() const { @@ -45,18 +47,28 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { return false; } + ui32 countWithDeadline = 0; + ui32 countOverrideDeadline = 0; + ui32 countNoDeadline = 0; while (!rowset.EndOfSet()) { const ui64 txId = rowset.GetValue(); const NKikimrTxColumnShard::ETransactionKind txKind = rowset.GetValue(); ITransactionOperator::TPtr txOperator(ITransactionOperator::TFactory::Construct(txKind, TTxInfo(txKind, txId))); - Y_ABORT_UNLESS(!!txOperator); + AFL_VERIFY(!!txOperator)("kind", txKind); const TString txBody = rowset.GetValue(); - Y_ABORT_UNLESS(txOperator->Parse(Owner, txBody, true)); + AFL_VERIFY(txOperator->Parse(Owner, txBody, true)); auto& txInfo = txOperator->MutableTxInfo(); txInfo.MaxStep = rowset.GetValue(); if (txInfo.MaxStep != Max()) { txInfo.MinStep = txInfo.MaxStep - MaxCommitTxDelay.MilliSeconds(); + ++countWithDeadline; + } else if (txOperator->TxWithDeadline()) { + txInfo.MinStep = GetAllowedStep(); + txInfo.MaxStep = txInfo.MinStep + MaxCommitTxDelay.MilliSeconds(); + ++countOverrideDeadline; + } else { + ++countNoDeadline; } txInfo.PlanStep = rowset.GetValueOrDefault(0); txInfo.Source = rowset.GetValue(); @@ -74,25 +86,14 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { return false; } } + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("override", countOverrideDeadline)("no_dl", countNoDeadline)("dl", countWithDeadline)( + "operators", Operators.size())("plan", PlanQueue.size())("dl_queue", DeadlineQueue.size()); return true; } -TTxController::ITransactionOperator::TPtr TTxController::GetTxOperator(const ui64 txId) const { - auto it = Operators.find(txId); - if (it == Operators.end()) { - return nullptr; - } - return it->second; -} - -TTxController::ITransactionOperator::TPtr TTxController::GetVerifiedTxOperator(const ui64 txId) const { - auto it = Operators.find(txId); - AFL_VERIFY(it != Operators.end())("tx_id", txId); - return it->second; -} - -std::shared_ptr TTxController::UpdateTxSourceInfo(const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc) { - auto op = GetVerifiedTxOperator(tx.GetTxId()); +std::shared_ptr TTxController::UpdateTxSourceInfo( + const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc) { + auto op = GetTxOperatorVerified(tx.GetTxId()); op->ResetStatusOnUpdate(); auto& txInfo = op->MutableTxInfo(); txInfo.Source = tx.Source; @@ -104,17 +105,20 @@ std::shared_ptr TTxController::UpdateTxSour return op; } -TTxController::TTxInfo TTxController::RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); auto& txInfo = txOperator->GetTxInfo(); AFL_VERIFY(txInfo.MaxStep == Max()); AFL_VERIFY(Operators.emplace(txInfo.TxId, txOperator).second); Schema::SaveTxInfo(db, txInfo, txBody); + Counters.OnRegisterTx(txOperator->GetOpType()); return txInfo; } -TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_ptr& txOperator, + const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); auto& txInfo = txOperator->MutableTxInfo(); @@ -125,22 +129,22 @@ TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const std::shared_p Schema::SaveTxInfo(db, txInfo, txBody); DeadlineQueue.emplace(txInfo.MaxStep, txOperator->GetTxId()); + Counters.OnRegisterTx(txOperator->GetOpType()); return txInfo; } -bool TTxController::AbortTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - auto opIt = Operators.find(txId); +bool TTxController::AbortTx(const TPlanQueueItem planQueueItem, NTabletFlatExecutor::TTransactionContext& txc) { + auto opIt = Operators.find(planQueueItem.TxId); Y_ABORT_UNLESS(opIt != Operators.end()); Y_ABORT_UNLESS(opIt->second->GetTxInfo().PlanStep == 0); opIt->second->ExecuteOnAbort(Owner, txc); opIt->second->CompleteOnAbort(Owner, NActors::TActivationContext::AsActorContext()); + Counters.OnAbortTx(opIt->second->GetOpType()); - if (opIt->second->GetTxInfo().MaxStep != Max()) { - DeadlineQueue.erase(TPlanQueueItem(opIt->second->GetTxInfo().MaxStep, txId)); - } - Operators.erase(txId); + AFL_VERIFY(Operators.erase(planQueueItem.TxId)); + AFL_VERIFY(DeadlineQueue.erase(planQueueItem)); NIceDb::TNiceDb db(txc.DB); - Schema::EraseTxInfo(db, txId); + Schema::EraseTxInfo(db, planQueueItem.TxId); return true; } @@ -179,7 +183,14 @@ bool TTxController::ExecuteOnCancel(const ui64 txId, NTabletFlatExecutor::TTrans return true; } -std::optional TTxController::StartPlannedTx() { +std::optional TTxController::GetFirstPlannedTx() const { + if (!PlanQueue.empty()) { + return GetTxInfoVerified(PlanQueue.begin()->TxId); + } + return std::nullopt; +} + +std::optional TTxController::PopFirstPlannedTx() { if (!PlanQueue.empty()) { auto node = PlanQueue.extract(PlanQueue.begin()); auto& item = node.value(); @@ -190,13 +201,16 @@ std::optional TTxController::StartPlannedTx() { return std::nullopt; } -void TTxController::FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { +void TTxController::ProgressOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); + auto opIt = Operators.find(txId); + AFL_VERIFY(opIt != Operators.end())("tx_id", txId); + Counters.OnFinishPlannedTx(opIt->second->GetOpType()); + AFL_VERIFY(Operators.erase(txId)); Schema::EraseTxInfo(db, txId); } -void TTxController::CompleteRunningTx(const TPlanQueueItem& txItem) { - AFL_VERIFY(Operators.erase(txItem.TxId)); +void TTxController::ProgressOnComplete(const TPlanQueueItem& txItem) { AFL_VERIFY(RunningQueue.erase(txItem))("info", txItem.DebugString()); } @@ -240,7 +254,7 @@ size_t TTxController::CleanExpiredTxs(NTabletFlatExecutor::TTransactionContext& } ui64 txId = it->TxId; LOG_S_DEBUG(TStringBuilder() << "Removing outdated txId " << txId << " max step " << it->Step << " outdated step "); - AbortTx(txId, txc); + AbortTx(*it, txc); ++removedCount; } } @@ -263,7 +277,10 @@ TDuration TTxController::GetTxCompleteLag(ui64 timecastStep) const { TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { auto it = Operators.find(txId); if (it == Operators.end()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_plan_tx")("tx_id", txId); return EPlanResult::Skipped; + } else { + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "plan_tx")("tx_id", txId)("plan_step", it->second->MutableTxInfo().PlanStep); } auto& txInfo = it->second->MutableTxInfo(); if (txInfo.PlanStep == 0) { @@ -280,6 +297,8 @@ TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 } void TTxController::OnTabletInit() { + AFL_VERIFY(!StartedFlag); + StartedFlag = true; for (auto&& txOperator : Operators) { txOperator.second->OnTabletInit(Owner); } @@ -287,22 +306,24 @@ void TTxController::OnTabletInit() { std::shared_ptr TTxController::StartProposeOnExecute( const TTxController::TTxInfo& txInfo, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnExecute")( - "tx_info", txInfo.DebugString())("tx_info", txInfo.DebugString()); + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnExecute")("tx_info", txInfo.DebugString()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); - std::shared_ptr txOperator(TTxController::ITransactionOperator::TFactory::Construct(txInfo.TxKind, txInfo)); + std::shared_ptr txOperator( + TTxController::ITransactionOperator::TFactory::Construct(txInfo.TxKind, txInfo)); AFL_VERIFY(!!txOperator); if (!txOperator->Parse(Owner, txBody)) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("error", "cannot parse txOperator"); return txOperator; } + Counters.OnStartProposeOnExecute(txOperator->GetOpType()); auto txInfoPtr = GetTxInfo(txInfo.TxId); if (!!txInfoPtr) { if (!txOperator->CheckAllowUpdate(*txInfoPtr)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "incorrect duplication")("actual_tx", txInfoPtr->DebugString()); - TTxController::TProposeResult proposeResult( - NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); + TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); txOperator->SetProposeStartInfo(proposeResult); return txOperator; } else { @@ -325,52 +346,48 @@ std::shared_ptr TTxController::StartPropose } } -void TTxController::StartProposeOnComplete(const ui64 txId, const TActorContext& ctx) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnComplete")("tx_id", txId); - auto txOperator = GetTxOperator(txId); - if (!txOperator) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); - txOperator->StartProposeOnComplete(Owner, ctx); - } +void TTxController::StartProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx) { + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::StartProposeOnComplete")("tx_id", txOperator.GetTxId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); + txOperator.StartProposeOnComplete(Owner, ctx); + Counters.OnStartProposeOnComplete(txOperator.GetOpType()); } void TTxController::FinishProposeOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnExecute")("tx_id", txId); - auto txOperator = GetTxOperator(txId); - if (!txOperator) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); - } else { + if (auto txOperator = GetTxOperatorOptional(txId)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start"); txOperator->FinishProposeOnExecute(Owner, txc); + Counters.OnFinishProposeOnExecute(txOperator->GetOpType()); + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction base")("tx_id", txId); } } +void TTxController::FinishProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx) { + NActors::TLogContextGuard lGuard = + NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnComplete")("tx_id", txOperator.GetTxId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start")("tx_info", txOperator.GetTxInfo().DebugString()); + TTxController::TProposeResult proposeResult = txOperator.GetProposeStartInfoVerified(); + AFL_VERIFY(!txOperator.IsFail()); + txOperator.FinishProposeOnComplete(Owner, ctx); + txOperator.SendReply(Owner, ctx); + Counters.OnFinishProposeOnComplete(txOperator.GetOpType()); +} + void TTxController::FinishProposeOnComplete(const ui64 txId, const TActorContext& ctx) { - NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("method", "TTxController::FinishProposeOnComplete")("tx_id", txId); - auto txOperator = GetTxOperator(txId); + auto txOperator = GetTxOperatorOptional(txId); if (!txOperator) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot found txOperator in propose transaction finish")("tx_id", txId); return; } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start")("tx_info", txOperator->GetTxInfo().DebugString()); - TTxController::TProposeResult proposeResult = txOperator->GetProposeStartInfoVerified(); - AFL_VERIFY(!txOperator->IsFail()); - txOperator->FinishProposeOnComplete(Owner, ctx); - txOperator->SendReply(Owner, ctx); -} - -void TTxController::StartOperators() { - AFL_VERIFY(!StartedFlag); - StartedFlag = true; - for (auto&& i : Operators) { - Y_UNUSED(i.second->OnStartAsync(Owner)); - } + return FinishProposeOnComplete(*txOperator, ctx); } void TTxController::ITransactionOperator::SwitchStateVerified(const EStatus from, const EStatus to) { - AFL_VERIFY(!Status || *Status == from)("error", "incorrect expected status")("real_state", *Status)("expected", from)("details", DebugString()); + AFL_VERIFY(!Status || *Status == from)("error", "incorrect expected status")("real_state", *Status)("expected", from)( + "details", DebugString()); Status = to; } diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.h b/ydb/core/tx/columnshard/transactions/tx_controller.h index 3e9dfcfb735e..e48f10d3796d 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.h +++ b/ydb/core/tx/columnshard/transactions/tx_controller.h @@ -1,11 +1,14 @@ #pragma once -#include - #include +#include +#include #include #include +namespace NKikimr::NOlap::NTxInteractions { +class TManager; +} namespace NKikimr::NColumnShard { @@ -14,10 +17,12 @@ class TColumnShard; struct TBasicTxInfo { const NKikimrTxColumnShard::ETransactionKind TxKind; const ui64 TxId; + public: TBasicTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId) : TxKind(txKind) , TxId(txId) { + AFL_VERIFY(txKind != NKikimrTxColumnShard::TX_KIND_NONE); } bool operator==(const TBasicTxInfo& item) const = default; @@ -42,12 +47,18 @@ struct TFullTxInfo: public TBasicTxInfo { TActorId Source; ui64 Cookie = 0; std::optional SeqNo; + public: + static TFullTxInfo BuildFake(const NKikimrTxColumnShard::ETransactionKind kind) { + return TFullTxInfo(kind, 0, NActors::TActorId(), 0, {}); + } + bool operator==(const TFullTxInfo& item) const = default; TString DebugString() const { TStringBuilder sb; - sb << TBase::DebugString() << ";min=" << MinStep << ";max=" << MaxStep << ";plan=" << PlanStep << ";src=" << Source << ";cookie=" << Cookie; + sb << TBase::DebugString() << ";min=" << MinStep << ";max=" << MaxStep << ";plan=" << PlanStep << ";src=" << Source + << ";cookie=" << Cookie; if (SeqNo) { sb << *SeqNo << ";"; } @@ -75,12 +86,12 @@ struct TFullTxInfo: public TBasicTxInfo { : TBasicTxInfo(txKind, txId) { } - TFullTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId, const TActorId& source, const ui64 cookie, const std::optional& seqNo) + TFullTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId, const TActorId& source, const ui64 cookie, + const std::optional& seqNo) : TBasicTxInfo(txKind, txId) , Source(source) , Cookie(cookie) - , SeqNo(seqNo) - { + , SeqNo(seqNo) { } }; @@ -89,6 +100,7 @@ class TTxProposeResult { class TProposeResult { YDB_READONLY(NKikimrTxColumnShard::EResultStatus, Status, NKikimrTxColumnShard::EResultStatus::PREPARED); YDB_READONLY_DEF(TString, StatusMessage); + public: TProposeResult() = default; TProposeResult(NKikimrTxColumnShard::EResultStatus status, const TString& statusMessage) @@ -109,16 +121,15 @@ class TTxProposeResult { std::optional BaseTxInfo; std::optional FullTxInfo; TProposeResult ProposeResult; + public: TTxProposeResult(const TBasicTxInfo& txInfo, TProposeResult&& result) : BaseTxInfo(txInfo) , ProposeResult(std::move(result)) { - } TTxProposeResult(const TFullTxInfo& txInfo, TProposeResult&& result) : FullTxInfo(txInfo) , ProposeResult(std::move(result)) { - } ui64 GetTxId() const noexcept { @@ -152,8 +163,8 @@ class TTxController { TPlanQueueItem(const ui64 step, const ui64 txId) : Step(step) - , TxId(txId) - {} + , TxId(txId) { + } inline bool operator<(const TPlanQueueItem& rhs) const { return Step < rhs.Step || (Step == rhs.Step && TxId < rhs.TxId); @@ -180,10 +191,12 @@ class TTxController { ReplySent, Failed }; + protected: TTxInfo TxInfo; YDB_READONLY_DEF(std::optional, ProposeStartInfo); std::optional Status = EStatus::Created; + private: friend class TTxController; virtual bool DoParse(TColumnShard& owner, const TString& data) = 0; @@ -191,41 +204,48 @@ class TTxController { virtual void DoStartProposeOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; virtual void DoFinishProposeOnExecute(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) = 0; virtual void DoFinishProposeOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; + virtual TString DoGetOpType() const = 0; virtual bool DoIsAsync() const = 0; virtual void DoSendReply(TColumnShard& owner, const TActorContext& ctx) = 0; virtual bool DoCheckAllowUpdate(const TFullTxInfo& currentTxInfo) const = 0; virtual bool DoCheckTxInfoForReply(const TFullTxInfo& /*originalTxInfo*/) const { return true; } + virtual bool DoPingTimeout(TColumnShard& /*owner*/, const TMonotonic /*now*/) { + return false; + } + + virtual std::unique_ptr DoBuildTxPrepareForProgress(TColumnShard* /*owner*/) const { + return nullptr; + } void SwitchStateVerified(const EStatus from, const EStatus to); TTxInfo& MutableTxInfo() { return TxInfo; } + virtual void DoOnTabletInit(TColumnShard& /*owner*/) { + } + void ResetStatusOnUpdate() { Status = {}; } virtual TString DoDebugString() const = 0; - virtual bool DoOnStartAsync(TColumnShard& /*owner*/) { - return false; - } std::optional StartedAsync; public: using TPtr = std::shared_ptr; using TFactory = NObjectFactory::TParametrizedObjectFactory; + using OpType = TString; - bool CheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const { - return DoCheckTxInfoForReply(originalTxInfo); + bool PingTimeout(TColumnShard& owner, const TMonotonic now) { + return DoPingTimeout(owner, now); } - [[nodiscard]] bool OnStartAsync(TColumnShard& owner) { - AFL_VERIFY(!StartedAsync); - StartedAsync = DoOnStartAsync(owner); - return *StartedAsync; + bool CheckTxInfoForReply(const TFullTxInfo& originalTxInfo) const { + return DoCheckTxInfoForReply(originalTxInfo); } TString DebugString() const { @@ -236,6 +256,10 @@ class TTxController { return DoCheckAllowUpdate(currentTxInfo); } + std::unique_ptr BuildTxPrepareForProgress(TColumnShard* owner) const { + return DoBuildTxPrepareForProgress(owner); + } + bool IsFail() const { return ProposeStartInfo && ProposeStartInfo->IsFail(); } @@ -258,18 +282,23 @@ class TTxController { } ITransactionOperator(const TTxInfo& txInfo) - : TxInfo(txInfo) - {} + : TxInfo(txInfo) { + } ui64 GetTxId() const { return TxInfo.TxId; } + OpType GetOpType() const { + return DoGetOpType(); + } + bool IsAsync() const { return DoIsAsync() && Status != EStatus::Failed && Status != EStatus::ReplySent; } - virtual ~ITransactionOperator() {} + virtual ~ITransactionOperator() { + } virtual bool TxWithDeadline() const { return true; @@ -279,14 +308,15 @@ class TTxController { const bool result = DoParse(owner, data); if (!result) { AFL_VERIFY(!onLoad); - ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Error processing commit TxId# " << TxInfo.TxId - << ". Parsing error"); + ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Error processing commit TxId# " << TxInfo.TxId << ". Parsing error"); SwitchStateVerified(EStatus::Created, EStatus::Failed); } else { SwitchStateVerified(EStatus::Created, EStatus::Parsed); } if (onLoad) { - ProposeStartInfo = TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::PREPARED, "success on iteration before restart"); + ProposeStartInfo = + TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::PREPARED, "success on iteration before restart"); Status = {}; } return result; @@ -336,8 +366,8 @@ class TTxController { return DoFinishProposeOnComplete(owner, ctx); } - virtual bool ExecuteOnProgress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) = 0; - virtual bool CompleteOnProgress(TColumnShard& owner, const TActorContext& ctx) = 0; + virtual bool ProgressOnExecute(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) = 0; + virtual bool ProgressOnComplete(TColumnShard& owner, const TActorContext& ctx) = 0; virtual bool ExecuteOnAbort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) = 0; virtual bool CompleteOnAbort(TColumnShard& owner, const TActorContext& ctx) = 0; @@ -345,8 +375,15 @@ class TTxController { virtual void RegisterSubscriber(const TActorId&) { AFL_VERIFY(false)("message", "Not implemented"); }; - virtual void OnTabletInit(TColumnShard& /*owner*/) {} + void OnTabletInit(TColumnShard& owner) { + AFL_VERIFY(!StartedAsync); + StartedAsync = true; + DoOnTabletInit(owner); + } }; + TTxProgressCounters& GetCounters() { + return Counters; + } private: const TDuration MaxCommitTxDelay = TDuration::Seconds(30); @@ -354,44 +391,75 @@ class TTxController { std::set DeadlineQueue; std::set PlanQueue; std::set RunningQueue; + TTxProgressCounters Counters; THashMap Operators; - private: ui64 GetAllowedStep() const; - bool AbortTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + bool AbortTx(const TPlanQueueItem planQueueItem, NTabletFlatExecutor::TTransactionContext& txc); - TTxInfo RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); - TTxInfo RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTx(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTxWithDeadline(const std::shared_ptr& txOperator, const TString& txBody, + NTabletFlatExecutor::TTransactionContext& txc); bool StartedFlag = false; + public: TTxController(TColumnShard& owner); - ITransactionOperator::TPtr GetTxOperator(const ui64 txId) const; - ITransactionOperator::TPtr GetVerifiedTxOperator(const ui64 txId) const; - void StartOperators(); + ITransactionOperator::TPtr GetTxOperatorOptional(const ui64 txId) const { + auto it = Operators.find(txId); + if (it == Operators.end()) { + return nullptr; + } + return it->second; + } + ITransactionOperator::TPtr GetTxOperatorVerified(const ui64 txId) const { + return TValidator::CheckNotNull(GetTxOperatorOptional(txId)); + } + template + std::shared_ptr GetTxOperatorVerifiedAs(const ui64 txId) const { + auto result = GetTxOperatorOptional(txId); + AFL_VERIFY(result); + auto resultClass = dynamic_pointer_cast(result); + AFL_VERIFY(resultClass); + return resultClass; + } + + void PingTimeouts(const TMonotonic now) { + auto txInfo = GetFirstPlannedTx(); + if (!txInfo) { + return; + } + GetTxOperatorVerified(txInfo->GetTxId())->PingTimeout(Owner, now); + } ui64 GetMemoryUsage() const; bool HaveOutdatedTxs() const; bool Load(NTabletFlatExecutor::TTransactionContext& txc); - [[nodiscard]] std::shared_ptr UpdateTxSourceInfo(const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc); + [[nodiscard]] std::shared_ptr UpdateTxSourceInfo( + const TFullTxInfo& tx, NTabletFlatExecutor::TTransactionContext& txc); [[nodiscard]] std::shared_ptr StartProposeOnExecute( const TTxController::TTxInfo& txInfo, const TString& txBody, NTabletFlatExecutor::TTransactionContext& txc); - void StartProposeOnComplete(const ui64 txId, const TActorContext& ctx); - + void StartProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx); void FinishProposeOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - + void FinishProposeOnComplete(ITransactionOperator& txOperator, const TActorContext& ctx); void FinishProposeOnComplete(const ui64 txId, const TActorContext& ctx); + void WriteTxOperatorInfo(NTabletFlatExecutor::TTransactionContext& txc, const ui64 txId, const TString& data) { + NIceDb::TNiceDb db(txc.DB); + NColumnShard::Schema::UpdateTxInfoBody(db, txId, data); + } bool ExecuteOnCancel(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); bool CompleteOnCancel(const ui64 txId, const TActorContext& ctx); - std::optional StartPlannedTx(); - void FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - void CompleteRunningTx(const TPlanQueueItem& tx); + std::optional GetFirstPlannedTx() const; + std::optional PopFirstPlannedTx(); + void ProgressOnExecute(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void ProgressOnComplete(const TPlanQueueItem& tx); std::optional GetPlannedTx() const; TPlanQueueItem GetFrontTx() const; @@ -412,5 +480,4 @@ class TTxController { void OnTabletInit(); }; -} - +} // namespace NKikimr::NColumnShard diff --git a/ydb/core/tx/columnshard/transactions/ya.make b/ydb/core/tx/columnshard/transactions/ya.make index 8479005c4d67..c6509a2a6473 100644 --- a/ydb/core/tx/columnshard/transactions/ya.make +++ b/ydb/core/tx/columnshard/transactions/ya.make @@ -11,6 +11,7 @@ PEERDIR( ydb/core/tx/columnshard/data_sharing/destination/events ydb/core/tx/columnshard/transactions/operators ydb/core/tx/columnshard/transactions/transactions + ydb/core/tx/columnshard/transactions/locks ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 700ed7157f15..ad5ec1f688fd 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -14,11 +14,11 @@ #include #include #include -#include +#include #include -#include -#include -#include +#include +#include +#include #include namespace NKikimr { @@ -35,12 +35,6 @@ using TTypeId = NScheme::TTypeId; using TTypeInfo = NScheme::TTypeInfo; using TDefaultTestsController = NKikimr::NYDBTest::NColumnShard::TController; -class TDisableCompactionController: public NKikimr::NYDBTest::NColumnShard::TController { -public: - TDisableCompactionController() { - DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); - } -}; template bool DataHas(const std::vector>& batches, std::pair range, @@ -538,7 +532,7 @@ void TestWriteReadDup(const TestTableDescription& table = {}) { // read if (planStep != initPlanStep) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({"timestamp"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -597,7 +591,7 @@ void TestWriteReadLongTxDup() { // read TAutoPtr handle; { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -612,7 +606,9 @@ void TestWriteReadLongTxDup() { } void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString codec = "") { - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + csControllerGuard->SetOverrideReadTimeoutClean(TDuration::Max()); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -681,7 +677,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 1); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"resource_type"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -698,7 +694,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 2); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"resource_type"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -708,7 +704,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 3 (committed) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 3); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -723,7 +719,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 4 (column by id) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 4); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumnIds({1}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -738,7 +734,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 5); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -776,7 +772,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 6, planstep 0 { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 6); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 0)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(0, 1)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(!rb); @@ -786,7 +782,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 7, planstep 21 (part of index) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 7); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(21, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(21, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -803,7 +799,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 8, planstep 22 (full index) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 8); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(22, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(22, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -833,7 +829,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 9 (committed, indexed) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 9); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(23, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(23, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -858,7 +854,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 10 { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 10); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -878,8 +874,8 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString const ui64 committedBytes = reader.GetReadStat("committed_bytes"); Cerr << codec << "/" << compactedBytes << "/" << insertedBytes << "/" << committedBytes << Endl; if (insertedBytes) { - UNIT_ASSERT_GE(insertedBytes / 100000, 40); - UNIT_ASSERT_LE(insertedBytes / 100000, 50); + UNIT_ASSERT_GE(insertedBytes / 100000, 50); + UNIT_ASSERT_LE(insertedBytes / 100000, 60); } if (committedBytes) { UNIT_ASSERT_LE(committedBytes / 100000, 1); @@ -904,7 +900,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 11 (range predicate: closed interval) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 11); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); reader.AddRange(MakeTestRange({10, 42}, true, true, testYdbPk)); auto rb = reader.ReadAll(); @@ -921,7 +917,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString // read 12 (range predicate: open interval) { NActors::TLogContextGuard guard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("TEST_STEP", 11); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(24, txId)); reader.SetReplyColumns(TTestSchema::ExtractNames(ydbSchema)); reader.AddRange(MakeTestRange({10, 42}, false, false, testYdbPk)); auto rb = reader.ReadAll(); @@ -1034,7 +1030,7 @@ void TestCompactionInGranuleImpl(bool reboots, const TestTableDescription& table --txId; for (ui32 i = 0; i < 2; ++i) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(rb); @@ -1279,7 +1275,7 @@ void TestReadWithProgram(const TestTableDescription& table = {}) ui32 i = 0; for (auto& programText : programs) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(programText); auto rb = reader.ReadAll(); if (i < numWrong) { @@ -1348,7 +1344,7 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) { ui32 i = 0; for (auto& ssa : ssas) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(ssa); auto rb = reader.ReadAll(); @@ -1420,7 +1416,7 @@ void TestSomePrograms(const TestTableDescription& table) { // TODO: add programs with bugs here for (auto& ssaText : programs) { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(ssaText); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsError()); @@ -1530,7 +1526,7 @@ void TestReadAggregate(const std::vector& ydbSchema, for (auto& programText : programs) { Cerr << "-- select program: " << prog << " is filtered: " << (int)isFiltered.count(prog) << "\n"; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetProgram(programText); auto batch = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1890,7 +1886,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1905,7 +1901,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1921,7 +1917,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1937,7 +1933,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -1958,7 +1954,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { txIds.insert(txId); PlanCommit(runtime, sender, planStep, txIds); - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({ "timestamp" }); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -2239,7 +2235,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { const ui64 tableId = 1; std::set useFields = {"timestamp", "message"}; { // read with predicate (FROM) - NOlap::NTests::TShardReader reader(Owner.Runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(Owner.PlanStep, Owner.TxId)); + TShardReader reader(Owner.Runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(Owner.PlanStep, Owner.TxId)); reader.SetReplyColumns({"timestamp", "message"}); reader.AddRange(MakeRange(Owner.YdbPk)); auto rb = reader.ReadAll(); @@ -2334,7 +2330,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { for (ui32 i = 0; i < 2; ++i) { { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, txId)); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -2440,7 +2436,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ui64 numRows = static_cast(*rows).Value(i); ui64 numBytes = static_cast(*bytes).Value(i); ui64 numRawBytes = static_cast(*rawBytes).Value(i); - bool activity = static_cast(*activities).Value(i); + bool activity = static_cast(*activities).Value(i); if (!activity) { continue; } @@ -2584,7 +2580,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { // Try to read snapshot that is too old { - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - staleness.MilliSeconds(), Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - staleness.MilliSeconds(), Max())); reader.SetReplyColumns({"timestamp", "message"}); reader.ReadAll(); UNIT_ASSERT(reader.IsError()); @@ -2594,8 +2590,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { void TestCompactionGC() { TTestBasicRuntime runtime; - TTester::Setup(runtime); auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csDefaultControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csDefaultControllerGuard->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + TTester::Setup(runtime); runtime.SetLogPriority(NKikimrServices::BLOB_CACHE, NActors::NLog::PRI_INFO); @@ -2753,7 +2751,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { --planStep; --txId; Cerr << compactionsHappened << Endl; - UNIT_ASSERT_GE(compactionsHappened, 3); // we catch it three times per action +// UNIT_ASSERT_GE(compactionsHappened, 3); // we catch it three times per action ui64 previousCompactionsHappened = compactionsHappened; ui64 previousCleanupsHappened = cleanupsHappened; @@ -2761,12 +2759,13 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { // Send a request that reads the latest version // This request is expected to read at least 1 committed blob and several index portions // These committed blob and portions must not be deleted by the BlobManager until the read request finishes - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({"timestamp", "message"}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); UNIT_ASSERT(CheckOrdered(rb)); UNIT_ASSERT(reader.GetIterationsCount() < 10); + csDefaultControllerGuard->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); // We captured EvReadFinished event and dropped is so the columnshard still thinks that // read request is in progress and keeps the portions @@ -2782,15 +2781,21 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ProposeCommit(runtime, sender, txId, writeIds); PlanCommit(runtime, sender, planStep, txId); } + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } - Cerr << "Compactions happened: " << compactionsHappened << Endl; - Cerr << "Cleanups happened: " << cleanupsHappened << Endl; + Cerr << "Compactions happened: " << csDefaultControllerGuard->GetCompactionStartedCounter().Val() << Endl; + Cerr << "Indexations happened: " << csDefaultControllerGuard->GetInsertStartedCounter().Val() << Endl; + Cerr << "Cleanups happened: " << csDefaultControllerGuard->GetCleaningStartedCounter().Val() << Endl; Cerr << "Old portions: " << JoinStrings(oldPortions.begin(), oldPortions.end(), " ") << Endl; Cerr << "Cleaned up portions: " << JoinStrings(deletedPortions.begin(), deletedPortions.end(), " ") << Endl; + Cerr << "delayedBlobs: " << JoinStrings(delayedBlobs.begin(), delayedBlobs.end(), " ") << Endl; // Check that GC happened but it didn't collect some old portions UNIT_ASSERT_GT(compactionsHappened, previousCompactionsHappened); - UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); + UNIT_ASSERT_EQUAL(cleanupsHappened, 0); UNIT_ASSERT_GT_C(oldPortions.size(), deletedPortions.size(), "Some old portions must not be deleted because the are in use by read"); UNIT_ASSERT_GT_C(delayedBlobs.size(), 0, "Read request is expected to have at least one committed blob, which deletion must be delayed"); previousCompactionsHappened = compactionsHappened; @@ -2805,9 +2810,25 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } // Advance the time and trigger some more cleanups withno compactions - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - planStep += 2 * delay.MilliSeconds(); - numWrites = 2; + csDefaultControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } + planStep += (2 * delay).MilliSeconds(); + for (ui32 i = 0; i < numWrites; ++i, ++writeId, ++planStep, ++txId) { + std::vector writeIds; + UNIT_ASSERT(WriteData(runtime, sender, writeId, tableId, triggerData, ydbSchema, true, &writeIds)); + + ProposeCommit(runtime, sender, txId, writeIds); + PlanCommit(runtime, sender, planStep, txId); + } + UNIT_ASSERT_EQUAL(cleanupsHappened, 0); + csDefaultControllerGuard->SetOverrideRequestsTracePingCheckPeriod(TDuration::Zero()); + { + auto read = std::make_unique(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, read.release()); + } for (ui32 i = 0; i < numWrites; ++i, ++writeId, ++planStep, ++txId) { std::vector writeIds; UNIT_ASSERT(WriteData(runtime, sender, writeId, tableId, triggerData, ydbSchema, true, &writeIds)); @@ -2815,9 +2836,12 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ProposeCommit(runtime, sender, txId, writeIds); PlanCommit(runtime, sender, planStep, txId); } + AFL_VERIFY(csDefaultControllerGuard->GetRequestTracingSnapshotsSave().Val() == 1); + AFL_VERIFY(csDefaultControllerGuard->GetRequestTracingSnapshotsRemove().Val() == 1); - Cerr << "Compactions happened: " << compactionsHappened << Endl; - Cerr << "Cleanups happened: " << cleanupsHappened << Endl; + Cerr << "Compactions happened: " << csDefaultControllerGuard->GetCompactionStartedCounter().Val() << Endl; + Cerr << "Indexations happened: " << csDefaultControllerGuard->GetInsertStartedCounter().Val() << Endl; + Cerr << "Cleanups happened: " << csDefaultControllerGuard->GetCleaningStartedCounter().Val() << Endl; Cerr << "Old portions: " << JoinStrings(oldPortions.begin(), oldPortions.end(), " ") << Endl; Cerr << "Cleaned up portions: " << JoinStrings(deletedPortions.begin(), deletedPortions.end(), " ") << Endl; @@ -2825,7 +2849,6 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { UNIT_ASSERT_GE(compactionsHappened, previousCompactionsHappened); UNIT_ASSERT_GT(cleanupsHappened, previousCleanupsHappened); UNIT_ASSERT_VALUES_EQUAL_C(oldPortions.size(), deletedPortions.size(), "All old portions must be deleted after read has finished"); - UNIT_ASSERT_VALUES_EQUAL_C(delayedBlobs.size(), 0, "All previously delayed deletions must now happen " + JoinSeq(",", delayedBlobs)); } Y_UNIT_TEST(CompactionGC) { diff --git a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp index 50f305bf4fc6..734047952707 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp @@ -3,12 +3,13 @@ #include #include +#include #include -#include -#include -#include +#include +#include +#include namespace NKikimr { @@ -161,7 +162,7 @@ class TColumnChunksCleaner : public NYDBTest::ILocalDBModifier { } }; -class TPortinosCleaner : public NYDBTest::ILocalDBModifier { +class TPortionsCleaner : public NYDBTest::ILocalDBModifier { public: virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { using namespace NColumnShard; @@ -185,6 +186,21 @@ class TPortinosCleaner : public NYDBTest::ILocalDBModifier { } }; + +class TEmptyPortionsCleaner : public NYDBTest::ILocalDBModifier { +public: + virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + for (size_t pathId = 100; pathId != 299; ++pathId) { + for (size_t portionId = 1000; portionId != 1199; ++portionId) { + db.Table().Key(pathId, portionId).Update(); + } + } + } +}; + + class TTablesCleaner : public NYDBTest::ILocalDBModifier { public: virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { @@ -317,7 +333,11 @@ Y_UNIT_TEST_SUITE(Normalizers) { } Y_UNIT_TEST(PortionsNormalizer) { - TestNormalizerImpl(); + TestNormalizerImpl(); + } + + Y_UNIT_TEST(CleanEmptyPortionsNormalizer) { + TestNormalizerImpl(); } Y_UNIT_TEST(EmptyTablesNormalizer) { diff --git a/ydb/core/tx/columnshard/ut_rw/ya.make b/ydb/core/tx/columnshard/ut_rw/ya.make index d03099069b75..7cf343aad4c6 100644 --- a/ydb/core/tx/columnshard/ut_rw/ya.make +++ b/ydb/core/tx/columnshard/ut_rw/ya.make @@ -22,8 +22,6 @@ PEERDIR( ydb/core/tx/columnshard/test_helper ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing - ydb/core/tx/columnshard/common/tests - ydb/core/tx/columnshard/test_helper ydb/services/metadata ydb/core/tx ydb/public/lib/yson_value diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index cfc9ea9526b5..deb7be3d89e9 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include @@ -17,6 +17,9 @@ #include #include +#include + +#include namespace NKikimr { @@ -32,6 +35,16 @@ enum class EInitialEviction { namespace { +Aws::SDKOptions Options; + +Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); +} + +Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); +} + static const std::vector testYdbSchema = TTestSchema::YdbSchema(); static const std::vector testYdbPk = TTestSchema::YdbPkSchema(); @@ -161,7 +174,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); - csControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); std::vector ts = {1600000000, 1620000000}; ui32 ttlIncSeconds = 1; @@ -246,7 +259,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -278,7 +291,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -312,7 +325,7 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({spec.TtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); @@ -513,7 +526,7 @@ std::vector> TestTiers(bool reboots, const std::vector(); csControllerGuard->DisableBackground(NYDBTest::ICSController::EBackground::TTL); - csControllerGuard->SetTasksActualizationLag(TDuration::Zero()); + csControllerGuard->SetOverrideTasksActualizationLag(TDuration::Zero()); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -621,9 +634,9 @@ std::vector> TestTiers(bool reboots, const std::vector reader; + std::unique_ptr reader; if (!misconfig) { - reader = std::make_unique(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); + reader = std::make_unique(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader->SetReplyColumns({specs[i].TtlColumn}); counter.CaptureReadEvents = specs[i].WaitEmptyAfter ? 0 : 1; // TODO: we need affected by tiering blob here counter.WaitReadsCaptured(runtime); @@ -662,7 +675,7 @@ std::vector> TestTiers(bool reboots, const std::vector())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep - 1, Max())); reader.SetReplyColumns({columnToRead}); auto rb = reader.ReadAll(); if (expectedReadResult == EExpectedResult::ERROR) { @@ -981,7 +994,7 @@ void TestDrop(bool reboots) { TAutoPtr handle; { --planStep; - NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); + TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); reader.SetReplyColumns({TTestSchema::DefaultTtlColumn}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); diff --git a/ydb/core/tx/columnshard/ut_schema/ya.make b/ydb/core/tx/columnshard/ut_schema/ya.make index 35d906ee2055..d67c0d2ad5b8 100644 --- a/ydb/core/tx/columnshard/ut_schema/ya.make +++ b/ydb/core/tx/columnshard/ut_schema/ya.make @@ -18,6 +18,7 @@ PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing diff --git a/ydb/core/tx/columnshard/ya.make b/ydb/core/tx/columnshard/ya.make index 716b6eae6174..f1f4df107ffe 100644 --- a/ydb/core/tx/columnshard/ya.make +++ b/ydb/core/tx/columnshard/ya.make @@ -16,7 +16,6 @@ SRCS( columnshard__write_index.cpp columnshard.cpp columnshard_impl.cpp - columnshard_common.cpp columnshard_private_events.cpp columnshard_schema.cpp columnshard_view.cpp diff --git a/ydb/core/tx/conveyor/service/service.cpp b/ydb/core/tx/conveyor/service/service.cpp index cf95fde86778..68900c3e25ec 100644 --- a/ydb/core/tx/conveyor/service/service.cpp +++ b/ydb/core/tx/conveyor/service/service.cpp @@ -7,14 +7,13 @@ namespace NKikimr::NConveyor { TDistributor::TDistributor(const TConfig& config, const TString& conveyorName, TIntrusivePtr<::NMonitoring::TDynamicCounters> conveyorSignals) : Config(config) , ConveyorName(conveyorName) - , Counters(ConveyorName, conveyorSignals) -{ + , Counters(ConveyorName, conveyorSignals) { } void TDistributor::Bootstrap() { const ui32 workersCount = Config.GetWorkersCountForConveyor(NKqp::TStagePredictor::GetUsableThreads()); - AFL_NOTICE(NKikimrServices::TX_CONVEYOR)("action", "conveyor_registered")("actor_id", SelfId())("workers_count", workersCount)("config", Config.DebugString()); + AFL_NOTICE(NKikimrServices::TX_CONVEYOR)("name", ConveyorName)("action", "conveyor_registered")("config", Config.DebugString()); for (ui32 i = 0; i < workersCount; ++i) { const double usage = Config.GetWorkerCPUUsage(i); Workers.emplace_back(Register(new TWorker(ConveyorName, usage, SelfId()))); @@ -36,27 +35,18 @@ void TDistributor::HandleMain(TEvInternal::TEvTaskProcessedResult::TPtr& ev) { Counters.ExecuteHistogram->Collect(dExecution.MilliSeconds()); if (Waiting.size()) { auto task = Waiting.pop(); - Counters.WaitingHistogram->Collect((ev->Get()->GetStartInstant() - task.GetCreateInstant()).MilliSeconds()); + Counters.WaitingHistogram->Collect((now - task.GetCreateInstant()).MilliSeconds()); task.OnBeforeStart(); Send(ev->Sender, new TEvInternal::TEvNewTask(task)); } else { Workers.emplace_back(ev->Sender); } - if (ev->Get()->GetOwnerId()) { - if (ev->Get()->IsFail()) { - ALS_ERROR(NKikimrServices::TX_CONVEYOR) << "action=on_error;owner=" << *ev->Get()->GetOwnerId() << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); - Send(*ev->Get()->GetOwnerId(), new TEvExecution::TEvTaskProcessedResult(ev->Get()->GetError())); - } else { - Send(*ev->Get()->GetOwnerId(), new TEvExecution::TEvTaskProcessedResult(ev->Get()->GetResult())); - } - } Counters.WaitingQueueSize->Set(Waiting.size()); Counters.AvailableWorkersCount->Set(Workers.size()); - ALS_DEBUG(NKikimrServices::TX_CONVEYOR) << "action=processed;owner=" << ev->Get()->GetOwnerId().value_or(NActors::TActorId()) << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); } void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { - ALS_DEBUG(NKikimrServices::TX_CONVEYOR) << "action=add_task;owner=" << ev->Sender << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); + AFL_DEBUG(NKikimrServices::TX_CONVEYOR)("action", "add_task")("sender", ev->Sender); Counters.IncomingRate->Inc(); const TString taskClass = ev->Get()->GetTask()->GetTaskClassIdentifier(); @@ -65,7 +55,7 @@ void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { itSignal = Signals.emplace(taskClass, std::make_shared("Conveyor/" + ConveyorName, taskClass)).first; } - TWorkerTask wTask(ev->Get()->GetTask(), ev->Get()->GetTask()->GetOwnerId(), itSignal->second); + TWorkerTask wTask(ev->Get()->GetTask(), itSignal->second); if (Workers.size()) { Counters.WaitingHistogram->Collect(0); @@ -83,11 +73,9 @@ void TDistributor::HandleMain(TEvExecution::TEvNewTask::TPtr& ev) { Waiting.push(wTask); Counters.WaitWorkerRate->Inc(); } else { - ALS_ERROR(NKikimrServices::TX_CONVEYOR) << "action=overlimit;sender=" << ev->Sender << ";workers=" << Workers.size() << ";waiting=" << Waiting.size(); Counters.OverlimitRate->Inc(); - Send(ev->Sender, new TEvExecution::TEvTaskProcessedResult( - TConclusionStatus::Fail("scan conveyor overloaded (" + ::ToString(Waiting.size()) + " >= " + ::ToString(Config.GetQueueSizeLimit()) + ")") - )); + AFL_ERROR(NKikimrServices::TX_CONVEYOR)("action", "queue_overlimit")("sender", ev->Sender)("limit", Config.GetQueueSizeLimit()); + ev->Get()->GetTask()->OnCannotExecute("scan conveyor overloaded (" + ::ToString(Waiting.size()) + " >= " + ::ToString(Config.GetQueueSizeLimit()) + ")"); } Counters.WaitingQueueSize->Set(Waiting.size()); Counters.AvailableWorkersCount->Set(Workers.size()); diff --git a/ydb/core/tx/conveyor/service/service.h b/ydb/core/tx/conveyor/service/service.h index 833952090905..f13629f967ee 100644 --- a/ydb/core/tx/conveyor/service/service.h +++ b/ydb/core/tx/conveyor/service/service.h @@ -4,7 +4,10 @@ #include #include #include +#include + #include + #include namespace NKikimr::NConveyor { @@ -86,11 +89,13 @@ class TDistributor: public TActorBootstrapped { public: STATEFN(StateMain) { + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("name", ConveyorName) + ("workers", Workers.size())("waiting", Waiting.size())("actor_id", SelfId()); switch (ev->GetTypeRewrite()) { hFunc(TEvExecution::TEvNewTask, HandleMain); hFunc(TEvInternal::TEvTaskProcessedResult, HandleMain); default: - ALS_ERROR(NKikimrServices::TX_CONVEYOR) << ConveyorName << ": unexpected event for task executor: " << ev->GetTypeRewrite(); + AFL_ERROR(NKikimrServices::TX_CONVEYOR)("problem", "unexpected event for task executor")("ev_type", ev->GetTypeName()); break; } } diff --git a/ydb/core/tx/conveyor/service/worker.cpp b/ydb/core/tx/conveyor/service/worker.cpp index 42c0b4e239d7..8861de7e7a67 100644 --- a/ydb/core/tx/conveyor/service/worker.cpp +++ b/ydb/core/tx/conveyor/service/worker.cpp @@ -7,11 +7,8 @@ void TWorker::ExecuteTask(const TWorkerTask& workerTask) { if (CPUUsage < 1) { start = TMonotonic::Now(); } - if (workerTask.GetTask()->Execute(workerTask.GetTaskSignals())) { - TBase::Sender(workerTask, workerTask.GetTask()).SendTo(DistributorId); - } else { - TBase::Sender(workerTask, workerTask.GetTask()->GetErrorMessage()).SendTo(DistributorId); - } + Y_UNUSED(workerTask.GetTask()->Execute(workerTask.GetTaskSignals(), workerTask.GetTask())); + TBase::Sender(workerTask).SendTo(DistributorId); if (CPUUsage < 1) { Schedule((TMonotonic::Now() - *start) * (1 - CPUUsage), new NActors::TEvents::TEvWakeup); WaitWakeUp = true; diff --git a/ydb/core/tx/conveyor/service/worker.h b/ydb/core/tx/conveyor/service/worker.h index 662a6b2e00d6..34904557a57e 100644 --- a/ydb/core/tx/conveyor/service/worker.h +++ b/ydb/core/tx/conveyor/service/worker.h @@ -14,7 +14,6 @@ namespace NKikimr::NConveyor { class TWorkerTask { private: YDB_READONLY_DEF(ITask::TPtr, Task); - YDB_READONLY_DEF(std::optional, OwnerId); YDB_READONLY(TMonotonic, CreateInstant, TMonotonic::Now()); YDB_READONLY_DEF(std::shared_ptr, TaskSignals); std::optional StartInstant; @@ -28,9 +27,8 @@ class TWorkerTask { return *StartInstant; } - TWorkerTask(ITask::TPtr task, const std::optional& ownerId, std::shared_ptr taskSignals) + TWorkerTask(ITask::TPtr task, std::shared_ptr taskSignals) : Task(task) - , OwnerId(ownerId) , TaskSignals(taskSignals) { Y_ABORT_UNLESS(task); @@ -66,23 +64,13 @@ struct TEvInternal { }; class TEvTaskProcessedResult: - public NActors::TEventLocal, - public TConclusion { + public NActors::TEventLocal { private: using TBase = TConclusion; YDB_READONLY_DEF(TMonotonic, StartInstant); - YDB_READONLY_DEF(std::optional, OwnerId); public: - TEvTaskProcessedResult(const TWorkerTask& originalTask, const TString& errorMessage) - : TBase(TConclusionStatus::Fail(errorMessage)) - , StartInstant(originalTask.GetStartInstant()) - , OwnerId(originalTask.GetOwnerId()) { - - } - TEvTaskProcessedResult(const TWorkerTask& originalTask, ITask::TPtr result) - : TBase(result) - , StartInstant(originalTask.GetStartInstant()) - , OwnerId(originalTask.GetOwnerId()) { + TEvTaskProcessedResult(const TWorkerTask& originalTask) + : StartInstant(originalTask.GetStartInstant()) { } }; diff --git a/ydb/core/tx/conveyor/usage/abstract.cpp b/ydb/core/tx/conveyor/usage/abstract.cpp index 4d423bed7caa..1bfa3357f530 100644 --- a/ydb/core/tx/conveyor/usage/abstract.cpp +++ b/ydb/core/tx/conveyor/usage/abstract.cpp @@ -5,41 +5,35 @@ #include namespace NKikimr::NConveyor { -bool ITask::Execute(std::shared_ptr signals) { +TConclusionStatus ITask::Execute(std::shared_ptr signals, const std::shared_ptr& taskPtr) { AFL_VERIFY(!ExecutedFlag); ExecutedFlag = true; - bool result = false; const TMonotonic start = TMonotonic::Now(); try { - result = DoExecute(); - if (!result) { + TConclusionStatus result = DoExecute(taskPtr); + if (result.IsFail()) { if (signals) { signals->Fails->Add(1); signals->FailsDuration->Add((TMonotonic::Now() - start).MicroSeconds()); } - if (!ErrorMessage) { - ErrorMessage = "cannot execute task (not specified error message)"; - } } else { if (signals) { signals->Success->Add(1); signals->SuccessDuration->Add((TMonotonic::Now() - start).MicroSeconds()); } } + return result; } catch (...) { if (signals) { signals->Fails->Add(1); signals->FailsDuration->Add((TMonotonic::Now() - start).MicroSeconds()); } - TStringBuilder sbLocalMessage; - sbLocalMessage << "exception: " << CurrentExceptionMessage(); - if (!ErrorMessage) { - ErrorMessage = sbLocalMessage; - } else { - ErrorMessage += sbLocalMessage; - } + return TConclusionStatus::Fail("exception: " + CurrentExceptionMessage()); } - return result; +} + +void ITask::DoOnCannotExecute(const TString& reason) { + AFL_VERIFY(false)("problem", "cannot execute conveyor task")("reason", reason); } } diff --git a/ydb/core/tx/conveyor/usage/abstract.h b/ydb/core/tx/conveyor/usage/abstract.h index fb80a80b02e6..26f95d16cacf 100644 --- a/ydb/core/tx/conveyor/usage/abstract.h +++ b/ydb/core/tx/conveyor/usage/abstract.h @@ -3,8 +3,9 @@ #include #include - #include +#include + #include namespace NKikimr::NConveyor { @@ -19,8 +20,7 @@ class TTaskSignals: public NColumnShard::TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr SuccessDuration; TTaskSignals(const TString& moduleId, const TString& taskClassIdentifier, TIntrusivePtr<::NMonitoring::TDynamicCounters> baseSignals = nullptr) - : TBase(moduleId, baseSignals) - { + : TBase(moduleId, baseSignals) { DeepSubGroup("task_class", taskClassIdentifier); Fails = TBase::GetDeriviative("Fails"); FailsDuration = TBase::GetDeriviative("FailsDuration"); @@ -37,32 +37,21 @@ class ITask { Low = 0 }; private: - YDB_READONLY_DEF(TString, ErrorMessage); YDB_ACCESSOR(EPriority, Priority, EPriority::Normal); - YDB_READONLY_DEF(std::optional, OwnerId); bool ExecutedFlag = false; protected: - ITask& SetErrorMessage(const TString& message) { - ErrorMessage = message; - return *this; - } - virtual bool DoExecute() = 0; + virtual TConclusionStatus DoExecute(const std::shared_ptr& taskPtr) = 0; + virtual void DoOnCannotExecute(const TString& reason); public: - ITask(const std::optional& ownerId = {}) - : OwnerId(ownerId) - { - - } using TPtr = std::shared_ptr; virtual ~ITask() = default; virtual TString GetTaskClassIdentifier() const = 0; - bool HasError() const { - return !!ErrorMessage; + void OnCannotExecute(const TString& reason) { + return DoOnCannotExecute(reason); } - - bool Execute(std::shared_ptr signals); + TConclusionStatus Execute(std::shared_ptr signals, const std::shared_ptr& taskPtr); }; } diff --git a/ydb/core/tx/conveyor/usage/events.cpp b/ydb/core/tx/conveyor/usage/events.cpp index beb3b35860d0..3a3387fcd90f 100644 --- a/ydb/core/tx/conveyor/usage/events.cpp +++ b/ydb/core/tx/conveyor/usage/events.cpp @@ -1,5 +1,12 @@ #include "events.h" +#include namespace NKikimr::NConveyor { +TEvExecution::TEvNewTask::TEvNewTask(ITask::TPtr task) + : Task(task) +{ + AFL_VERIFY(Task); +} + } diff --git a/ydb/core/tx/conveyor/usage/events.h b/ydb/core/tx/conveyor/usage/events.h index 9d4075d35fa9..c392d179b306 100644 --- a/ydb/core/tx/conveyor/usage/events.h +++ b/ydb/core/tx/conveyor/usage/events.h @@ -10,7 +10,6 @@ namespace NKikimr::NConveyor { struct TEvExecution { enum EEv { EvNewTask = EventSpaceBegin(TKikimrEvents::ES_CONVEYOR), - EvTaskProcessedResult, EvEnd }; @@ -22,18 +21,7 @@ struct TEvExecution { public: TEvNewTask() = default; - explicit TEvNewTask(ITask::TPtr task) - : Task(task) { - } - }; - - class TEvTaskProcessedResult: - public NActors::TEventLocal, - public TConclusion { - private: - using TBase = TConclusion; - public: - using TBase::TBase; + explicit TEvNewTask(ITask::TPtr task); }; }; diff --git a/ydb/core/tx/conveyor/usage/service.h b/ydb/core/tx/conveyor/usage/service.h index 097f9944bc7b..6ba3c3320fde 100644 --- a/ydb/core/tx/conveyor/usage/service.h +++ b/ydb/core/tx/conveyor/usage/service.h @@ -19,7 +19,7 @@ class TAsyncTaskExecutor: public TActorBootstrapped { void Bootstrap() { auto gAway = PassAwayGuard(); - Task->Execute(nullptr); + Task->Execute(nullptr, Task); } }; @@ -47,10 +47,7 @@ class TServiceOperatorImpl { context.Send(MakeServiceId(selfId.NodeId()), new NConveyor::TEvExecution::TEvNewTask(task)); return true; } else { - task->Execute(nullptr); - if (task->GetOwnerId()) { - context.Send(*task->GetOwnerId(), new NConveyor::TEvExecution::TEvTaskProcessedResult(task)); - } + task->Execute(nullptr, task); return false; } } diff --git a/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp b/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp index d34df2ed3f30..d74c67376705 100644 --- a/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp +++ b/ydb/core/tx/coordinator/coordinator__acquire_read_step.cpp @@ -103,7 +103,12 @@ void TTxCoordinator::Handle(TEvTxProxy::TEvAcquireReadStep::TPtr& ev, const TAct return; } - if (ReadOnlyLeaseEnabled()) { + // Note: when volatile state is preserved we don't want to update the last + // acquired step, because the new generation might miss that and invariants + // not read-step not going back would be violated. Run the code below using + // the normal tx, which will almost certainly fail (the storage is supposed + // to be blocked already), or successfully persist the new read step. + if (ReadOnlyLeaseEnabled() && !VolatileState.Preserved) { // We acquire read step using a read-only lease from executor // It is guaranteed that any future generation was not running at // the time ConfirmReadOnlyLease was called. diff --git a/ydb/core/tx/coordinator/coordinator__plan_step.cpp b/ydb/core/tx/coordinator/coordinator__plan_step.cpp index 2954ff4faafb..c5ce361b4b52 100644 --- a/ydb/core/tx/coordinator/coordinator__plan_step.cpp +++ b/ydb/core/tx/coordinator/coordinator__plan_step.cpp @@ -1,4 +1,5 @@ #include "coordinator_impl.h" +#include "coordinator_hooks.h" #include @@ -42,7 +43,29 @@ struct TTxCoordinator::TTxPlanStep : public TTransactionBase { } void Plan(TTransactionContext &txc, const TActorContext &ctx) { - Y_UNUSED(txc); + if (Self->VolatileState.Preserved) { + // A preserved state indicates a newer generation has been started + // already, and this coordinator will stop eventually. Decline + // all pending transactions. + for (auto& slot : Slots) { + for (auto& proposal : slot) { + Self->MonCounters.StepPlannedDeclinedTx->Inc(); + ProxyPlanConfirmations.Queue.emplace_back( + proposal.TxId, + proposal.Proxy, + TEvTxProxy::TEvProposeTransactionStatus::EStatus::StatusRestarting, + 0); + ++DeclinedCounter; + } + } + Self->SendStepConfirmations(ProxyPlanConfirmations, ctx); + return; + } + + if (auto* hooks = ICoordinatorHooks::Get(); Y_UNLIKELY(hooks)) { + hooks->BeginPlanStep(Self->TabletID(), Self->Executor()->Generation(), PlanOnStep); + } + NIceDb::TNiceDb db(txc.DB); ExecStartMoment = ctx.Now(); const bool lowDiskSpace = Self->Executor()->GetStats().IsAnyChannelYellowStop; diff --git a/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp b/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp index af411d349ed5..f3269e362cab 100644 --- a/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp +++ b/ydb/core/tx/coordinator/coordinator__restore_transaction.cpp @@ -125,10 +125,12 @@ struct TTxCoordinator::TTxRestoreTransactions : public TTransactionBaseVolatileTransactions) { auto txId = pr.first; auto &tx = pr.second; + maxStep = Max(maxStep, tx.PlanOnStep); for (auto &prmed : tx.UnconfirmedAffectedSet) { auto medId = prmed.first; auto &medTx = GetMediatorTx(medId, tx.PlanOnStep, txId); @@ -137,6 +139,7 @@ struct TTxCoordinator::TTxRestoreTransactions : public TTransactionBaseVolatileTransactions.size(); Self->Transactions.swap(transactions); *Self->MonCounters.TxInFly += txCounter; Self->MonCounters.CurrentTxInFly = txCounter; - if (Self->PrevStateActorId) { - NIceDb::TNiceDb db(txc.DB); + NIceDb::TNiceDb db(txc.DB); + // Previous coordinator might have had transactions that were after + // its persistent blocked range, but before LastPlanned was updated. + // Since we pick them up as planned and send to mediators we also need + // to make sure LastPlanned reflects that. + if (Self->VolatileState.LastPlanned < maxVolatileStep) { + Self->VolatileState.LastPlanned = maxVolatileStep; + Schema::SaveState(db, Schema::State::KeyLastPlanned, maxVolatileStep); + } + + if (Self->PrevStateActorId) { ui64 volatileLeaseMs = Self->VolatilePlanLeaseMs; if (volatileLeaseMs > 0) { // Make sure we start and persist new state actor before allowing clients to acquire new read steps diff --git a/ydb/core/tx/coordinator/coordinator_hooks.cpp b/ydb/core/tx/coordinator/coordinator_hooks.cpp index 742ed2bdc4b6..225ed046ac1e 100644 --- a/ydb/core/tx/coordinator/coordinator_hooks.cpp +++ b/ydb/core/tx/coordinator/coordinator_hooks.cpp @@ -14,6 +14,12 @@ namespace NKikimr::NFlatTxCoordinator { return true; } + void ICoordinatorHooks::BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep) { + Y_UNUSED(tabletId); + Y_UNUSED(generation); + Y_UNUSED(planStep); + } + ICoordinatorHooks* ICoordinatorHooks::Get() { return CoordinatorHooks.load(std::memory_order_acquire); } diff --git a/ydb/core/tx/coordinator/coordinator_hooks.h b/ydb/core/tx/coordinator/coordinator_hooks.h index 4f96fd25d146..e38dbc72cc96 100644 --- a/ydb/core/tx/coordinator/coordinator_hooks.h +++ b/ydb/core/tx/coordinator/coordinator_hooks.h @@ -10,6 +10,7 @@ namespace NKikimr::NFlatTxCoordinator { public: virtual bool PersistConfig(ui64 tabletId, const NKikimrSubDomains::TProcessingParams& config); + virtual void BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep); public: static ICoordinatorHooks* Get(); diff --git a/ydb/core/tx/coordinator/coordinator_impl.cpp b/ydb/core/tx/coordinator/coordinator_impl.cpp index cfdc7bb55fc5..9a130616231b 100644 --- a/ydb/core/tx/coordinator/coordinator_impl.cpp +++ b/ydb/core/tx/coordinator/coordinator_impl.cpp @@ -328,6 +328,11 @@ ui64 TTxCoordinator::AlignPlanStep(ui64 step) { void TTxCoordinator::Handle(TEvPrivate::TEvPlanTick::TPtr &ev, const TActorContext &ctx) { //LOG_DEBUG_S(ctx, NKikimrServices::TX_COORDINATOR, "tablet# " << TabletID() << " HANDLE EvPlanTick LastPlanned " << VolatileState.LastPlanned); + if (VolatileState.Preserved) { + // Avoid planning any new transactions, wait until we are stopped + return; + } + ui64 next = ev->Get()->Step; while (!PendingPlanTicks.empty() && PendingPlanTicks.front() <= next) { PendingPlanTicks.pop_front(); @@ -556,8 +561,14 @@ void TTxCoordinator::TryInitMonCounters(const TActorContext &ctx) { } void TTxCoordinator::SendMediatorStep(TMediator &mediator, const TActorContext &ctx) { + if (VolatileState.Preserved) { + // We don't want to send new steps when state has been preserved and + // potentially sent to newer generations. + return; + } + if (!mediator.Active) { - // We don't want to update LastSentStep when mediators are not empty + // We don't want to update LastSentStep when mediators are not connected return; } diff --git a/ydb/core/tx/coordinator/coordinator_impl.h b/ydb/core/tx/coordinator/coordinator_impl.h index 83272721ee4c..8369e8418f6e 100644 --- a/ydb/core/tx/coordinator/coordinator_impl.h +++ b/ydb/core/tx/coordinator/coordinator_impl.h @@ -433,6 +433,10 @@ class TTxCoordinator : public TActor, public TTabletExecutedFlat TVector AcquireReadStepPending; bool AcquireReadStepFlushing = false; bool AcquireReadStepStarting = false; + + // When true the state has been preserved by the state actor + // Any changes will not be migrated to newer generations + bool Preserved = false; }; public: diff --git a/ydb/core/tx/coordinator/coordinator_state.cpp b/ydb/core/tx/coordinator/coordinator_state.cpp index fa1351c54770..70f91a736244 100644 --- a/ydb/core/tx/coordinator/coordinator_state.cpp +++ b/ydb/core/tx/coordinator/coordinator_state.cpp @@ -80,6 +80,7 @@ void TCoordinatorStateActor::PreserveState() { Y_ABORT_UNLESS(ok); } + Owner->VolatileState.Preserved = true; } STFUNC(TCoordinatorStateActor::StateWork) { diff --git a/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp b/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp index 8d75974813aa..cb57683785b4 100644 --- a/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp +++ b/ydb/core/tx/coordinator/coordinator_volatile_ut.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -357,6 +358,539 @@ namespace NKikimr::NFlatTxCoordinator::NTest { UNIT_ASSERT_VALUES_EQUAL(observedSteps.size(), 1u); } + /** + * Tests a scenario where coordinator's volatile lease expires, which + * causes coordinator to update the lease during volatile planning. + * That transaction is migrated to a newer instance, but commit updating + * last known step fails. A bug caused new instances to reach a confused + * state, which could cause it to attempt planning more transactions in + * the same step, violating invariants. + */ + Y_UNIT_TEST(CoordinatorMigrateUncommittedVolatileTx) { + struct TCoordinatorHooks : public ICoordinatorHooks { + std::vector PlannedSteps; + + void BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep) override { + Cerr << "... coordinator " << tabletId << " gen " << generation << " is planning step " << planStep << Endl; + PlannedSteps.push_back(planStep); + } + } hooks; + TCoordinatorHooksGuard hooksGuard(hooks); + + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetNodeCount(1) + .SetUseRealThreads(false) + .SetDomainPlanResolution(50); + + Tests::TServer::TPtr server = new TServer(serverSettings); + + auto &runtime = *server->GetRuntime(); + runtime.SetLogPriority(NKikimrServices::TX_COORDINATOR, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, NActors::NLog::PRI_DEBUG); + // runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR, NActors::NLog::PRI_DEBUG); + // runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_EXEC_QUEUE, NActors::NLog::PRI_DEBUG); + // runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_TABLETQUEUE, NActors::NLog::PRI_DEBUG); + + auto sender = runtime.AllocateEdgeActor(); + ui64 coordinatorId = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + ui64 tabletId = ChangeStateStorage(TTestTxConfig::TxTablet0, server->GetSettings().Domain); + + CreateTestBootstrapper(runtime, + CreateTestTabletInfo(tabletId, TTabletTypes::Dummy), + [](const TActorId& tablet, TTabletStorageInfo* info) { + return new TPlanTargetTablet(tablet, info); + }); + + { + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot, 1)); + runtime.DispatchEvents(options); + } + + auto waitFor = [&](const auto& condition, const TString& description) { + for (int i = 0; i < 5 && !condition(); ++i) { + Cerr << "... waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + } + UNIT_ASSERT_C(condition(), "... failed to wait for " << description); + }; + + // Wait for the first idle mediator step + waitFor([&]{ return hooks.PlannedSteps.size() >= 2; }, "the first two planned steps"); + + auto oldTimestamp = runtime.GetCurrentTime(); + auto oldCoordinatorSysActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ true); + auto oldCoordinatorUserActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ false); + + // Sleep for 500ms, so the default volatile lease of 250ms will expire + runtime.SimulateSleep(TDuration::MilliSeconds(500)); + + hooks.PlannedSteps.clear(); + + // Start blocking EvPut responses for the new plan (but allowing data to commit) + std::vector> blockedPutResponses; + auto blockPutResponses = runtime.AddObserver( + [&](TEvBlobStorage::TEvPutResult::TPtr& ev) { + auto* msg = ev->Get(); + if (hooks.PlannedSteps.size() > 0 && msg->Id.TabletID() == coordinatorId) { + // Block commits from coordinator + Cerr << "... blocking put " << msg->Id << " response" << Endl; + blockedPutResponses.emplace_back(ev.Release()); + } + }); + + // Block target tablet's accept messages to keep transactions in mediator + std::vector> blockedPlanStepAccepted; + auto blockPlanStepAccepted = runtime.AddObserver( + [&](TEvTxProcessing::TEvPlanStepAccepted::TPtr& ev) { + auto* msg = ev->Get(); + if (msg->Record.GetTabletId() == tabletId) { + Cerr << "... blocked accept from " << tabletId << Endl; + blockedPlanStepAccepted.emplace_back(ev.Release()); + } + }); + + // Plan a persistent transaction + ui64 persistentTxId = 10000000; + if (auto propose = std::make_unique(coordinatorId, persistentTxId, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until we have some commit responses blocked + // This will ensure planned tx is persisted, but coordinator will not act on it yet + waitFor([&]{ return blockedPutResponses.size() > 0; }, "blocked put responses"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 persistentPlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Stop blocking put responses + blockPutResponses.Remove(); + + // Start blocking EvPut requests (not allowing data to commit) + std::vector> blockedPutRequests; + auto blockPutRequests = runtime.AddObserver( + [&](TEvBlobStorage::TEvPut::TPtr& ev) { + auto* msg = ev->Get(); + if (msg->Id.TabletID() == coordinatorId) { + // Block commits from coordinator + Cerr << "... blocking put " << msg->Id << " request" << Endl; + blockedPutRequests.emplace_back(ev.Release()); + } + }); + + // Plan a volatile transaction, expected to be planned for Step+1 + ui64 volatileTxId1 = 10000010; + if (auto propose = std::make_unique(coordinatorId, volatileTxId1, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + tx->SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until it's actually planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planned volatile tx"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 volatilePlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + UNIT_ASSERT_C(volatilePlanStep > persistentPlanStep, + "Volatile plan step " << volatilePlanStep << " should be after persistent plan step " << persistentPlanStep); + + // Make sure everything settles + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // We expect there to be a commit attempt (extending the lease and updating last planned) + UNIT_ASSERT_C(blockedPutRequests.size() > 0, "expected to have put requests blocked by now"); + blockPutRequests.Remove(); + + // We want to start a new instance in parallel + // Block the old bootstrapper from starting unwanted instances when current tablet dies + bool oldTabletStopped = false; + auto blockOldTabletDead = runtime.AddObserver( + [&](TEvTablet::TEvTabletDead::TPtr& ev) { + if (ev->Sender == oldCoordinatorSysActorId) { + if (ev->GetRecipientRewrite() == oldCoordinatorUserActorId) { + oldTabletStopped = true; + } else { + ev.Reset(); + } + } + }); + + // New instance will migrate the in-memory state, block it from reaching the new instance temporarily + std::vector> blockedStateResponses; + auto blockStateResponses = runtime.AddObserver( + [&](TEvTxCoordinator::TEvCoordinatorStateResponse::TPtr& ev) { + Cerr << "... blocking state response from " << ev->Sender << " to " << ev->GetRecipientRewrite() << Endl; + Cerr << ev->Get()->Record.DebugString(); + blockedStateResponses.emplace_back(ev.Release()); + }); + + // Rewind to some older time + runtime.UpdateCurrentTime(oldTimestamp, /* rewind */ true); + + // Start a new bootstrapper, which will boot a new instance in parallel + Cerr << "... starting a new coordinator instance" << Endl; + CreateTestBootstrapper(runtime, CreateTestTabletInfo(coordinatorId, TTabletTypes::Coordinator), &CreateFlatTxCoordinator); + + // Wait until new coordinator almost receives the in-memory state + waitFor([&]{ return blockedStateResponses.size() >= 1; }, "migrated state"); + + // Unblock previously blocked blobstorage messages + // Since new coordinator has started the storage is already blocked + Cerr << "... unblocking put responses and requests" << Endl; + for (auto& ev : blockedPutResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedPutResponses.clear(); + for (auto& ev : blockedPutRequests) { + runtime.Send(ev.release(), 0, true); + } + blockedPutRequests.clear(); + + // Sleep a little, so everything settles (e.g. committed plan is sent to mediator) + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + waitFor([&]{ return oldTabletStopped; }, "old tablet stopped"); + hooks.PlannedSteps.clear(); + + // Unblock the in-memory state transfer + blockStateResponses.Remove(); + for (auto& ev : blockedStateResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedStateResponses.clear(); + + // Make sure new requests go to the new instance + InvalidateTabletResolverCache(runtime, coordinatorId); + + // Plan another volatile transaction, with a smaller TxId + ui64 volatileTxId2 = 10000005; + if (auto propose = std::make_unique(coordinatorId, volatileTxId2, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + tx->SetFlags(TEvTxProxy::TEvProposeTransaction::FlagVolatile); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until it's actually planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planned volatile tx"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 volatilePlanStep2 = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Wait until everything settles (e.g. mediators receive all pending transactions) + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Reboot the target tablet to trigger the original assertion + Cerr << "... rebooting target tablet" << Endl; + RebootTablet(runtime, tabletId, sender); + + // Wait until everything settles + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Validate the new plan does not go back in time + UNIT_ASSERT_C(volatilePlanStep2 > volatilePlanStep, + "New volatile plan step " << volatilePlanStep2 << " is expected to be after " << volatilePlanStep); + } + + /** + * This scenario tests an empty volatile plan that is scheduled behind + * a persistent plan, which finishes committing after an in-memory + * state has been snapshotted and migrated. There was a bug where this + * empty plan step would not be considered as confirmed, and could be + * erroneously considered as unused by a previous generation. + */ + Y_UNIT_TEST(CoordinatorRestartWithEnqueuedVolatileStep) { + struct TCoordinatorHooks : public ICoordinatorHooks { + std::vector PlannedSteps; + + void BeginPlanStep(ui64 tabletId, ui64 generation, ui64 planStep) override { + Cerr << "... coordinator " << tabletId << " gen " << generation << " is planning step " << planStep << Endl; + PlannedSteps.push_back(planStep); + } + } hooks; + TCoordinatorHooksGuard hooksGuard(hooks); + + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetNodeCount(1) + .SetUseRealThreads(false) + .SetDomainPlanResolution(50); + + Tests::TServer::TPtr server = new TServer(serverSettings); + + auto &runtime = *server->GetRuntime(); + runtime.SetLogPriority(NKikimrServices::TX_COORDINATOR, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::BOOTSTRAPPER, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_EXEC_QUEUE, NActors::NLog::PRI_DEBUG); + runtime.SetLogPriority(NKikimrServices::TX_MEDIATOR_TABLETQUEUE, NActors::NLog::PRI_DEBUG); + + auto sender = runtime.AllocateEdgeActor(); + ui64 coordinatorId = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + ui64 mediatorId = ChangeStateStorage(Mediator, server->GetSettings().Domain); + ui64 tabletId = ChangeStateStorage(TTestTxConfig::TxTablet0, server->GetSettings().Domain); + + CreateTestBootstrapper(runtime, + CreateTestTabletInfo(tabletId, TTabletTypes::Dummy), + [](const TActorId& tablet, TTabletStorageInfo* info) { + return new TPlanTargetTablet(tablet, info); + }); + + { + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot, 1)); + runtime.DispatchEvents(options); + } + + auto waitFor = [&](const auto& condition, const TString& description) { + for (int i = 0; i < 5 && !condition(); ++i) { + Cerr << "... waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + } + UNIT_ASSERT_C(condition(), "... failed to wait for " << description); + }; + + // Wait for the first idle mediator step + waitFor([&]{ return hooks.PlannedSteps.size() >= 2; }, "the first two planned steps"); + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + hooks.PlannedSteps.clear(); + + auto oldTimestamp = runtime.GetCurrentTime(); + auto oldCoordinatorSysActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ true); + auto oldCoordinatorUserActorId = ResolveTablet(runtime, coordinatorId, 0, /* sysTablet */ false); + + // Start blocking EvPut responses for the new plan (but allowing data to commit) + std::vector> blockedPutResponses; + auto blockPutResponses = runtime.AddObserver( + [&](TEvBlobStorage::TEvPutResult::TPtr& ev) { + auto* msg = ev->Get(); + if (hooks.PlannedSteps.size() > 0 && msg->Id.TabletID() == coordinatorId) { + // Block commits from coordinator + Cerr << "... blocking put " << msg->Id << " response" << Endl; + blockedPutResponses.emplace_back(ev.Release()); + } + }); + + // Block target tablet's accept messages to keep transactions in mediator + std::vector> blockedPlanStepAccepted; + auto blockPlanStepAccepted = runtime.AddObserver( + [&](TEvTxProcessing::TEvPlanStepAccepted::TPtr& ev) { + auto* msg = ev->Get(); + if (msg->Record.GetTabletId() == tabletId) { + Cerr << "... blocked accept from " << tabletId << Endl; + blockedPlanStepAccepted.emplace_back(ev.Release()); + } + }); + + // step -> list of transactions + std::map> observedSteps; + auto observeSteps = runtime.AddObserver( + [&](TEvTxCoordinator::TEvCoordinatorStep::TPtr& ev) { + auto* msg = ev->Get(); + Cerr << "... observed step:" << Endl; + Cerr << msg->Record.DebugString(); + if (msg->Record.GetCoordinatorID() != coordinatorId) { + return; + } + ui64 step = msg->Record.GetStep(); + std::vector txIds; + for (const auto& tx : msg->Record.GetTransactions()) { + txIds.push_back(tx.GetTxId()); + } + std::sort(txIds.begin(), txIds.end()); + auto it = observedSteps.find(step); + if (it == observedSteps.end()) { + observedSteps[step] = std::move(txIds); + } else { + auto dumpTxIds = [](const std::vector& txIds) -> TString { + TStringBuilder sb; + sb << "{"; + bool first = true; + for (ui64 txId : txIds) { + if (first) { + first = false; + } else { + sb << ", "; + } + sb << txId; + } + sb << "}"; + return std::move(sb); + }; + UNIT_ASSERT_C(it->second == txIds, + "Step " << step << " changed transactions list " + << dumpTxIds(it->second) << " -> " << dumpTxIds(txIds)); + } + }); + + // txId -> step + std::map observedTabletTxs; + auto observeTabletTxs = runtime.AddObserver( + [&](TEvTxProcessing::TEvPlanStep::TPtr& ev) { + auto* msg = ev->Get(); + Cerr << "... observed tablet step:" << Endl; + Cerr << msg->Record.DebugString(); + ui64 step = msg->Record.GetStep(); + for (auto& tx : msg->Record.GetTransactions()) { + observedTabletTxs[tx.GetTxId()] = step; + } + }); + + // Plan a persistent transaction + ui64 persistentTxId = 10000000; + if (auto propose = std::make_unique(coordinatorId, persistentTxId, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until we have some commit responses blocked + waitFor([&]{ return blockedPutResponses.size() > 0; }, "blocked put responses"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 persistentPlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Require an empty step, it should be divisible by plan resolution + runtime.SendToPipe(coordinatorId, sender, new TEvTxProxy::TEvRequirePlanSteps(coordinatorId, persistentPlanStep + 50)); + + // Wait until it is also planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planning for the required step"); + + UNIT_ASSERT_VALUES_EQUAL(hooks.PlannedSteps.size(), 1u); + ui64 volatileEmptyPlanStep = hooks.PlannedSteps.at(0); + hooks.PlannedSteps.clear(); + + // Stop blocking newer put responses + blockPutResponses.Remove(); + + // Make sure everything settles + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // We want to start a new instance in parallel + // Block the old bootstrapper from starting unwanted instances when current tablet dies + bool oldTabletStopped = false; + auto blockOldTabletDead = runtime.AddObserver( + [&](TEvTablet::TEvTabletDead::TPtr& ev) { + if (ev->Sender == oldCoordinatorSysActorId) { + if (ev->GetRecipientRewrite() == oldCoordinatorUserActorId) { + oldTabletStopped = true; + } else { + ev.Reset(); + } + } + }); + + // New instance will migrate the in-memory state, block it from reaching the new instance temporarily + std::vector> blockedStateResponses; + auto blockStateResponses = runtime.AddObserver( + [&](TEvTxCoordinator::TEvCoordinatorStateResponse::TPtr& ev) { + Cerr << "... blocking state response from " << ev->Sender << " to " << ev->GetRecipientRewrite() << Endl; + Cerr << ev->Get()->Record.DebugString(); + blockedStateResponses.emplace_back(ev.Release()); + }); + + // Rewind to some older time + runtime.UpdateCurrentTime(oldTimestamp, /* rewind */ true); + + // Start a new bootstrapper, which will boot a new instance in parallel + Cerr << "... starting a new coordinator instance" << Endl; + CreateTestBootstrapper(runtime, CreateTestTabletInfo(coordinatorId, TTabletTypes::Coordinator), &CreateFlatTxCoordinator); + + // Wait until new coordinator almost receives the in-memory state + waitFor([&]{ return blockedStateResponses.size() >= 1; }, "migrated state"); + + // Unblock previously blocked blobstorage messages + // Since new coordinator has started the storage is already blocked + Cerr << "... unblocking put responses and requests" << Endl; + for (auto& ev : blockedPutResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedPutResponses.clear(); + + // Sleep a little, so everything settles (e.g. committed plan is sent to mediator) + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + waitFor([&]{ return oldTabletStopped; }, "old tablet stopped"); + hooks.PlannedSteps.clear(); + + // Unblock the in-memory state transfer + blockStateResponses.Remove(); + for (auto& ev : blockedStateResponses) { + runtime.Send(ev.release(), 0, true); + } + blockedStateResponses.clear(); + + size_t oldObservedSteps = observedSteps.size(); + + // Make sure new requests go to the new instance + InvalidateTabletResolverCache(runtime, coordinatorId); + + // Plan another persistent transaction + ui64 persistentTxId2 = 10000011; + Cerr << "... trying to plan tx " << persistentTxId2 << Endl; + if (auto propose = std::make_unique(coordinatorId, persistentTxId2, 0, Min(), Max())) { + auto* tx = propose->Record.MutableTransaction(); + auto* affected = tx->AddAffectedSet(); + affected->SetTabletId(tabletId); + affected->SetFlags(TEvTxProxy::TEvProposeTransaction::AffectedWrite); + + runtime.SendToPipe(coordinatorId, sender, propose.release()); + } + + // Wait until it's actually planned + waitFor([&]{ return hooks.PlannedSteps.size() >= 1; }, "planned another persistent tx"); + + // Wait until mediator observes it + waitFor([&]{ return observedSteps.size() > oldObservedSteps; }, "new step reaches mediator"); + + runtime.SimulateSleep(TDuration::MilliSeconds(50)); + UNIT_ASSERT_C(observedTabletTxs.contains(persistentTxId2), + "Tablet did not observe a persistent tx " << persistentTxId2); + + Y_UNUSED(sender); + Y_UNUSED(coordinatorId); + Y_UNUSED(mediatorId); + + Y_UNUSED(oldTimestamp); + Y_UNUSED(oldCoordinatorSysActorId); + Y_UNUSED(oldCoordinatorUserActorId); + + Y_UNUSED(persistentPlanStep); + Y_UNUSED(volatileEmptyPlanStep); + } + } // Y_UNIT_TEST_SUITE(CoordinatorVolatile) } // namespace NKikimr::NFlatTxCoordinator::NTest diff --git a/ydb/core/tx/data_events/columnshard_splitter.cpp b/ydb/core/tx/data_events/columnshard_splitter.cpp index 5b435a0f9198..19a787167270 100644 --- a/ydb/core/tx/data_events/columnshard_splitter.cpp +++ b/ydb/core/tx/data_events/columnshard_splitter.cpp @@ -66,9 +66,10 @@ NKikimr::NEvWrite::IShardsSplitter::TYdbConclusionStatus TColumnShardShardsSplit } TFullSplitData result(sharding->GetShardsCount()); + const TString schemaString = NArrow::SerializeSchema(*batch->schema()); for (auto&& [shardId, chunks] : split.GetResult()) { for (auto&& c : chunks) { - result.AddShardInfo(shardId, std::make_shared(c.GetSchemaData(), c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); + result.AddShardInfo(shardId, std::make_shared(schemaString, c.GetData(), c.GetRowsCount(), sharding->GetShardInfoVerified(shardId).GetShardingVersion())); } } diff --git a/ydb/core/tx/data_events/events.h b/ydb/core/tx/data_events/events.h index d9f17beceec4..bd4f06284e9d 100644 --- a/ydb/core/tx/data_events/events.h +++ b/ydb/core/tx/data_events/events.h @@ -103,6 +103,13 @@ struct TDataEvents { return result; } + static std::unique_ptr BuildCompleted(const ui64 origin) { + auto result = std::make_unique(); + result->Record.SetOrigin(origin); + result->Record.SetStatus(NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + return result; + } + static std::unique_ptr BuildCompleted(const ui64 origin, const ui64 txId) { auto result = std::make_unique(); result->Record.SetOrigin(origin); @@ -116,7 +123,9 @@ struct TDataEvents { result->Record.SetOrigin(origin); result->Record.SetTxId(txId); result->Record.SetStatus(NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); - *result->Record.AddTxLocks() = lock; + auto& lockResult = *result->Record.AddTxLocks(); + lockResult = lock; + lockResult.SetHasWrites(true); return result; } diff --git a/ydb/core/tx/data_events/shard_writer.cpp b/ydb/core/tx/data_events/shard_writer.cpp index 0158527fee49..7f935f5270cc 100644 --- a/ydb/core/tx/data_events/shard_writer.cpp +++ b/ydb/core/tx/data_events/shard_writer.cpp @@ -18,19 +18,25 @@ namespace NKikimr::NEvWrite { void TWritersController::OnSuccess(const ui64 shardId, const ui64 writeId, const ui32 writePartId) { WriteIds[WritesIndex.Inc() - 1] = TWriteIdForShard(shardId, writeId, writePartId); + Counters->OnCSReply(TMonotonic::Now() - StartInstant); if (!WritesCount.Dec()) { - auto req = MakeHolder(LongTxId); - for (auto&& i : WriteIds) { - req->AddWrite(i.GetShardId(), i.GetWriteId()); - } - LongTxActorId.Send(NLongTxService::MakeLongTxServiceID(LongTxActorId.NodeId()), req.Release()); + SendReply(); } } void TWritersController::OnFail(const Ydb::StatusIds::StatusCode code, const TString& message) { - NYql::TIssues issues; - issues.AddIssue(message); - LongTxActorId.Send(LongTxActorId, new TEvPrivate::TEvShardsWriteResult(code, issues)); + Counters->OnCSFailed(code); + FailsCount.Inc(); + if (!Code) { + TGuard g(Mutex); + if (!Code) { + Issues.AddIssue(message); + Code = code; + } + } + if (!WritesCount.Dec()) { + SendReply(); + } } TShardWriter::TShardWriter(const ui64 shardId, const ui64 tableId, const TString& dedupId, const IShardInfo::TPtr& data, @@ -60,7 +66,7 @@ namespace NKikimr::NEvWrite { const auto ydbStatus = msg->GetYdbStatus(); if (ydbStatus == Ydb::StatusIds::OVERLOADED) { - if (RetryWriteRequest()) { + if (RetryWriteRequest(true)) { return; } } @@ -81,7 +87,7 @@ namespace NKikimr::NEvWrite { const auto* msg = ev->Get(); Y_ABORT_UNLESS(msg->TabletId == ShardId); - if (RetryWriteRequest()) { + if (RetryWriteRequest(true)) { return; } @@ -99,7 +105,7 @@ namespace NKikimr::NEvWrite { RetryWriteRequest(false); } - bool TShardWriter::RetryWriteRequest(bool delayed) { + bool TShardWriter::RetryWriteRequest(const bool delayed) { if (NumRetries >= MaxRetriesPerShard) { return false; } diff --git a/ydb/core/tx/data_events/shard_writer.h b/ydb/core/tx/data_events/shard_writer.h index d7abe2e2a3ed..0a649a4dd3b9 100644 --- a/ydb/core/tx/data_events/shard_writer.h +++ b/ydb/core/tx/data_events/shard_writer.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace NKikimr::NEvWrite { @@ -22,19 +23,88 @@ class TWriteIdForShard { TWriteIdForShard(const ui64 shardId, const ui64 writeId, const ui32 writePartId) : ShardId(shardId) , WriteId(writeId) - , WritePartId(writePartId) - { + , WritePartId(writePartId) { + } +}; + +class TCSUploadCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr RequestsCount; + NMonitoring::THistogramPtr CSReplyDuration; + NMonitoring::THistogramPtr SucceedFullReplyDuration; + NMonitoring::THistogramPtr FailedFullReplyDuration; + NMonitoring::THistogramPtr BytesDistribution; + NMonitoring::THistogramPtr RowsDistribution; + NMonitoring::TDynamicCounters::TCounterPtr RowsCount; + NMonitoring::TDynamicCounters::TCounterPtr BytesCount; + NMonitoring::TDynamicCounters::TCounterPtr FailsCount; +public: + TCSUploadCounters() + : TBase("CSUpload") + , RequestsCount(TBase::GetDeriviative("Requests")) + , CSReplyDuration(TBase::GetHistogram("Replies/Shard/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10))) + , SucceedFullReplyDuration(TBase::GetHistogram("Replies/Success/Full/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10))) + , FailedFullReplyDuration(TBase::GetHistogram("Replies/Failed/Full/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10))) + , BytesDistribution(TBase::GetHistogram("Requests/Bytes", NMonitoring::ExponentialHistogram(15, 2, 1024))) + , RowsDistribution(TBase::GetHistogram("Requests/Rows", NMonitoring::ExponentialHistogram(15, 2, 16))) + , RowsCount(TBase::GetDeriviative("Rows")) + , BytesCount(TBase::GetDeriviative("Bytes")) + , FailsCount(TBase::GetDeriviative("Fails")) { + } -}; + void OnRequest(const ui64 rows, const ui64 bytes) const { + BytesDistribution->Collect(bytes); + RowsDistribution->Collect(rows); + BytesCount->Add(bytes); + RowsCount->Add(rows); + } + + void OnCSFailed(const Ydb::StatusIds::StatusCode /*code*/) { + FailsCount->Add(1); + } + + void OnCSReply(const TDuration d) const { + CSReplyDuration->Collect(d.MilliSeconds()); + } + + void OnSucceedFullReply(const TDuration d) const { + SucceedFullReplyDuration->Collect(d.MilliSeconds()); + } + + void OnFailedFullReply(const TDuration d) const { + FailedFullReplyDuration->Collect(d.MilliSeconds()); + } +}; // External transaction controller class class TWritersController { private: TAtomicCounter WritesCount = 0; TAtomicCounter WritesIndex = 0; + TAtomicCounter FailsCount = 0; + TMutex Mutex; + NYql::TIssues Issues; + std::optional Code; NActors::TActorIdentity LongTxActorId; std::vector WriteIds; + const TMonotonic StartInstant = TMonotonic::Now(); YDB_READONLY_DEF(NLongTxService::TLongTxId, LongTxId); + YDB_READONLY(std::shared_ptr, Counters, std::make_shared()); + void SendReply() { + if (FailsCount.Val()) { + Counters->OnFailedFullReply(TMonotonic::Now() - StartInstant); + AFL_VERIFY(Code); + LongTxActorId.Send(LongTxActorId, new TEvPrivate::TEvShardsWriteResult(*Code, Issues)); + } else { + Counters->OnSucceedFullReply(TMonotonic::Now() - StartInstant); + auto req = MakeHolder(LongTxId); + for (auto&& i : WriteIds) { + req->AddWrite(i.GetShardId(), i.GetWriteId()); + } + LongTxActorId.Send(NLongTxService::MakeLongTxServiceID(LongTxActorId.NodeId()), req.Release()); + } + } public: using TPtr = std::shared_ptr; @@ -94,10 +164,6 @@ class TShardWriter: public NActors::TActorBootstrapped { TBase::PassAway(); } public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ_SHARD_WRITER; - } - TShardWriter(const ui64 shardId, const ui64 tableId, const TString& dedupId, const IShardInfo::TPtr& data, const NWilson::TProfileSpan& parentSpan, TWritersController::TPtr externalController, const ui32 writePartIdx, const EModificationType mType); @@ -115,6 +181,6 @@ class TShardWriter: public NActors::TActorBootstrapped { void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev); void HandleTimeout(const TActorContext& ctx); private: - bool RetryWriteRequest(bool delayed = true); + bool RetryWriteRequest(const bool delayed = true); }; } diff --git a/ydb/core/tx/data_events/shards_splitter.h b/ydb/core/tx/data_events/shards_splitter.h index bed95ce29d03..77c04e1160e9 100644 --- a/ydb/core/tx/data_events/shards_splitter.h +++ b/ydb/core/tx/data_events/shards_splitter.h @@ -3,7 +3,6 @@ #include #include - #include #include @@ -21,6 +20,8 @@ class IShardsSplitter { using TYdbConclusionStatus = TConclusionSpecialStatus; class IEvWriteDataAccessor { + private: + YDB_READONLY(ui64, Size, 0); public: using TPtr = std::shared_ptr; @@ -29,6 +30,11 @@ class IShardsSplitter { } virtual std::shared_ptr GetDeserializedBatch() const = 0; virtual TString GetSerializedData() const = 0; + IEvWriteDataAccessor(const ui64 size) + : Size(size) + { + + } virtual ~IEvWriteDataAccessor() {} }; diff --git a/ydb/core/tx/data_events/write_data.cpp b/ydb/core/tx/data_events/write_data.cpp index 0457f72d86a7..390667624dda 100644 --- a/ydb/core/tx/data_events/write_data.cpp +++ b/ydb/core/tx/data_events/write_data.cpp @@ -17,9 +17,4 @@ TWriteData::TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, c Y_ABORT_UNLESS(BlobsAction); } -const NKikimr::NEvWrite::IDataContainer& TWriteData::GetDataVerified() const { - AFL_VERIFY(Data); - return *Data; -} - } diff --git a/ydb/core/tx/data_events/write_data.h b/ydb/core/tx/data_events/write_data.h index 8ed419bb0f0b..0acbec1bcf98 100644 --- a/ydb/core/tx/data_events/write_data.h +++ b/ydb/core/tx/data_events/write_data.h @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -37,7 +38,7 @@ class TWriteMeta { YDB_ACCESSOR_DEF(TString, DedupId); YDB_READONLY(TString, Id, TGUID::CreateTimebased().AsUuidString()); - YDB_ACCESSOR(EModificationType, ModificationType, EModificationType::Upsert); + YDB_ACCESSOR(EModificationType, ModificationType, EModificationType::Replace); YDB_READONLY(TMonotonic, WriteStartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle1StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle2StartInstant, TMonotonic::Now()); @@ -45,7 +46,21 @@ class TWriteMeta { YDB_ACCESSOR(TMonotonic, WriteMiddle4StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle5StartInstant, TMonotonic::Now()); YDB_ACCESSOR(TMonotonic, WriteMiddle6StartInstant, TMonotonic::Now()); + std::optional LockId; public: + void SetLockId(const ui64 lockId) { + LockId = lockId; + } + + ui64 GetLockIdVerified() const { + AFL_VERIFY(LockId); + return *LockId; + } + + std::optional GetLockIdOptional() const { + return LockId; + } + bool IsGuaranteeWriter() const { switch (ModificationType) { case EModificationType::Delete: @@ -72,10 +87,14 @@ class TWriteData { YDB_READONLY_DEF(IDataContainer::TPtr, Data); YDB_READONLY_DEF(std::shared_ptr, PrimaryKeySchema); YDB_READONLY_DEF(std::shared_ptr, BlobsAction); + YDB_ACCESSOR_DEF(std::optional, SchemaSubset); public: TWriteData(const TWriteMeta& writeMeta, IDataContainer::TPtr data, const std::shared_ptr& primaryKeySchema, const std::shared_ptr& blobsAction); - const IDataContainer& GetDataVerified() const; + const NArrow::TSchemaSubset& GetSchemaSubsetVerified() const { + AFL_VERIFY(SchemaSubset); + return *SchemaSubset; + } const TWriteMeta& GetWriteMeta() const { return WriteMeta; diff --git a/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp b/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp index 1e661132460d..5aafd5a20ada 100644 --- a/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp +++ b/ydb/core/tx/datashard/build_kqp_data_tx_out_rs_unit.cpp @@ -83,15 +83,9 @@ EExecutionStatus TBuildKqpDataTxOutRSUnit::Execute(TOperation::TPtr op, TTransac auto allocGuard = tasksRunner.BindAllocator(txc.GetMemoryLimit() - dataTx->GetTxSize()); - NKqp::NRm::TKqpResourcesRequest req; - req.MemoryPool = NKqp::NRm::EKqpMemoryPool::DataQuery; - req.ExternalMemory = txc.GetMemoryLimit(); - ui64 taskId = dataTx->GetFirstKqpTaskId(); - - NKqp::GetKqpResourceManager()->NotifyExternalResourcesAllocated(tx->GetTxId(), taskId, req); - + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Add(txc.GetMemoryLimit()); Y_DEFER { - NKqp::GetKqpResourceManager()->FreeResources(tx->GetTxId(), taskId); + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Sub(txc.GetMemoryLimit()); }; LOG_T("Operation " << *op << " (build_kqp_data_tx_out_rs) at " << tabletId diff --git a/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp b/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp index 6473bc62ba66..0c228730c13a 100644 --- a/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp +++ b/ydb/core/tx/datashard/cdc_stream_heartbeat.cpp @@ -33,6 +33,10 @@ class TDataShard::TTxCdcStreamEmitHeartbeats: public NTabletFlatExecutor::TTrans TTxType GetTxType() const override { return TXTYPE_CDC_STREAM_EMIT_HEARTBEATS; } bool Execute(TTransactionContext& txc, const TActorContext&) override { + if (Self->State != TShardState::Ready) { + return true; + } + LOG_I("Emit change records" << ": edge# " << Edge << ", at tablet# " << Self->TabletID()); @@ -91,27 +95,27 @@ void TDataShard::EmitHeartbeats() { return; } + // We may possibly have more writes at this version + TRowVersion edge = GetMvccTxVersion(EMvccTxMode::ReadWrite); + bool wait = true; + if (const auto& plan = TransQueue.GetPlan()) { - const auto version = Min(plan.begin()->ToRowVersion(), VolatileTxManager.GetMinUncertainVersion()); - if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(version)) { - return Execute(new TTxCdcStreamEmitHeartbeats(this, version)); - } - return; + edge = Min(edge, plan.begin()->ToRowVersion()); + wait = false; } if (auto version = VolatileTxManager.GetMinUncertainVersion(); !version.IsMax()) { - if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(version)) { - return Execute(new TTxCdcStreamEmitHeartbeats(this, version)); - } - return; + edge = Min(edge, version); + wait = false; } - const TRowVersion nextWrite = GetMvccTxVersion(EMvccTxMode::ReadWrite); - if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(nextWrite)) { - return Execute(new TTxCdcStreamEmitHeartbeats(this, nextWrite)); + if (CdcStreamHeartbeatManager.ShouldEmitHeartbeat(edge)) { + return Execute(new TTxCdcStreamEmitHeartbeats(this, edge)); } - WaitPlanStep(lowest.Next().Step); + if (wait) { + WaitPlanStep(lowest.Next().Step); + } } void TCdcStreamHeartbeatManager::Reset() { @@ -211,7 +215,7 @@ bool TCdcStreamHeartbeatManager::ShouldEmitHeartbeat(const TRowVersion& edge) co return false; } - if (Schedule.top().Version > edge) { + if (Schedule.top().Version >= edge) { return false; } @@ -221,7 +225,7 @@ bool TCdcStreamHeartbeatManager::ShouldEmitHeartbeat(const TRowVersion& edge) co THashMap TCdcStreamHeartbeatManager::EmitHeartbeats( NTable::TDatabase& db, const TRowVersion& edge) { - if (Schedule.empty() || Schedule.top().Version > edge) { + if (!ShouldEmitHeartbeat(edge)) { return {}; } @@ -230,7 +234,7 @@ THashMap TCdcStreamHeartbea while (true) { const auto& top = Schedule.top(); - if (top.Version > edge) { + if (top.Version >= edge) { break; } diff --git a/ydb/core/tx/datashard/cdc_stream_scan.cpp b/ydb/core/tx/datashard/cdc_stream_scan.cpp index 2f67fe5a79ac..321147e0ddb4 100644 --- a/ydb/core/tx/datashard/cdc_stream_scan.cpp +++ b/ydb/core/tx/datashard/cdc_stream_scan.cpp @@ -240,7 +240,8 @@ class TDataShard::TTxCdcStreamScanProgress const auto& valueTags = ev.ValueTags; LOG_D("Progress" - << ": streamPathId# " << streamPathId); + << ": streamPathId# " << streamPathId + << ", rows# " << ev.Rows.size()); if (!Self->GetUserTables().contains(tablePathId.LocalPathId)) { LOG_W("Cannot progress on unknown table" diff --git a/ydb/core/tx/datashard/change_record_cdc_serializer.cpp b/ydb/core/tx/datashard/change_record_cdc_serializer.cpp index 6e6eb098d28a..aa7d0e0a0d9c 100644 --- a/ydb/core/tx/datashard/change_record_cdc_serializer.cpp +++ b/ydb/core/tx/datashard/change_record_cdc_serializer.cpp @@ -91,10 +91,16 @@ class TJsonSerializer: public TBaseSerializer { friend class TChangeRecord; // used in GetPartitionKey() static NJson::TJsonWriterConfig DefaultJsonConfig() { - NJson::TJsonWriterConfig jsonConfig; - jsonConfig.ValidateUtf8 = false; - jsonConfig.WriteNanAsString = true; - return jsonConfig; + constexpr ui32 doubleNDigits = std::numeric_limits::max_digits10; + constexpr ui32 floatNDigits = std::numeric_limits::max_digits10; + constexpr EFloatToStringMode floatMode = EFloatToStringMode::PREC_NDIGITS; + return NJson::TJsonWriterConfig { + .DoubleNDigits = doubleNDigits, + .FloatNDigits = floatNDigits, + .FloatToStringMode = floatMode, + .ValidateUtf8 = false, + .WriteNanAsString = true, + }; } protected: diff --git a/ydb/core/tx/datashard/change_sender_async_index.cpp b/ydb/core/tx/datashard/change_sender_async_index.cpp index 38492b20728e..106d03406b58 100644 --- a/ydb/core/tx/datashard/change_sender_async_index.cpp +++ b/ydb/core/tx/datashard/change_sender_async_index.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -435,16 +436,6 @@ class TAsyncIndexChangeSenderMain return Check(&TSchemeCacheHelpers::CheckEntryKind, &TThis::LogWarnAndRetry, entry, expected); } - static TVector MakePartitionIds(const TVector& partitions) { - TVector result(Reserve(partitions.size())); - - for (const auto& partition : partitions) { - result.push_back(partition.ShardId); // partition = shard - } - - return result; - } - /// ResolveUserTable void ResolveUserTable() { @@ -611,6 +602,11 @@ class TAsyncIndexChangeSenderMain return; } + if (IndexTableVersion && IndexTableVersion == entry.Self->Info.GetVersion().GetGeneralVersion()) { + CreateSenders(); + return Become(&TThis::StateMain); + } + TagMap.clear(); TVector keyColumnTypes; @@ -692,11 +688,9 @@ class TAsyncIndexChangeSenderMain return Retry(); } - const bool versionChanged = !IndexTableVersion || IndexTableVersion != entry.GeneralVersion; IndexTableVersion = entry.GeneralVersion; - KeyDesc = std::move(entry.KeyDescription); - CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()), versionChanged); + CreateSenders(NChangeExchange::MakePartitionIds(KeyDesc->GetPartitions())); Become(&TThis::StateMain); } diff --git a/ydb/core/tx/datashard/change_sender_cdc_stream.cpp b/ydb/core/tx/datashard/change_sender_cdc_stream.cpp index 5300357c24cd..ecf916263b6d 100644 --- a/ydb/core/tx/datashard/change_sender_cdc_stream.cpp +++ b/ydb/core/tx/datashard/change_sender_cdc_stream.cpp @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include #include @@ -300,45 +300,6 @@ class TCdcChangeSenderMain , public NChangeExchange::ISenderFactory , private NSchemeCache::TSchemeCacheHelpers { - struct TPQPartitionInfo { - ui32 PartitionId; - ui64 ShardId; - TPartitionKeyRange KeyRange; - - struct TLess { - TConstArrayRef Schema; - - TLess(const TVector& schema) - : Schema(schema) - { - } - - bool operator()(const TPQPartitionInfo& lhs, const TPQPartitionInfo& rhs) const { - Y_ABORT_UNLESS(lhs.KeyRange.ToBound || rhs.KeyRange.ToBound); - - if (!lhs.KeyRange.ToBound) { - return false; - } - - if (!rhs.KeyRange.ToBound) { - return true; - } - - Y_ABORT_UNLESS(lhs.KeyRange.ToBound && rhs.KeyRange.ToBound); - - const int compares = CompareTypedCellVectors( - lhs.KeyRange.ToBound->GetCells().data(), - rhs.KeyRange.ToBound->GetCells().data(), - Schema.data(), Schema.size() - ); - - return (compares < 0); - } - - }; // TLess - - }; // TPQPartitionInfo - TStringBuf GetLogPrefix() const { if (!LogPrefix) { LogPrefix = TStringBuilder() @@ -430,16 +391,6 @@ class TCdcChangeSenderMain return false; } - static TVector MakePartitionIds(const TVector& partitions) { - TVector result(Reserve(partitions.size())); - - for (const auto& partition : partitions) { - result.push_back(partition.ShardId); - } - - return result; - } - /// ResolveCdcStream void ResolveCdcStream() { @@ -561,77 +512,27 @@ class TCdcChangeSenderMain return; } - const auto& pqDesc = entry.PQGroupInfo->Description; - const auto& pqConfig = pqDesc.GetPQTabletConfig(); - - TVector schema; - PartitionToShard.clear(); - - schema.reserve(pqConfig.PartitionKeySchemaSize()); - for (const auto& keySchema : pqConfig.GetPartitionKeySchema()) { - // TODO: support pg types - schema.push_back(NScheme::TTypeInfo(keySchema.GetTypeId())); + const auto topicVersion = entry.Self->Info.GetVersion().GetGeneralVersion(); + if (TopicVersion && TopicVersion == topicVersion) { + CreateSenders(); + return Become(&TThis::StateMain); } - TSet partitions(schema); - THashSet shards; - - for (const auto& partition : pqDesc.GetPartitions()) { - const auto partitionId = partition.GetPartitionId(); - const auto shardId = partition.GetTabletId(); - - PartitionToShard.emplace(partitionId, shardId); - - auto keyRange = TPartitionKeyRange::Parse(partition.GetKeyRange()); - Y_ABORT_UNLESS(!keyRange.FromBound || keyRange.FromBound->GetCells().size() == schema.size()); - Y_ABORT_UNLESS(!keyRange.ToBound || keyRange.ToBound->GetCells().size() == schema.size()); - - partitions.insert({partitionId, shardId, std::move(keyRange)}); - shards.insert(shardId); - } - - // used to validate - bool isFirst = true; - const TPQPartitionInfo* prev = nullptr; - - TVector partitioning; - partitioning.reserve(partitions.size()); - for (const auto& cur : partitions) { - if (isFirst) { - isFirst = false; - Y_ABORT_UNLESS(!cur.KeyRange.FromBound.Defined()); - } else { - Y_ABORT_UNLESS(cur.KeyRange.FromBound.Defined()); - Y_ABORT_UNLESS(prev); - Y_ABORT_UNLESS(prev->KeyRange.ToBound.Defined()); - // TODO: compare cells - } - - auto& part = partitioning.emplace_back(cur.PartitionId); // TODO: double-check that it is right partitioning - - if (cur.KeyRange.ToBound) { - part.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{ - .EndKeyPrefix = *cur.KeyRange.ToBound, - }; - } else { - part.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{}; - } + TopicVersion = topicVersion; - prev = &cur; - } + const auto& pqDesc = entry.PQGroupInfo->Description; - if (prev) { - Y_ABORT_UNLESS(!prev->KeyRange.ToBound.Defined()); + PartitionToShard.clear(); + for (const auto& partition : pqDesc.GetPartitions()) { + PartitionToShard.emplace(partition.GetPartitionId(), partition.GetTabletId()); } - const auto topicVersion = entry.Self->Info.GetVersion().GetGeneralVersion(); - const bool versionChanged = !TopicVersion || TopicVersion != topicVersion; - TopicVersion = topicVersion; - - KeyDesc = NKikimr::TKeyDesc::CreateMiniKeyDesc(schema); - KeyDesc->Partitioning = std::make_shared>(std::move(partitioning)); + Y_ABORT_UNLESS(entry.PQGroupInfo->Schema); + KeyDesc = NKikimr::TKeyDesc::CreateMiniKeyDesc(entry.PQGroupInfo->Schema); + Y_ABORT_UNLESS(entry.PQGroupInfo->Partitioning); + KeyDesc->Partitioning = std::make_shared>(entry.PQGroupInfo->Partitioning); - CreateSenders(MakePartitionIds(*KeyDesc->Partitioning), versionChanged); + CreateSenders(NChangeExchange::MakePartitionIds(*KeyDesc->Partitioning)); Become(&TThis::StateMain); } diff --git a/ydb/core/tx/datashard/datashard.cpp b/ydb/core/tx/datashard/datashard.cpp index ba07749ec285..363e8b1658d1 100644 --- a/ydb/core/tx/datashard/datashard.cpp +++ b/ydb/core/tx/datashard/datashard.cpp @@ -855,6 +855,39 @@ void TDataShard::PersistChangeRecord(NIceDb::TNiceDb& db, const TChangeRecord& r NIceDb::TUpdate(record.GetKind()), NIceDb::TUpdate(record.GetBody()), NIceDb::TUpdate(record.GetSource())); + + auto res = ChangesQueue.emplace(record.GetOrder(), record); + Y_VERIFY_S(res.second, "Duplicate change record: " << record.GetOrder()); + + if (res.first->second.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } + + if (CommittingChangeRecords.empty()) { + db.GetDatabase().OnCommit([this] { + CommittingChangeRecords.clear(); + }); + db.GetDatabase().OnRollback([this] { + for (const auto order : CommittingChangeRecords) { + auto cIt = ChangesQueue.find(order); + Y_VERIFY_S(cIt != ChangesQueue.end(), "Cannot find change record: " << order); + + if (cIt->second.SchemaSnapshotAcquired) { + const auto snapshotKey = TSchemaSnapshotKey(cIt->second.TableId, cIt->second.SchemaVersion); + if (const auto last = SchemaSnapshotManager.ReleaseReference(snapshotKey)) { + ScheduleRemoveSchemaSnapshot(snapshotKey); + } + } + + ChangesQueue.erase(cIt); + } + + CommittingChangeRecords.clear(); + }); + } + + CommittingChangeRecords.push_back(record.GetOrder()); } else { auto& state = LockChangeRecords[lockId]; Y_ABORT_UNLESS(state.Changes.empty() || state.Changes.back().LockOffset < record.GetLockOffset(), @@ -934,6 +967,14 @@ void TDataShard::CommitLockChangeRecords(NIceDb::TNiceDb& db, ui64 lockId, ui64 committed.Step = rowVersion.Step; committed.TxId = rowVersion.TxId; collected.push_back(committed); + + auto res = ChangesQueue.emplace(committed.Order, committed); + Y_VERIFY_S(res.second, "Duplicate change record: " << committed.Order); + + if (res.first->second.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } } Y_VERIFY_S(!CommittedLockChangeRecords.contains(lockId), "Cannot commit lock " << lockId << " more than once"); @@ -960,7 +1001,26 @@ void TDataShard::CommitLockChangeRecords(NIceDb::TNiceDb& db, ui64 lockId, ui64 LockChangeRecords.erase(it); }); db.GetDatabase().OnRollback([this, lockId]() { - CommittedLockChangeRecords.erase(lockId); + auto it = CommittedLockChangeRecords.find(lockId); + Y_VERIFY_S(it != CommittedLockChangeRecords.end(), "Unexpected failure to find lockId# " << lockId); + + for (size_t i = 0; i < it->second.Count; ++i) { + const ui64 order = it->second.Order + i; + + auto cIt = ChangesQueue.find(order); + Y_VERIFY_S(cIt != ChangesQueue.end(), "Cannot find change record: " << order); + + if (cIt->second.SchemaSnapshotAcquired) { + const auto snapshotKey = TSchemaSnapshotKey(cIt->second.TableId, cIt->second.SchemaVersion); + if (const auto last = SchemaSnapshotManager.ReleaseReference(snapshotKey)) { + ScheduleRemoveSchemaSnapshot(snapshotKey); + } + } + + ChangesQueue.erase(cIt); + } + + CommittedLockChangeRecords.erase(it); }); } @@ -994,7 +1054,6 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { auto it = ChangesQueue.find(order); if (it == ChangesQueue.end()) { - Y_VERIFY_DEBUG_S(false, "Trying to remove non-enqueud record: " << order); return; } @@ -1022,23 +1081,9 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { ChangesQueueBytes -= record.BodySize; if (record.SchemaSnapshotAcquired) { - Y_ABORT_UNLESS(record.TableId); - auto tableIt = TableInfos.find(record.TableId.LocalPathId); - - if (tableIt != TableInfos.end()) { - const auto snapshotKey = TSchemaSnapshotKey(record.TableId, record.SchemaVersion); - const bool last = SchemaSnapshotManager.ReleaseReference(snapshotKey); - - if (last) { - const auto* snapshot = SchemaSnapshotManager.FindSnapshot(snapshotKey); - Y_ABORT_UNLESS(snapshot); - - if (snapshot->Schema->GetTableSchemaVersion() < tableIt->second->GetTableSchemaVersion()) { - SchemaSnapshotManager.RemoveShapshot(db, snapshotKey); - } - } - } else { - Y_DEBUG_ABORT_UNLESS(State == TShardState::PreOffline); + const auto snapshotKey = TSchemaSnapshotKey(record.TableId, record.SchemaVersion); + if (const bool last = SchemaSnapshotManager.ReleaseReference(snapshotKey)) { + ScheduleRemoveSchemaSnapshot(snapshotKey); } } @@ -1047,6 +1092,8 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { if (!--rIt->second) { ChangeQueueReservations.erase(rIt); } + + SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); } UpdateChangeExchangeLag(AppData()->TimeProvider->Now()); @@ -1054,12 +1101,24 @@ void TDataShard::RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order) { IncCounter(COUNTER_CHANGE_RECORDS_REMOVED); SetCounter(COUNTER_CHANGE_QUEUE_SIZE, ChangesQueue.size()); - SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); CheckChangesQueueNoOverflow(); } -void TDataShard::EnqueueChangeRecords(TVector&& records, ui64 cookie) { +void TDataShard::EnqueueChangeRecords(TVector&& records, ui64 cookie, bool afterMove) { + if (auto it = ChangeQueueReservations.find(cookie); it != ChangeQueueReservations.end()) { + Y_ABORT_UNLESS(!afterMove); + + ChangeQueueReservedCapacity -= it->second; + it->second = records.size(); + ChangeQueueReservedCapacity += it->second; + if (!it->second) { + ChangeQueueReservations.erase(it); + } + + SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); + } + if (!records) { return; } @@ -1079,41 +1138,31 @@ void TDataShard::EnqueueChangeRecords(TVectorTimeProvider->Now(); TVector forward(Reserve(records.size())); for (const auto& record : records) { - forward.emplace_back(record.Order, record.PathId, record.BodySize); + auto it = ChangesQueue.find(record.Order); + if (it == ChangesQueue.end()) { + Y_ABORT_UNLESS(afterMove); + continue; + } - auto res = ChangesQueue.emplace( - std::piecewise_construct, - std::forward_as_tuple(record.Order), - std::forward_as_tuple(record, now, cookie) - ); - if (res.second) { - ChangesList.PushBack(&res.first->second); + forward.emplace_back(record.Order, record.PathId, record.BodySize); - Y_ABORT_UNLESS(ChangesQueueBytes <= (Max() - record.BodySize)); - ChangesQueueBytes += record.BodySize; + it->second.EnqueuedAt = now; + it->second.ReservationCookie = cookie; + ChangesList.PushBack(&it->second); - if (record.SchemaVersion) { - res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( - TSchemaSnapshotKey(record.TableId, record.SchemaVersion)); - } - } - } - - if (auto it = ChangeQueueReservations.find(cookie); it != ChangeQueueReservations.end()) { - ChangeQueueReservedCapacity -= it->second; - ChangeQueueReservedCapacity += records.size(); + Y_ABORT_UNLESS(ChangesQueueBytes <= (Max() - record.BodySize)); + ChangesQueueBytes += record.BodySize; } UpdateChangeExchangeLag(now); IncCounter(COUNTER_CHANGE_RECORDS_ENQUEUED, forward.size()); SetCounter(COUNTER_CHANGE_QUEUE_SIZE, ChangesQueue.size()); - SetCounter(COUNTER_CHANGE_QUEUE_RESERVED_CAPACITY, ChangeQueueReservedCapacity); Y_ABORT_UNLESS(OutChangeSender); Send(OutChangeSender, new NChangeExchange::TEvChangeExchange::TEvEnqueueRecords(std::move(forward))); } -ui32 TDataShard::GetFreeChangeQueueCapacity(ui64 cookie) { +ui32 TDataShard::GetFreeChangeQueueCapacity(ui64 cookie) const { const ui64 sizeLimit = AppData()->DataShardConfig.GetChangesQueueItemsLimit(); if (sizeLimit < ChangesQueue.size()) { return 0; @@ -1265,6 +1314,14 @@ bool TDataShard::LoadChangeRecords(NIceDb::TNiceDb& db, TVectorsecond.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } + if (!rowset.Next()) { return false; } @@ -1363,6 +1420,14 @@ bool TDataShard::LoadChangeRecordCommits(NIceDb::TNiceDb& db, TVectorsecond.SchemaVersion) { + res.first->second.SchemaSnapshotAcquired = SchemaSnapshotManager.AcquireReference( + TSchemaSnapshotKey(res.first->second.TableId, res.first->second.SchemaVersion)); + } } LockChangeRecords.erase(lockId); @@ -1421,6 +1486,51 @@ void TDataShard::ScheduleRemoveAbandonedLockChanges() { } } +void TDataShard::ScheduleRemoveSchemaSnapshot(const TSchemaSnapshotKey& key) { + Y_ABORT_UNLESS(!SchemaSnapshotManager.HasReference(key)); + + const auto* snapshot = SchemaSnapshotManager.FindSnapshot(key); + Y_ABORT_UNLESS(snapshot); + + auto it = TableInfos.find(key.PathId); + if (it == TableInfos.end()) { + Y_DEBUG_ABORT_UNLESS(State == TShardState::PreOffline); + return; + } + + if (snapshot->Schema->GetTableSchemaVersion() < it->second->GetTableSchemaVersion()) { + bool wasEmpty = PendingSchemaSnapshotsToGc.empty(); + PendingSchemaSnapshotsToGc.push_back(key); + if (wasEmpty) { + Send(SelfId(), new TEvPrivate::TEvRemoveSchemaSnapshots); + } + } +} + +void TDataShard::ScheduleRemoveAbandonedSchemaSnapshots() { + bool wasEmpty = PendingSchemaSnapshotsToGc.empty(); + + for (const auto& [key, snapshot] : SchemaSnapshotManager.GetSnapshots()) { + auto it = TableInfos.find(key.PathId); + if (it == TableInfos.end()) { + Y_DEBUG_ABORT_UNLESS(State == TShardState::PreOffline); + break; + } + if (SchemaSnapshotManager.HasReference(key)) { + continue; + } + if (snapshot.Schema->GetTableSchemaVersion() >= it->second->GetTableSchemaVersion()) { + continue; + } + + PendingSchemaSnapshotsToGc.push_back(key); + } + + if (wasEmpty && !PendingSchemaSnapshotsToGc.empty()) { + Send(SelfId(), new TEvPrivate::TEvRemoveSchemaSnapshots); + } +} + void TDataShard::PersistSchemeTxResult(NIceDb::TNiceDb &db, const TSchemaOperation &op) { db.Table().Key(op.TxId).Update( NIceDb::TUpdate(op.Success), @@ -1649,8 +1759,18 @@ void TDataShard::AddSchemaSnapshot(const TPathId& pathId, ui64 tableSchemaVersio Y_ABORT_UNLESS(TableInfos.contains(pathId.LocalPathId)); auto tableInfo = TableInfos[pathId.LocalPathId]; - const auto key = TSchemaSnapshotKey(pathId.OwnerId, pathId.LocalPathId, tableSchemaVersion); + const auto key = TSchemaSnapshotKey(pathId, tableSchemaVersion); SchemaSnapshotManager.AddSnapshot(txc.DB, key, TSchemaSnapshot(tableInfo, step, txId)); + + const auto& snapshots = SchemaSnapshotManager.GetSnapshots(); + for (auto it = snapshots.lower_bound(TSchemaSnapshotKey(pathId, 1)); it != snapshots.end(); ++it) { + if (it->first == key) { + break; + } + if (!SchemaSnapshotManager.HasReference(it->first)) { + ScheduleRemoveSchemaSnapshot(it->first); + } + } } void TDataShard::PersistLastLoanTableTid(NIceDb::TNiceDb& db, ui32 localTid) { diff --git a/ydb/core/tx/datashard/datashard__engine_host.cpp b/ydb/core/tx/datashard/datashard__engine_host.cpp index 2155bcf91a43..71a46e61854a 100644 --- a/ydb/core/tx/datashard/datashard__engine_host.cpp +++ b/ydb/core/tx/datashard/datashard__engine_host.cpp @@ -370,11 +370,11 @@ class TDataShardEngineHost final TSmallVec ops; ConvertTableValues(Scheme, tableInfo, commands, ops, nullptr); - UserDb.UpdateRow(tableId, key, ops); + UserDb.UpsertRow(tableId, key, ops); } - void UpdateRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { - UserDb.UpdateRow(tableId, key, ops); + void UpsertRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { + UserDb.UpsertRow(tableId, key, ops); } void ReplaceRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { @@ -385,6 +385,10 @@ class TDataShardEngineHost final UserDb.InsertRow(tableId, key, ops); } + void UpdateRow(const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override { + UserDb.UpdateRow(tableId, key, ops); + } + void EraseRow(const TTableId& tableId, const TArrayRef& row) override { if (TSysTables::IsSystemTable(tableId)) { DataShardSysTable(tableId).EraseRow(row); diff --git a/ydb/core/tx/datashard/datashard__init.cpp b/ydb/core/tx/datashard/datashard__init.cpp index 89981068248d..a0561a9c998f 100644 --- a/ydb/core/tx/datashard/datashard__init.cpp +++ b/ydb/core/tx/datashard/datashard__init.cpp @@ -425,6 +425,12 @@ bool TDataShard::TTxInit::ReadEverything(TTransactionContext &txc) { return false; } + if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::SchemaSnapshots::TableId)) { + if (!Self->SchemaSnapshotManager.Load(db)) { + return false; + } + } + if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::ChangeRecords::TableId)) { if (!Self->LoadChangeRecords(db, ChangeRecords)) { return false; @@ -512,12 +518,6 @@ bool TDataShard::TTxInit::ReadEverything(TTransactionContext &txc) { } } - if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::SchemaSnapshots::TableId)) { - if (!Self->SchemaSnapshotManager.Load(db)) { - return false; - } - } - if (Self->State != TShardState::Offline && txc.DB.GetScheme().GetTableInfo(Schema::Locks::TableId)) { TDataShardLocksDb locksDb(*Self, txc); if (!Self->SysLocks.Load(locksDb)) { @@ -547,6 +547,7 @@ bool TDataShard::TTxInit::ReadEverything(TTransactionContext &txc) { Self->SubscribeNewLocks(); Self->ScheduleRemoveAbandonedLockChanges(); + Self->ScheduleRemoveAbandonedSchemaSnapshots(); return true; } diff --git a/ydb/core/tx/datashard/datashard__read_iterator.cpp b/ydb/core/tx/datashard/datashard__read_iterator.cpp index 6e385ebf13e6..6be261d99bee 100644 --- a/ydb/core/tx/datashard/datashard__read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard__read_iterator.cpp @@ -5,6 +5,7 @@ #include "datashard_locks_db.h" #include "probes.h" +#include #include #include @@ -315,6 +316,8 @@ class TReader { , Self(self) , TableId(state.PathId.OwnerId, state.PathId.LocalPathId, state.SchemaVersion) , FirstUnprocessedQuery(State.FirstUnprocessedQuery) + , LastProcessedKey(State.LastProcessedKey) + , LastProcessedKeyErased(State.LastProcessedKeyErased) { GetTimeFast(&StartTime); EndTime = StartTime; @@ -329,10 +332,10 @@ class TReader { bool toInclusive; TSerializedCellVec keyFromCells; TSerializedCellVec keyToCells; - if (Y_UNLIKELY(FirstUnprocessedQuery == State.FirstUnprocessedQuery && State.LastProcessedKey)) { + if (LastProcessedKey) { if (!State.Reverse) { - keyFromCells = TSerializedCellVec(State.LastProcessedKey); - fromInclusive = State.LastProcessedKeyErased; + keyFromCells = TSerializedCellVec(LastProcessedKey); + fromInclusive = LastProcessedKeyErased; keyToCells = range.To; toInclusive = range.ToInclusive; @@ -341,8 +344,8 @@ class TReader { keyFromCells = range.From; fromInclusive = range.FromInclusive; - keyToCells = TSerializedCellVec(State.LastProcessedKey); - toInclusive = State.LastProcessedKeyErased; + keyToCells = TSerializedCellVec(LastProcessedKey); + toInclusive = LastProcessedKeyErased; } } else { keyFromCells = range.From; @@ -500,6 +503,7 @@ class TReader { while (FirstUnprocessedQuery < State.Request->Ranges.size()) { if (ReachedTotalRowsLimit()) { FirstUnprocessedQuery = -1; + LastProcessedKey.clear(); return true; } @@ -526,6 +530,7 @@ class TReader { FirstUnprocessedQuery++; else FirstUnprocessedQuery--; + LastProcessedKey.clear(); } return true; @@ -537,6 +542,7 @@ class TReader { while (FirstUnprocessedQuery < State.Request->Keys.size()) { if (ReachedTotalRowsLimit()) { FirstUnprocessedQuery = -1; + LastProcessedKey.clear(); return true; } @@ -562,6 +568,7 @@ class TReader { FirstUnprocessedQuery++; else FirstUnprocessedQuery--; + LastProcessedKey.clear(); } return true; @@ -727,6 +734,28 @@ class TReader { } void UpdateState(TReadIteratorState& state, bool sentResult) { + if (state.FirstUnprocessedQuery == FirstUnprocessedQuery && + state.LastProcessedKey && !LastProcessedKey) + { + LOG_CRIT_S(*TlsActivationContext, NKikimrServices::TX_DATASHARD, + "DataShard " << Self->TabletID() << " detected unexpected reset of LastProcessedKey:" + << " ReadId# " << State.ReadId + << " LastSeqNo# " << State.SeqNo + << " LastQuery# " << State.FirstUnprocessedQuery + << " RowsRead# " << RowsRead + << " RowsProcessed# " << RowsProcessed + << " RowsSinceLastCheck# " << RowsSinceLastCheck + << " BytesInResult# " << BytesInResult + << " DeletedRowSkips# " << DeletedRowSkips + << " InvisibleRowSkips# " << InvisibleRowSkips + << " Quota.Rows# " << State.Quota.Rows + << " Quota.Bytes# " << State.Quota.Bytes + << " State.TotalRows# " << State.TotalRows + << " State.TotalRowsLimit# " << State.TotalRowsLimit + << " State.MaxRowsInResult# " << State.MaxRowsInResult); + Self->IncCounterReadIteratorLastKeyReset(); + } + state.TotalRows += RowsRead; state.FirstUnprocessedQuery = FirstUnprocessedQuery; state.LastProcessedKey = LastProcessedKey; @@ -1632,6 +1661,7 @@ class TDataShard::TReadOperation : public TOperation, public IReadOperation { if (Reader->HasUnreadQueries()) { Reader->UpdateState(state, ResultSent); if (!state.IsExhausted()) { + state.ReadContinuePending = true; ctx.Send( Self->SelfId(), new TEvDataShard::TEvReadContinue(ReadId.Sender, ReadId.ReadId)); @@ -2282,6 +2312,15 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase Y_ASSERT(it->second); auto& state = *it->second; + if (state.IsExhausted()) { + // iterator quota reduced and exhausted while ReadContinue was inflight + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue for iterator# " << ReadId + << ", quota exhausted while rescheduling"); + state.ReadContinuePending = false; + Result.reset(); + return true; + } + LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " ReadContinue for iterator# " << ReadId << ", firstUnprocessedQuery# " << state.FirstUnprocessedQuery); @@ -2394,6 +2433,7 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase if (Reader->Read(txc, ctx)) { // Retry later when dependencies are resolved if (!Reader->GetVolatileReadDependencies().empty()) { + state.ReadContinuePending = true; Self->WaitVolatileDependenciesThenSend( Reader->GetVolatileReadDependencies(), Self->SelfId(), @@ -2480,6 +2520,8 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase Y_ABORT_UNLESS(it->second); auto& state = *it->second; + state.ReadContinuePending = false; + if (!Result) { LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " read iterator# " << ReadId << " TTxReadContinue::Execute() finished without Result, aborting"); @@ -2527,14 +2569,14 @@ class TDataShard::TTxReadContinue : public NTabletFlatExecutor::TTransactionBase } if (Reader->HasUnreadQueries()) { - Y_ASSERT(it->second); - auto& state = *it->second; + bool wasExhausted = state.IsExhausted(); Reader->UpdateState(state, useful); if (!state.IsExhausted()) { + state.ReadContinuePending = true; ctx.Send( Self->SelfId(), new TEvDataShard::TEvReadContinue(ReadId.Sender, ReadId.ReadId)); - } else { + } else if (!wasExhausted) { Self->IncCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT); LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " read iterator# " << ReadId << " exhausted"); @@ -2807,14 +2849,19 @@ void TDataShard::Handle(TEvDataShard::TEvReadAck::TPtr& ev, const TActorContext& bool wasExhausted = state.IsExhausted(); state.UpQuota( record.GetSeqNo(), - record.GetMaxRows(), - record.GetMaxBytes()); + record.HasMaxRows() ? record.GetMaxRows() : Max(), + record.HasMaxBytes() ? record.GetMaxBytes() : Max()); if (wasExhausted && !state.IsExhausted()) { DecCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT); - ctx.Send( - SelfId(), - new TEvDataShard::TEvReadContinue(ev->Sender, record.GetReadId())); + if (!state.ReadContinuePending) { + state.ReadContinuePending = true; + ctx.Send( + SelfId(), + new TEvDataShard::TEvReadContinue(ev->Sender, record.GetReadId())); + } + } else if (!wasExhausted && state.IsExhausted()) { + IncCounter(COUNTER_READ_ITERATORS_EXHAUSTED_COUNT); } LOG_TRACE_S(ctx, NKikimrServices::TX_DATASHARD, TabletID() << " ReadAck for read iterator# " << readId @@ -2943,6 +2990,16 @@ void TDataShard::UnsubscribeReadIteratorSessions(const TActorContext& ctx) { ReadIteratorSessions.clear(); } +void TDataShard::IncCounterReadIteratorLastKeyReset() { + if (!CounterReadIteratorLastKeyReset) { + CounterReadIteratorLastKeyReset = GetServiceCounters(AppData()->Counters, "tablets") + ->GetSubgroup("type", "DataShard") + ->GetSubgroup("category", "app") + ->GetCounter("DataShard/ReadIteratorLastKeyReset", true); + } + ++*CounterReadIteratorLastKeyReset; +} + } // NKikimr::NDataShard template<> diff --git a/ydb/core/tx/datashard/datashard__stats.cpp b/ydb/core/tx/datashard/datashard__stats.cpp index 9087ecd9bd4b..67d5c62db373 100644 --- a/ydb/core/tx/datashard/datashard__stats.cpp +++ b/ydb/core/tx/datashard/datashard__stats.cpp @@ -167,7 +167,7 @@ class TTableStatsCoroBuilder : public TActorCoroImpl, private IPages { LOG_DEBUG_S(GetActorContext(), NKikimrServices::TX_DATASHARD, "BuildStats result at datashard " << TabletId << ", for tableId " << TableId << ": RowCount " << ev->Stats.RowCount << ", DataSize " << ev->Stats.DataSize.Size << ", IndexSize " << ev->Stats.IndexSize.Size << ", PartCount " << ev->PartCount << (ev->PartOwners.size() > 1 || ev->PartOwners.size() == 1 && *ev->PartOwners.begin() != TabletId ? ", with borrowed parts" : "") - << ", LoadedSize " << PagesSize << ", " << NFmt::Do(*Spent)); + << ", LoadedSize " << PagesSize << ", " << NFmt::Do(*Spent) << ", HistogramKeys " << ev->Stats.DataSizeHistogram.size()); Send(ReplyTo, ev.Release()); diff --git a/ydb/core/tx/datashard/datashard_change_sending.cpp b/ydb/core/tx/datashard/datashard_change_sending.cpp index 181f3fdf8d23..9023b29d7b9f 100644 --- a/ydb/core/tx/datashard/datashard_change_sending.cpp +++ b/ydb/core/tx/datashard/datashard_change_sending.cpp @@ -286,7 +286,7 @@ class TDataShard::TTxRemoveChangeRecords: public TTransactionBase { ChangeExchangeSplit = true; } else { for (const auto dstTabletId : Self->ChangeSenderActivator.GetDstSet()) { - if (Self->SplitSrcSnapshotSender.Acked(dstTabletId)) { + if (Self->SplitSrcSnapshotSender.Acked(dstTabletId) && !Self->ChangeSenderActivator.Acked(dstTabletId)) { ActivationList.insert(dstTabletId); } } @@ -346,9 +346,7 @@ class TDataShard::TTxRemoveChangeRecords: public TTransactionBase { } for (const auto dstTabletId : ActivationList) { - if (!Self->ChangeSenderActivator.Acked(dstTabletId)) { - Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); - } + Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); } Self->CheckStateChange(ctx); @@ -383,7 +381,7 @@ class TDataShard::TTxChangeExchangeSplitAck: public TTransactionBase Y_ABORT_UNLESS(Self->ChangeExchangeSplitter.Done()); for (const auto dstTabletId : Self->ChangeSenderActivator.GetDstSet()) { - if (Self->SplitSrcSnapshotSender.Acked(dstTabletId)) { + if (Self->SplitSrcSnapshotSender.Acked(dstTabletId) && !Self->ChangeSenderActivator.Acked(dstTabletId)) { ActivationList.insert(dstTabletId); } } @@ -396,9 +394,7 @@ class TDataShard::TTxChangeExchangeSplitAck: public TTransactionBase << ", at tablet# " << Self->TabletID()); for (const auto dstTabletId : ActivationList) { - if (!Self->ChangeSenderActivator.Acked(dstTabletId)) { - Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); - } + Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); } } diff --git a/ydb/core/tx/datashard/datashard_impl.h b/ydb/core/tx/datashard/datashard_impl.h index b770044dedb1..2cb3a91a6861 100644 --- a/ydb/core/tx/datashard/datashard_impl.h +++ b/ydb/core/tx/datashard/datashard_impl.h @@ -241,6 +241,7 @@ class TDataShard class TTxCdcStreamScanProgress; class TTxCdcStreamEmitHeartbeats; class TTxUpdateFollowerReadEdge; + class TTxRemoveSchemaSnapshots; template friend class TTxDirectBase; class TTxUploadRows; @@ -374,6 +375,7 @@ class TDataShard EvPlanPredictedTxs, EvStatisticsScanFinished, EvTableStatsError, + EvRemoveSchemaSnapshots, EvEnd }; @@ -595,6 +597,8 @@ class TDataShard struct TEvPlanPredictedTxs : public TEventLocal {}; struct TEvStatisticsScanFinished : public TEventLocal {}; + + struct TEvRemoveSchemaSnapshots : public TEventLocal {}; }; struct Schema : NIceDb::Schema { @@ -1383,6 +1387,8 @@ class TDataShard void Handle(TEvPrivate::TEvPlanPredictedTxs::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvRemoveSchemaSnapshots::TPtr& ev, const TActorContext& ctx); + void HandleByReplicationSourceOffsetsServer(STATEFN_SIG); void DoPeriodicTasks(const TActorContext &ctx); @@ -1906,8 +1912,9 @@ class TDataShard void MoveChangeRecord(NIceDb::TNiceDb& db, ui64 order, const TPathId& pathId); void MoveChangeRecord(NIceDb::TNiceDb& db, ui64 lockId, ui64 lockOffset, const TPathId& pathId); void RemoveChangeRecord(NIceDb::TNiceDb& db, ui64 order); - void EnqueueChangeRecords(TVector&& records, ui64 cookie = 0); - ui32 GetFreeChangeQueueCapacity(ui64 cookie); + // TODO(ilnaz): remove 'afterMove' after #6541 + void EnqueueChangeRecords(TVector&& records, ui64 cookie = 0, bool afterMove = false); + ui32 GetFreeChangeQueueCapacity(ui64 cookie) const; ui64 ReserveChangeQueueCapacity(ui32 capacity); void UpdateChangeExchangeLag(TInstant now); void CreateChangeSender(const TActorContext& ctx); @@ -1920,6 +1927,8 @@ class TDataShard bool LoadChangeRecordCommits(NIceDb::TNiceDb& db, TVector& records); void ScheduleRemoveLockChanges(ui64 lockId); void ScheduleRemoveAbandonedLockChanges(); + void ScheduleRemoveSchemaSnapshot(const TSchemaSnapshotKey& key); + void ScheduleRemoveAbandonedSchemaSnapshots(); static void PersistCdcStreamScanLastKey(NIceDb::TNiceDb& db, const TSerializedCellVec& value, const TPathId& tablePathId, const TPathId& streamPathId); @@ -2804,24 +2813,29 @@ class TDataShard ui64 LockOffset; ui64 ReservationCookie; - explicit TEnqueuedRecord(ui64 bodySize, const TPathId& tableId, - ui64 schemaVersion, TInstant created, TInstant enqueued, - ui64 lockId = 0, ui64 lockOffset = 0, ui64 cookie = 0) + explicit TEnqueuedRecord(ui64 bodySize, const TPathId& tableId, ui64 schemaVersion, + TInstant created, ui64 lockId = 0, ui64 lockOffset = 0) : BodySize(bodySize) , TableId(tableId) , SchemaVersion(schemaVersion) , SchemaSnapshotAcquired(false) , CreatedAt(created) - , EnqueuedAt(enqueued) + , EnqueuedAt(TInstant::Zero()) , LockId(lockId) , LockOffset(lockOffset) - , ReservationCookie(cookie) + , ReservationCookie(0) + { + } + + explicit TEnqueuedRecord(const IDataShardChangeCollector::TChange& record) + : TEnqueuedRecord(record.BodySize, record.TableId, record.SchemaVersion, + record.CreatedAt(), record.LockId, record.LockOffset) { } - explicit TEnqueuedRecord(const IDataShardChangeCollector::TChange& record, TInstant now, ui64 cookie) - : TEnqueuedRecord(record.BodySize, record.TableId, record.SchemaVersion, record.CreatedAt(), now, - record.LockId, record.LockOffset, cookie) + explicit TEnqueuedRecord(const TChangeRecord& record) + : TEnqueuedRecord(record.GetBody().size(), record.GetTableId(), record.GetSchemaVersion(), + record.GetApproximateCreationDateTime(), record.GetLockId(), record.GetLockOffset()) { } }; @@ -2863,9 +2877,11 @@ class TDataShard size_t Count = 0; }; + TVector CommittingChangeRecords; THashMap LockChangeRecords; // ui64 is lock id THashMap CommittedLockChangeRecords; // ui64 is lock id TVector PendingLockChangeRecordsToRemove; + TVector PendingSchemaSnapshotsToGc; // in THashMap InChangeSenders; // ui64 is shard id @@ -2965,6 +2981,16 @@ class TDataShard CommittedLockChangeRecords = std::move(committedLockChangeRecords); } + auto TakeChangesQueue() { + auto result = std::move(ChangesQueue); + ChangesQueue.clear(); + return result; + } + + void SetChangesQueue(THashMap&& changesQueue) { + ChangesQueue = std::move(changesQueue); + } + protected: // Redundant init state required by flat executor implementation void StateInit(TAutoPtr &ev) { @@ -2986,6 +3012,7 @@ class TDataShard HFuncTraced(TEvMediatorTimecast::TEvNotifyPlanStep, Handle); HFuncTraced(TEvPrivate::TEvMediatorRestoreBackup, Handle); HFuncTraced(TEvPrivate::TEvRemoveLockChangeRecords, Handle); + HFuncTraced(TEvPrivate::TEvRemoveSchemaSnapshots, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { ALOG_WARN(NKikimrServices::TX_DATASHARD, "TDataShard::StateInactive unhandled event type: " << ev->GetTypeRewrite() @@ -3114,6 +3141,7 @@ class TDataShard HFunc(TEvPrivate::TEvPlanPredictedTxs, Handle); HFunc(NStat::TEvStatistics::TEvStatisticsRequest, Handle); HFunc(TEvPrivate::TEvStatisticsScanFinished, Handle); + HFuncTraced(TEvPrivate::TEvRemoveSchemaSnapshots, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { ALOG_WARN(NKikimrServices::TX_DATASHARD, "TDataShard::StateWork unhandled event type: " << ev->GetTypeRewrite() << " event: " << ev->ToString()); @@ -3291,6 +3319,10 @@ class TDataShard bool AllowCancelROwithReadsets() const; void ResolveTablePath(const TActorContext &ctx); + +public: + NMonitoring::TDynamicCounters::TCounterPtr CounterReadIteratorLastKeyReset; + void IncCounterReadIteratorLastKeyReset(); }; NKikimrTxDataShard::TError::EKind ConvertErrCode(NMiniKQL::IEngineFlat::EResult code); diff --git a/ydb/core/tx/datashard/datashard_pipeline.cpp b/ydb/core/tx/datashard/datashard_pipeline.cpp index 8744e937d257..d7afb3621bdb 100644 --- a/ydb/core/tx/datashard/datashard_pipeline.cpp +++ b/ydb/core/tx/datashard/datashard_pipeline.cpp @@ -2285,11 +2285,15 @@ void TPipeline::AddCommittingOp(const TOperation::TPtr& op) { if (!Self->IsMvccEnabled() || op->IsReadOnly()) return; + Y_VERIFY_S(!op->GetCommittingOpsVersion(), + "Trying to AddCommittingOp " << *op << " more than once"); + TRowVersion version = Self->GetReadWriteVersions(op.Get()).WriteVersion; if (op->IsImmediate()) CommittingOps.Add(op->GetTxId(), version); else CommittingOps.Add(version); + op->SetCommittingOpsVersion(version); } void TPipeline::RemoveCommittingOp(const TRowVersion& version) { @@ -2299,13 +2303,13 @@ void TPipeline::RemoveCommittingOp(const TRowVersion& version) { } void TPipeline::RemoveCommittingOp(const TOperation::TPtr& op) { - if (!Self->IsMvccEnabled() || op->IsReadOnly()) - return; - - if (op->IsImmediate()) - CommittingOps.Remove(op->GetTxId()); - else - CommittingOps.Remove(TRowVersion(op->GetStep(), op->GetTxId())); + if (const auto& version = op->GetCommittingOpsVersion()) { + if (op->IsImmediate()) + CommittingOps.Remove(op->GetTxId(), *version); + else + CommittingOps.Remove(*version); + op->ResetCommittingOpsVersion(); + } } bool TPipeline::WaitCompletion(const TOperation::TPtr& op) const { diff --git a/ydb/core/tx/datashard/datashard_pipeline.h b/ydb/core/tx/datashard/datashard_pipeline.h index 10b523a194ab..705fc765966e 100644 --- a/ydb/core/tx/datashard/datashard_pipeline.h +++ b/ydb/core/tx/datashard/datashard_pipeline.h @@ -424,11 +424,13 @@ class TPipeline : TNonCopyable { ui64 Step; ui64 TxId; mutable ui32 Counter; + mutable ui32 TxCounter; TItem(const TRowVersion& from) : Step(from.Step) , TxId(from.TxId) , Counter(1u) + , TxCounter(0u) {} friend constexpr bool operator<(const TItem& a, const TItem& b) { @@ -442,6 +444,7 @@ class TPipeline : TNonCopyable { using TItemsSet = TSet; using TTxIdMap = THashMap; + public: inline void Add(ui64 txId, TRowVersion version) { auto res = ItemsSet.emplace(version); @@ -450,6 +453,7 @@ class TPipeline : TNonCopyable { auto res2 = TxIdMap.emplace(txId, res.first); Y_VERIFY_S(res2.second, "Unexpected duplicate immediate tx " << txId << " committing at " << version); + res.first->TxCounter += 1; } inline void Add(TRowVersion version) { @@ -458,17 +462,29 @@ class TPipeline : TNonCopyable { res.first->Counter += 1; } - inline void Remove(ui64 txId) { - if (auto it = TxIdMap.find(txId); it != TxIdMap.end()) { - if (--it->second->Counter == 0) - ItemsSet.erase(it->second); - TxIdMap.erase(it); - } + inline void Remove(ui64 txId, TRowVersion version) { + auto it = TxIdMap.find(txId); + Y_VERIFY_S(it != TxIdMap.end(), "Removing immediate tx " << txId << " " << version + << " does not match a previous Add"); + Y_VERIFY_S(TRowVersion(it->second->Step, it->second->TxId) == version, "Removing immediate tx " << txId << " " << version + << " does not match a previous Add " << TRowVersion(it->second->Step, it->second->TxId)); + Y_VERIFY_S(it->second->TxCounter > 0, "Removing immediate tx " << txId << " " << version + << " with a mismatching TxCounter"); + --it->second->TxCounter; + if (--it->second->Counter == 0) + ItemsSet.erase(it->second); + TxIdMap.erase(it); } inline void Remove(TRowVersion version) { - if (auto it = ItemsSet.find(version); it != ItemsSet.end() && --it->Counter == 0) + auto it = ItemsSet.find(version); + Y_VERIFY_S(it != ItemsSet.end(), "Removing version " << version + << " does not match a previous Add"); + if (--it->Counter == 0) { + Y_VERIFY_S(it->TxCounter == 0, "Removing version " << version + << " while TxCounter has active references, possible Add/Remove mismatch"); ItemsSet.erase(it); + } } inline bool HasOpsBelow(TRowVersion upperBound) const { diff --git a/ydb/core/tx/datashard/datashard_repl_apply.cpp b/ydb/core/tx/datashard/datashard_repl_apply.cpp index 92533d7befde..718328eb63f2 100644 --- a/ydb/core/tx/datashard/datashard_repl_apply.cpp +++ b/ydb/core/tx/datashard/datashard_repl_apply.cpp @@ -1,4 +1,6 @@ #include "datashard_impl.h" +#include "datashard_locks_db.h" +#include "setup_sys_locks.h" #include @@ -24,6 +26,9 @@ class TDataShard::TTxApplyReplicationChanges : public TTransactionBaseState != TShardState::Ready) { Result = MakeHolder( NKikimrTxDataShard::TEvApplyReplicationChangesResult::STATUS_REJECTED, @@ -80,6 +85,7 @@ class TDataShard::TTxApplyReplicationChanges : public TTransactionBasePromoteImmediatePostExecuteEdges(*MvccReadWriteVersion, TDataShard::EPromotePostExecuteEdges::ReadWrite, txc); Pipeline.AddCommittingOp(*MvccReadWriteVersion); } @@ -88,6 +94,7 @@ class TDataShard::TTxApplyReplicationChanges : public TTransactionBaseSysLocksTable().ApplyLocks(); return true; } diff --git a/ydb/core/tx/datashard/datashard_schema_snapshots.cpp b/ydb/core/tx/datashard/datashard_schema_snapshots.cpp index 7b603ac4eca2..9c56e4ad7381 100644 --- a/ydb/core/tx/datashard/datashard_schema_snapshots.cpp +++ b/ydb/core/tx/datashard/datashard_schema_snapshots.cpp @@ -20,6 +20,7 @@ TSchemaSnapshotManager::TSchemaSnapshotManager(const TDataShard* self) void TSchemaSnapshotManager::Reset() { Snapshots.clear(); + References.clear(); } bool TSchemaSnapshotManager::Load(NIceDb::TNiceDb& db) { @@ -79,14 +80,16 @@ const TSchemaSnapshot* TSchemaSnapshotManager::FindSnapshot(const TSchemaSnapsho return Snapshots.FindPtr(key); } -void TSchemaSnapshotManager::RemoveShapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key) { +void TSchemaSnapshotManager::RemoveShapshot(NTable::TDatabase& db, const TSchemaSnapshotKey& key) { auto it = Snapshots.find(key); if (it == Snapshots.end()) { return; } Snapshots.erase(it); - PersistRemoveSnapshot(db, key); + + NIceDb::TNiceDb nicedb(db); + PersistRemoveSnapshot(nicedb, key); } void TSchemaSnapshotManager::RenameSnapshots(NTable::TDatabase& db, @@ -119,6 +122,10 @@ void TSchemaSnapshotManager::RenameSnapshots(NTable::TDatabase& db, } } +const TSchemaSnapshotManager::TSnapshots& TSchemaSnapshotManager::GetSnapshots() const { + return Snapshots; +} + bool TSchemaSnapshotManager::AcquireReference(const TSchemaSnapshotKey& key) { auto it = Snapshots.find(key); if (it == Snapshots.end()) { @@ -152,6 +159,15 @@ bool TSchemaSnapshotManager::ReleaseReference(const TSchemaSnapshotKey& key) { return true; } +bool TSchemaSnapshotManager::HasReference(const TSchemaSnapshotKey& key) const { + auto refIt = References.find(key); + if (refIt != References.end()) { + return refIt->second; + } else { + return false; + } +} + void TSchemaSnapshotManager::PersistAddSnapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key, const TSchemaSnapshot& snapshot) { using Schema = TDataShard::Schema; db.Table() diff --git a/ydb/core/tx/datashard/datashard_schema_snapshots.h b/ydb/core/tx/datashard/datashard_schema_snapshots.h index db0d3b655b34..0bc80a628e2e 100644 --- a/ydb/core/tx/datashard/datashard_schema_snapshots.h +++ b/ydb/core/tx/datashard/datashard_schema_snapshots.h @@ -23,6 +23,8 @@ struct TSchemaSnapshot { }; class TSchemaSnapshotManager { + using TSnapshots = TMap>; + public: explicit TSchemaSnapshotManager(const TDataShard* self); @@ -31,11 +33,13 @@ class TSchemaSnapshotManager { bool AddSnapshot(NTable::TDatabase& db, const TSchemaSnapshotKey& key, const TSchemaSnapshot& snapshot); const TSchemaSnapshot* FindSnapshot(const TSchemaSnapshotKey& key) const; - void RemoveShapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key); + void RemoveShapshot(NTable::TDatabase& db, const TSchemaSnapshotKey& key); void RenameSnapshots(NTable::TDatabase& db, const TPathId& prevTableId, const TPathId& newTableId); + const TSnapshots& GetSnapshots() const; bool AcquireReference(const TSchemaSnapshotKey& key); bool ReleaseReference(const TSchemaSnapshotKey& key); + bool HasReference(const TSchemaSnapshotKey& key) const; private: void PersistAddSnapshot(NIceDb::TNiceDb& db, const TSchemaSnapshotKey& key, const TSchemaSnapshot& snapshot); @@ -43,7 +47,7 @@ class TSchemaSnapshotManager { private: const TDataShard* Self; - TMap> Snapshots; + TSnapshots Snapshots; THashMap References; }; // TSchemaSnapshotManager diff --git a/ydb/core/tx/datashard/datashard_split_src.cpp b/ydb/core/tx/datashard/datashard_split_src.cpp index b5e303fb5ba1..1e88d7495c63 100644 --- a/ydb/core/tx/datashard/datashard_split_src.cpp +++ b/ydb/core/tx/datashard/datashard_split_src.cpp @@ -244,6 +244,8 @@ class TDataShard::TTxSplitSnapshotComplete : public NTabletFlatExecutor::TTransa private: TIntrusivePtr SnapContext; bool ChangeExchangeSplit; + THashSet ActivationList; + THashSet SplitList; public: TTxSplitSnapshotComplete(TDataShard* ds, TIntrusivePtr snapContext) @@ -378,13 +380,11 @@ class TDataShard::TTxSplitSnapshotComplete : public NTabletFlatExecutor::TTransa proto->SetTimeoutMs(kv.second.Timeout.MilliSeconds()); } - if (Self->ChangesQueue || tableInfo.HasCdcStreams()) { + if (tableInfo.HasAsyncIndexes() || tableInfo.HasCdcStreams()) { snapshot->SetWaitForActivation(true); - Self->ChangeSenderActivator.AddDst(dstTablet); - db.Table().Key(dstTablet).Update(); - + ActivationList.insert(dstTablet); if (tableInfo.HasCdcStreams()) { - Self->ChangeExchangeSplitter.AddDst(dstTablet); + SplitList.insert(dstTablet); } } @@ -403,14 +403,23 @@ class TDataShard::TTxSplitSnapshotComplete : public NTabletFlatExecutor::TTransa } } - ChangeExchangeSplit = !Self->ChangesQueue && !Self->ChangeExchangeSplitter.Done(); - if (needToReadPages) { LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, Self->TabletID() << " BorrowSnapshot is restarting for split OpId " << opId); return false; } else { txc.Env.DropSnapshot(SnapContext); + for (ui64 dstTabletId : ActivationList) { + Self->ChangeSenderActivator.AddDst(dstTabletId); + db.Table().Key(dstTabletId).Update(); + } + + for (ui64 dstTabletId : SplitList) { + Self->ChangeExchangeSplitter.AddDst(dstTabletId); + } + + ChangeExchangeSplit = !Self->ChangesQueue && !Self->ChangeExchangeSplitter.Done(); + Self->State = TShardState::SplitSrcSendingSnapshot; Self->PersistSys(db, Schema::Sys_State, Self->State); @@ -438,14 +447,14 @@ class TDataShard::TTxSplitTransferSnapshotAck : public NTabletFlatExecutor::TTra private: TEvDataShard::TEvSplitTransferSnapshotAck::TPtr Ev; bool AllDstAcksReceived; - bool Activate; + ui64 ActivateTabletId; public: TTxSplitTransferSnapshotAck(TDataShard* ds, TEvDataShard::TEvSplitTransferSnapshotAck::TPtr& ev) : NTabletFlatExecutor::TTransactionBase(ds) , Ev(ev) , AllDstAcksReceived(false) - , Activate(false) + , ActivateTabletId(0) {} TTxType GetTxType() const override { return TXTYPE_SPLIT_TRANSFER_SNAPSHOT_ACK; } @@ -469,8 +478,8 @@ class TDataShard::TTxSplitTransferSnapshotAck : public NTabletFlatExecutor::TTra // Remove the row for acked snapshot db.Table().Key(dstTabletId).Delete(); - if (!Self->ChangesQueue && Self->ChangeExchangeSplitter.Done()) { - Activate = !Self->ChangeSenderActivator.Acked(dstTabletId); + if (!Self->ChangesQueue && Self->ChangeExchangeSplitter.Done() && !Self->ChangeSenderActivator.Acked(dstTabletId)) { + ActivateTabletId = dstTabletId; } return true; @@ -485,11 +494,8 @@ class TDataShard::TTxSplitTransferSnapshotAck : public NTabletFlatExecutor::TTra } } - if (Activate) { - const ui64 dstTabletId = Ev->Get()->Record.GetTabletId(); - if (!Self->ChangeSenderActivator.Acked(dstTabletId)) { - Self->ChangeSenderActivator.DoSend(dstTabletId, ctx); - } + if (ActivateTabletId) { + Self->ChangeSenderActivator.DoSend(ActivateTabletId, ctx); } } }; diff --git a/ydb/core/tx/datashard/datashard_user_db.cpp b/ydb/core/tx/datashard/datashard_user_db.cpp index 4f711124043f..c25492ad2c82 100644 --- a/ydb/core/tx/datashard/datashard_user_db.cpp +++ b/ydb/core/tx/datashard/datashard_user_db.cpp @@ -62,7 +62,7 @@ ui64 CalculateValueBytes(const TArrayRef ops) { return bytes; }; -void TDataShardUserDb::UpdateRow( +void TDataShardUserDb::UpsertRow( const TTableId& tableId, const TArrayRef key, const TArrayRef ops) @@ -108,11 +108,11 @@ void TDataShardUserDb::UpdateRow( if (specUpdates.ColIdUpdateNo != Max()) { addExtendedOp(specUpdates.ColIdUpdateNo, specUpdates.UpdateNo); } - UpdateRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, extendedOps); + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, extendedOps); IncreaseUpdateCounters(key, extendedOps); } else { - UpdateRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); IncreaseUpdateCounters(key, ops); } @@ -126,7 +126,7 @@ void TDataShardUserDb::ReplaceRow( auto localTableId = Self.GetLocalTableId(tableId); Y_ABORT_UNLESS(localTableId != 0, "Unexpected ReplaceRow for an unknown table"); - UpdateRowInt(NTable::ERowOp::Reset, tableId, localTableId, key, ops); + UpsertRowInt(NTable::ERowOp::Reset, tableId, localTableId, key, ops); IncreaseUpdateCounters(key, ops); } @@ -139,9 +139,26 @@ void TDataShardUserDb::InsertRow( auto localTableId = Self.GetLocalTableId(tableId); Y_ABORT_UNLESS(localTableId != 0, "Unexpected InsertRow for an unknown table"); - EnsureMissingRow(tableId, key); + if (RowExists(tableId, key)) + throw TUniqueConstrainException(); + + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); + + IncreaseUpdateCounters(key, ops); +} + +void TDataShardUserDb::UpdateRow( + const TTableId& tableId, + const TArrayRef key, + const TArrayRef ops) +{ + auto localTableId = Self.GetLocalTableId(tableId); + Y_ABORT_UNLESS(localTableId != 0, "Unexpected UpdateRow for an unknown table"); + + if (!RowExists(tableId, key)) + return; - UpdateRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); + UpsertRowInt(NTable::ERowOp::Upsert, tableId, localTableId, key, ops); IncreaseUpdateCounters(key, ops); } @@ -153,7 +170,7 @@ void TDataShardUserDb::EraseRow( auto localTableId = Self.GetLocalTableId(tableId); Y_ABORT_UNLESS(localTableId != 0, "Unexpected UpdateRow for an unknown table"); - UpdateRowInt(NTable::ERowOp::Erase, tableId, localTableId, key, {}); + UpsertRowInt(NTable::ERowOp::Erase, tableId, localTableId, key, {}); ui64 keyBytes = CalculateKeyBytes(key); @@ -172,7 +189,7 @@ void TDataShardUserDb::IncreaseUpdateCounters( Counters.UpdateRowBytes += keyBytes + valueBytes; } -void TDataShardUserDb::UpdateRowInt( +void TDataShardUserDb::UpsertRowInt( NTable::ERowOp rowOp, const TTableId& tableId, ui64 localTableId, @@ -216,7 +233,7 @@ void TDataShardUserDb::UpdateRowInt( Self.GetKeyAccessSampler()->AddSample(tableId, keyCells); } -void TDataShardUserDb::EnsureMissingRow ( +bool TDataShardUserDb::RowExists ( const TTableId& tableId, const TArrayRef key) { @@ -227,12 +244,10 @@ void TDataShardUserDb::EnsureMissingRow ( throw TNotReadyTabletException(); } case NTable::EReady::Data: { - if (rowState == NTable::ERowOp::Upsert) - throw TUniqueConstrainException(); - break; + return true; } case NTable::EReady::Gone: { - break; + return false; } } } diff --git a/ydb/core/tx/datashard/datashard_user_db.h b/ydb/core/tx/datashard/datashard_user_db.h index af6fdb235549..64be2116c284 100644 --- a/ydb/core/tx/datashard/datashard_user_db.h +++ b/ydb/core/tx/datashard/datashard_user_db.h @@ -37,7 +37,7 @@ class IDataShardUserDb { NTable::TRowState& row, const TMaybe& readVersion = {}) = 0; - virtual void UpdateRow( + virtual void UpsertRow( const TTableId& tableId, const TArrayRef key, const TArrayRef ops) = 0; @@ -52,6 +52,11 @@ class IDataShardUserDb { const TArrayRef key, const TArrayRef ops) = 0; + virtual void UpdateRow( + const TTableId& tableId, + const TArrayRef key, + const TArrayRef ops) = 0; + virtual void EraseRow( const TTableId& tableId, const TArrayRef key) = 0; @@ -107,7 +112,7 @@ class TDataShardUserDb final NTable::TRowState& row, const TMaybe& readVersion = {}) override; - void UpdateRow( + void UpsertRow( const TTableId& tableId, const TArrayRef key, const TArrayRef ops) override; @@ -122,6 +127,11 @@ class TDataShardUserDb final const TArrayRef key, const TArrayRef ops) override; + void UpdateRow( + const TTableId& tableId, + const TArrayRef key, + const TArrayRef ops) override; + void EraseRow( const TTableId& tableId, const TArrayRef key) override; @@ -169,8 +179,8 @@ class TDataShardUserDb final private: static TSmallVec ConvertTableKeys(const TArrayRef key); - void UpdateRowInt(NTable::ERowOp rowOp, const TTableId& tableId, ui64 localTableId, const TArrayRef key, const TArrayRef ops); - void EnsureMissingRow(const TTableId& tableId, const TArrayRef key); + void UpsertRowInt(NTable::ERowOp rowOp, const TTableId& tableId, ui64 localTableId, const TArrayRef key, const TArrayRef ops); + bool RowExists(const TTableId& tableId, const TArrayRef key); void IncreaseUpdateCounters(const TArrayRef key, const TArrayRef ops); private: diff --git a/ydb/core/tx/datashard/datashard_user_table.cpp b/ydb/core/tx/datashard/datashard_user_table.cpp index 4e3f58065826..3e95d901b9b8 100644 --- a/ydb/core/tx/datashard/datashard_user_table.cpp +++ b/ydb/core/tx/datashard/datashard_user_table.cpp @@ -392,6 +392,8 @@ void TUserTable::AlterSchema() { schema.SetPartitionRangeEnd(Range.To.GetBuffer()); schema.SetPartitionRangeEndIsInclusive(Range.ToInclusive); + ReplicationConfig.Serialize(*schema.MutableReplicationConfig()); + schema.SetName(Name); schema.SetPath(Path); diff --git a/ydb/core/tx/datashard/datashard_user_table.h b/ydb/core/tx/datashard/datashard_user_table.h index 0f60e80569dc..af7c9ab2a6d8 100644 --- a/ydb/core/tx/datashard/datashard_user_table.h +++ b/ydb/core/tx/datashard/datashard_user_table.h @@ -339,6 +339,11 @@ struct TUserTable : public TThrRefBase { bool HasStrongConsistency() const { return Consistency == NKikimrSchemeOp::TTableReplicationConfig::CONSISTENCY_STRONG; } + + void Serialize(NKikimrSchemeOp::TTableReplicationConfig& proto) const { + proto.SetMode(Mode); + proto.SetConsistency(Consistency); + } }; struct TStats { diff --git a/ydb/core/tx/datashard/datashard_ut_build_index.cpp b/ydb/core/tx/datashard/datashard_ut_build_index.cpp index 35cb7139e6d6..a15064bb875d 100644 --- a/ydb/core/tx/datashard/datashard_ut_build_index.cpp +++ b/ydb/core/tx/datashard/datashard_ut_build_index.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include @@ -167,11 +168,8 @@ Y_UNIT_TEST_SUITE(TTxDataShardBuildIndexScan) { CreateShardedTableForIndex(server, sender, "/Root", "table-2", 1, false); - auto observer = runtime.AddObserver([&](TEvDataShard::TEvCompactBorrowed::TPtr& event) { - Cerr << "Captured TEvDataShard::TEvCompactBorrowed from " << runtime.FindActorName(event->Sender) << " to " << runtime.FindActorName(event->GetRecipientRewrite()) << Endl; - if (runtime.FindActorName(event->Sender) == "FLAT_SCHEMESHARD_ACTOR") { - event.Reset(); - } + TBlockEvents block(runtime, [&](const TEvDataShard::TEvCompactBorrowed::TPtr& event) { + return runtime.FindActorName(event->Sender) == "FLAT_SCHEMESHARD_ACTOR"; }); auto snapshot = CreateVolatileSnapshot(server, { "/Root/table-1" }); diff --git a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp index de473a467412..2accef1eb0ae 100644 --- a/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp +++ b/ydb/core/tx/datashard/datashard_ut_change_exchange.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1246,6 +1247,8 @@ Y_UNIT_TEST_SUITE(Cdc) { // get records { + WaitForDataRecords(client, shardIt); + auto res = client.GetRecords(shardIt).ExtractValueSync(); UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); UNIT_ASSERT_VALUES_EQUAL(res.GetResult().records().size(), records.size()); @@ -1267,6 +1270,19 @@ Y_UNIT_TEST_SUITE(Cdc) { } } + static void WaitForDataRecords(TDataStreamsClient& client, const TString& shardIt) { + int n = 0; + for (; n < 100; ++n) { + auto res = client.GetRecords(shardIt).ExtractValueSync(); + UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); + if (res.GetResult().records().size()) { + break; + } + Sleep(TDuration::MilliSeconds(100)); + } + UNIT_ASSERT_VALUES_UNEQUAL(n, 100); + } + static void Write(const TShardedTableOptions& tableDesc, const TCdcStream& streamDesc) { TTestYdsEnv env(tableDesc, streamDesc); @@ -1825,18 +1841,13 @@ Y_UNIT_TEST_SUITE(Cdc) { UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); } - // try to update retention period - { - auto res = client.AlterTopic("/Root/Table/Stream", NYdb::NTopic::TAlterTopicSettings() - .SetRetentionPeriod(TDuration::Hours(48))).ExtractValueSync(); - UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); - } // try to update supported codecs { auto res = client.AlterTopic("/Root/Table/Stream", NYdb::NTopic::TAlterTopicSettings() .AppendSetSupportedCodecs(NYdb::NTopic::ECodec(5))).ExtractValueSync(); UNIT_ASSERT_VALUES_EQUAL(res.GetStatus(), NYdb::EStatus::BAD_REQUEST); } + // try to update retention storage { auto res = client.AlterTopic("/Root/Table/Stream", NYdb::NTopic::TAlterTopicSettings() @@ -1990,7 +2001,7 @@ Y_UNIT_TEST_SUITE(Cdc) { return result; } - void WaitForContent(TServer::TPtr server, const TActorId& sender, const TString& path, const TVector& expected) { + TVector WaitForContent(TServer::TPtr server, const TActorId& sender, const TString& path, const TVector& expected) { while (true) { const auto records = GetRecords(*server->GetRuntime(), sender, path, 0); for (ui32 i = 0; i < std::min(records.size(), expected.size()); ++i) { @@ -2000,7 +2011,12 @@ Y_UNIT_TEST_SUITE(Cdc) { if (records.size() >= expected.size()) { UNIT_ASSERT_VALUES_EQUAL_C(records.size(), expected.size(), "Unexpected record: " << records.at(expected.size()).second); - break; + TVector values; + for (const auto& pr : records) { + bool ok = NJson::ReadJsonTree(pr.second, &values.emplace_back()); + Y_ABORT_UNLESS(ok); + } + return values; } SimulateSleep(server, TDuration::Seconds(1)); @@ -2659,12 +2675,13 @@ Y_UNIT_TEST_SUITE(Cdc) { } } - Y_UNIT_TEST(InitialScan) { + void InitialScanTest(bool withTopicSchemeTx) { TPortManager portManager; TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) .SetUseRealThreads(false) .SetDomainName("Root") .SetEnableChangefeedInitialScan(true) + .SetEnablePQConfigTransactionsAtSchemeShard(withTopicSchemeTx) ); auto& runtime = *server->GetRuntime(); @@ -2707,6 +2724,14 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + Y_UNIT_TEST(InitialScan) { + InitialScanTest(false); + } + + Y_UNIT_TEST(InitialScan_WithTopicSchemeTx) { + InitialScanTest(true); + } + Y_UNIT_TEST(InitialScanDebezium) { TTestTopicEnv env(SimpleTable(), KeysOnly(NKikimrSchemeOp::ECdcStreamFormatDebeziumJson, "UnusedStream")); auto& client = env.GetClient(); @@ -3016,6 +3041,63 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + Y_UNIT_TEST(InitialScanEnqueuesZeroRecords) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableChangefeedInitialScan(true) + .SetChangesQueueItemsLimit(2) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 10), + (2, 20), + (3, 30), + (4, 40); + )"); + + TBlockEvents blockScanRequest(runtime, [&](auto& ev) { + ev->Get()->Record.MutableLimits()->SetBatchMaxRows(1); + return true; + }); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithInitialScan(Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + runtime.WaitFor("Scan request", [&]{ return blockScanRequest.size(); }); + runtime.AddObserver([&](auto& ev) { + ev->Get()->Record.MutableLimits()->SetBatchMaxRows(1); + }); + + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES + (1, 100), + (2, 200), + (3, 300); + )"); + + blockScanRequest.Unblock().Stop(); + + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"update":{"value":10},"key":[1]})", + R"({"update":{"value":100},"key":[1]})", + R"({"update":{"value":20},"key":[2]})", + R"({"update":{"value":200},"key":[2]})", + R"({"update":{"value":30},"key":[3]})", + R"({"update":{"value":300},"key":[3]})", + R"({"update":{"value":40},"key":[4]})", + }); + } + Y_UNIT_TEST(InitialScanRacyProgressAndDrop) { TPortManager portManager; TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) @@ -3500,6 +3582,324 @@ Y_UNIT_TEST_SUITE(Cdc) { }); } + void MustNotLoseSchemaSnapshot(bool enableVolatileTx) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + .SetEnableDataShardVolatileTransactions(enableVolatileTx) + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + Updates(NKikimrSchemeOp::ECdcStreamFormatJson))); + + auto tabletIds = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(tabletIds.size(), 1); + + std::vector> blockedRemoveRecords; + auto blockRemoveRecords = runtime.AddObserver([&](auto& ev) { + Cerr << "... blocked remove record" << Endl; + blockedRemoveRecords.emplace_back(ev.Release()); + }); + + Cerr << "... execute first query" << Endl; + ExecSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (1, 10); + )"); + + WaitFor(runtime, [&]{ return blockedRemoveRecords.size() == 1; }, "blocked remove records"); + blockRemoveRecords.Remove(); + + std::vector> blockedPlans; + auto blockPlans = runtime.AddObserver([&](auto& ev) { + blockedPlans.emplace_back(ev.Release()); + }); + + Cerr << "... execute scheme query" << Endl; + const auto alterTxId = AsyncAlterAddExtraColumn(server, "/Root", "Table"); + + WaitFor(runtime, [&]{ return blockedPlans.size() > 0; }, "blocked plans"); + blockPlans.Remove(); + + std::vector> blockedPutResponses; + auto blockPutResponses = runtime.AddObserver([&](auto& ev) { + auto* msg = ev->Get(); + if (msg->Id.TabletID() == tabletIds[0]) { + Cerr << "... blocked put response:" << msg->Id << Endl; + blockedPutResponses.emplace_back(ev.Release()); + } + }); + + Cerr << "... execute second query" << Endl; + SendSQL(server, edgeActor, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (2, 20); + )"); + + WaitFor(runtime, [&]{ return blockedPutResponses.size() > 0; }, "blocked put responses"); + auto wasBlockedPutResponses = blockedPutResponses.size(); + + Cerr << "... release blocked plans" << Endl; + for (auto& ev : std::exchange(blockedPlans, {})) { + runtime.Send(ev.release(), 0, true); + } + + WaitFor(runtime, [&]{ return blockedPutResponses.size() > wasBlockedPutResponses; }, "blocked put responses"); + wasBlockedPutResponses = blockedPutResponses.size(); + + Cerr << "... release blocked remove records" << Endl; + for (auto& ev : std::exchange(blockedRemoveRecords, {})) { + runtime.Send(ev.release(), 0, true); + } + + WaitFor(runtime, [&]{ return blockedPutResponses.size() > wasBlockedPutResponses; }, "blocked put responses"); + blockPutResponses.Remove(); + + Cerr << "... release blocked put responses" << Endl; + for (auto& ev : std::exchange(blockedPutResponses, {})) { + runtime.Send(ev.release(), 0, true); + } + + Cerr << "... finalize" << Endl; + WaitTxNotification(server, edgeActor, alterTxId); + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"update":{"value":10},"key":[1]})", + R"({"update":{"value":20},"key":[2]})", + }); + } + + Y_UNIT_TEST(MustNotLoseSchemaSnapshot) { + MustNotLoseSchemaSnapshot(false); + } + + Y_UNIT_TEST(MustNotLoseSchemaSnapshotWithVolatileTx) { + MustNotLoseSchemaSnapshot(true); + } + + Y_UNIT_TEST(ResolvedTimestampsContinueAfterMerge) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + ); + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + SetSplitMergePartCountLimit(&runtime, -1); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithResolvedTimestamps(TDuration::Seconds(3), Updates(NKikimrSchemeOp::ECdcStreamFormatJson)))); + + Cerr << "... prepare" << Endl; + { + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + }); + + auto tabletIds = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(tabletIds.size(), 1); + + WaitTxNotification(server, edgeActor, AsyncSplitTable(server, edgeActor, "/Root/Table", tabletIds.at(0), 2)); + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"resolved":"***"})", + }); + } + + auto initialTabletIds = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(initialTabletIds.size(), 2); + + std::vector> blockedSplitRequests; + auto blockSplitRequests = runtime.AddObserver([&](auto& ev) { + if (ev->Get()->Record.GetPartitionRequest().HasCmdSplitMessageGroup()) { + blockedSplitRequests.emplace_back(ev.Release()); + } + }); + + Cerr << "... merge table" << Endl; + const auto mergeTxId = AsyncMergeTable(server, edgeActor, "/Root/Table", initialTabletIds); + WaitFor(runtime, [&]{ return blockedSplitRequests.size() == initialTabletIds.size(); }, "blocked split requests"); + blockSplitRequests.Remove(); + + std::vector> blockedRegisterRequests; + auto blockRegisterRequests = runtime.AddObserver([&](auto& ev) { + if (ev->Get()->Record.GetPartitionRequest().HasCmdRegisterMessageGroup()) { + blockedRegisterRequests.emplace_back(ev.Release()); + } + }); + + ui32 splitResponses = 0; + auto countSplitResponses = runtime.AddObserver([&](auto&) { + ++splitResponses; + }); + + Cerr << "... release split requests" << Endl; + for (auto& ev : std::exchange(blockedSplitRequests, {})) { + runtime.Send(ev.release(), 0, true); + WaitFor(runtime, [prev = splitResponses, &splitResponses]{ return splitResponses > prev; }, "split response"); + } + + Cerr << "... reboot pq tablet" << Endl; + RebootTablet(runtime, ResolvePqTablet(runtime, edgeActor, "/Root/Table/Stream", 0), edgeActor); + countSplitResponses.Remove(); + + Cerr << "... release register requests" << Endl; + blockRegisterRequests.Remove(); + for (auto& ev : std::exchange(blockedRegisterRequests, {})) { + runtime.Send(ev.release(), 0, true); + } + + Cerr << "... wait for merge tx notification" << Endl; + WaitTxNotification(server, edgeActor, mergeTxId); + + Cerr << "... wait for final heartbeat" << Endl; + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"resolved":"***"})", + R"({"resolved":"***"})", + }); + } + + Y_UNIT_TEST(ResolvedTimestampForDisplacedUpsert) { + TPortManager portManager; + TServer::TPtr server = new TServer(TServerSettings(portManager.GetPort(2134), {}, DefaultPQConfig()) + .SetUseRealThreads(false) + .SetDomainName("Root") + ); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + auto& runtime = *server->GetRuntime(); + const auto edgeActor = runtime.AllocateEdgeActor(); + + SetupLogging(runtime); + InitRoot(server, edgeActor); + SetSplitMergePartCountLimit(&runtime, -1); + CreateShardedTable(server, edgeActor, "/Root", "Table", SimpleTable()); + + WaitTxNotification(server, edgeActor, AsyncAlterAddStream(server, "/Root", "Table", + WithVirtualTimestamps(WithResolvedTimestamps( + TDuration::Seconds(3), Updates(NKikimrSchemeOp::ECdcStreamFormatJson))))); + + Cerr << "... prepare" << Endl; + WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + }); + + KqpSimpleExec(runtime, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (1, 10); + )"); + + auto records = WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"update":{"value":10},"key":[1],"ts":"***"})", + R"({"resolved":"***"})", + }); + + // Take the final step + ui64 lastStep = records.back()["resolved"][0].GetUInteger(); + Cerr << "... last heartbeat at " << lastStep << Endl; + + const auto tableId = ResolveTableId(server, edgeActor, "/Root/Table"); + const auto shards = GetTableShards(server, edgeActor, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 1u); + + ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + ui64 snapshotStep = lastStep + 3000 - 1; + ForwardToTablet(runtime, coordinator, edgeActor, new TEvTxProxy::TEvRequirePlanSteps(coordinator, snapshotStep)); + + TBlockEvents blockedUpdates(runtime, + [&](auto& ev) { + return ev->Get()->Record.GetTimeBarrier() > snapshotStep; + }); + + Cerr << "... performing a read from snapshot just before the next heartbeat" << Endl; + { + auto req = std::make_unique(); + { + auto& record = req->Record; + record.SetReadId(1); + record.MutableTableId()->SetOwnerId(tableId.PathId.OwnerId); + record.MutableTableId()->SetTableId(tableId.PathId.LocalPathId); + record.AddColumns(1); + record.AddColumns(2); + record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC); + ui32 key = 1; + TVector keys; + keys.push_back(TCell::Make(key)); + req->Keys.push_back(TSerializedCellVec(TSerializedCellVec::Serialize(keys))); + record.MutableSnapshot()->SetStep(snapshotStep); + record.MutableSnapshot()->SetTxId(Max()); + } + ForwardToTablet(runtime, shards.at(0), edgeActor, req.release()); + auto ev = runtime.GrabEdgeEventRethrow(edgeActor); + auto* res = ev->Get(); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetStatus().GetCode(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetFinished(), true); + Cerr << "... read finished" << Endl; + } + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... starting upsert 1 (expected to displace)" << Endl; + auto upsert1 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (2, 20); + )"); + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... starting upsert 2 (expected to displace)" << Endl; + auto upsert2 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/Table` (key, value) VALUES (3, 30); + )"); + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... unblocking updates" << Endl; + blockedUpdates.Unblock().Stop(); + for (int i = 0; i < 10; ++i) { + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + } + + Cerr << "... checking the update is logged before the new resolved timestamp" << Endl; + records = WaitForContent(server, edgeActor, "/Root/Table/Stream", { + R"({"resolved":"***"})", + R"({"update":{"value":10},"key":[1],"ts":"***"})", + R"({"resolved":"***"})", + R"({"update":{"value":20},"key":[2],"ts":"***"})", + R"({"update":{"value":30},"key":[3],"ts":"***"})", + R"({"resolved":"***"})", + }); + + TRowVersion resolved(0, 0); + for (auto& record : records) { + if (record.Has("resolved")) { + resolved.Step = record["resolved"][0].GetUInteger(); + resolved.TxId = record["resolved"][1].GetUInteger(); + } + if (record.Has("ts")) { + TRowVersion ts( + record["ts"][0].GetUInteger(), + record["ts"][1].GetUInteger()); + UNIT_ASSERT_C(resolved < ts, + "Record with ts " << ts << " after resolved " << resolved); + } + } + } + } // Cdc } // NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_followers.cpp b/ydb/core/tx/datashard/datashard_ut_followers.cpp index f32604f21b8b..49b738f97dc4 100644 --- a/ydb/core/tx/datashard/datashard_ut_followers.cpp +++ b/ydb/core/tx/datashard/datashard_ut_followers.cpp @@ -426,7 +426,8 @@ Y_UNIT_TEST_SUITE(DataShardFollowers) { Cerr << "Captured pages request" << Endl; for (auto pageId : msg->Fetch->Pages) { auto type = NTable::NPage::EPage(msg->Fetch->PageCollection->Page(pageId).Type); - UNIT_ASSERT_C(type != NTable::EPage::BTreeIndex && type != NTable::EPage::FlatIndex, "Index pages should be preload during a part switch"); + // Note: FlatIndex pages also have been preloaded, but don't stick in private cache (see TLoaderEnv) + UNIT_ASSERT_C(type != NTable::EPage::BTreeIndex, "Index pages should be preload during a part switch"); } }); diff --git a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp index f0768d37cc6e..14153b3e8912 100644 --- a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp @@ -4,6 +4,7 @@ #include "read_iterator.h" #include +#include #include #include #include @@ -4670,6 +4671,88 @@ Y_UNIT_TEST_SUITE(DataShardReadIteratorConsistency) { "result2: " << result2); } + Y_UNIT_TEST(Bug_7674_IteratorDuplicateRows) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false); + TServer::TPtr server = new TServer(serverSettings); + + auto& runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + + InitRoot(server, sender); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + CreateShardedTable(server, sender, "/Root", "table-1", 1); + + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (1, 10), (2, 20), (3, 30), (4, 40), (5, 50);"); + ExecSQL(server, sender, "UPSERT INTO `/Root/table-1` (key, value) VALUES (6, 60), (7, 70), (8, 80), (9, 90), (10, 100);"); + runtime.SimulateSleep(TDuration::Seconds(1)); + + auto forceSmallChunks = runtime.AddObserver( + [&](TEvDataShard::TEvRead::TPtr& ev) { + auto* msg = ev->Get(); + // Force chunks of at most 3 rows + msg->Record.SetMaxRowsInResult(3); + }); + + TBlockEvents blockedAcks(runtime); + TBlockEvents blockedResults(runtime); + TBlockEvents blockedContinue(runtime); + + auto waitFor = [&](const TString& description, const auto& condition, size_t count = 1) { + while (!condition()) { + UNIT_ASSERT_C(count > 0, "... failed to wait for " << description); + Cerr << "... waiting for " << description << Endl; + TDispatchOptions options; + options.CustomFinalCondition = [&]() { + return condition(); + }; + runtime.DispatchEvents(options); + --count; + } + }; + + auto readFuture = KqpSimpleSend(runtime, "SELECT key, value FROM `/Root/table-1` ORDER BY key LIMIT 7"); + waitFor("first TEvReadContinue", [&]{ return blockedContinue.size() >= 1; }); + waitFor("first TEvReadResult", [&]{ return blockedResults.size() >= 1; }); + + blockedContinue.Unblock(1); + waitFor("second TEvReadContinue", [&]{ return blockedContinue.size() >= 1; }); + waitFor("second TEvReadResult", [&]{ return blockedResults.size() >= 2; }); + + // We need both results to arrive without pauses + blockedResults.Unblock(); + + waitFor("both TEvReadAcks", [&]{ return blockedAcks.size() >= 2; }); + + // Unblock the first TEvReadAck and then pending TEvReadContinue + blockedAcks.Unblock(1); + blockedContinue.Unblock(1); + + // Give it some time to trigger the bug + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Stop blocking everything + blockedAcks.Unblock().Stop(); + blockedResults.Unblock().Stop(); + blockedContinue.Unblock().Stop(); + + UNIT_ASSERT_VALUES_EQUAL( + FormatResult(AwaitResponse(runtime, std::move(readFuture))), + "{ items { uint32_value: 1 } items { uint32_value: 10 } }, " + "{ items { uint32_value: 2 } items { uint32_value: 20 } }, " + "{ items { uint32_value: 3 } items { uint32_value: 30 } }, " + "{ items { uint32_value: 4 } items { uint32_value: 40 } }, " + "{ items { uint32_value: 5 } items { uint32_value: 50 } }, " + "{ items { uint32_value: 6 } items { uint32_value: 60 } }, " + "{ items { uint32_value: 7 } items { uint32_value: 70 } }"); + } + } } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_replication.cpp b/ydb/core/tx/datashard/datashard_ut_replication.cpp index b0395077e915..8d5df4025d1c 100644 --- a/ydb/core/tx/datashard/datashard_ut_replication.cpp +++ b/ydb/core/tx/datashard/datashard_ut_replication.cpp @@ -1,11 +1,13 @@ #include #include "datashard_active_transaction.h" +#include "datashard_ut_common_kqp.h" #include namespace NKikimr { using namespace NKikimr::NDataShard; +using namespace NKikimr::NDataShard::NKqpHelpers; using namespace NSchemeShard; using namespace Tests; @@ -244,6 +246,9 @@ Y_UNIT_TEST_SUITE(DataShardReplication) { ExecSQL(server, sender, "SELECT * FROM `/Root/table-1`"); ExecSQL(server, sender, "INSERT INTO `/Root/table-1` (key, value) VALUES (1, 10);", true, Ydb::StatusIds::GENERIC_ERROR); + + WaitTxNotification(server, sender, AsyncAlterDropReplicationConfig(server, "/Root", "table-1")); + ExecSQL(server, sender, "INSERT INTO `/Root/table-1` (key, value) VALUES (1, 10);"); } Y_UNIT_TEST(ApplyChangesToReplicatedTable) { @@ -304,6 +309,46 @@ Y_UNIT_TEST_SUITE(DataShardReplication) { }, NKikimrTxDataShard::TEvApplyReplicationChangesResult::STATUS_REJECTED); } + Y_UNIT_TEST(ApplyChangesWithConcurrentTx) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto &runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + + InitRoot(server, sender); + CreateShardedTable(server, sender, "/Root", "table-1", TShardedTableOptions() + .Replicated(true) + .ReplicationConsistency(EReplicationConsistency::Weak) + ); + + auto shards = GetTableShards(server, sender, "/Root/table-1"); + auto tableId = ResolveTableId(server, sender, "/Root/table-1"); + + ApplyChanges(server, shards.at(0), tableId, "my-source", { + TChange{ .Offset = 0, .WriteTxId = 0, .Key = 1, .Value = 11 }, + }); + + TString sessionId; + TString txId; + UNIT_ASSERT_VALUES_EQUAL( + KqpSimpleBegin(runtime, sessionId, txId, "SELECT key, value FROM `/Root/table-1`;"), + "{ items { uint32_value: 1 } items { uint32_value: 11 } }"); + + ApplyChanges(server, shards.at(0), tableId, "my-source", { + TChange{ .Offset = 1, .WriteTxId = 0, .Key = 1, .Value = 21 }, + }); + + UNIT_ASSERT_VALUES_EQUAL( + KqpSimpleCommit(runtime, sessionId, txId, "SELECT key, value FROM `/Root/table-1`;"), + "{ items { uint32_value: 1 } items { uint32_value: 11 } }"); + } + } } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_stats.cpp b/ydb/core/tx/datashard/datashard_ut_stats.cpp index 18f19e6b6bcc..5b4c8311cba9 100644 --- a/ydb/core/tx/datashard/datashard_ut_stats.cpp +++ b/ydb/core/tx/datashard/datashard_ut_stats.cpp @@ -1,6 +1,7 @@ #include #include "ydb/core/tablet_flat/shared_sausagecache.h" #include +#include namespace NKikimr { @@ -441,28 +442,16 @@ Y_UNIT_TEST_SUITE(DataShardStats) { const auto shard1 = GetTableShards(server, sender, "/Root/table-1").at(0); UpsertRows(server, sender); - - bool captured = false; - auto observer = runtime.AddObserver([&](NSharedCache::TEvResult::TPtr& event) { - Cerr << "Captured NSharedCache::TEvResult from " << runtime.FindActorName(event->Sender) << " to " << runtime.FindActorName(event->GetRecipientRewrite()) << Endl; - if (runtime.FindActorName(event->GetRecipientRewrite()) == "DATASHARD_STATS_BUILDER") { - auto& message = *event->Get(); - event.Reset(static_cast *>( - new IEventHandle(event->Recipient, event->Sender, - new NSharedCache::TEvResult(message.Origin, message.Cookie, NKikimrProto::NODATA)))); - captured = true; - } + + TBlockEvents block(runtime, [&](const NSharedCache::TEvResult::TPtr& event) { + return runtime.FindActorName(event->GetRecipientRewrite()) == "DATASHARD_STATS_BUILDER"; }); CompactTable(runtime, shard1, tableId1, false); - for (int i = 0; i < 5 && !captured; ++i) { - TDispatchOptions options; - options.CustomFinalCondition = [&]() { return captured; }; - runtime.DispatchEvents(options, TDuration::Seconds(5)); - } - UNIT_ASSERT(captured); - observer.Remove(); + runtime.WaitFor("blocked read", [&]{ return block.size(); }); + + block.Stop().Unblock(); { Cerr << "Waiting stats.." << Endl; diff --git a/ydb/core/tx/datashard/datashard_ut_volatile.cpp b/ydb/core/tx/datashard/datashard_ut_volatile.cpp index 1ca101657cb6..a3986c61f20f 100644 --- a/ydb/core/tx/datashard/datashard_ut_volatile.cpp +++ b/ydb/core/tx/datashard/datashard_ut_volatile.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace NKikimr { @@ -2988,6 +2989,94 @@ Y_UNIT_TEST_SUITE(DataShardVolatile) { "ERROR: ABORTED"); } + Y_UNIT_TEST(UpsertDependenciesShardsRestart) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false) + .SetEnableDataShardVolatileTransactions(true); + + Tests::TServer::TPtr server = new TServer(serverSettings); + auto &runtime = *server->GetRuntime(); + auto sender = runtime.AllocateEdgeActor(); + + runtime.SetLogPriority(NKikimrServices::TX_DATASHARD, NLog::PRI_TRACE); + runtime.SetLogPriority(NKikimrServices::PIPE_CLIENT, NLog::PRI_TRACE); + + InitRoot(server, sender); + + UNIT_ASSERT_VALUES_EQUAL( + KqpSchemeExec(runtime, R"( + CREATE TABLE `/Root/table` (key uint32, value uint32, PRIMARY KEY (key)) + WITH (PARTITION_AT_KEYS = (10)); + )"), + "SUCCESS"); + + const auto shards = GetTableShards(server, sender, "/Root/table"); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 2u); + + // We need to fill table with some data + Cerr << "========= Upserting initial values =========" << Endl; + KqpSimpleExec(runtime, R"( + UPSERT INTO `/Root/table` (key, subkey, value) + VALUES (1, 1), (11, 11) + )"); + + TForceVolatileProposeArbiter forceArbiter(runtime, shards.at(0)); + TBlockEvents blockedPlan(runtime, + [actor = ResolveTablet(runtime, shards.at(0))](const auto& ev) { + return ev->GetRecipientRewrite() == actor; + }); + + Cerr << "========= Starting upsert 1 =========" << Endl; + auto upsertFuture1 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/table` (key, value) + VALUES (2, 2), (12, 12); + )"); + runtime.SimulateSleep(TDuration::Seconds(1)); + + Cerr << "========= Starting upsert 2 =========" << Endl; + auto upsertFuture2 = KqpSimpleSend(runtime, R"( + UPSERT INTO `/Root/table` (key, value) + VALUES (2, 1002), (12, 1012); + )"); + runtime.SimulateSleep(TDuration::Seconds(1)); + + UNIT_ASSERT_VALUES_EQUAL(blockedPlan.size(), 2u); + + // We expect transaction to execute at shards[1] + // However at shards[0] it didn't even start due to blocked plans + // Now we need to restart both shards, without giving them a chance to communicate + std::vector shardActors{ + ResolveTablet(runtime, shards.at(0)), + ResolveTablet(runtime, shards.at(1)), + }; + for (auto& shardActor : shardActors) { + Cerr << "... killing actor " << shardActor << Endl; + // Perform a synchronous send, this makes sure both shards handle TEvPoison before anything else + runtime.Send(new IEventHandle(shardActor, TActorId(), new TEvents::TEvPoison), 0, /* viaActorSystem */ false); + } + + blockedPlan.Stop().clear(); + + // Both queries should abort with UNDETERMINED + Cerr << "... waiting for query results" << Endl; + UNIT_ASSERT_VALUES_EQUAL( + FormatResult(runtime.WaitFuture(std::move(upsertFuture1))), + "ERROR: UNDETERMINED"); + UNIT_ASSERT_VALUES_EQUAL( + FormatResult(runtime.WaitFuture(std::move(upsertFuture2))), + "ERROR: UNDETERMINED"); + + // Split the second shard, which makes sure it's not stuck + Cerr << "========= Splitting shard 2 =========" << Endl; + SetSplitMergePartCountLimit(server->GetRuntime(), -1); + ui64 txId = AsyncSplitTable(server, sender, "/Root/table", shards.at(1), 15); + Cerr << "... split txId# " << txId << " started" << Endl; + WaitTxNotification(server, sender, txId); + Cerr << "... split finished" << Endl; + } + } // Y_UNIT_TEST_SUITE(DataShardVolatile) } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_ut_write.cpp b/ydb/core/tx/datashard/datashard_ut_write.cpp index 50bfc995f6fb..03c6b3d79132 100644 --- a/ydb/core/tx/datashard/datashard_ut_write.cpp +++ b/ydb/core/tx/datashard/datashard_ut_write.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include "datashard_ut_common_kqp.h" namespace NKikimr { @@ -402,6 +404,69 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { } } + Y_UNIT_TEST(UpdateImmediate) { + auto [runtime, server, sender] = TestCreateServer(); + + TShardedTableOptions opts; + auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table-1", opts); + const ui64 shard = shards[0]; + const ui32 rowCount = 3; + + ui64 txId = 100; + + Cout << "========= Send immediate update to empty table, it should be no op =========\n"; + { + Update(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, ""); + } + + Cout << "========= Send immediate insert =========\n"; + { + Insert(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } + + Cout << "========= Send immediate upsert, change one row =========\n"; + { + UpsertOneKeyValue(runtime, sender, shard, tableId, opts.Columns_, 0, 555, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, "key = 0, value = 555\nkey = 2, value = 3\nkey = 4, value = 5\n"); + } + + Cout << "========= Send immediate update, it should override all the rows =========\n"; + { + const auto writeResult = Update(runtime, sender, shard, tableId, opts.Columns_, rowCount, txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); + + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrigin(), shard); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetStep(), 0); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetOrderId(), txId); + UNIT_ASSERT_VALUES_EQUAL(writeResult.GetTxId(), txId); + + const auto& tableAccessStats = writeResult.GetTxStats().GetTableAccessStats(0); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetTableInfo().GetName(), "/Root/table-1"); + UNIT_ASSERT_VALUES_EQUAL(tableAccessStats.GetUpdateRow().GetCount(), rowCount); + } + + Cout << "========= Read table =========\n"; + { + auto tableState = TReadTableState(server, MakeReadTableSettings("/Root/table-1")).All(); + UNIT_ASSERT_VALUES_EQUAL(tableState, expectedTableState); + } + } Y_UNIT_TEST_TWIN(UpsertPrepared, Volatile) { auto [runtime, server, sender] = TestCreateServer(); @@ -1190,5 +1255,447 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { ""); } + Y_UNIT_TEST(ImmediateAndPlannedCommittedOpsRace) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false) + // It's easier to reproduce without volatile transactions, since + // then we can block pipeline by blocking readsets + .SetEnableDataShardVolatileTransactions(false); + + auto [runtime, server, sender] = TestCreateServer(serverSettings); + + TDisableDataShardLogBatching disableDataShardLogBatching; + UNIT_ASSERT_VALUES_EQUAL( + KqpSchemeExec(runtime, R"( + CREATE TABLE `/Root/table` (key int, value int, PRIMARY KEY (key)) + WITH (PARTITION_AT_KEYS = (10)); + )"), + "SUCCESS"); + + const auto tableId = ResolveTableId(server, sender, "/Root/table"); + const auto shards = GetTableShards(server, sender, "/Root/table"); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 2u); + + TVector columns{ + {"key", "Int32", true, false}, + {"value", "Int32", false, false}, + }; + + const ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + + const ui64 lockTxId1 = 1234567890001; + const ui64 lockTxId2 = 1234567890002; + const ui64 lockTxId3 = 1234567890003; + const ui64 lockNodeId = runtime.GetNodeId(0); + NLongTxService::TLockHandle lockHandle1(lockTxId1, runtime.GetActorSystem(0)); + NLongTxService::TLockHandle lockHandle2(lockTxId2, runtime.GetActorSystem(0)); + NLongTxService::TLockHandle lockHandle3(lockTxId3, runtime.GetActorSystem(0)); + + auto shard1 = shards.at(0); + auto shard1actor = ResolveTablet(runtime, shard1); + auto shard2 = shards.at(1); + + NKikimrDataEvents::TLock lock1shard1; + NKikimrDataEvents::TLock lock1shard2; + NKikimrDataEvents::TLock lock2; + + // 1. Make a read (lock1 shard1) + auto read1sender = runtime.AllocateEdgeActor(); + { + Cerr << "... making a read from " << shard1 << Endl; + auto req = std::make_unique(); + { + auto& record = req->Record; + record.SetReadId(1); + record.MutableTableId()->SetOwnerId(tableId.PathId.OwnerId); + record.MutableTableId()->SetTableId(tableId.PathId.LocalPathId); + record.AddColumns(1); + record.AddColumns(2); + record.SetLockTxId(lockTxId1); + record.SetLockNodeId(lockNodeId); + record.SetResultFormat(NKikimrDataEvents::FORMAT_CELLVEC); + i32 key = 1; + TVector keys; + keys.push_back(TCell::Make(key)); + req->Keys.push_back(TSerializedCellVec(TSerializedCellVec::Serialize(keys))); + } + ForwardToTablet(runtime, shard1, read1sender, req.release()); + auto ev = runtime.GrabEdgeEventRethrow(read1sender); + auto* res = ev->Get(); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetStatus().GetCode(), Ydb::StatusIds::SUCCESS); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetFinished(), true); + UNIT_ASSERT_VALUES_EQUAL(res->Record.GetTxLocks().size(), 1u); + lock1shard1 = res->Record.GetTxLocks().at(0); + UNIT_ASSERT_C(lock1shard1.GetCounter() < 1000, "Unexpected lock in the result: " << lock1shard1.ShortDebugString()); + } + + // 2. Make an uncommitted write (lock1 shard2) + { + Cerr << "... making an uncommmited write to " << shard2 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 11, 1101); + req->SetLockId(lockTxId1, lockNodeId); + auto result = Write(runtime, sender, shard2, std::move(req)); + UNIT_ASSERT_VALUES_EQUAL(result.GetTxLocks().size(), 1u); + lock1shard2 = result.GetTxLocks().at(0); + UNIT_ASSERT_C(lock1shard2.GetCounter() < 1000, "Unexpected lock in the result: " << lock1shard2.ShortDebugString()); + } + + // 3. Make an uncommitted write (lock2 shard1) + { + Cerr << "... making an uncommmited write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 2, 202); + req->SetLockId(lockTxId2, lockNodeId); + auto result = Write(runtime, sender, shard1, std::move(req)); + UNIT_ASSERT_VALUES_EQUAL(result.GetTxLocks().size(), 1u); + lock2 = result.GetTxLocks().at(0); + UNIT_ASSERT_C(lock2.GetCounter() < 1000, "Unexpected lock in the result: " << lock2.ShortDebugString()); + } + + // 4. Break lock2 so later we could make an aborted distributed commit + { + Cerr << "... making an immediate write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 2, 203); + Write(runtime, sender, shard1, std::move(req)); + } + + // Start blocking readsets + TBlockEvents blockedReadSets(runtime); + + // Prepare an upsert (readsets flow between shards) + ui64 txId1 = 1234567890011; + auto tx1sender = runtime.AllocateEdgeActor(); + { + auto req1 = MakeWriteRequestOneKeyValue( + txId1, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 3, 304); + req1->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req1->Record.MutableLocks()->AddSendingShards(shard1); + req1->Record.MutableLocks()->AddSendingShards(shard2); + req1->Record.MutableLocks()->AddReceivingShards(shard1); + req1->Record.MutableLocks()->AddReceivingShards(shard2); + *req1->Record.MutableLocks()->AddLocks() = lock1shard1; + + auto req2 = MakeWriteRequestOneKeyValue( + txId1, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 13, 1304); + req2->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req2->Record.MutableLocks()->AddSendingShards(shard1); + req2->Record.MutableLocks()->AddSendingShards(shard2); + req2->Record.MutableLocks()->AddReceivingShards(shard1); + req2->Record.MutableLocks()->AddReceivingShards(shard2); + *req2->Record.MutableLocks()->AddLocks() = lock1shard2; + + Cerr << "... preparing tx1 at " << shard1 << Endl; + auto res1 = Write(runtime, tx1sender, shard1, std::move(req1)); + Cerr << "... preparing tx1 at " << shard2 << Endl; + auto res2 = Write(runtime, tx1sender, shard2, std::move(req2)); + + ui64 minStep = Max(res1.GetMinStep(), res2.GetMinStep()); + ui64 maxStep = Min(res1.GetMaxStep(), res2.GetMaxStep()); + + Cerr << "... planning tx1 at " << coordinator << Endl; + SendProposeToCoordinator( + runtime, tx1sender, shards, { + .TxId = txId1, + .Coordinator = coordinator, + .MinStep = minStep, + .MaxStep = maxStep, + }); + } + + runtime.WaitFor("blocked readsets", [&]{ return blockedReadSets.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(blockedReadSets.size(), 2u); + + // Start blocking new plan steps + TBlockEvents blockedPlanSteps(runtime); + + // Prepare an upsert (readset flows from shard 1 to shard 2, already broken) + // Must not conflict with other transactions + ui64 txId2 = 1234567890012; + auto tx2sender = runtime.AllocateEdgeActor(); + { + auto req1 = MakeWriteRequestOneKeyValue( + txId2, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 5, 505); + req1->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req1->Record.MutableLocks()->AddSendingShards(shard1); + req1->Record.MutableLocks()->AddReceivingShards(shard2); + *req1->Record.MutableLocks()->AddLocks() = lock2; + + auto req2 = MakeWriteRequestOneKeyValue( + txId2, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 15, 1505); + req2->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + req2->Record.MutableLocks()->AddSendingShards(shard1); + req2->Record.MutableLocks()->AddReceivingShards(shard2); + + Cerr << "... preparing tx2 at " << shard1 << Endl; + auto res1 = Write(runtime, tx2sender, shard1, std::move(req1)); + Cerr << "... preparing tx2 at " << shard2 << Endl; + auto res2 = Write(runtime, tx2sender, shard2, std::move(req2)); + + ui64 minStep = Max(res1.GetMinStep(), res2.GetMinStep()); + ui64 maxStep = Min(res1.GetMaxStep(), res2.GetMaxStep()); + + Cerr << "... planning tx2 at " << coordinator << Endl; + SendProposeToCoordinator( + runtime, tx2sender, shards, { + .TxId = txId2, + .Coordinator = coordinator, + .MinStep = minStep, + .MaxStep = maxStep, + }); + } + + runtime.WaitFor("blocked plan steps", [&]{ return blockedPlanSteps.size() >= 2; }); + UNIT_ASSERT_VALUES_EQUAL(blockedPlanSteps.size(), 2u); + + // Block TEvPrivate::TEvProgressTransaction for shard1 + TBlockEvents blockedProgress(runtime, + [&](const TAutoPtr& ev) { + return ev->GetRecipientRewrite() == shard1actor && + ev->GetTypeRewrite() == EventSpaceBegin(TKikimrEvents::ES_PRIVATE) + 0; + }); + + blockedPlanSteps.Unblock(); + runtime.WaitFor("blocked progress", [&]{ return blockedProgress.size() >= 1; }); + runtime.SimulateSleep(TDuration::MilliSeconds(1)); // let it commit + UNIT_ASSERT_VALUES_EQUAL(blockedProgress.size(), 1u); + + // Make an unrelated immediate write, this will pin write (and future snapshot) version to tx2 + { + Cerr << "... making an immediate write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 4, 406); + Write(runtime, sender, shard1, std::move(req)); + } + + // Block commit attempts at shard1 + TBlockEvents blockedCommits(runtime, + [&](const TEvBlobStorage::TEvPut::TPtr& ev) { + auto* msg = ev->Get(); + return msg->Id.TabletID() == shard1 && msg->Id.Channel() == 0; + }); + + // Make an uncommitted write to a key overlapping with tx1 + // Since tx1 has been validated, and reads are pinned at tx2, tx3 will + // be after tx1 and blocked by a read dependency. Since tx2 has not + // entered the pipeline yet, version will not be above tx2. + auto tx3sender = runtime.AllocateEdgeActor(); + { + Cerr << "... starting uncommitted upsert at " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 3, 307); + req->SetLockId(lockTxId3, lockNodeId); + runtime.SendToPipe(shard1, tx3sender, req.release()); + } + + // Wait for some time and make sure there have been no unexpected + // commits, which would indicate the upsert is blocked by tx1. + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + UNIT_ASSERT_VALUES_EQUAL_C(blockedCommits.size(), 0u, + "The uncommitted upsert didn't block. Something may have changed and the test needs to be revised."); + + // Now, while blocking commits, unblock progress and let tx2 to execute, + // which will abort due to broken locks. + blockedProgress.Unblock(); + blockedProgress.Stop(); + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + size_t commitsAfterTx2 = blockedCommits.size(); + Cerr << "... observed " << commitsAfterTx2 << " commits after tx2 unblock" << Endl; + UNIT_ASSERT_C(commitsAfterTx2 >= 2, + "Expected tx2 to produce at least 2 commits (store out rs + abort tx)" + << ", observed " << commitsAfterTx2 << ". Something may have changed."); + + // Now, while still blocking commits, unblock readsets + // Everything will unblock and execute tx1 then tx3 + blockedReadSets.Unblock(); + blockedReadSets.Stop(); + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + size_t commitsAfterTx3 = blockedCommits.size() - commitsAfterTx2; + Cerr << "... observed " << commitsAfterTx3 << " more commits after readset unblock" << Endl; + UNIT_ASSERT_C(commitsAfterTx3 >= 2, + "Expected at least 2 commits after readset unblock (tx1, tx3), but only " + << commitsAfterTx3 << " have been observed."); + + // Finally, stop blocking commits + // We expect completion handlers to run in tx3, tx1, tx2 order, triggering the bug + blockedCommits.Unblock(); + blockedCommits.Stop(); + + runtime.SimulateSleep(TDuration::MilliSeconds(1)); + + // Check tx3 reply + { + auto ev = runtime.GrabEdgeEventRethrow(tx3sender); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + } + + // Check tx1 reply + { + auto ev1 = runtime.GrabEdgeEventRethrow(tx1sender); + UNIT_ASSERT_VALUES_EQUAL(ev1->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + auto ev2 = runtime.GrabEdgeEventRethrow(tx1sender); + UNIT_ASSERT_VALUES_EQUAL(ev2->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + } + + // Check tx2 reply + { + auto ev1 = runtime.GrabEdgeEventRethrow(tx2sender); + UNIT_ASSERT_VALUES_EQUAL(ev1->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + auto ev2 = runtime.GrabEdgeEventRethrow(tx2sender); + UNIT_ASSERT_VALUES_EQUAL(ev2->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_LOCKS_BROKEN); + } + } + + Y_UNIT_TEST(PreparedDistributedWritePageFault) { + TPortManager pm; + TServerSettings serverSettings(pm.GetPort(2134)); + serverSettings.SetDomainName("Root") + .SetUseRealThreads(false) + .SetEnableDataShardVolatileTransactions(false); + + auto [runtime, server, sender] = TestCreateServer(serverSettings); + + TDisableDataShardLogBatching disableDataShardLogBatching; + + // Use a policy without levels and very small page sizes, effectively making each row on its own page + NLocalDb::TCompactionPolicyPtr policy = NLocalDb::CreateDefaultTablePolicy(); + policy->MinDataPageSize = 1; + + auto opts = TShardedTableOptions() + .Columns({{"key", "Int32", true, false}, + {"value", "Int32", false, false}}) + .Policy(policy.Get()); + const auto& columns = opts.Columns_; + auto [shards, tableId] = CreateShardedTable(server, sender, "/Root", "table", opts); + UNIT_ASSERT_VALUES_EQUAL(shards.size(), 1u); + + const ui64 coordinator = ChangeStateStorage(Coordinator, server->GetSettings().Domain); + + const ui64 lockTxId1 = 1234567890001; + const ui64 lockNodeId = runtime.GetNodeId(0); + NLongTxService::TLockHandle lockHandle1(lockTxId1, runtime.GetActorSystem(0)); + + auto shard1 = shards.at(0); + NKikimrDataEvents::TLock lock1shard1; + + // 1. Make an uncommitted write (lock1 shard1) + { + Cerr << "... making an uncommmited write to " << shard1 << Endl; + auto req = MakeWriteRequestOneKeyValue( + std::nullopt, + NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 1, 11); + req->SetLockId(lockTxId1, lockNodeId); + auto result = Write(runtime, sender, shard1, std::move(req)); + UNIT_ASSERT_VALUES_EQUAL(result.GetTxLocks().size(), 1u); + lock1shard1 = result.GetTxLocks().at(0); + UNIT_ASSERT_C(lock1shard1.GetCounter() < 1000, "Unexpected lock in the result: " << lock1shard1.ShortDebugString()); + } + + // 2. Compact and reboot the tablet + Cerr << "... compacting shard " << shard1 << Endl; + CompactTable(runtime, shard1, tableId, false); + Cerr << "... rebooting shard " << shard1 << Endl; + RebootTablet(runtime, shard1, sender); + runtime.SimulateSleep(TDuration::Seconds(1)); + + // 3. Prepare a distributed write (single shard for simplicity) + ui64 txId1 = 1234567890011; + auto tx1sender = runtime.AllocateEdgeActor(); + { + auto req1 = MakeWriteRequestOneKeyValue( + txId1, + NKikimrDataEvents::TEvWrite::MODE_PREPARE, + NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, + tableId, + columns, + 1, 22); + req1->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + + Cerr << "... preparing tx1 at " << shard1 << Endl; + auto res1 = Write(runtime, tx1sender, shard1, std::move(req1)); + + // Reboot, making sure tx is only loaded after it's planned + // This causes tx to skip conflicts cache and go to execution + // The first attempt to execute will page fault looking for conflicts + // Tx will be released, and will trigger the bug on restore + Cerr << "... rebooting shard " << shard1 << Endl; + RebootTablet(runtime, shard1, sender); + runtime.SimulateSleep(TDuration::Seconds(1)); + + ui64 minStep = res1.GetMinStep(); + ui64 maxStep = res1.GetMaxStep(); + + Cerr << "... planning tx1 at " << coordinator << Endl; + SendProposeToCoordinator( + runtime, tx1sender, { shard1 }, { + .TxId = txId1, + .Coordinator = coordinator, + .MinStep = minStep, + .MaxStep = maxStep, + }); + } + + // 4. Check tx1 reply (it must succeed) + { + Cerr << "... waiting for tx1 result" << Endl; + auto ev = runtime.GrabEdgeEventRethrow(tx1sender); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Record.GetStatus(), NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + } + } + } // Y_UNIT_TEST_SUITE } // namespace NKikimr diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index 5670a3abca88..0d67c7f02350 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -95,6 +95,7 @@ std::tuple TValidatedWriteTxOperatio case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_DELETE: case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE: case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT: + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE: break; default: return {NKikimrTxDataShard::TError::BAD_ARGUMENT, TStringBuilder() << OperationType << " operation is not supported now"}; @@ -415,8 +416,9 @@ TValidatedWriteTx::TPtr TWriteOperation::BuildWriteTx(TDataShard* self) void TWriteOperation::ReleaseTxData(NTabletFlatExecutor::TTxMemoryProviderBase& provider) { ReleasedTxDataSize = provider.GetMemoryLimit() + provider.GetRequestedMemory(); - if (!WriteTx || IsTxDataReleased()) + if (!WriteTx || WriteTx->GetIsReleased()) { return; + } WriteTx->ReleaseTxData(); // Immediate transactions have no body stored. diff --git a/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp b/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp index 74c0d4e740a5..b61b78d7f180 100644 --- a/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp +++ b/ydb/core/tx/datashard/execute_kqp_data_tx_unit.cpp @@ -217,14 +217,9 @@ EExecutionStatus TExecuteKqpDataTxUnit::Execute(TOperation::TPtr op, TTransactio auto allocGuard = tasksRunner.BindAllocator(txc.GetMemoryLimit() - dataTx->GetTxSize()); - NKqp::NRm::TKqpResourcesRequest req; - req.MemoryPool = NKqp::NRm::EKqpMemoryPool::DataQuery; - req.ExternalMemory = txc.GetMemoryLimit(); - ui64 taskId = dataTx->GetFirstKqpTaskId(); - NKqp::GetKqpResourceManager()->NotifyExternalResourcesAllocated(txId, taskId, req); - + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Add(txc.GetMemoryLimit()); Y_DEFER { - NKqp::GetKqpResourceManager()->FreeResources(txId, taskId); + NKqp::GetKqpResourceManager()->GetCounters()->RmExternalMemory->Sub(txc.GetMemoryLimit()); }; LOG_T("Operation " << *op << " (execute_kqp_data_tx) at " << tabletId diff --git a/ydb/core/tx/datashard/execute_write_unit.cpp b/ydb/core/tx/datashard/execute_write_unit.cpp index 57111e235be5..b5ba1432cecb 100644 --- a/ydb/core/tx/datashard/execute_write_unit.cpp +++ b/ydb/core/tx/datashard/execute_write_unit.cpp @@ -147,7 +147,7 @@ class TExecuteWriteUnit : public TExecutionUnit { switch (operationType) { case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT: { fillOps(rowIdx); - userDb.UpdateRow(fullTableId, key, ops); + userDb.UpsertRow(fullTableId, key, ops); break; } case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE: { @@ -164,6 +164,11 @@ class TExecuteWriteUnit : public TExecutionUnit { userDb.InsertRow(fullTableId, key, ops); break; } + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE: { + fillOps(rowIdx); + userDb.UpdateRow(fullTableId, key, ops); + break; + } default: // Checked before in TWriteOperation Y_FAIL_S(operationType << " operation is not supported now"); @@ -173,7 +178,8 @@ class TExecuteWriteUnit : public TExecutionUnit { switch (operationType) { case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT: case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE: - case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT: { + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_INSERT: + case NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE: { DataShard.IncCounter(COUNTER_WRITE_ROWS, matrix.GetRowCount()); DataShard.IncCounter(COUNTER_WRITE_BYTES, matrix.GetBuffer().size()); break; diff --git a/ydb/core/tx/datashard/export_common.cpp b/ydb/core/tx/datashard/export_common.cpp index ee801ebe4c52..cf999f1f76a7 100644 --- a/ydb/core/tx/datashard/export_common.cpp +++ b/ydb/core/tx/datashard/export_common.cpp @@ -55,18 +55,23 @@ TMaybe GenYdbScheme( try { FillTableBoundary(scheme, tableDesc, mkqlKeyType); + FillIndexDescription(scheme, tableDesc); } catch (const yexception&) { return Nothing(); } - FillIndexDescription(scheme, tableDesc, mkqlKeyType); FillStorageSettings(scheme, tableDesc); FillColumnFamilies(scheme, tableDesc); FillAttributes(scheme, pathDesc); FillPartitioningSettings(scheme, tableDesc); FillKeyBloomFilter(scheme, tableDesc); FillReadReplicasSettings(scheme, tableDesc); - FillSequenceDescription(scheme, tableDesc); + + TString error; + Ydb::StatusIds::StatusCode status; + if (!FillSequenceDescription(scheme, tableDesc, status, error)) { + return Nothing(); + } return scheme; } diff --git a/ydb/core/tx/datashard/import_s3.cpp b/ydb/core/tx/datashard/import_s3.cpp index ba7227a7a74c..655ee80172ab 100644 --- a/ydb/core/tx/datashard/import_s3.cpp +++ b/ydb/core/tx/datashard/import_s3.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/ydb/core/tx/datashard/move_index_unit.cpp b/ydb/core/tx/datashard/move_index_unit.cpp index 6b3a30be457a..73fa338d35e7 100644 --- a/ydb/core/tx/datashard/move_index_unit.cpp +++ b/ydb/core/tx/datashard/move_index_unit.cpp @@ -60,20 +60,27 @@ class TMoveIndexUnit : public TExecutionUnit { NIceDb::TNiceDb db(txc.DB); ChangeRecords.clear(); - if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { - return EExecutionStatus::Restart; - } + auto changesQueue = DataShard.TakeChangesQueue(); auto lockChangeRecords = DataShard.TakeLockChangeRecords(); auto committedLockChangeRecords = DataShard.TakeCommittedLockChangeRecords(); + if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); + DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); + DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); + return EExecutionStatus::Restart; + } + if (!DataShard.LoadLockChangeRecords(db)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; } if (!DataShard.LoadChangeRecordCommits(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; @@ -99,7 +106,7 @@ class TMoveIndexUnit : public TExecutionUnit { void Complete(TOperation::TPtr, const TActorContext& ctx) override { DataShard.CreateChangeSender(ctx); DataShard.MaybeActivateChangeSender(ctx); - DataShard.EnqueueChangeRecords(std::move(ChangeRecords)); + DataShard.EnqueueChangeRecords(std::move(ChangeRecords), 0, true); } }; diff --git a/ydb/core/tx/datashard/move_table_unit.cpp b/ydb/core/tx/datashard/move_table_unit.cpp index 846f517ee10a..3e34394e15d6 100644 --- a/ydb/core/tx/datashard/move_table_unit.cpp +++ b/ydb/core/tx/datashard/move_table_unit.cpp @@ -60,20 +60,27 @@ class TMoveTableUnit : public TExecutionUnit { NIceDb::TNiceDb db(txc.DB); ChangeRecords.clear(); - if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { - return EExecutionStatus::Restart; - } + auto changesQueue = DataShard.TakeChangesQueue(); auto lockChangeRecords = DataShard.TakeLockChangeRecords(); auto committedLockChangeRecords = DataShard.TakeCommittedLockChangeRecords(); + if (!DataShard.LoadChangeRecords(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); + DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); + DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); + return EExecutionStatus::Restart; + } + if (!DataShard.LoadLockChangeRecords(db)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; } if (!DataShard.LoadChangeRecordCommits(db, ChangeRecords)) { + DataShard.SetChangesQueue(std::move(changesQueue)); DataShard.SetLockChangeRecords(std::move(lockChangeRecords)); DataShard.SetCommittedLockChangeRecords(std::move(committedLockChangeRecords)); return EExecutionStatus::Restart; @@ -99,7 +106,7 @@ class TMoveTableUnit : public TExecutionUnit { void Complete(TOperation::TPtr, const TActorContext& ctx) override { DataShard.CreateChangeSender(ctx); DataShard.MaybeActivateChangeSender(ctx); - DataShard.EnqueueChangeRecords(std::move(ChangeRecords)); + DataShard.EnqueueChangeRecords(std::move(ChangeRecords), 0, true); } }; diff --git a/ydb/core/tx/datashard/operation.h b/ydb/core/tx/datashard/operation.h index e530fe6fc254..df0687d57488 100644 --- a/ydb/core/tx/datashard/operation.h +++ b/ydb/core/tx/datashard/operation.h @@ -883,6 +883,12 @@ class TOperation */ virtual void OnCleanup(TDataShard& self, std::vector>& replies); + + // CommittingOps book keeping + const std::optional& GetCommittingOpsVersion() const { return CommittingOpsVersion; } + void SetCommittingOpsVersion(const TRowVersion& version) { CommittingOpsVersion = version; } + void ResetCommittingOpsVersion() { CommittingOpsVersion.reset(); } + protected: TOperation() : TOperation(TBasicOpInfo()) @@ -956,6 +962,8 @@ class TOperation static NMiniKQL::IEngineFlat::TValidationInfo EmptyKeysInfo; + std::optional CommittingOpsVersion; + public: std::optional MvccReadWriteVersion; diff --git a/ydb/core/tx/datashard/read_iterator.h b/ydb/core/tx/datashard/read_iterator.h index 8b2a4a2b7e5e..6648388a6138 100644 --- a/ydb/core/tx/datashard/read_iterator.h +++ b/ydb/core/tx/datashard/read_iterator.h @@ -205,6 +205,7 @@ struct TReadIteratorState { TActorId SessionId; TMonotonic StartTs; bool IsFinished = false; + bool ReadContinuePending = false; // note that we send SeqNo's starting from 1 ui64 SeqNo = 0; diff --git a/ydb/core/tx/datashard/remove_schema_snapshots.cpp b/ydb/core/tx/datashard/remove_schema_snapshots.cpp new file mode 100644 index 000000000000..fe63f30be61d --- /dev/null +++ b/ydb/core/tx/datashard/remove_schema_snapshots.cpp @@ -0,0 +1,54 @@ +#include "datashard_impl.h" + +namespace NKikimr::NDataShard { + +class TDataShard::TTxRemoveSchemaSnapshots: public NTabletFlatExecutor::TTransactionBase { +public: + TTxRemoveSchemaSnapshots(TDataShard* self) + : TBase(self) + { } + + TTxType GetTxType() const override { return TXTYPE_REMOVE_SCHEMA_SNAPSHOTS; } + + bool Execute(TTransactionContext& txc, const TActorContext&) override { + while (!Self->PendingSchemaSnapshotsToGc.empty()) { + const auto key = Self->PendingSchemaSnapshotsToGc.back(); + const auto* snapshot = Self->GetSchemaSnapshotManager().FindSnapshot(key); + + if (!snapshot) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + if (Self->GetSchemaSnapshotManager().HasReference(key)) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + auto table = Self->FindUserTable(TPathId(key.OwnerId, key.PathId)); + if (!table) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + if (snapshot->Schema->GetTableSchemaVersion() >= table->GetTableSchemaVersion()) { + Self->PendingSchemaSnapshotsToGc.pop_back(); + continue; + } + + Self->GetSchemaSnapshotManager().RemoveShapshot(txc.DB, key); + Self->PendingSchemaSnapshotsToGc.pop_back(); + } + + return true; + } + + void Complete(const TActorContext&) override { + } +}; + +void TDataShard::Handle(TEvPrivate::TEvRemoveSchemaSnapshots::TPtr&, const TActorContext& ctx) { + Execute(new TTxRemoveSchemaSnapshots(this), ctx); +} + +} // namespace NKikimr::NDataShard diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp index 1421e3afa670..0a4d50965dd8 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp @@ -1730,6 +1730,22 @@ ui64 AsyncAlterDropStream( return RunSchemeTx(*server->GetRuntime(), std::move(request)); } +ui64 AsyncAlterDropReplicationConfig( + Tests::TServer::TPtr server, + const TString& workingDir, + const TString& tableName) +{ + auto request = SchemeTxTemplate(NKikimrSchemeOp::ESchemeOpAlterTable, workingDir); + auto& tx = *request->Record.MutableTransaction()->MutableModifyScheme(); + tx.SetInternal(true); + + auto& desc = *tx.MutableAlterTable(); + desc.SetName(tableName); + desc.MutableReplicationConfig()->SetMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_NONE); + + return RunSchemeTx(*server->GetRuntime(), std::move(request)); +} + ui64 AsyncCreateContinuousBackup( Tests::TServer::TPtr server, const TString& workingDir, @@ -1890,6 +1906,24 @@ TRowVersion AcquireReadSnapshot(TTestActorRuntime& runtime, const TString& datab return TRowVersion(record.GetSnapshotStep(), record.GetSnapshotTxId()); } +void AddValueToCells(ui64 value, const TString& columnType, TVector& cells, TVector& stringValues) { + if (columnType == "Uint64") { + cells.emplace_back(TCell((const char*)&value, sizeof(ui64))); + } else if (columnType == "Uint32") { + ui32 value32 = (ui32)value; + cells.emplace_back(TCell((const char*)&value32, sizeof(ui32))); + } else if (columnType == "Int32") { + i32 value32 = (i32)value; + cells.push_back(TCell::Make(value32)); + } else if (columnType == "Utf8") { + stringValues.emplace_back(Sprintf("String_%" PRIu64, value)); + cells.emplace_back(TCell(stringValues.back().c_str(), stringValues.back().size())); + } else { + Y_ABORT("Unsupported column type"); + } +} + + std::unique_ptr MakeWriteRequest(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui32 rowCount, ui64 seed) { std::vector columnIds; for (ui32 col = 0; col < columns.size(); ++col) { @@ -1906,19 +1940,8 @@ std::unique_ptr MakeWriteRequest(std::optional MakeWriteRequest(std::optional MakeWriteRequestOneKeyValue(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value) { + UNIT_ASSERT_VALUES_EQUAL(columns.size(), 2); + + std::vector columnIds = {1, 2}; + + TVector stringValues; + TVector cells; + + AddValueToCells(key, columns[0].Type, cells, stringValues); + AddValueToCells(value, columns[1].Type, cells, stringValues); + + TSerializedCellMatrix matrix(cells, 1, 2); + TString blobData = matrix.ReleaseBuffer(); + + std::unique_ptr evWrite = txId ? std::make_unique(*txId, txMode) : std::make_unique(txMode); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); + evWrite->AddOperation(operationType, tableId, columnIds, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); + + return evWrite; +} + NKikimrDataEvents::TEvWriteResult Write(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, std::unique_ptr&& request, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) { auto txMode = request->Record.GetTxMode(); @@ -1968,6 +2012,13 @@ NKikimrDataEvents::TEvWriteResult Upsert(TTestActorRuntime& runtime, TActorId se return Write(runtime, sender, shardId, std::move(request), expectedStatus); } +NKikimrDataEvents::TEvWriteResult UpsertOneKeyValue(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) +{ + auto request = MakeWriteRequestOneKeyValue(txId, txMode, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, columns, key, value); + return Write(runtime, sender, shardId, std::move(request), expectedStatus); +} + + NKikimrDataEvents::TEvWriteResult Replace(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) { auto request = MakeWriteRequest(txId, txMode, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, tableId, columns, rowCount); @@ -1986,6 +2037,12 @@ NKikimrDataEvents::TEvWriteResult Insert(TTestActorRuntime& runtime, TActorId se return Write(runtime, sender, shardId, std::move(request), expectedStatus); } +NKikimrDataEvents::TEvWriteResult Update(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus) +{ + auto request = MakeWriteRequest(txId, txMode, NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPDATE, tableId, columns, rowCount); + return Write(runtime, sender, shardId, std::move(request), expectedStatus); +} + TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithEvWrite(TTestActorRuntime& runtime, TEvWriteRows& rows) { if (rows.empty()) return {}; diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.h b/ydb/core/tx/datashard/ut_common/datashard_ut_common.h index 9257505c43a9..cc348b3c0f24 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.h +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.h @@ -670,6 +670,11 @@ ui64 AsyncAlterDropStream( const TString& tableName, const TString& streamName); +ui64 AsyncAlterDropReplicationConfig( + Tests::TServer::TPtr server, + const TString& workingDir, + const TString& tableName); + ui64 AsyncCreateContinuousBackup( Tests::TServer::TPtr server, const TString& workingDir, @@ -734,11 +739,15 @@ void ExecSQL(Tests::TServer::TPtr server, TRowVersion AcquireReadSnapshot(TTestActorRuntime& runtime, const TString& databaseName, ui32 nodeIndex = 0); std::unique_ptr MakeWriteRequest(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui32 rowCount, ui64 seed = 0); +std::unique_ptr MakeWriteRequestOneKeyValue(std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value); + NKikimrDataEvents::TEvWriteResult Write(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, std::unique_ptr&& request, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Upsert(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); +NKikimrDataEvents::TEvWriteResult UpsertOneKeyValue(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui64 key, ui64 value, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Replace(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Delete(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult Insert(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); +NKikimrDataEvents::TEvWriteResult Update(TTestActorRuntime& runtime, TActorId sender, ui64 shardId, const TTableId& tableId, const TVector& columns, ui32 rowCount, std::optional txId, NKikimrDataEvents::TEvWrite::ETxMode txMode, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_UNSPECIFIED); NKikimrDataEvents::TEvWriteResult WaitForWriteCompleted(TTestActorRuntime& runtime, TActorId sender, NKikimrDataEvents::TEvWriteResult::EStatus expectedStatus = NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); struct TEvWriteRow { diff --git a/ydb/core/tx/datashard/volatile_tx.cpp b/ydb/core/tx/datashard/volatile_tx.cpp index 1a72b7c10783..93a28c836295 100644 --- a/ydb/core/tx/datashard/volatile_tx.cpp +++ b/ydb/core/tx/datashard/volatile_tx.cpp @@ -271,9 +271,6 @@ namespace NKikimr::NDataShard { void TVolatileTxManager::Start(const TActorContext& ctx) { for (auto& pr : VolatileTxs) { - if (!pr.second->Dependencies.empty()) { - continue; - } switch (pr.second->State) { case EVolatileTxState::Waiting: for (ui64 target : pr.second->Participants) { @@ -347,7 +344,7 @@ namespace NKikimr::NDataShard { std::vector byCommitOrder; byCommitOrder.reserve(VolatileTxs.size()); - auto postProcessTxInfo = [this, &byCommitOrder](TVolatileTxInfo* info) { + auto postProcessTxInfo = [&](TVolatileTxInfo* info) { switch (info->State) { case EVolatileTxState::Waiting: case EVolatileTxState::Committed: { @@ -402,6 +399,28 @@ namespace NKikimr::NDataShard { VolatileTxByCommitOrder.PushBack(info); } + ui64 numWaiting = 0; + ui64 numCommitted = 0; + ui64 numAborting = 0; + for (auto& pr : VolatileTxs) { + switch (pr.second->State) { + case EVolatileTxState::Waiting: + ++numWaiting; + break; + case EVolatileTxState::Committed: + ++numCommitted; + break; + case EVolatileTxState::Aborting: + ++numAborting; + break; + } + } + + Self->SetCounter(COUNTER_VOLATILE_TX_INFLIGHT, VolatileTxs.size()); + Self->SetCounter(COUNTER_VOLATILE_TX_WAITING_COUNT, numWaiting); + Self->SetCounter(COUNTER_VOLATILE_TX_COMMITTED_COUNT, numCommitted); + Self->SetCounter(COUNTER_VOLATILE_TX_ABORTING_COUNT, numAborting); + return true; } @@ -557,6 +576,8 @@ namespace NKikimr::NDataShard { db.Table().Key(info->TxId, shardId).Update(); } + UpdateCountersAdd(info); + txc.DB.OnRollback([this, txId]() { RollbackAddVolatileTx(txId); }); @@ -596,7 +617,10 @@ namespace NKikimr::NDataShard { // FIXME: do we need to handle WaitingSnapshotEvents somehow? + // Note: not counting latency (this is a rollback) + // This will also unlink from linked lists + UpdateCountersRemove(info); VolatileTxs.erase(txId); } @@ -635,6 +659,10 @@ namespace NKikimr::NDataShard { VolatileTxByCommitTxId.erase(commitTxId); } VolatileTxByVersion.erase(info); + + Self->IncCounter(COUNTER_VOLATILE_TX_TOTAL_LATENCY_MS, info->LatencyTimer.Passed() * 1000); + + UpdateCountersRemove(info); VolatileTxs.erase(txId); if (prevUncertain < GetMinUncertainVersion()) { @@ -731,7 +759,7 @@ namespace NKikimr::NDataShard { ui64 txId = info->TxId; // Move tx to aborting, but don't persist yet, we need a separate transaction for that - info->State = EVolatileTxState::Aborting; + ChangeState(info, EVolatileTxState::Aborting); // Aborted transactions don't have dependencies for (ui64 dependencyTxId : info->Dependencies) { @@ -845,7 +873,7 @@ namespace NKikimr::NDataShard { // Move tx to committed. // Note that we don't need to wait until the new state is committed (it's repeatable), // but we need to wait until the initial effects are committed and persisted. - info->State = EVolatileTxState::Committed; + ChangeState(info, EVolatileTxState::Committed); db.Table().Key(txId).Update( NIceDb::TUpdate(info->State)); @@ -875,7 +903,7 @@ namespace NKikimr::NDataShard { if (info->AddCommitted) { RunCommitCallbacks(info); } - if (info->Dependencies.empty() && ReadyToDbCommit(info)) { + if (ReadyToDbCommit(info)) { AddPendingCommit(txId); } } @@ -926,7 +954,9 @@ namespace NKikimr::NDataShard { case EVolatileTxState::Waiting: break; case EVolatileTxState::Committed: - AddPendingCommit(dependentTxId); + if (ReadyToDbCommit(dependent)) { + AddPendingCommit(dependentTxId); + } break; case EVolatileTxState::Aborting: Y_ABORT("FIXME: unexpected dependency removed from aborting tx"); @@ -1031,4 +1061,43 @@ namespace NKikimr::NDataShard { return false; } + void TVolatileTxManager::UpdateCountersAdd(TVolatileTxInfo* info) { + Self->IncCounter(COUNTER_VOLATILE_TX_INFLIGHT); + switch (info->State) { + case EVolatileTxState::Waiting: + Self->IncCounter(COUNTER_VOLATILE_TX_WAITING_COUNT); + break; + case EVolatileTxState::Committed: + Self->IncCounter(COUNTER_VOLATILE_TX_COMMITTED_COUNT); + break; + case EVolatileTxState::Aborting: + Self->IncCounter(COUNTER_VOLATILE_TX_ABORTING_COUNT); + break; + } + } + + void TVolatileTxManager::UpdateCountersRemove(TVolatileTxInfo* info) { + Self->DecCounter(COUNTER_VOLATILE_TX_INFLIGHT); + switch (info->State) { + case EVolatileTxState::Waiting: + Self->DecCounter(COUNTER_VOLATILE_TX_WAITING_COUNT); + break; + case EVolatileTxState::Committed: + Self->DecCounter(COUNTER_VOLATILE_TX_COMMITTED_COUNT); + break; + case EVolatileTxState::Aborting: + Self->DecCounter(COUNTER_VOLATILE_TX_ABORTING_COUNT); + break; + } + } + + void TVolatileTxManager::ChangeState(TVolatileTxInfo* info, EVolatileTxState state) { + if (info->State == EVolatileTxState::Waiting) { + Self->IncCounter(COUNTER_VOLATILE_TX_WAIT_LATENCY_MS, info->LatencyTimer.Passed() * 1000); + } + UpdateCountersRemove(info); + info->State = state; + UpdateCountersAdd(info); + } + } // namespace NKikimr::NDataShard diff --git a/ydb/core/tx/datashard/volatile_tx.h b/ydb/core/tx/datashard/volatile_tx.h index a7f40b3178bf..010a21ab3988 100644 --- a/ydb/core/tx/datashard/volatile_tx.h +++ b/ydb/core/tx/datashard/volatile_tx.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace NKikimr::NTabletFlatExecutor { @@ -75,6 +76,9 @@ namespace NKikimr::NDataShard { // DECISION_ABORT on abort. std::vector ArbiterReadSets; + // Calculates Waiting and Total latency + THPTimer LatencyTimer; + template bool IsInList() const { using TItem = TIntrusiveListItem; @@ -276,6 +280,10 @@ namespace NKikimr::NDataShard { void RemoveFromCommitOrder(TVolatileTxInfo* info); bool ReadyToDbCommit(TVolatileTxInfo* info) const; + void UpdateCountersAdd(TVolatileTxInfo* info); + void UpdateCountersRemove(TVolatileTxInfo* info); + void ChangeState(TVolatileTxInfo* info, EVolatileTxState state); + private: TDataShard* const Self; absl::flat_hash_map> VolatileTxs; // TxId -> Info diff --git a/ydb/core/tx/datashard/ya.make b/ydb/core/tx/datashard/ya.make index 7bcfa4c2af15..6cfc69901adb 100644 --- a/ydb/core/tx/datashard/ya.make +++ b/ydb/core/tx/datashard/ya.make @@ -189,6 +189,7 @@ SRCS( receive_snapshot_unit.cpp remove_lock_change_records.cpp remove_locks.cpp + remove_schema_snapshots.cpp range_ops.cpp read_iterator.h restore_unit.cpp @@ -243,7 +244,6 @@ PEERDIR( ydb/core/formats ydb/core/io_formats/ydb_dump ydb/core/kqp/runtime - ydb/core/persqueue/partition_key_range ydb/core/persqueue/writer ydb/core/protos ydb/core/tablet diff --git a/ydb/core/tx/limiter/grouped_memory/service/actor.cpp b/ydb/core/tx/limiter/grouped_memory/service/actor.cpp new file mode 100644 index 000000000000..e7573c23612e --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/actor.cpp @@ -0,0 +1,50 @@ +#include "actor.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +void TMemoryLimiterActor::Bootstrap() { + Manager = std::make_shared(SelfId(), Config, Name, Signals, DefaultStage); + Become(&TThis::StateWait); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartTask::TPtr& ev) { + for (auto&& i : ev->Get()->GetAllocations()) { + Manager->RegisterAllocation(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId(), i, + ev->Get()->GetStageFeaturesIdx()); + } +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishTask::TPtr& ev) { + Manager->UnregisterAllocation(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetAllocationId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvUpdateTask::TPtr& ev) { + Manager->UpdateAllocation( + ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetAllocationId(), ev->Get()->GetVolume()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishGroup::TPtr& ev) { + Manager->UnregisterGroup(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartGroup::TPtr& ev) { + Manager->RegisterGroup(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId(), ev->Get()->GetExternalGroupId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishProcess::TPtr& ev) { + Manager->UnregisterProcess(ev->Get()->GetExternalProcessId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartProcess::TPtr& ev) { + Manager->RegisterProcess(ev->Get()->GetExternalProcessId(), ev->Get()->GetStages()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvFinishProcessScope::TPtr& ev) { + Manager->UnregisterProcessScope(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId()); +} + +void TMemoryLimiterActor::Handle(NEvents::TEvExternal::TEvStartProcessScope::TPtr& ev) { + Manager->RegisterProcessScope(ev->Get()->GetExternalProcessId(), ev->Get()->GetExternalScopeId()); +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/actor.h b/ydb/core/tx/limiter/grouped_memory/service/actor.h new file mode 100644 index 000000000000..4b4506ba5b99 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/actor.h @@ -0,0 +1,59 @@ +#pragma once +#include "counters.h" +#include "manager.h" + +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { +class TManager; +class TMemoryLimiterActor: public NActors::TActorBootstrapped { +private: + std::shared_ptr Manager; + const TConfig Config; + const TString Name; + const std::shared_ptr Signals; + const std::shared_ptr DefaultStage; + +public: + TMemoryLimiterActor(const TConfig& config, const TString& name, const std::shared_ptr& signals, + const std::shared_ptr& defaultStage) + : Config(config) + , Name(name) + , Signals(signals) + , DefaultStage(defaultStage) { + } + + void Handle(NEvents::TEvExternal::TEvStartTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvUpdateTask::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartGroup::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishGroup::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartProcess::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishProcess::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvStartProcessScope::TPtr& ev); + void Handle(NEvents::TEvExternal::TEvFinishProcessScope::TPtr& ev); + + void Bootstrap(); + + STFUNC(StateWait) { + switch (ev->GetTypeRewrite()) { + hFunc(NEvents::TEvExternal::TEvStartTask, Handle); + hFunc(NEvents::TEvExternal::TEvFinishTask, Handle); + hFunc(NEvents::TEvExternal::TEvUpdateTask, Handle); + hFunc(NEvents::TEvExternal::TEvStartGroup, Handle); + hFunc(NEvents::TEvExternal::TEvFinishGroup, Handle); + hFunc(NEvents::TEvExternal::TEvStartProcess, Handle); + hFunc(NEvents::TEvExternal::TEvFinishProcess, Handle); + hFunc(NEvents::TEvExternal::TEvStartProcessScope, Handle); + hFunc(NEvents::TEvExternal::TEvFinishProcessScope, Handle); + default: + AFL_VERIFY(false)("ev_type", ev->GetTypeName()); + } + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp b/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp new file mode 100644 index 000000000000..2d04be2c9cef --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/allocation.cpp @@ -0,0 +1,26 @@ +#include "allocation.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TAllocationInfo::TAllocationInfo(const ui64 processId, const ui64 scopeId, const ui64 allocationInternalGroupId, + const std::shared_ptr& allocation, + const std::shared_ptr& stage) + : Allocation(allocation) + , AllocationInternalGroupId(allocationInternalGroupId) + , Identifier(TValidator::CheckNotNull(Allocation)->GetIdentifier()) + , ProcessId(processId) + , ScopeId(scopeId) + , Stage(stage) { + AFL_VERIFY(Stage); + AFL_VERIFY(Allocation); + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "add")("id", Allocation->GetIdentifier())("stage", Stage->GetName()); + AllocatedVolume = Allocation->GetMemory(); + Stage->Add(AllocatedVolume, Allocation->IsAllocated()); + if (allocation->IsAllocated()) { + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocated_on_add")("allocation_id", Identifier)("stage", Stage->GetName()); + Allocation = nullptr; + } +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/allocation.h b/ydb/core/tx/limiter/grouped_memory/service/allocation.h new file mode 100644 index 000000000000..dcbf2971367c --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/allocation.h @@ -0,0 +1,77 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +enum class EAllocationStatus { + Allocated, + Waiting, + Failed +}; + +class TAllocationInfo: public NColumnShard::TMonitoringObjectsCounter { +private: + std::shared_ptr Allocation; + YDB_READONLY(ui64, AllocationInternalGroupId, 0); + ui64 AllocatedVolume = 0; + YDB_READONLY(ui64, Identifier, 0); + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ScopeId, 0); + const std::shared_ptr Stage; + bool AllocationFailed = false; + +public: + ~TAllocationInfo() { + if (GetAllocationStatus() != EAllocationStatus::Failed) { + Stage->Free(AllocatedVolume, GetAllocationStatus() == EAllocationStatus::Allocated); + } + + AFL_TRACE(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "destroy")("allocation_id", Identifier)("stage", Stage->GetName()); + } + + bool IsAllocatable(const ui64 additional) const { + return Stage->IsAllocatable(AllocatedVolume, additional); + } + + void SetAllocatedVolume(const ui64 value) { + AFL_VERIFY(GetAllocationStatus() != EAllocationStatus::Failed); + Stage->UpdateVolume(AllocatedVolume, value, GetAllocationStatus() == EAllocationStatus::Allocated); + AllocatedVolume = value; + } + + ui64 GetAllocatedVolume() const { + return AllocatedVolume; + } + + [[nodiscard]] bool Allocate(const NActors::TActorId& ownerId) { + AFL_TRACE(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocated")("allocation_id", Identifier)("stage", Stage->GetName()); + AFL_VERIFY(Allocation)("status", GetAllocationStatus())("volume", AllocatedVolume)("id", Identifier)("stage", Stage->GetName())( + "allocation_internal_group_id", AllocationInternalGroupId); + const bool result = Allocation->OnAllocated( + std::make_shared(ProcessId, ScopeId, Allocation->GetIdentifier(), ownerId, Allocation->GetMemory()), Allocation); + if (result) { + Stage->Allocate(AllocatedVolume); + } else { + Stage->Free(AllocatedVolume, false); + AllocationFailed = true; + } + Allocation = nullptr; + return result; + } + + EAllocationStatus GetAllocationStatus() const { + if (AllocationFailed) { + return EAllocationStatus::Failed; + } else if (Allocation) { + return EAllocationStatus::Waiting; + } else { + return EAllocationStatus::Allocated; + } + } + + TAllocationInfo(const ui64 processId, const ui64 scopeId, const ui64 allocationInternalGroupId, + const std::shared_ptr& allocation, const std::shared_ptr& stage); +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/counters.cpp b/ydb/core/tx/limiter/grouped_memory/service/counters.cpp new file mode 100644 index 000000000000..d780bfd499d2 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/counters.cpp @@ -0,0 +1,5 @@ +#include "counters.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/service/counters.h b/ydb/core/tx/limiter/grouped_memory/service/counters.h new file mode 100644 index 000000000000..3c96b3b8b9a4 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/counters.h @@ -0,0 +1,62 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TStageCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedBytes; + NMonitoring::TDynamicCounters::TCounterPtr AllocatedChunks; + NMonitoring::TDynamicCounters::TCounterPtr WaitingBytes; + NMonitoring::TDynamicCounters::TCounterPtr WaitingChunks; + +public: + TStageCounters(const TCommonCountersOwner& owner, const TString& name) + : TBase(owner, "stage", name) + , AllocatedBytes(TBase::GetValue("Allocated/Bytes")) + , AllocatedChunks(TBase::GetValue("Allocated/Count")) + , WaitingBytes(TBase::GetValue("Waiting/Bytes")) + , WaitingChunks(TBase::GetValue("Waiting/Count")) { + } + + void Add(const ui64 volume, const bool allocated) { + if (allocated) { + AllocatedBytes->Add(volume); + AllocatedChunks->Add(1); + } else { + WaitingBytes->Add(volume); + WaitingChunks->Add(1); + } + } + + void Sub(const ui64 volume, const bool allocated) { + if (allocated) { + AllocatedBytes->Sub(volume); + AllocatedChunks->Sub(1); + } else { + WaitingBytes->Sub(volume); + WaitingChunks->Sub(1); + } + } +}; + +class TCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + +public: + NMonitoring::TDynamicCounters::TCounterPtr GroupsCount; + NMonitoring::TDynamicCounters::TCounterPtr ProcessesCount; + TCounters(const TIntrusivePtr<::NMonitoring::TDynamicCounters>& counters, const TString& name) + : TBase(NColumnShard::TCommonCountersOwner("grouped_memory_limiter", counters), "limiter_name", name) + , GroupsCount(TBase::GetValue("Groups/Count")) + , ProcessesCount(TBase::GetValue("Processes/Count")) { + } + + std::shared_ptr BuildStageCounters(const TString& stageName) const { + return std::make_shared(*this, stageName); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/group.cpp b/ydb/core/tx/limiter/grouped_memory/service/group.cpp new file mode 100644 index 000000000000..3bf671ff76b9 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/group.cpp @@ -0,0 +1,68 @@ +#include "group.h" +#include "process.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +std::vector> TGrouppedAllocations::AllocatePossible(const ui32 allocationsLimit) { + std::vector> result; + ui64 allocationMemory = 0; + ui32 allocationsCount = 0; + for (auto&& [_, allocation] : Allocations) { + if (allocation->IsAllocatable(allocationMemory)) { + allocationMemory += allocation->GetAllocatedVolume(); + result.emplace_back(allocation); + if (++allocationsCount == allocationsLimit) { + return result; + } + } + } + return result; +} + +bool TAllocationGroups::Allocate(const bool isPriorityProcess, TProcessMemoryScope& process, const ui32 allocationsLimit) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "try_allocation")("limit", allocationsLimit)( + "external_process_id", process.ExternalProcessId)("forced_internal_group_id", process.GroupIds.GetMinInternalIdOptional())( + "external_scope_id", process.ExternalScopeId)("forced_external_group_id", process.GroupIds.GetMinExternalIdOptional()); + ui32 allocationsCount = 0; + while (true) { + std::vector toRemove; + for (auto it = Groups.begin(); it != Groups.end();) { + const ui64 internalGroupId = it->first; + const bool forced = isPriorityProcess && internalGroupId == process.GroupIds.GetMinInternalIdVerified(); + std::vector> allocated; + if (forced) { + allocated = it->second.ExtractAllocationsToVector(); + } else if (allocationsLimit) { + allocated = it->second.AllocatePossible(allocationsLimit - allocationsCount); + } else { + break; + } + for (auto&& i : allocated) { + if (!i->Allocate(process.OwnerActorId)) { + toRemove.emplace_back(i->GetIdentifier()); + } else if (!forced) { + AFL_VERIFY(++allocationsCount <= allocationsLimit)("count", allocationsCount)("limit", allocationsLimit); + } + if (!forced) { + AFL_VERIFY(it->second.Remove(i)); + } + } + if (!it->second.IsEmpty()) { + break; + } + it = Groups.erase(it); + if (!forced && allocationsCount == allocationsLimit) { + break; + } + } + for (auto&& i : toRemove) { + process.UnregisterAllocation(i); + } + if (toRemove.empty() || allocationsCount == allocationsLimit) { + break; + } + } + return allocationsCount; +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/group.h b/ydb/core/tx/limiter/grouped_memory/service/group.h new file mode 100644 index 000000000000..1c988081b02e --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/group.h @@ -0,0 +1,92 @@ +#pragma once +#include "allocation.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TProcessMemoryScope; + +class TGrouppedAllocations: public NColumnShard::TMonitoringObjectsCounter { +private: + THashMap> Allocations; + +public: + std::vector> ExtractAllocationsToVector() { + std::vector> result; + result.reserve(Allocations.size()); + for (auto&& i : Allocations) { + result.emplace_back(std::move(i.second)); + } + Allocations.clear(); + return result; + } + + const THashMap>& GetAllocations() const { + return Allocations; + } + + bool IsEmpty() const { + return Allocations.empty(); + } + + void AddAllocation(const std::shared_ptr& allocation) { + AFL_VERIFY(Allocations.emplace(allocation->GetIdentifier(), allocation).second); + } + + [[nodiscard]] bool Remove(const std::shared_ptr& allocation) { + return Allocations.erase(allocation->GetIdentifier()); + } + + std::vector> AllocatePossible(const ui32 allocationsLimit); +}; + +class TAllocationGroups { +private: + std::map Groups; + +public: + bool IsEmpty() const { + return Groups.empty(); + } + + [[nodiscard]] bool Allocate(const bool isPriorityProcess, TProcessMemoryScope& process, const ui32 allocationsLimit); + + [[nodiscard]] std::vector> ExtractGroup(const ui64 id) { + auto it = Groups.find(id); + if (it == Groups.end()) { + return {}; + } + auto result = it->second.ExtractAllocationsToVector(); + Groups.erase(it); + return result; + } + + std::optional GetMinGroupId() const { + if (Groups.size()) { + return Groups.begin()->first; + } else { + return std::nullopt; + } + } + + [[nodiscard]] bool RemoveAllocation(const ui64 internalGroupId, const std::shared_ptr& allocation) { + auto groupIt = Groups.find(internalGroupId); + if (groupIt == Groups.end()) { + return false; + } + if (!groupIt->second.Remove(allocation)) { + return false; + } + if (groupIt->second.IsEmpty()) { + Groups.erase(groupIt); + } + return true; + } + + void AddAllocation(const ui64 internalGroupId, const std::shared_ptr& allocation) { + Groups[internalGroupId].AddAllocation(allocation); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ids.cpp b/ydb/core/tx/limiter/grouped_memory/service/ids.cpp new file mode 100644 index 000000000000..d550e6374ef3 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ids.cpp @@ -0,0 +1,77 @@ +#include "ids.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +ui64 TIdsControl::ExtractInternalIdVerified(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + AFL_VERIFY(it != ExternalIdIntoInternalId.end())("external_id", externalId); + const ui64 result = it->second; + InternalIdIntoExternalId.erase(result); + ExternalIdIntoInternalId.erase(it); + return result; +} + +std::optional TIdsControl::ExtractInternalIdOptional(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it == ExternalIdIntoInternalId.end()) { + return std::nullopt; + } + const ui64 result = it->second; + InternalIdIntoExternalId.erase(result); + ExternalIdIntoInternalId.erase(it); + return result; +} + +std::optional TIdsControl::GetInternalIdOptional(const ui64 externalId) const { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it != ExternalIdIntoInternalId.end()) { + return it->second; + } + return std::nullopt; +} + +ui64 TIdsControl::GetMinInternalIdVerified() const { + AFL_VERIFY(InternalIdIntoExternalId.size()); + return InternalIdIntoExternalId.begin()->first; +} + +ui64 TIdsControl::GetInternalIdVerified(const ui64 externalId) const { + auto it = ExternalIdIntoInternalId.find(externalId); + AFL_VERIFY(it != ExternalIdIntoInternalId.end())("external_id", externalId); + return it->second; +} + +ui64 TIdsControl::RegisterExternalId(const ui64 externalId) { + AFL_VERIFY(ExternalIdIntoInternalId.emplace(externalId, ++CurrentInternalId).second); + InternalIdIntoExternalId.emplace(CurrentInternalId, externalId); + return CurrentInternalId; +} + +ui64 TIdsControl::RegisterExternalIdOrGet(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it != ExternalIdIntoInternalId.end()) { + return it->second; + } + AFL_VERIFY(ExternalIdIntoInternalId.emplace(externalId, ++CurrentInternalId).second); + InternalIdIntoExternalId.emplace(CurrentInternalId, externalId); + return CurrentInternalId; +} + +bool TIdsControl::UnregisterExternalId(const ui64 externalId) { + auto it = ExternalIdIntoInternalId.find(externalId); + if (it == ExternalIdIntoInternalId.end()) { + return false; + } + AFL_VERIFY(InternalIdIntoExternalId.erase(it->second)); + ExternalIdIntoInternalId.erase(it); + return true; +} + +ui64 TIdsControl::GetExternalIdVerified(const ui64 internalId) const { + auto it = InternalIdIntoExternalId.find(internalId); + AFL_VERIFY(it != InternalIdIntoExternalId.end()); + return it->second; +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ids.h b/ydb/core/tx/limiter/grouped_memory/service/ids.h new file mode 100644 index 000000000000..93a0eaf120f3 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ids.h @@ -0,0 +1,71 @@ +#pragma once +#include + +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TIdsControl { +private: + THashMap ExternalIdIntoInternalId; + std::map InternalIdIntoExternalId; + ui64 CurrentInternalId = 0; + +public: + void Clear() { + ExternalIdIntoInternalId.clear(); + InternalIdIntoExternalId.clear(); + } + + const std::map& GetInternalIdToExternalIds() const { + return InternalIdIntoExternalId; + } + + ui64 GetSize() const { + return InternalIdIntoExternalId.size(); + } + + [[nodiscard]] ui64 ExtractInternalIdVerified(const ui64 externalId); + [[nodiscard]] std::optional ExtractInternalIdOptional(const ui64 externalId); + + ui64 GetMinInternalIdVerified() const; + ui64 GetExternalIdVerified(const ui64 internalId) const; + + std::optional GetInternalIdOptional(const ui64 externalId) const; + + ui64 GetInternalIdVerified(const ui64 externalId) const; + + [[nodiscard]] ui64 RegisterExternalId(const ui64 externalId); + [[nodiscard]] ui64 RegisterExternalIdOrGet(const ui64 externalId); + + [[nodiscard]] bool UnregisterExternalId(const ui64 externalId); + + std::optional GetMinInternalIdOptional() const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->first; + } else { + return std::nullopt; + } + } + + std::optional GetMinExternalIdOptional() const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->second; + } else { + return std::nullopt; + } + } + + ui64 GetMinInternalIdDef(const ui64 def) const { + if (InternalIdIntoExternalId.size()) { + return InternalIdIntoExternalId.begin()->first; + } else { + return def; + } + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/manager.cpp b/ydb/core/tx/limiter/grouped_memory/service/manager.cpp new file mode 100644 index 000000000000..96fe8bcefc17 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/manager.cpp @@ -0,0 +1,122 @@ +#include "manager.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TProcessMemory* TManager::GetProcessMemoryByExternalIdOptional(const ui64 externalProcessId) { + auto internalId = ProcessIds.GetInternalIdOptional(externalProcessId); + if (!internalId) { + return nullptr; + } + return GetProcessMemoryOptional(*internalId); +} + +void TManager::RegisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "register_group")("external_process_id", externalProcessId)( + "external_group_id", externalGroupId)("size", ProcessIds.GetSize())("external_scope_id", externalScopeId); + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->RegisterGroup(externalScopeId, externalGroupId); + } + RefreshSignals(); +} + +void TManager::UnregisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId) { + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "unregister_group")("external_process_id", externalProcessId)( + "external_group_id", externalGroupId)("size", ProcessIds.GetSize()); + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->UnregisterGroup(externalScopeId, externalGroupId); + } + RefreshSignals(); +} + +void TManager::UpdateAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) { + TProcessMemory& process = GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)); + if (process.UpdateAllocation(externalScopeId, allocationId, volume)) { + TryAllocateWaiting(); + } + + RefreshSignals(); +} + +void TManager::TryAllocateWaiting() { + if (Processes.size()) { + auto it = Processes.find(ProcessIds.GetMinInternalIdVerified()); + AFL_VERIFY(it != Processes.end()); + AFL_VERIFY(it->second.IsPriorityProcess()); + it->second.TryAllocateWaiting(0); + } + while (true) { + bool found = false; + for (auto&& i : Processes) { + if (i.second.TryAllocateWaiting(1)) { + found = true; + } + } + if (!found) { + break; + } + } + RefreshSignals(); +} + +void TManager::UnregisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId) { + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + if (process->UnregisterAllocation(externalScopeId, allocationId)) { + TryAllocateWaiting(); + } + } + RefreshSignals(); +} + +void TManager::RegisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId, + const std::shared_ptr& task, const std::optional& stageIdx) { + if (auto* process = GetProcessMemoryByExternalIdOptional(externalProcessId)) { + process->RegisterAllocation(externalScopeId, externalGroupId, task, stageIdx); + } else { + AFL_VERIFY(!task->OnAllocated(std::make_shared(externalProcessId, externalScopeId, task->GetIdentifier(), OwnerActorId, task->GetMemory()), task))( + "ext_group", externalGroupId)("stage_idx", stageIdx); + } + RefreshSignals(); +} + +void TManager::RegisterProcess(const ui64 externalProcessId, const std::vector>& stages) { + auto internalId = ProcessIds.GetInternalIdOptional(externalProcessId); + if (!internalId) { + const ui64 internalProcessId = ProcessIds.RegisterExternalIdOrGet(externalProcessId); + AFL_VERIFY(Processes.emplace(internalProcessId, TProcessMemory(externalProcessId, OwnerActorId, Processes.empty(), stages, DefaultStage)).second); + } else { + ++Processes.find(*internalId)->second.MutableLinksCount(); + } + RefreshSignals(); +} + +void TManager::UnregisterProcess(const ui64 externalProcessId) { + const ui64 internalProcessId = ProcessIds.GetInternalIdVerified(externalProcessId); + auto it = Processes.find(internalProcessId); + AFL_VERIFY(it != Processes.end()); + if (--it->second.MutableLinksCount()) { + return; + } + Y_UNUSED(ProcessIds.ExtractInternalIdVerified(externalProcessId)); + it->second.Unregister(); + Processes.erase(it); + const ui64 nextInternalProcessId = ProcessIds.GetMinInternalIdDef(internalProcessId); + if (internalProcessId < nextInternalProcessId) { + GetProcessMemoryVerified(nextInternalProcessId).SetPriorityProcess(); + TryAllocateWaiting(); + } + RefreshSignals(); +} + +void TManager::RegisterProcessScope(const ui64 externalProcessId, const ui64 externalProcessScopeId) { + GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)).RegisterScope(externalProcessScopeId); + RefreshSignals(); +} + +void TManager::UnregisterProcessScope(const ui64 externalProcessId, const ui64 externalProcessScopeId) { + GetProcessMemoryVerified(ProcessIds.GetInternalIdVerified(externalProcessId)).UnregisterScope(externalProcessScopeId); + RefreshSignals(); +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/manager.h b/ydb/core/tx/limiter/grouped_memory/service/manager.h new file mode 100644 index 000000000000..fd641a3f69b1 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/manager.h @@ -0,0 +1,79 @@ +#pragma once +#include "counters.h" +#include "process.h" + +#include +#include + +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TManager { +private: + const TConfig Config; + const TString Name; + const std::shared_ptr Signals; + const NActors::TActorId OwnerActorId; + THashMap Processes; + std::shared_ptr DefaultStage; + TIdsControl ProcessIds; + + void TryAllocateWaiting(); + void RefreshSignals() const { + Signals->ProcessesCount->Set(Processes.size()); + } + + TProcessMemory& GetProcessMemoryVerified(const ui64 internalProcessId) { + auto it = Processes.find(internalProcessId); + AFL_VERIFY(it != Processes.end()); + return it->second; + } + + TProcessMemory* GetProcessMemoryByExternalIdOptional(const ui64 externalProcessId); + + TProcessMemory* GetProcessMemoryOptional(const ui64 internalProcessId) { + auto it = Processes.find(internalProcessId); + if (it != Processes.end()) { + return &it->second; + } else { + return nullptr; + } + } + +public: + TManager(const NActors::TActorId& ownerActorId, const TConfig& config, const TString& name, const std::shared_ptr& signals, + const std::shared_ptr& defaultStage) + : Config(config) + , Name(name) + , Signals(signals) + , OwnerActorId(ownerActorId) + , DefaultStage(defaultStage) + { + } + + void RegisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId); + void UnregisterGroup(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId); + + void RegisterProcessScope(const ui64 externalProcessId, const ui64 externalScopeId); + void UnregisterProcessScope(const ui64 externalProcessId, const ui64 externalScopeId); + + void RegisterProcess(const ui64 externalProcessId, const std::vector>& stages); + void UnregisterProcess(const ui64 externalProcessId); + + void RegisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 externalGroupId, + const std::shared_ptr& task, + const std::optional& stageIdx); + void UnregisterAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId); + void UpdateAllocation(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume); + + bool IsEmpty() const { + return Processes.empty(); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/process.cpp b/ydb/core/tx/limiter/grouped_memory/service/process.cpp new file mode 100644 index 000000000000..bcde6532e797 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/process.cpp @@ -0,0 +1,5 @@ +#include "process.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/process.h b/ydb/core/tx/limiter/grouped_memory/service/process.h new file mode 100644 index 000000000000..3a53ff542750 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/process.h @@ -0,0 +1,271 @@ +#pragma once +#include "group.h" +#include "ids.h" + +#include + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TProcessMemoryScope: public NColumnShard::TMonitoringObjectsCounter { +private: + const ui64 ExternalProcessId; + const ui64 ExternalScopeId; + TAllocationGroups WaitAllocations; + THashMap> AllocationInfo; + TIdsControl GroupIds; + ui32 Links = 1; + const NActors::TActorId OwnerActorId; + + TAllocationInfo& GetAllocationInfoVerified(const ui64 allocationId) const { + auto it = AllocationInfo.find(allocationId); + AFL_VERIFY(it != AllocationInfo.end()); + return *it->second; + } + + void UnregisterGroupImpl(const ui64 internalGroupId) { + auto data = WaitAllocations.ExtractGroup(internalGroupId); + for (auto&& allocation : data) { + AFL_VERIFY(!allocation->Allocate(OwnerActorId)); + } + } + + const std::shared_ptr& RegisterAllocationImpl( + const ui64 internalGroupId, const std::shared_ptr& task, const std::shared_ptr& stage) { + auto it = AllocationInfo.find(task->GetIdentifier()); + if (it == AllocationInfo.end()) { + it = AllocationInfo + .emplace(task->GetIdentifier(), + std::make_shared(ExternalProcessId, ExternalScopeId, internalGroupId, task, stage)) + .first; + } + return it->second; + } + + friend class TAllocationGroups; + +public: + TProcessMemoryScope(const ui64 externalProcessId, const ui64 externalScopeId, const NActors::TActorId& ownerActorId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , OwnerActorId(ownerActorId) { + } + + void Register() { + ++Links; + } + + [[nodiscard]] bool Unregister() { + if (--Links) { + return false; + } + for (auto&& [i, _] : GroupIds.GetInternalIdToExternalIds()) { + UnregisterGroupImpl(i); + } + GroupIds.Clear(); + AllocationInfo.clear(); + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "scope_cleaned")("process_id", ExternalProcessId)( + "external_scope_id", ExternalScopeId); + return true; + } + + void RegisterAllocation(const bool isPriorityProcess, const ui64 externalGroupId, const std::shared_ptr& task, + const std::shared_ptr& stage) { + AFL_VERIFY(task); + AFL_VERIFY(stage); + const std::optional internalGroupIdOptional = GroupIds.GetInternalIdOptional(externalGroupId); + if (!internalGroupIdOptional) { + AFL_VERIFY(!task->OnAllocated(std::make_shared(ExternalProcessId, ExternalScopeId, task->GetIdentifier(), OwnerActorId, task->GetMemory()), task))( + "ext_group", externalGroupId)( + "min_group", GroupIds.GetMinInternalIdOptional())("stage", stage->GetName()); + AFL_VERIFY(!AllocationInfo.contains(task->GetIdentifier())); + } else { + const ui64 internalGroupId = *internalGroupIdOptional; + auto allocationInfo = RegisterAllocationImpl(internalGroupId, task, stage); + + if (allocationInfo->GetAllocationStatus() != EAllocationStatus::Waiting) { + } else if (WaitAllocations.GetMinGroupId().value_or(internalGroupId) < internalGroupId) { + WaitAllocations.AddAllocation(internalGroupId, allocationInfo); + } else if (allocationInfo->IsAllocatable(0) || (isPriorityProcess && internalGroupId == GroupIds.GetMinInternalIdVerified())) { + Y_UNUSED(WaitAllocations.RemoveAllocation(internalGroupId, allocationInfo)); + if (!allocationInfo->Allocate(OwnerActorId)) { + UnregisterAllocation(allocationInfo->GetIdentifier()); + } + } else { + WaitAllocations.AddAllocation(internalGroupId, allocationInfo); + } + } + } + + bool UpdateAllocation(const ui64 allocationId, const ui64 volume) { + GetAllocationInfoVerified(allocationId).SetAllocatedVolume(volume); + return true; + } + + bool TryAllocateWaiting(const bool isPriorityProcess, const ui32 allocationsCountLimit) { + return WaitAllocations.Allocate(isPriorityProcess, *this, allocationsCountLimit); + } + + bool UnregisterAllocation(const ui64 allocationId) { + ui64 memoryAllocated = 0; + auto it = AllocationInfo.find(allocationId); + if (it == AllocationInfo.end()) { + AFL_WARN(NKikimrServices::GROUPED_MEMORY_LIMITER)("reason", "allocation_cleaned_in_previous_scope_id_live")( + "allocation_id", allocationId)("process_id", ExternalProcessId)("external_scope_id", ExternalScopeId); + return true; + } + bool waitFlag = false; + const ui64 internalGroupId = it->second->GetAllocationInternalGroupId(); + switch (it->second->GetAllocationStatus()) { + case EAllocationStatus::Allocated: + case EAllocationStatus::Failed: + AFL_VERIFY(!WaitAllocations.RemoveAllocation(internalGroupId, it->second)); + break; + case EAllocationStatus::Waiting: + AFL_VERIFY(WaitAllocations.RemoveAllocation(internalGroupId, it->second)); + waitFlag = true; + break; + } + AFL_DEBUG(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "allocation_unregister")("allocation_id", allocationId)("wait", waitFlag)( + "internal_group_id", internalGroupId)("allocation_status", it->second->GetAllocationStatus()); + memoryAllocated = it->second->GetAllocatedVolume(); + AllocationInfo.erase(it); + return !!memoryAllocated; + } + + void UnregisterGroup(const bool isPriorityProcess, const ui64 externalGroupId) { + if (auto internalGroupId = GroupIds.ExtractInternalIdOptional(externalGroupId)) { + AFL_INFO(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "remove_group")("external_group_id", externalGroupId)( + "internal_group_id", internalGroupId); + UnregisterGroupImpl(*internalGroupId); + if (isPriorityProcess && (*internalGroupId < GroupIds.GetMinInternalIdDef(*internalGroupId))) { + Y_UNUSED(TryAllocateWaiting(isPriorityProcess, 0)); + } + } else { + AFL_WARN(NKikimrServices::GROUPED_MEMORY_LIMITER)("event", "remove_absent_group")("external_group_id", externalGroupId); + } + } + + void RegisterGroup(const ui64 externalGroupId) { + Y_UNUSED(GroupIds.RegisterExternalId(externalGroupId)); + } +}; + +class TProcessMemory: public NColumnShard::TMonitoringObjectsCounter { +private: + const ui64 ExternalProcessId; + + const NActors::TActorId OwnerActorId; + bool PriorityProcessFlag = false; + + YDB_ACCESSOR(ui32, LinksCount, 1); + YDB_READONLY_DEF(std::vector>, Stages); + const std::shared_ptr DefaultStage; + THashMap> AllocationScopes; + + TProcessMemoryScope* GetAllocationScopeOptional(const ui64 externalScopeId) const { + auto it = AllocationScopes.find(externalScopeId); + if (it == AllocationScopes.end()) { + return nullptr; + } + return it->second.get(); + } + + TProcessMemoryScope& GetAllocationScopeVerified(const ui64 externalScopeId) const { + return *TValidator::CheckNotNull(GetAllocationScopeOptional(externalScopeId)); + } + +public: + bool IsPriorityProcess() const { + return PriorityProcessFlag; + } + + bool UpdateAllocation(const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) { + return GetAllocationScopeVerified(externalScopeId).UpdateAllocation(allocationId, volume); + } + + void RegisterAllocation( + const ui64 externalScopeId, const ui64 externalGroupId, const std::shared_ptr& task, const std::optional& stageIdx) { + AFL_VERIFY(task); + std::shared_ptr stage; + if (Stages.empty()) { + AFL_VERIFY(!stageIdx); + stage = DefaultStage; + } else { + AFL_VERIFY(stageIdx); + AFL_VERIFY(*stageIdx < Stages.size()); + stage = Stages[*stageIdx]; + } + AFL_VERIFY(stage); + auto& scope = GetAllocationScopeVerified(externalScopeId); + scope.RegisterAllocation(IsPriorityProcess(), externalGroupId, task, stage); + } + + bool UnregisterAllocation(const ui64 externalScopeId, const ui64 allocationId) { + if (auto* scope = GetAllocationScopeOptional(externalScopeId)) { + return scope->UnregisterAllocation(allocationId); + } + return false; + } + + void UnregisterGroup(const ui64 externalScopeId, const ui64 externalGroupId) { + if (auto* scope = GetAllocationScopeOptional(externalScopeId)) { + scope->UnregisterGroup(IsPriorityProcess(), externalGroupId); + } + } + + void RegisterGroup(const ui64 externalScopeId, const ui64 externalGroupId) { + GetAllocationScopeVerified(externalScopeId).RegisterGroup(externalGroupId); + } + + void UnregisterScope(const ui64 externalScopeId) { + auto it = AllocationScopes.find(externalScopeId); + AFL_VERIFY(it != AllocationScopes.end()); + if (it->second->Unregister()) { + AllocationScopes.erase(it); + } + } + + void RegisterScope(const ui64 externalScopeId) { + auto it = AllocationScopes.find(externalScopeId); + if (it == AllocationScopes.end()) { + AFL_VERIFY(AllocationScopes.emplace(externalScopeId, std::make_shared(ExternalProcessId, externalScopeId, OwnerActorId)).second); + } else { + it->second->Register(); + } + } + + void SetPriorityProcess() { + AFL_VERIFY(!PriorityProcessFlag); + PriorityProcessFlag = true; + } + + TProcessMemory(const ui64 externalProcessId, const NActors::TActorId& ownerActorId, const bool isPriority, + const std::vector>& stages, const std::shared_ptr& defaultStage) + : ExternalProcessId(externalProcessId) + , OwnerActorId(ownerActorId) + , PriorityProcessFlag(isPriority) + , Stages(stages) + , DefaultStage(defaultStage) { + } + + bool TryAllocateWaiting(const ui32 allocationsCountLimit) { + bool allocated = false; + for (auto&& i : AllocationScopes) { + if (i.second->TryAllocateWaiting(IsPriorityProcess(), allocationsCountLimit)) { + allocated = true; + } + } + return allocated; + } + + void Unregister() { + for (auto&& i : AllocationScopes) { + Y_UNUSED(i.second->Unregister()); + } + AllocationScopes.clear(); + } +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/service/ya.make b/ydb/core/tx/limiter/grouped_memory/service/ya.make new file mode 100644 index 000000000000..d67332688426 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/service/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + actor.cpp + manager.cpp + counters.cpp + group.cpp + process.cpp + allocation.cpp + ids.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/tx/columnshard/counters/common +) + +GENERATE_ENUM_SERIALIZATION(allocation.h) + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp b/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp new file mode 100644 index 000000000000..2d72f0039846 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/abstract.cpp @@ -0,0 +1,79 @@ +#include "abstract.h" +#include "events.h" + +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +TAllocationGuard::~TAllocationGuard() { + if (TlsActivationContext && !Released) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ScopeId, AllocationId)); + } +} + +void TAllocationGuard::Update(const ui64 newVolume) { + AFL_VERIFY(!Released); + Memory = newVolume; + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ScopeId, AllocationId, newVolume)); + } +} + +bool IAllocation::OnAllocated(std::shared_ptr&& guard, const std::shared_ptr& allocation) { + AFL_VERIFY(!Allocated); + Allocated = true; + AFL_VERIFY(allocation); + AFL_VERIFY(guard); + return DoOnAllocated(std::move(guard), allocation); +} + +TGroupGuard::~TGroupGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ExternalScopeId, GroupId)); + } +} + +TGroupGuard::TGroupGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 externalScopeId, const ui64 groupId) + : ActorId(actorId) + , ProcessId(processId) + , ExternalScopeId(externalScopeId) + , GroupId(groupId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send( + ActorId, std::make_unique(ProcessId, ExternalScopeId, GroupId)); + } +} + +TProcessGuard::~TProcessGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId)); + } +} + +TProcessGuard::TProcessGuard(const NActors::TActorId& actorId, const ui64 processId, const std::vector>& stages) + : ActorId(actorId) + , ProcessId(processId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, stages)); + } +} + +TScopeGuard::~TScopeGuard() { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, ScopeId)); + } +} + +TScopeGuard::TScopeGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 scopeId) + : ActorId(actorId) + , ProcessId(processId) + , ScopeId(scopeId) { + if (TlsActivationContext) { + NActors::TActivationContext::AsActorContext().Send(ActorId, std::make_unique(ProcessId, ScopeId)); + } +} + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/abstract.h b/ydb/core/tx/limiter/grouped_memory/usage/abstract.h new file mode 100644 index 000000000000..d92120f46fb6 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/abstract.h @@ -0,0 +1,223 @@ +#pragma once +#include + +#include +#include +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TStageFeatures; + +class TGroupGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, GroupId, 0); + +public: + TGroupGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 externalScopeId, const ui64 groupId); + + ~TGroupGuard(); +}; + +class TProcessGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + +public: + TProcessGuard(const NActors::TActorId& actorId, const ui64 processId, const std::vector>& stages); + + ~TProcessGuard(); +}; + +class TScopeGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0); + YDB_READONLY(ui64, ScopeId, 0); + +public: + TScopeGuard(const NActors::TActorId& actorId, const ui64 processId, const ui64 scopeId); + + ~TScopeGuard(); +}; + +class TAllocationGuard { +private: + const NActors::TActorId ActorId; + YDB_READONLY(ui64, ProcessId, 0) + YDB_READONLY(ui64, ScopeId, 0) + YDB_READONLY(ui64, AllocationId, 0) + YDB_READONLY(ui64, Memory, 0) + bool Released = false; + +public: + TAllocationGuard(const ui64 processId, const ui64 scopeId, const ui64 allocationId, const NActors::TActorId actorId, const ui64 memory) + : ActorId(actorId) + , ProcessId(processId) + , ScopeId(scopeId) + , AllocationId(allocationId) + , Memory(memory) { + } + + void Release() { + AFL_VERIFY(!Released); + Released = true; + } + + void Update(const ui64 newVolume); + + ~TAllocationGuard(); +}; + +class TPositiveControlInteger { +private: + ui64 Value = 0; + +public: + void Add(const ui64 value) { + Value += value; + } + void Sub(const ui64 value) { + AFL_VERIFY(value <= Value); + Value -= value; + } + ui64 Val() const { + return Value; + } +}; + +class TStageFeatures { +private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY(ui64, Limit, 0); + YDB_ACCESSOR_DEF(TPositiveControlInteger, Usage); + YDB_ACCESSOR_DEF(TPositiveControlInteger, Waiting); + std::shared_ptr Owner; + std::shared_ptr Counters; + +public: + TString DebugString() const { + TStringBuilder result; + result << "name=" << Name << ";limit=" << Limit << ";"; + if (Owner) { + result << "owner=" << Owner->DebugString() << ";"; + } + return result; + } + + ui64 GetFullMemory() const { + return Usage.Val() + Waiting.Val(); + } + + TStageFeatures( + const TString& name, const ui64 limit, const std::shared_ptr& owner, const std::shared_ptr& counters) + : Name(name) + , Limit(limit) + , Owner(owner) + , Counters(counters) { + } + + void Allocate(const ui64 volume) { + Waiting.Sub(volume); + Usage.Add(volume); + if (Counters) { + Counters->Add(volume, true); + Counters->Sub(volume, false); + } + if (Owner) { + Owner->Allocate(volume); + } + } + + void Free(const ui64 volume, const bool allocated) { + if (Counters) { + Counters->Sub(volume, allocated); + } + if (allocated) { + Usage.Sub(volume); + } else { + Waiting.Sub(volume); + } + + if (Owner) { + Owner->Free(volume, allocated); + } + } + + void UpdateVolume(const ui64 from, const ui64 to, const bool allocated) { + if (Counters) { + Counters->Sub(from, allocated); + Counters->Add(to, allocated); + } + if (allocated) { + Usage.Sub(from); + Usage.Add(to); + } else { + Waiting.Sub(from); + Waiting.Add(to); + } + + if (Owner) { + Owner->UpdateVolume(from, to, allocated); + } + } + + bool IsAllocatable(const ui64 volume, const ui64 additional) const { + if (Limit < additional + Usage.Val() + volume) { + return false; + } + if (Owner) { + return Owner->IsAllocatable(volume, additional); + } + return true; + } + + void Add(const ui64 volume, const bool allocated) { + if (Counters) { + Counters->Add(volume, allocated); + } + if (allocated) { + Usage.Add(volume); + } else { + Waiting.Add(volume); + } + + if (Owner) { + Owner->Add(volume, allocated); + } + } +}; + +class IAllocation { +private: + static inline TAtomicCounter Counter = 0; + YDB_READONLY(ui64, Identifier, Counter.Inc()); + YDB_READONLY(ui64, Memory, 0); + bool Allocated = false; + virtual bool DoOnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation) = 0; + +public: + virtual ~IAllocation() = default; + IAllocation(const ui64 mem) + : Memory(mem) { + } + + void ResetAllocation() { + Allocated = false; + } + + bool IsAllocated() const { + return Allocated; + } + + [[nodiscard]] bool OnAllocated( + std::shared_ptr&& guard, const std::shared_ptr& allocation); +}; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/config.cpp b/ydb/core/tx/limiter/grouped_memory/usage/config.cpp new file mode 100644 index 000000000000..17fe55975744 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/config.cpp @@ -0,0 +1,20 @@ +#include "config.h" +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +bool TConfig::DeserializeFromProto(const NKikimrConfig::TGroupedMemoryLimiterConfig& config) { + if (config.HasMemoryLimit()) { + MemoryLimit = config.GetMemoryLimit(); + } + Enabled = config.GetEnabled(); + return true; +} + +TString TConfig::DebugString() const { + TStringBuilder sb; + sb << "MemoryLimit=" << MemoryLimit << ";Enabled=" << Enabled << ";"; + return sb; +} + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/config.h b/ydb/core/tx/limiter/grouped_memory/usage/config.h new file mode 100644 index 000000000000..91a9b5bc7afe --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/config.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +class TConfig { +private: + YDB_READONLY(bool, Enabled, true); + YDB_READONLY(ui64, MemoryLimit, ui64(3) << 30); + +public: + + static TConfig BuildDisabledConfig() { + TConfig result; + result.Enabled = false; + return result; + } + + bool IsEnabled() const { + return Enabled; + } + bool DeserializeFromProto(const NKikimrConfig::TGroupedMemoryLimiterConfig& config); + TString DebugString() const; +}; + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/events.cpp b/ydb/core/tx/limiter/grouped_memory/usage/events.cpp new file mode 100644 index 000000000000..cafd00f2bd82 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/events.cpp @@ -0,0 +1,5 @@ +#include "events.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/events.h b/ydb/core/tx/limiter/grouped_memory/usage/events.h new file mode 100644 index 000000000000..d3a8200c584c --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/events.h @@ -0,0 +1,150 @@ +#pragma once +#include "abstract.h" + +#include + +#include +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents { +struct TEvExternal { + enum EEv { + EvStartAllocationTask = EventSpaceBegin(TKikimrEvents::ES_GROUPED_ALLOCATIONS_MANAGER), + EvFinishAllocationTask, + EvStartAllocationGroup, + EvFinishAllocationGroup, + EvUpdateAllocationTask, + EvStartAllocationProcess, + EvFinishAllocationProcess, + EvStartAllocationProcessScope, + EvFinishAllocationProcessScope, + EvEnd + }; + + class TEvStartTask: public NActors::TEventLocal { + private: + YDB_READONLY_DEF(std::vector>, Allocations); + YDB_READONLY_DEF(std::optional, StageFeaturesIdx); + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvStartTask(const ui64 externalProcessId, const ui64 externalScopeId, + const ui64 externalGroupId, const std::vector>& allocations, + const std::optional& stageFeaturesIdx) + : Allocations(allocations) + , StageFeaturesIdx(stageFeaturesIdx) + , ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + AFL_VERIFY(Allocations.size()); + } + }; + + class TEvFinishTask: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, AllocationId, 0); + + public: + explicit TEvFinishTask(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , AllocationId(allocationId) { + } + }; + + class TEvUpdateTask: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + YDB_READONLY(ui64, AllocationId, 0); + YDB_READONLY(ui64, Volume, 0); + + public: + explicit TEvUpdateTask(const ui64 externalProcessId, const ui64 externalScopeId, const ui64 allocationId, const ui64 volume) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , AllocationId(allocationId) + , Volume(volume) { + } + }; + + class TEvFinishGroup: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui32, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvFinishGroup(const ui64 externalProcessId, const ui32 externalScopeId, const ui64 externalGroupId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + } + }; + + class TEvStartGroup: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui32, ExternalScopeId, 0); + YDB_READONLY(ui64, ExternalGroupId, 0); + + public: + explicit TEvStartGroup(const ui64 externalProcessId, const ui32 externalScopeId, const ui64 externalGroupId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + , ExternalGroupId(externalGroupId) { + } + }; + + class TEvFinishProcess: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + + public: + explicit TEvFinishProcess(const ui64 externalProcessId) + : ExternalProcessId(externalProcessId) { + } + }; + + class TEvStartProcess: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY_DEF(std::vector>, Stages); + public: + explicit TEvStartProcess(const ui64 externalProcessId, const std::vector>& stages) + : ExternalProcessId(externalProcessId) + , Stages(stages) { + } + }; + + class TEvFinishProcessScope: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + + public: + explicit TEvFinishProcessScope(const ui64 externalProcessId, const ui64 externalScopeId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) + { + } + }; + + class TEvStartProcessScope: public NActors::TEventLocal { + private: + YDB_READONLY(ui64, ExternalProcessId, 0); + YDB_READONLY(ui64, ExternalScopeId, 0); + + public: + explicit TEvStartProcessScope(const ui64 externalProcessId, const ui64 externalScopeId) + : ExternalProcessId(externalProcessId) + , ExternalScopeId(externalScopeId) { + } + }; +}; +} // namespace NKikimr::NOlap::NGroupedMemoryManager::NEvents diff --git a/ydb/core/tx/limiter/grouped_memory/usage/service.cpp b/ydb/core/tx/limiter/grouped_memory/usage/service.cpp new file mode 100644 index 000000000000..affe243f11bb --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/service.cpp @@ -0,0 +1,5 @@ +#include "service.h" + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +} diff --git a/ydb/core/tx/limiter/grouped_memory/usage/service.h b/ydb/core/tx/limiter/grouped_memory/usage/service.h new file mode 100644 index 000000000000..8192743218b1 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/service.h @@ -0,0 +1,102 @@ +#pragma once +#include "abstract.h" +#include "config.h" +#include "events.h" + +#include + +#include +#include + +namespace NKikimr::NOlap::NGroupedMemoryManager { + +template +class TServiceOperatorImpl { +private: + TConfig ServiceConfig = TConfig::BuildDisabledConfig(); + std::shared_ptr Counters; + std::shared_ptr DefaultStageFeatures = std::make_shared("DEFAULT", ((ui64)3) << 30, nullptr, nullptr); + using TSelf = TServiceOperatorImpl; + static void Register(const TConfig& serviceConfig, TIntrusivePtr<::NMonitoring::TDynamicCounters> counters) { + Singleton()->Counters = std::make_shared(counters, TMemoryLimiterPolicy::Name); + Singleton()->ServiceConfig = serviceConfig; + Singleton()->DefaultStageFeatures = std::make_shared( + "GLOBAL", serviceConfig.GetMemoryLimit(), nullptr, Singleton()->Counters->BuildStageCounters("general")); + } + static const TString& GetMemoryLimiterName() { + Y_ABORT_UNLESS(TMemoryLimiterPolicy::Name.size() == 4); + return TMemoryLimiterPolicy::Name; + } + +public: + static std::shared_ptr BuildStageFeatures(const TString& name, const ui64 limit) { + if (!IsEnabled()) { + return Singleton()->DefaultStageFeatures; + } else { + AFL_VERIFY(Singleton()->DefaultStageFeatures); + return std::make_shared( + name, limit, Singleton()->DefaultStageFeatures, Singleton()->Counters->BuildStageCounters(name)); + } + } + + static std::shared_ptr GetDefaultStageFeatures() { + AFL_VERIFY(Singleton()->DefaultStageFeatures); + return Singleton()->DefaultStageFeatures; + } + + static std::shared_ptr BuildGroupGuard(const ui64 processId, const ui32 scopeId) { + static TAtomicCounter counter = 0; + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, scopeId, counter.Inc()); + } + + static std::shared_ptr BuildScopeGuard(const ui64 processId, const ui32 scopeId) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, scopeId); + } + + static std::shared_ptr BuildProcessGuard(const ui64 processId, const std::vector>& stages) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + return std::make_shared(MakeServiceId(selfId.NodeId()), processId, stages); + } + + static bool SendToAllocation(const ui64 processId, const ui64 scopeId, const ui64 groupId, + const std::vector>& tasks, + const std::optional& stageIdx) { + auto& context = NActors::TActorContext::AsActorContext(); + const NActors::TActorId& selfId = context.SelfID; + if (TSelf::IsEnabled()) { + context.Send(MakeServiceId(selfId.NodeId()), new NEvents::TEvExternal::TEvStartTask(processId, scopeId, groupId, tasks, stageIdx)); + return true; + } else { + for (auto&& i : tasks) { + if (!i->IsAllocated()) { + AFL_VERIFY(i->OnAllocated(std::make_shared(0, 0, 0, NActors::TActorId(), i->GetMemory()), i)); + } + } + return false; + } + } + static bool IsEnabled() { + return Singleton()->ServiceConfig.IsEnabled(); + } + static NActors::TActorId MakeServiceId(const ui32 nodeId) { + return NActors::TActorId(nodeId, "SrvcMlmt" + GetMemoryLimiterName()); + } + static NActors::IActor* CreateService(const TConfig& config, TIntrusivePtr<::NMonitoring::TDynamicCounters> signals) { + Register(config, signals); + return new TMemoryLimiterActor(config, GetMemoryLimiterName(), Singleton()->Counters, Singleton()->DefaultStageFeatures); + } +}; + +class TScanMemoryLimiterPolicy { +public: + static const inline TString Name = "Scan"; +}; + +using TScanMemoryLimiterOperator = TServiceOperatorImpl; + +} // namespace NKikimr::NOlap::NGroupedMemoryManager diff --git a/ydb/core/tx/limiter/grouped_memory/usage/ya.make b/ydb/core/tx/limiter/grouped_memory/usage/ya.make new file mode 100644 index 000000000000..4295b9f8cafe --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/usage/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + events.cpp + config.cpp + abstract.cpp + service.cpp +) + +PEERDIR( + ydb/library/actors/core + ydb/services/metadata/request + ydb/core/tx/limiter/grouped_memory/service +) + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp b/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp new file mode 100644 index 000000000000..277d62903205 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ut/ut_manager.cpp @@ -0,0 +1,222 @@ +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +Y_UNIT_TEST_SUITE(GroupedMemoryLimiter) { + using namespace NKikimr; + + class TAllocation: public NOlap::NGroupedMemoryManager::IAllocation, public TObjectCounter { + private: + using TBase = NOlap::NGroupedMemoryManager::IAllocation; + virtual bool DoOnAllocated(std::shared_ptr&& /*guard*/, + const std::shared_ptr& /*allocation*/) override { + return true; + } + + public: + TAllocation(const ui64 mem) + : TBase(mem) { + } + }; + + Y_UNIT_TEST(Simplest) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + auto alloc1 = std::make_shared(50); + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc1_1 = std::make_shared(50); + manager->RegisterAllocation(0, 0, 1, alloc1_1, {}); + AFL_VERIFY(alloc1_1->IsAllocated()); + + manager->RegisterGroup(0, 0, 2); + auto alloc2 = std::make_shared(50); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + AFL_VERIFY(alloc2->IsAllocated()); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + manager->UnregisterAllocation(0, 0, alloc1_1->GetIdentifier()); + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterProcessScope(0, 0); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(Simple) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor(NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + auto alloc1 = std::make_shared(10); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc2 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 2); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + auto alloc3 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc3, {}); + AFL_VERIFY(alloc1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + auto alloc1_1 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc1_1, {}); + AFL_VERIFY(alloc1_1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + manager->UnregisterAllocation(0, 0, alloc1_1->GetIdentifier()); + AFL_VERIFY(!alloc2->IsAllocated()); + manager->UnregisterGroup(0, 0, 1); + AFL_VERIFY(alloc2->IsAllocated()); + + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + AFL_VERIFY(!alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + AFL_VERIFY(alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc3->GetIdentifier()); + manager->UnregisterProcessScope(0, 0); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(CommonUsage) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + manager->RegisterGroup(0, 0, 1); + auto alloc0 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc0, {}); + auto alloc1 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc1->IsAllocated()); + + manager->RegisterGroup(0, 0, 2); + auto alloc2 = std::make_shared(1000); + manager->RegisterAllocation(0, 0, 2, alloc0, {}); + manager->RegisterAllocation(0, 0, 2, alloc2, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + + auto alloc3 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc0, {}); + manager->RegisterAllocation(0, 0, 3, alloc3, {}); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc1->IsAllocated()); + AFL_VERIFY(!alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc2->IsAllocated()); + AFL_VERIFY(!alloc3->IsAllocated()); + manager->UnregisterGroup(0, 0, 2); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + AFL_VERIFY(alloc0->IsAllocated()); + AFL_VERIFY(alloc3->IsAllocated()); + + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc3->GetIdentifier()); + manager->UnregisterAllocation(0, 0, alloc0->GetIdentifier()); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } + + Y_UNIT_TEST(Update) { + auto counters = std::make_shared(MakeIntrusive(), "test"); + NOlap::NGroupedMemoryManager::TConfig config; + { + NKikimrConfig::TGroupedMemoryLimiterConfig protoConfig; + protoConfig.SetMemoryLimit(100); + AFL_VERIFY(config.DeserializeFromProto(protoConfig)); + } + std::unique_ptr actor( + NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::CreateService(config, MakeIntrusive())); + auto stage = NOlap::NGroupedMemoryManager::TScanMemoryLimiterOperator::GetDefaultStageFeatures(); + auto manager = std::make_shared(NActors::TActorId(), config, "test", counters, stage); + { + manager->RegisterProcess(0, {}); + manager->RegisterProcessScope(0, 0); + auto alloc1 = std::make_shared(1000); + manager->RegisterGroup(0, 0, 1); + manager->RegisterAllocation(0, 0, 1, alloc1, {}); + AFL_VERIFY(alloc1->IsAllocated()); + auto alloc2 = std::make_shared(10); + manager->RegisterGroup(0, 0, 3); + manager->RegisterAllocation(0, 0, 3, alloc2, {}); + AFL_VERIFY(!alloc2->IsAllocated()); + + manager->UpdateAllocation(0, 0, alloc1->GetIdentifier(), 10); + AFL_VERIFY(alloc2->IsAllocated()); + + manager->UnregisterGroup(0, 0, 3); + manager->UnregisterAllocation(0, 0, alloc2->GetIdentifier()); + + manager->UnregisterGroup(0, 0, 1); + manager->UnregisterAllocation(0, 0, alloc1->GetIdentifier()); + manager->UnregisterProcess(0); + } + AFL_VERIFY(!stage->GetUsage().Val()); + AFL_VERIFY(manager->IsEmpty()); + AFL_VERIFY(!TObjectCounter::ObjectCount()); + } +}; diff --git a/ydb/core/tx/limiter/grouped_memory/ut/ya.make b/ydb/core/tx/limiter/grouped_memory/ut/ya.make new file mode 100644 index 000000000000..eeae3952e358 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ut/ya.make @@ -0,0 +1,17 @@ +UNITTEST_FOR(ydb/core/formats/arrow) + +SIZE(SMALL) + +PEERDIR( + ydb/core/tx/limiter/grouped_memory/usage + ydb/library/yql/public/udf/service/stub + ydb/library/yql/parser/pg_wrapper +) + +SRCS( + ut_manager.cpp +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/tx/limiter/grouped_memory/ya.make b/ydb/core/tx/limiter/grouped_memory/ya.make new file mode 100644 index 000000000000..2e071158a966 --- /dev/null +++ b/ydb/core/tx/limiter/grouped_memory/ya.make @@ -0,0 +1,5 @@ +RECURSE( + ut + service + usage +) diff --git a/ydb/core/tx/limiter/service/service.cpp b/ydb/core/tx/limiter/service/service.cpp index edbce04b7256..e31ea0f253c7 100644 --- a/ydb/core/tx/limiter/service/service.cpp +++ b/ydb/core/tx/limiter/service/service.cpp @@ -12,7 +12,7 @@ TLimiterActor::TLimiterActor(const TConfig& config, const TString& limiterName, void TLimiterActor::HandleMain(TEvExternal::TEvAskResource::TPtr& ev) { const auto now = TMonotonic::Now(); - if (RequestsInFlight.empty() || VolumeInFlight + ev->Get()->GetRequest()->GetVolume() <= Config.GetLimit()) { + if (RequestsInFlight.empty() || (RequestsQueue.empty() && VolumeInFlight + ev->Get()->GetRequest()->GetVolume() <= Config.GetLimit())) { VolumeInFlight += ev->Get()->GetRequest()->GetVolume(); RequestsInFlight.emplace_back(now, ev->Get()->GetRequest()->GetVolume()); if (RequestsInFlight.size() == 1) { diff --git a/ydb/core/tx/limiter/ya.make b/ydb/core/tx/limiter/ya.make new file mode 100644 index 000000000000..c27ad41aa02b --- /dev/null +++ b/ydb/core/tx/limiter/ya.make @@ -0,0 +1,5 @@ +RECURSE( + grouped_memory + service + usage +) diff --git a/ydb/core/tx/program/program.cpp b/ydb/core/tx/program/program.cpp index a6dbce7fed75..a4ada441cd74 100644 --- a/ydb/core/tx/program/program.cpp +++ b/ydb/core/tx/program/program.cpp @@ -330,7 +330,7 @@ NSsa::TAggregateAssign TProgramBuilder::MakeAggregate(const NSsa::TColumnInfo& n } } else if (func.ArgumentsSize() == 0 && func.GetId() == TId::AGG_COUNT) { // COUNT(*) case - return TAggregateAssign(name, EAggregate::Count); + return TAggregateAssign(name, EAggregate::NumRows); } return TAggregateAssign(name); // !ok() } @@ -483,7 +483,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, const NKikim if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { TString out; ::google::protobuf::TextFormat::PrintToString(programProto, &out); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("program", out); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "parse_program")("program", out); } if (programProto.HasKernels()) { @@ -496,6 +496,7 @@ bool TProgramContainer::Init(const IColumnResolver& columnResolver, const NKikim } return false; } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "program_parsed")("result", DebugString()); return true; } diff --git a/ydb/core/tx/program/program.h b/ydb/core/tx/program/program.h index 76fbe8702488..3ab18eccc9d1 100644 --- a/ydb/core/tx/program/program.h +++ b/ydb/core/tx/program/program.h @@ -2,7 +2,7 @@ #include "registry.h" #include -#include +#include #include #include #include @@ -20,7 +20,6 @@ class IColumnResolver { virtual ~IColumnResolver() = default; virtual TString GetColumnName(ui32 id, bool required = true) const = 0; virtual std::optional GetColumnIdOptional(const TString& name) const = 0; - virtual const NTable::TScheme::TTableSchema& GetSchema() const = 0; virtual NSsa::TColumnInfo GetDefaultColumn() const = 0; }; @@ -35,10 +34,6 @@ class TSchemaResolverColumnsOnly: public IColumnResolver { virtual TString GetColumnName(ui32 id, bool required = true) const override; virtual std::optional GetColumnIdOptional(const TString& name) const override; - virtual const NTable::TScheme::TTableSchema& GetSchema() const override { - AFL_VERIFY(false); - return Default(); - } virtual NSsa::TColumnInfo GetDefaultColumn() const override { return NSsa::TColumnInfo::Original((ui32)NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP); } diff --git a/ydb/core/tx/program/ya.make b/ydb/core/tx/program/ya.make index 4e69528543a3..d8ef7ed60696 100644 --- a/ydb/core/tx/program/ya.make +++ b/ydb/core/tx/program/ya.make @@ -8,7 +8,7 @@ SRCS( PEERDIR( ydb/core/formats/arrow ydb/core/protos - ydb/core/formats/arrow/protos + ydb/library/formats/arrow/protos ydb/core/tablet_flat ydb/library/yql/minikql/comp_nodes ydb/library/yql/core/arrow_kernels/registry diff --git a/ydb/core/tx/replication/controller/controller.cpp b/ydb/core/tx/replication/controller/controller.cpp index 4f73d84d68b0..46e8c3dfbb0c 100644 --- a/ydb/core/tx/replication/controller/controller.cpp +++ b/ydb/core/tx/replication/controller/controller.cpp @@ -1,6 +1,7 @@ #include "controller.h" #include "controller_impl.h" +#include #include #include @@ -60,6 +61,8 @@ STFUNC(TController::StateWork) { HFunc(TEvPrivate::TEvProcessQueues, Handle); HFunc(TEvPrivate::TEvRemoveWorker, Handle); HFunc(TEvPrivate::TEvDescribeTargetsResult, Handle); + HFunc(TEvPrivate::TEvRequestCreateStream, Handle); + HFunc(TEvPrivate::TEvRequestDropStream, Handle); HFunc(TEvDiscovery::TEvDiscoveryData, Handle); HFunc(TEvDiscovery::TEvError, Handle); HFunc(TEvService::TEvStatus, Handle); @@ -148,13 +151,53 @@ void TController::Handle(TEvPrivate::TEvAssignStreamName::TPtr& ev, const TActor RunTxAssignStreamName(ev, ctx); } +template +void ProcessLimiterQueue(TDeque& requested, THashSet& inflight, ui32 limit, const TActorContext& ctx) { + while (!requested.empty() && inflight.size() < limit) { + const auto& actorId = requested.front(); + ctx.Send(actorId, new TEvent()); + inflight.insert(actorId); + requested.pop_front(); + } +} + +void TController::ProcessCreateStreamQueue(const TActorContext& ctx) { + const auto& limits = AppData()->ReplicationConfig.GetSchemeOperationLimits(); + ProcessLimiterQueue(RequestedCreateStream, InflightCreateStream, limits.GetInflightCreateStreamLimit(), ctx); +} + +void TController::ProcessDropStreamQueue(const TActorContext& ctx) { + const auto& limits = AppData()->ReplicationConfig.GetSchemeOperationLimits(); + ProcessLimiterQueue(RequestedDropStream, InflightDropStream, limits.GetInflightDropStreamLimit(), ctx); +} + +void TController::Handle(TEvPrivate::TEvRequestCreateStream::TPtr& ev, const TActorContext& ctx) { + CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + RequestedCreateStream.push_back(ev->Sender); + ProcessCreateStreamQueue(ctx); +} + void TController::Handle(TEvPrivate::TEvCreateStreamResult::TPtr& ev, const TActorContext& ctx) { CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + InflightCreateStream.erase(ev->Sender); + ProcessCreateStreamQueue(ctx); RunTxCreateStreamResult(ev, ctx); } +void TController::Handle(TEvPrivate::TEvRequestDropStream::TPtr& ev, const TActorContext& ctx) { + CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + RequestedDropStream.push_back(ev->Sender); + ProcessDropStreamQueue(ctx); +} + void TController::Handle(TEvPrivate::TEvDropStreamResult::TPtr& ev, const TActorContext& ctx) { CLOG_T(ctx, "Handle " << ev->Get()->ToString()); + + InflightDropStream.erase(ev->Sender); + ProcessDropStreamQueue(ctx); RunTxDropStreamResult(ev, ctx); } diff --git a/ydb/core/tx/replication/controller/controller_impl.h b/ydb/core/tx/replication/controller/controller_impl.h index 927d2d5bf528..b7a365501a46 100644 --- a/ydb/core/tx/replication/controller/controller_impl.h +++ b/ydb/core/tx/replication/controller/controller_impl.h @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -83,6 +84,8 @@ class TController void Handle(TEvPrivate::TEvProcessQueues::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvRemoveWorker::TPtr& ev, const TActorContext& ctx); void Handle(TEvPrivate::TEvDescribeTargetsResult::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvRequestCreateStream::TPtr& ev, const TActorContext& ctx); + void Handle(TEvPrivate::TEvRequestDropStream::TPtr& ev, const TActorContext& ctx); void Handle(TEvDiscovery::TEvDiscoveryData::TPtr& ev, const TActorContext& ctx); void Handle(TEvDiscovery::TEvError::TPtr& ev, const TActorContext& ctx); void Handle(TEvService::TEvStatus::TPtr& ev, const TActorContext& ctx); @@ -103,6 +106,8 @@ class TController void RemoveWorker(const TWorkerId& id, const TActorContext& ctx); bool MaybeRemoveWorker(const TWorkerId& id, const TActorContext& ctx); void UpdateLag(const TWorkerId& id, TDuration lag); + void ProcessCreateStreamQueue(const TActorContext& ctx); + void ProcessDropStreamQueue(const TActorContext& ctx); // local transactions class TTxInitSchema; @@ -178,6 +183,13 @@ class TController bool ProcessQueuesScheduled = false; static constexpr ui32 ProcessBatchLimit = 100; + // create stream limiter + TDeque RequestedCreateStream; + THashSet InflightCreateStream; + // drop stream limiter + TDeque RequestedDropStream; + THashSet InflightDropStream; + }; // TController } diff --git a/ydb/core/tx/replication/controller/dst_alterer.cpp b/ydb/core/tx/replication/controller/dst_alterer.cpp index cb436259fae2..c03314f1d59e 100644 --- a/ydb/core/tx/replication/controller/dst_alterer.cpp +++ b/ydb/core/tx/replication/controller/dst_alterer.cpp @@ -41,6 +41,7 @@ class TDstAlterer: public TActorBootstrapped { switch (Kind) { case TReplication::ETargetKind::Table: + case TReplication::ETargetKind::IndexTable: tx.SetOperationType(NKikimrSchemeOp::ESchemeOpAlterTable); PathIdFromPathId(DstPathId, tx.MutableAlterTable()->MutablePathId()); tx.MutableAlterTable()->MutableReplicationConfig()->SetMode( diff --git a/ydb/core/tx/replication/controller/dst_creator.cpp b/ydb/core/tx/replication/controller/dst_creator.cpp index 4b43cbad4fd5..cabb8c757334 100644 --- a/ydb/core/tx/replication/controller/dst_creator.cpp +++ b/ydb/core/tx/replication/controller/dst_creator.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -116,6 +118,8 @@ class TDstCreator: public TActorBootstrapped { .WithKeyShardBoundary(true))); } break; + case TReplication::ETargetKind::IndexTable: + Y_ABORT("unreachable"); } } @@ -128,7 +132,7 @@ class TDstCreator: public TActorBootstrapped { } } - NKikimrScheme::EStatus ConvertStatus(NYdb::EStatus status) { + static NKikimrScheme::EStatus ConvertStatus(NYdb::EStatus status) { switch (status) { case NYdb::EStatus::SUCCESS: return NKikimrScheme::StatusSuccess; @@ -165,8 +169,20 @@ class TDstCreator: public TActorBootstrapped { Ydb::Table::CreateTableRequest scheme; result.GetTableDescription().SerializeTo(scheme); - // Disable index support until other replicator code be ready to process index replication - scheme.mutable_indexes()->Clear(); + + // filter out unsupported index types + auto& indexes = *scheme.mutable_indexes(); + for (auto it = indexes.begin(); it != indexes.end();) { + switch (it->type_case()) { + case Ydb::Table::TableIndex::kGlobalIndex: + case Ydb::Table::TableIndex::kGlobalUniqueIndex: + ++it; + continue; + default: + it = indexes.erase(it); + break; + } + } Ydb::StatusIds::StatusCode status; TString error; @@ -182,30 +198,37 @@ class TDstCreator: public TActorBootstrapped { TxBody.SetWorkingDir(pathPair.first); - NKikimrSchemeOp::TTableDescription* tableDesc = nullptr; + NKikimrSchemeOp::TTableDescription* desc = nullptr; if (scheme.indexes_size()) { + NeedToCheck = true; TxBody.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateIndexedTable); - tableDesc = TxBody.MutableCreateIndexedTable()->MutableTableDescription(); TxBody.SetInternal(true); + desc = TxBody.MutableCreateIndexedTable()->MutableTableDescription(); + if (!FillIndexDescription(*TxBody.MutableCreateIndexedTable(), scheme, status, error)) { + return Error(NKikimrScheme::StatusSchemeError, error); + } } else { TxBody.SetOperationType(NKikimrSchemeOp::ESchemeOpCreateTable); - tableDesc = TxBody.MutableCreateTable(); + desc = TxBody.MutableCreateTable(); } - Ydb::StatusIds::StatusCode dummyCode; + Y_ABORT_UNLESS(desc); + desc->SetName(pathPair.second); - if (!FillIndexDescription(*TxBody.MutableCreateIndexedTable(), scheme, dummyCode, error)) { - return Error(NKikimrScheme::StatusSchemeError, error); + FillReplicationConfig(*desc->MutableReplicationConfig()); + if (scheme.indexes_size()) { + for (auto& index : *TxBody.MutableCreateIndexedTable()->MutableIndexDescription()) { + FillReplicationConfig(*index.MutableIndexImplTableDescription()->MutableReplicationConfig()); + } } - tableDesc->SetName(pathPair.second); + AllocateTxId(); + } + static void FillReplicationConfig(NKikimrSchemeOp::TTableReplicationConfig& replicationConfig) { // TODO: support other modes - auto& replicationConfig = *tableDesc->MutableReplicationConfig(); replicationConfig.SetMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_READ_ONLY); replicationConfig.SetConsistency(NKikimrSchemeOp::TTableReplicationConfig::CONSISTENCY_WEAK); - - AllocateTxId(); } void AllocateTxId() { @@ -257,7 +280,9 @@ class TDstCreator: public TActorBootstrapped { switch (record.GetStatus()) { case NKikimrScheme::StatusAccepted: - DstPathId = TPathId(SchemeShardId, record.GetPathId()); + if (!NeedToCheck) { + DstPathId = TPathId(SchemeShardId, record.GetPathId()); + } Y_DEBUG_ABORT_UNLESS(TxId == record.GetTxId()); return SubscribeTx(record.GetTxId()); case NKikimrScheme::StatusMultipleModifications: @@ -338,6 +363,8 @@ class TDstCreator: public TActorBootstrapped { switch (Kind) { case TReplication::ETargetKind::Table: return CheckTableScheme(desc.GetTable(), error); + case TReplication::ETargetKind::IndexTable: + Y_ABORT("unreachable"); } } @@ -366,21 +393,30 @@ class TDstCreator: public TActorBootstrapped { return false; } - const auto& expected = TxBody.GetCreateTable(); + const NKikimrSchemeOp::TIndexedTableCreationConfig* indexedDesc = nullptr; + const NKikimrSchemeOp::TTableDescription* tableDesc = nullptr; + if (TxBody.GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexedTable) { + indexedDesc = &TxBody.GetCreateIndexedTable(); + tableDesc = &indexedDesc->GetTableDescription(); + } else { + tableDesc = &TxBody.GetCreateTable(); + } + + Y_ABORT_UNLESS(tableDesc); // check key - if (expected.KeyColumnNamesSize() != got.KeyColumnNamesSize()) { + if (tableDesc->KeyColumnNamesSize() != got.KeyColumnNamesSize()) { error = TStringBuilder() << "Key columns size mismatch" - << ": expected: " << expected.KeyColumnNamesSize() + << ": expected: " << tableDesc->KeyColumnNamesSize() << ", got: " << got.KeyColumnNamesSize(); return false; } - for (ui32 i = 0; i < expected.KeyColumnNamesSize(); ++i) { - if (expected.GetKeyColumnNames(i) != got.GetKeyColumnNames(i)) { + for (ui32 i = 0; i < tableDesc->KeyColumnNamesSize(); ++i) { + if (tableDesc->GetKeyColumnNames(i) != got.GetKeyColumnNames(i)) { error = TStringBuilder() << "Key column name mismatch" << ": position: " << i - << ", expected: " << expected.GetKeyColumnNames(i) + << ", expected: " << tableDesc->GetKeyColumnNames(i) << ", got: " << got.GetKeyColumnNames(i); return false; } @@ -392,14 +428,14 @@ class TDstCreator: public TActorBootstrapped { columns.emplace(column.GetName(), column.GetType()); } - if (expected.ColumnsSize() != columns.size()) { + if (tableDesc->ColumnsSize() != columns.size()) { error = TStringBuilder() << "Columns size mismatch" - << ": expected: " << expected.ColumnsSize() + << ": expected: " << tableDesc->ColumnsSize() << ", got: " << columns.size(); return false; } - for (const auto& column : expected.GetColumns()) { + for (const auto& column : tableDesc->GetColumns()) { auto it = columns.find(column.GetName()); if (it == columns.end()) { error = TStringBuilder() << "Cannot find column" @@ -422,14 +458,25 @@ class TDstCreator: public TActorBootstrapped { indexes.emplace(index.GetName(), &index); } - if (expected.TableIndexesSize() != indexes.size()) { + if (!indexedDesc) { + if (!indexes.empty()) { + error = TStringBuilder() << "Indexes size mismatch" + << ": expected: " << 0 + << ", got: " << indexes.size(); + return false; + } + + return true; + } + + if (indexedDesc->IndexDescriptionSize() != indexes.size()) { error = TStringBuilder() << "Indexes size mismatch" - << ": expected: " << expected.TableIndexesSize() + << ": expected: " << indexedDesc->IndexDescriptionSize() << ", got: " << indexes.size(); return false; } - for (const auto& index : expected.GetTableIndexes()) { + for (const auto& index : indexedDesc->GetIndexDescription()) { auto it = indexes.find(index.GetName()); if (it == indexes.end()) { error = TStringBuilder() << "Cannot find index" @@ -487,6 +534,36 @@ class TDstCreator: public TActorBootstrapped { return true; } + void SubscribeDstPath() { + Subscriber = Register(CreateSchemeBoardSubscriber(SelfId(), DstPath)); + Become(&TThis::StateSubscribeDstPath); + } + + STATEFN(StateSubscribeDstPath) { + switch (ev->GetTypeRewrite()) { + hFunc(TSchemeBoardEvents::TEvNotifyUpdate, Handle); + default: + return StateBase(ev); + } + } + + void Handle(TSchemeBoardEvents::TEvNotifyUpdate::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + + const auto& desc = ev->Get()->DescribeSchemeResult; + if (desc.GetStatus() != NKikimrScheme::StatusSuccess) { + return; + } + + const auto& entryDesc = desc.GetPathDescription().GetSelf(); + if (!entryDesc.HasCreateFinished() || !entryDesc.GetCreateFinished()) { + return; + } + + DstPathId = ev->Get()->PathId; + return Success(); + } + void Handle(TEvPipeCache::TEvDeliveryProblem::TPtr& ev) { LOG_T("Handle " << ev->Get()->ToString()); @@ -525,6 +602,12 @@ class TDstCreator: public TActorBootstrapped { Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup); } + void PassAway() override { + if (const auto& actorId = std::exchange(Subscriber, {})) { + Send(actorId, new TEvents::TEvPoison()); + } + } + public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::REPLICATION_CONTROLLER_DST_CREATOR; @@ -554,7 +637,13 @@ class TDstCreator: public TActorBootstrapped { } void Bootstrap() { - Resolve(PathId); + switch (Kind) { + case TReplication::ETargetKind::Table: + return Resolve(PathId); + case TReplication::ETargetKind::IndexTable: + // indexed table will be created along with its indexes + return SubscribeDstPath(); + } } STATEFN(StateBase) { @@ -586,6 +675,7 @@ class TDstCreator: public TActorBootstrapped { TActorId PipeCache; bool NeedToCheck = false; TPathId DstPathId; + TActorId Subscriber; }; // TDstCreator diff --git a/ydb/core/tx/replication/controller/dst_creator_ut.cpp b/ydb/core/tx/replication/controller/dst_creator_ut.cpp index d7fab275eb28..805cd2b1e3b3 100644 --- a/ydb/core/tx/replication/controller/dst_creator_ut.cpp +++ b/ydb/core/tx/replication/controller/dst_creator_ut.cpp @@ -62,8 +62,16 @@ Y_UNIT_TEST_SUITE(DstCreator) { CheckTableReplica(tableDesc, replicatedDesc); } - void WithSyncIndex(const TString& replicatedPath) { - TEnv env; + Y_UNIT_TEST(Basic) { + Basic("/Root/Replicated"); + } + + Y_UNIT_TEST(WithIntermediateDir) { + Basic("/Root/Dir/Replicated"); + } + + void WithIndex(const TString& replicatedPath, NKikimrSchemeOp::EIndexType indexType) { + TEnv env(TFeatureFlags().SetEnableChangefeedsOnIndexTables(true)); env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); const auto tableDesc = TTestTableDescription{ @@ -79,25 +87,45 @@ Y_UNIT_TEST_SUITE(DstCreator) { const TString indexName = "index_by_value"; env.CreateTableWithIndex("/Root", *MakeTableDescription(tableDesc), - indexName, TVector{"value"}, NKikimrSchemeOp::EIndexTypeGlobal, - TVector{}, TDuration::Seconds(5000)); + indexName, TVector{"value"}, indexType); env.GetRuntime().Register(CreateDstCreator( env.GetSender(), env.GetSchemeshardId("/Root/Table"), env.GetYdbProxy(), env.GetPathId("/Root"), 1 /* rid */, 1 /* tid */, TReplication::ETargetKind::Table, "/Root/Table", replicatedPath )); - - auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); - UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Status, NKikimrScheme::StatusSuccess); + { + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Status, NKikimrScheme::StatusSuccess); + } auto desc = env.GetDescription(replicatedPath); const auto& replicatedDesc = desc.GetPathDescription().GetTable(); CheckTableReplica(tableDesc, replicatedDesc); + switch (indexType) { + case NKikimrSchemeOp::EIndexTypeGlobal: + case NKikimrSchemeOp::EIndexTypeGlobalUnique: + UNIT_ASSERT_VALUES_EQUAL(replicatedDesc.TableIndexesSize(), 1); + break; + default: + UNIT_ASSERT_VALUES_EQUAL(replicatedDesc.TableIndexesSize(), 0); + return; + } + + env.GetRuntime().Register(CreateDstCreator( + env.GetSender(), env.GetSchemeshardId("/Root/Table"), env.GetYdbProxy(), env.GetPathId("/Root"), + 1 /* rid */, 2 /* tid */, TReplication::ETargetKind::IndexTable, + "/Root/Table/" + indexName + "/indexImplTable", replicatedPath + "/" + indexName + "/indexImplTable" + )); + { + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT_VALUES_EQUAL(ev->Get()->Status, NKikimrScheme::StatusSuccess); + } + { auto desc = env.GetDescription(replicatedPath + "/" + indexName); UNIT_ASSERT_VALUES_EQUAL(desc.GetPathDescription().GetTableIndex().GetName(), indexName); - UNIT_ASSERT_VALUES_EQUAL(desc.GetPathDescription().GetTableIndex().GetType(), NKikimrSchemeOp::EIndexType::EIndexTypeGlobal); + UNIT_ASSERT_VALUES_EQUAL(desc.GetPathDescription().GetTableIndex().GetType(), indexType); } { @@ -106,25 +134,19 @@ Y_UNIT_TEST_SUITE(DstCreator) { const auto& indexTableDesc = desc.GetPathDescription().GetTable(); UNIT_ASSERT_VALUES_EQUAL(indexTableDesc.KeyColumnNamesSize(), 2); } - } - - - Y_UNIT_TEST(Basic) { - Basic("/Root/Replicated"); } - Y_UNIT_TEST(WithIntermediateDir) { - Basic("/Root/Dir/Replicated"); - } -/* Y_UNIT_TEST(WithSyncIndex) { - WithSyncIndex("/Root/Replicated"); + WithIndex("/Root/Replicated", NKikimrSchemeOp::EIndexTypeGlobal); + } + + Y_UNIT_TEST(WithSyncIndexAndIntermediateDir) { + WithIndex("/Root/Dir/Replicated", NKikimrSchemeOp::EIndexTypeGlobal); } - Y_UNIT_TEST(WithSyncIndexWithIntermediateDir) { - WithSyncIndex("/Root/Dir/Replicated"); + Y_UNIT_TEST(WithAsyncIndex) { + WithIndex("/Root/Replicated", NKikimrSchemeOp::EIndexTypeGlobalAsync); } -*/ Y_UNIT_TEST(SameOwner) { TEnv env; diff --git a/ydb/core/tx/replication/controller/dst_remover.cpp b/ydb/core/tx/replication/controller/dst_remover.cpp index 2d0ec2d3b72f..538eddd812ff 100644 --- a/ydb/core/tx/replication/controller/dst_remover.cpp +++ b/ydb/core/tx/replication/controller/dst_remover.cpp @@ -43,6 +43,8 @@ class TDstRemover: public TActorBootstrapped { case TReplication::ETargetKind::Table: tx.SetOperationType(NKikimrSchemeOp::ESchemeOpDropTable); break; + case TReplication::ETargetKind::IndexTable: + Y_ABORT("unreachable"); } Send(PipeCache, new TEvPipeCache::TEvForward(ev.Release(), SchemeShardId, true)); @@ -156,7 +158,13 @@ class TDstRemover: public TActorBootstrapped { if (!DstPathId) { Success(); } else { - AllocateTxId(); + switch (Kind) { + case TReplication::ETargetKind::Table: + return AllocateTxId(); + case TReplication::ETargetKind::IndexTable: + // indexed table will be removed along with its indexes + return Success(); + } } } diff --git a/ydb/core/tx/replication/controller/private_events.cpp b/ydb/core/tx/replication/controller/private_events.cpp index f562331cc26d..12807487c856 100644 --- a/ydb/core/tx/replication/controller/private_events.cpp +++ b/ydb/core/tx/replication/controller/private_events.cpp @@ -4,6 +4,20 @@ namespace NKikimr::NReplication::NController { +TEvPrivate::TEvDiscoveryTargetsResult::TAddEntry::TAddEntry( + const TString& srcPath, const TString& dstPath, TReplication::ETargetKind kind) + : SrcPath(srcPath) + , DstPath(dstPath) + , Kind(kind) +{ +} + +TEvPrivate::TEvDiscoveryTargetsResult::TFailedEntry::TFailedEntry(const TString& srcPath, const NYdb::TStatus& error) + : SrcPath(srcPath) + , Error(error) +{ +} + TEvPrivate::TEvDiscoveryTargetsResult::TEvDiscoveryTargetsResult(ui64 rid, TVector&& toAdd, TVector&& toDel) : ReplicationId(rid) , ToAdd(std::move(toAdd)) @@ -179,11 +193,11 @@ TString TEvPrivate::TEvDescribeTargetsResult::ToString() const { } Y_DECLARE_OUT_SPEC(, NKikimr::NReplication::NController::TEvPrivate::TEvDiscoveryTargetsResult::TAddEntry, stream, value) { - stream << value.first.Name << " (" << value.first.Type << ")"; + stream << value.SrcPath << " (" << value.Kind << ")"; } Y_DECLARE_OUT_SPEC(, NKikimr::NReplication::NController::TEvPrivate::TEvDiscoveryTargetsResult::TFailedEntry, stream, value) { - stream << value.first << ": " << value.second.GetStatus() << " ("; - value.second.GetIssues().PrintTo(stream, true); + stream << value.SrcPath << ": " << value.Error.GetStatus() << " ("; + value.Error.GetIssues().PrintTo(stream, true); stream << ")"; } diff --git a/ydb/core/tx/replication/controller/private_events.h b/ydb/core/tx/replication/controller/private_events.h index 7383d6f7ffc0..ebf083f97b0b 100644 --- a/ydb/core/tx/replication/controller/private_events.h +++ b/ydb/core/tx/replication/controller/private_events.h @@ -1,6 +1,7 @@ #pragma once -#include +#include "replication.h" + #include #include @@ -31,6 +32,10 @@ struct TEvPrivate { EvAlterDstResult, EvRemoveWorker, EvDescribeTargetsResult, + EvRequestCreateStream, + EvAllowCreateStream, + EvRequestDropStream, + EvAllowDropStream, EvEnd, }; @@ -38,8 +43,20 @@ struct TEvPrivate { static_assert(EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TKikimrEvents::ES_PRIVATE)"); struct TEvDiscoveryTargetsResult: public TEventLocal { - using TAddEntry = std::pair; // src, dst - using TFailedEntry = std::pair; // src, error + struct TAddEntry { + TString SrcPath; + TString DstPath; + TReplication::ETargetKind Kind; + + explicit TAddEntry(const TString& srcPath, const TString& dstPath, TReplication::ETargetKind kind); + }; + + struct TFailedEntry { + TString SrcPath; + NYdb::TStatus Error; + + explicit TFailedEntry(const TString& srcPath, const NYdb::TStatus& error); + }; const ui64 ReplicationId; TVector ToAdd; @@ -208,6 +225,18 @@ struct TEvPrivate { TString ToString() const override; }; + struct TEvRequestCreateStream: public TEventLocal { + }; + + struct TEvAllowCreateStream: public TEventLocal { + }; + + struct TEvRequestDropStream: public TEventLocal { + }; + + struct TEvAllowDropStream: public TEventLocal { + }; + }; // TEvPrivate } diff --git a/ydb/core/tx/replication/controller/replication.cpp b/ydb/core/tx/replication/controller/replication.cpp index 983f89fa4eb2..9acc0f7f9b7a 100644 --- a/ydb/core/tx/replication/controller/replication.cpp +++ b/ydb/core/tx/replication/controller/replication.cpp @@ -42,7 +42,9 @@ class TReplication::TImpl: public TLagProvider { ITarget* CreateTarget(TReplication* self, ui64 id, ETargetKind kind, Args&&... args) const { switch (kind) { case ETargetKind::Table: - return new TTableTarget(self, id, std::forward(args)...); + return new TTargetTable(self, id, std::forward(args)...); + case ETargetKind::IndexTable: + return new TTargetIndexTable(self, id, std::forward(args)...); } } @@ -113,19 +115,22 @@ class TReplication::TImpl: public TLagProvider { if (!YdbProxy && !(State == EState::Removing && !Targets)) { THolder ydbProxy; const auto& params = Config.GetSrcConnectionParams(); + const auto& endpoint = params.GetEndpoint(); + const auto& database = params.GetDatabase(); + const bool ssl = params.GetEnableSsl(); switch (params.GetCredentialsCase()) { case NKikimrReplication::TConnectionParams::kStaticCredentials: if (!params.GetStaticCredentials().HasPassword()) { return ResolveSecret(params.GetStaticCredentials().GetPasswordSecretName(), ctx); } - ydbProxy.Reset(CreateYdbProxy(params.GetEndpoint(), params.GetDatabase(), params.GetStaticCredentials())); + ydbProxy.Reset(CreateYdbProxy(endpoint, database, ssl, params.GetStaticCredentials())); break; case NKikimrReplication::TConnectionParams::kOAuthToken: if (!params.GetOAuthToken().HasToken()) { return ResolveSecret(params.GetOAuthToken().GetTokenSecretName(), ctx); } - ydbProxy.Reset(CreateYdbProxy(params.GetEndpoint(), params.GetDatabase(), params.GetOAuthToken().GetToken())); + ydbProxy.Reset(CreateYdbProxy(endpoint, database, ssl, params.GetOAuthToken().GetToken())); break; default: ErrorState(TStringBuilder() << "Unexpected credentials: " << params.GetCredentialsCase()); diff --git a/ydb/core/tx/replication/controller/replication.h b/ydb/core/tx/replication/controller/replication.h index afdcfccebe16..c4e64436bb6d 100644 --- a/ydb/core/tx/replication/controller/replication.h +++ b/ydb/core/tx/replication/controller/replication.h @@ -32,6 +32,7 @@ class TReplication: public TSimpleRefCount { enum class ETargetKind: ui8 { Table, + IndexTable, }; enum class EDstState: ui8 { diff --git a/ydb/core/tx/replication/controller/stream_creator.cpp b/ydb/core/tx/replication/controller/stream_creator.cpp index 2e295fab12ed..cd8cb06d8e7c 100644 --- a/ydb/core/tx/replication/controller/stream_creator.cpp +++ b/ydb/core/tx/replication/controller/stream_creator.cpp @@ -4,7 +4,9 @@ #include "target_with_stream.h" #include "util.h" +#include #include +#include #include #include #include @@ -16,16 +18,38 @@ namespace NKikimr::NReplication::NController { class TStreamCreator: public TActorBootstrapped { - static NYdb::NTable::TChangefeedDescription MakeChangefeed(const TString& name, const NJson::TJsonMap& attrs) { + static NYdb::NTable::TChangefeedDescription MakeChangefeed( + const TString& name, const TDuration& retentionPeriod, const NJson::TJsonMap& attrs) + { using namespace NYdb::NTable; return TChangefeedDescription(name, EChangefeedMode::Updates, EChangefeedFormat::Json) + .WithRetentionPeriod(retentionPeriod) .WithInitialScan() .AddAttribute("__async_replication", NJson::WriteJson(attrs, false)); } + void RequestPermission() { + Send(Parent, new TEvPrivate::TEvRequestCreateStream()); + Become(&TThis::StateRequestPermission); + } + + STATEFN(StateRequestPermission) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvAllowCreateStream, Handle); + default: + return StateBase(ev); + } + } + + void Handle(TEvPrivate::TEvAllowCreateStream::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + CreateStream(); + } + void CreateStream() { switch (Kind) { case TReplication::ETargetKind::Table: + case TReplication::ETargetKind::IndexTable: Send(YdbProxy, new TEvYdbProxy::TEvAlterTableRequest(SrcPath, NYdb::NTable::TAlterTableSettings() .AppendAddChangefeeds(Changefeed))); break; @@ -64,8 +88,17 @@ class TStreamCreator: public TActorBootstrapped { } } + TString BuildStreamPath() const { + switch (Kind) { + case TReplication::ETargetKind::Table: + return CanonizePath(ChildPath(SplitPath(SrcPath), Changefeed.GetName())); + case TReplication::ETargetKind::IndexTable: + return CanonizePath(ChildPath(SplitPath(SrcPath), {"indexImplTable", Changefeed.GetName()})); + } + } + void CreateConsumer() { - const auto streamPath = CanonizePath(ChildPath(SplitPath(SrcPath), Changefeed.GetName())); + const auto streamPath = BuildStreamPath(); const auto settings = NYdb::NTopic::TAlterTopicSettings() .BeginAddConsumer() .ConsumerName(ReplicationConsumerName) @@ -88,6 +121,10 @@ class TStreamCreator: public TActorBootstrapped { LOG_T("Handle " << ev->Get()->ToString()); auto& result = ev->Get()->Result; + if (result.GetStatus() == NYdb::EStatus::ALREADY_EXISTS) { + return Reply(NYdb::TStatus(NYdb::EStatus::SUCCESS, NYql::TIssues())); + } + if (!result.IsSuccess()) { if (IsRetryableError(result)) { LOG_D("Retry CreateConsumer"); @@ -123,14 +160,15 @@ class TStreamCreator: public TActorBootstrapped { TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, - const TString& streamName) + const TString& streamName, + const TDuration& streamRetentionPeriod) : Parent(parent) , YdbProxy(proxy) , ReplicationId(rid) , TargetId(tid) , Kind(kind) , SrcPath(srcPath) - , Changefeed(MakeChangefeed(streamName, NJson::TJsonMap{ + , Changefeed(MakeChangefeed(streamName, streamRetentionPeriod, NJson::TJsonMap{ {"path", dstPath}, {"id", ToString(rid)}, })) @@ -139,7 +177,7 @@ class TStreamCreator: public TActorBootstrapped { } void Bootstrap() { - CreateStream(); + RequestPermission(); } STATEFN(StateBase) { @@ -165,13 +203,15 @@ IActor* CreateStreamCreator(TReplication* replication, ui64 targetId, const TAct Y_ABORT_UNLESS(target); return CreateStreamCreator(ctx.SelfID, replication->GetYdbProxy(), replication->GetId(), target->GetId(), target->GetKind(), - target->GetSrcPath(), target->GetDstPath(), target->GetStreamName()); + target->GetSrcPath(), target->GetDstPath(), target->GetStreamName(), + TDuration::Seconds(AppData()->ReplicationConfig.GetRetentionPeriodSeconds())); } IActor* CreateStreamCreator(const TActorId& parent, const TActorId& proxy, ui64 rid, ui64 tid, - TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, const TString& streamName) + TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, + const TString& streamName, const TDuration& streamRetentionPeriod) { - return new TStreamCreator(parent, proxy, rid, tid, kind, srcPath, dstPath, streamName); + return new TStreamCreator(parent, proxy, rid, tid, kind, srcPath, dstPath, streamName, streamRetentionPeriod); } } diff --git a/ydb/core/tx/replication/controller/stream_creator.h b/ydb/core/tx/replication/controller/stream_creator.h index 94eca13b4554..1eca930efb6b 100644 --- a/ydb/core/tx/replication/controller/stream_creator.h +++ b/ydb/core/tx/replication/controller/stream_creator.h @@ -6,6 +6,7 @@ namespace NKikimr::NReplication::NController { IActor* CreateStreamCreator(TReplication* replication, ui64 targetId, const TActorContext& ctx); IActor* CreateStreamCreator(const TActorId& parent, const TActorId& proxy, ui64 rid, ui64 tid, - TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, const TString& streamName); + TReplication::ETargetKind kind, const TString& srcPath, const TString& dstPath, + const TString& streamName, const TDuration& streamRetentionPeriod); } diff --git a/ydb/core/tx/replication/controller/stream_remover.cpp b/ydb/core/tx/replication/controller/stream_remover.cpp index 6a30ac851801..4f75dc27a69a 100644 --- a/ydb/core/tx/replication/controller/stream_remover.cpp +++ b/ydb/core/tx/replication/controller/stream_remover.cpp @@ -10,9 +10,28 @@ namespace NKikimr::NReplication::NController { class TStreamRemover: public TActorBootstrapped { + void RequestPermission() { + Send(Parent, new TEvPrivate::TEvRequestDropStream()); + Become(&TThis::StateRequestPermission); + } + + STATEFN(StateRequestPermission) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvPrivate::TEvAllowDropStream, Handle); + default: + return StateBase(ev); + } + } + + void Handle(TEvPrivate::TEvAllowDropStream::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + DropStream(); + } + void DropStream() { switch (Kind) { case TReplication::ETargetKind::Table: + case TReplication::ETargetKind::IndexTable: Send(YdbProxy, new TEvYdbProxy::TEvAlterTableRequest(SrcPath, NYdb::NTable::TAlterTableSettings() .AppendDropChangefeeds(StreamName))); break; @@ -25,7 +44,8 @@ class TStreamRemover: public TActorBootstrapped { switch (ev->GetTypeRewrite()) { hFunc(TEvYdbProxy::TEvAlterTableResponse, Handle); sFunc(TEvents::TEvWakeup, DropStream); - sFunc(TEvents::TEvPoison, PassAway); + default: + return StateBase(ev); } } @@ -76,7 +96,13 @@ class TStreamRemover: public TActorBootstrapped { } void Bootstrap() { - DropStream(); + RequestPermission(); + } + + STATEFN(StateBase) { + switch (ev->GetTypeRewrite()) { + sFunc(TEvents::TEvPoison, PassAway); + } } private: diff --git a/ydb/core/tx/replication/controller/target_discoverer.cpp b/ydb/core/tx/replication/controller/target_discoverer.cpp index c5f956555cac..a29fafea1ffd 100644 --- a/ydb/core/tx/replication/controller/target_discoverer.cpp +++ b/ydb/core/tx/replication/controller/target_discoverer.cpp @@ -3,11 +3,11 @@ #include "target_discoverer.h" #include "util.h" +#include +#include #include #include -#include - #include #include @@ -25,7 +25,7 @@ class TTargetDiscoverer: public TActorBootstrapped { auto it = Pending.find(ev->Cookie); if (it == Pending.end()) { - LOG_W("Unknown describe response" + LOG_W("Unknown describe path response" << ": cookie# " << ev->Cookie); return; } @@ -35,37 +35,95 @@ class TTargetDiscoverer: public TActorBootstrapped { const auto& result = ev->Get()->Result; if (result.IsSuccess()) { - LOG_D("Describe succeeded" + LOG_D("Describe path succeeded" << ": path# " << path.first); - auto entry = result.GetEntry(); + const auto& entry = result.GetEntry(); switch (entry.Type) { case NYdb::NScheme::ESchemeEntryType::SubDomain: case NYdb::NScheme::ESchemeEntryType::Directory: Pending.erase(it); return ListDirectory(path); + case NYdb::NScheme::ESchemeEntryType::Table: + return DescribeTable(ev->Cookie); default: break; } - entry.Name = path.first; // replace by full path + LOG_W("Unsupported entry type" + << ": path# " << path.first + << ", type# " << entry.Type); - if (const auto kind = TryTargetKindFromEntryType(entry.Type)) { - LOG_I("Add target" - << ": path# " << path.first - << ", kind# " << kind); - ToAdd.emplace_back(std::move(entry), path.second); + NYql::TIssues issues; + issues.AddIssue(TStringBuilder() << "Unsupported entry type: " << entry.Type); + Failed.emplace_back(path.first, NYdb::TStatus(NYdb::EStatus::UNSUPPORTED, std::move(issues))); + } else { + LOG_E("Describe path failed" + << ": path# " << path.first + << ", status# " << result.GetStatus() + << ", issues# " << result.GetIssues().ToOneLineString()); + + if (IsRetryableError(result)) { + return RetryDescribe(*it); } else { - LOG_W("Unsupported entry type" - << ": path# " << path.first - << ", type# " << entry.Type); + Failed.emplace_back(path.first, result); + } + } + + Pending.erase(it); + MaybeReply(); + } + + void DescribeTable(ui32 idx) { + Y_ABORT_UNLESS(idx < Paths.size()); + Send(YdbProxy, new TEvYdbProxy::TEvDescribeTableRequest(Paths.at(idx).first, {}), 0, idx); + Pending.insert(idx); + } - NYql::TIssues issues; - issues.AddIssue(TStringBuilder() << "Unsupported entry type: " << entry.Type); - Failed.emplace_back(path.first, NYdb::TStatus(NYdb::EStatus::UNSUPPORTED, std::move(issues))); + void Handle(TEvYdbProxy::TEvDescribeTableResponse::TPtr& ev) { + LOG_T("Handle " << ev->Get()->ToString()); + + auto it = Pending.find(ev->Cookie); + if (it == Pending.end()) { + LOG_W("Unknown describe table response" + << ": cookie# " << ev->Cookie); + return; + } + + Y_ABORT_UNLESS(*it < Paths.size()); + const auto& path = Paths.at(*it); + + const auto& result = ev->Get()->Result; + if (result.IsSuccess()) { + LOG_D("Describe table succeeded" + << ": path# " << path.first); + + const auto& target = ToAdd.emplace_back(path.first, path.second, TReplication::ETargetKind::Table); + LOG_I("Add target" + << ": srcPath# " << target.SrcPath + << ", dstPath# " << target.DstPath + << ", kind# " << target.Kind); + + for (const auto& index : result.GetTableDescription().GetIndexDescriptions()) { + switch (index.GetIndexType()) { + case NYdb::NTable::EIndexType::GlobalSync: + case NYdb::NTable::EIndexType::GlobalUnique: + break; + default: + continue; + } + + const auto& target = ToAdd.emplace_back( + CanonizePath(ChildPath(SplitPath(path.first), index.GetIndexName())), + CanonizePath(ChildPath(SplitPath(path.second), {index.GetIndexName(), "indexImplTable"})), + TReplication::ETargetKind::IndexTable); + LOG_I("Add target" + << ": srcPath# " << target.SrcPath + << ", dstPath# " << target.DstPath + << ", kind# " << target.Kind); } } else { - LOG_E("Describe failed" + LOG_E("Describe table failed" << ": path# " << path.first << ", status# " << result.GetStatus() << ", issues# " << result.GetIssues().ToOneLineString()); @@ -114,7 +172,8 @@ class TTargetDiscoverer: public TActorBootstrapped { return entry.Name.StartsWith("~") || entry.Name.StartsWith(".sys") - || entry.Name.StartsWith(".metadata"); + || entry.Name.StartsWith(".metadata") + || entry.Name.StartsWith("export-"); } void Handle(TEvYdbProxy::TEvListDirectoryResponse::TPtr& ev) { @@ -143,13 +202,13 @@ class TTargetDiscoverer: public TActorBootstrapped { path.second + '/' + child.Name)); } break; + case NYdb::NScheme::ESchemeEntryType::Table: + Paths.emplace_back( + path.first + '/' + child.Name, + path.second + '/' + child.Name); + DescribeTable(Paths.size() - 1); + break; default: - if (TryTargetKindFromEntryType(child.Type)) { - Paths.emplace_back( - path.first + '/' + child.Name, - path.second + '/' + child.Name); - DescribePath(Paths.size() - 1); - } break; } } @@ -225,6 +284,7 @@ class TTargetDiscoverer: public TActorBootstrapped { switch (ev->GetTypeRewrite()) { hFunc(TEvYdbProxy::TEvDescribePathResponse, Handle); hFunc(TEvYdbProxy::TEvListDirectoryResponse, Handle); + hFunc(TEvYdbProxy::TEvDescribeTableResponse, Handle); sFunc(TEvents::TEvWakeup, Retry); sFunc(TEvents::TEvPoison, PassAway); } diff --git a/ydb/core/tx/replication/controller/target_discoverer_ut.cpp b/ydb/core/tx/replication/controller/target_discoverer_ut.cpp new file mode 100644 index 000000000000..70bd0f863c26 --- /dev/null +++ b/ydb/core/tx/replication/controller/target_discoverer_ut.cpp @@ -0,0 +1,135 @@ +#include "private_events.h" +#include "target_discoverer.h" + +#include +#include + +#include + +namespace NKikimr::NReplication::NController { + +Y_UNIT_TEST_SUITE(TargetDiscoverer) { + using namespace NTestHelpers; + + TTestTableDescription DummyTable() { + return TTestTableDescription{ + .Name = "Table", + .KeyColumns = {"key"}, + .Columns = { + {.Name = "key", .Type = "Uint32"}, + {.Name = "value", .Type = "Uint32"}, + }, + .ReplicationConfig = Nothing(), + }; + } + + Y_UNIT_TEST(Basic) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.CreateTable("/Root", *MakeTableDescription(DummyTable())); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).SrcPath, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).DstPath, "/Root/Replicated/Table"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).Kind, TReplication::ETargetKind::Table); + } + + Y_UNIT_TEST(IndexedTable) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.CreateTableWithIndex("/Root", *MakeTableDescription(DummyTable()), + "Index", TVector{"value"}, NKikimrSchemeOp::EIndexTypeGlobal); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(1).SrcPath, "/Root/Table/Index"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(1).DstPath, "/Root/Replicated/Table/Index/indexImplTable"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(1).Kind, TReplication::ETargetKind::IndexTable); + } + + Y_UNIT_TEST(Negative) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root/Table", "/Root/ReplicatedTable"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(!ev->Get()->IsSuccess()); + + const auto& failed = ev->Get()->Failed; + UNIT_ASSERT_VALUES_EQUAL(failed.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(failed.at(0).SrcPath, "/Root/Table"); + UNIT_ASSERT_VALUES_EQUAL(failed.at(0).Error.GetStatus(), NYdb::EStatus::SCHEME_ERROR); + } + + Y_UNIT_TEST(Dirs) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.MkDir("/Root", "Dir"); + env.CreateTable("/Root/Dir", *MakeTableDescription(DummyTable())); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).SrcPath, "/Root/Dir/Table"); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).DstPath, "/Root/Replicated/Dir/Table"); + } + + Y_UNIT_TEST(SystemObjects) { + TEnv env; + env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_CONTROLLER, NLog::PRI_TRACE); + + env.CreateTable("/Root", *MakeTableDescription(DummyTable())); + env.MkDir("/Root", "export-100500"); + env.CreateTable("/Root/export-100500", *MakeTableDescription(DummyTable())); + + env.GetRuntime().Register(CreateTargetDiscoverer(env.GetSender(), 1, env.GetYdbProxy(), + TVector>{ + {"/Root", "/Root/Replicated"}, + } + )); + + auto ev = env.GetRuntime().GrabEdgeEvent(env.GetSender()); + UNIT_ASSERT(ev->Get()->IsSuccess()); + + const auto& toAdd = ev->Get()->ToAdd; + UNIT_ASSERT_VALUES_EQUAL(toAdd.size(), 1); + UNIT_ASSERT_VALUES_EQUAL(toAdd.at(0).SrcPath, "/Root/Table"); + } +} + +} diff --git a/ydb/core/tx/replication/controller/target_table.cpp b/ydb/core/tx/replication/controller/target_table.cpp index d44b699bf0d0..6201bc29fbfa 100644 --- a/ydb/core/tx/replication/controller/target_table.cpp +++ b/ydb/core/tx/replication/controller/target_table.cpp @@ -102,16 +102,35 @@ class TTableWorkerRegistar: public TActorBootstrapped { }; // TTableWorkerRegistar -TTableTarget::TTableTarget(TReplication* replication, ui64 id, const TString& srcPath, const TString& dstPath) - : TTargetWithStream(replication, ETargetKind::Table, id, srcPath, dstPath) +TTargetTableBase::TTargetTableBase(TReplication* replication, ETargetKind finalKind, + ui64 id, const TString& srcPath, const TString& dstPath) + : TTargetWithStream(replication, finalKind, id, srcPath, dstPath) { } -IActor* TTableTarget::CreateWorkerRegistar(const TActorContext& ctx) const { +IActor* TTargetTableBase::CreateWorkerRegistar(const TActorContext& ctx) const { auto replication = GetReplication(); return new TTableWorkerRegistar(ctx.SelfID, replication->GetYdbProxy(), replication->GetConfig().GetSrcConnectionParams(), replication->GetId(), GetId(), - CanonizePath(ChildPath(SplitPath(GetSrcPath()), GetStreamName())), GetDstPathId()); + BuildStreamPath(), GetDstPathId()); +} + +TTargetTable::TTargetTable(TReplication* replication, ui64 id, const TString& srcPath, const TString& dstPath) + : TTargetTableBase(replication, ETargetKind::Table, id, srcPath, dstPath) +{ +} + +TString TTargetTable::BuildStreamPath() const { + return CanonizePath(ChildPath(SplitPath(GetSrcPath()), GetStreamName())); +} + +TTargetIndexTable::TTargetIndexTable(TReplication* replication, ui64 id, const TString& srcPath, const TString& dstPath) + : TTargetTableBase(replication, ETargetKind::IndexTable, id, srcPath, dstPath) +{ +} + +TString TTargetIndexTable::BuildStreamPath() const { + return CanonizePath(ChildPath(SplitPath(GetSrcPath()), {"indexImplTable", GetStreamName()})); } } diff --git a/ydb/core/tx/replication/controller/target_table.h b/ydb/core/tx/replication/controller/target_table.h index 08b45d7b0153..30f57d5db31a 100644 --- a/ydb/core/tx/replication/controller/target_table.h +++ b/ydb/core/tx/replication/controller/target_table.h @@ -4,14 +4,32 @@ namespace NKikimr::NReplication::NController { -class TTableTarget: public TTargetWithStream { +class TTargetTableBase: public TTargetWithStream { public: - explicit TTableTarget(TReplication* replication, + explicit TTargetTableBase(TReplication* replication, ETargetKind finalKind, ui64 id, const TString& srcPath, const TString& dstPath); protected: IActor* CreateWorkerRegistar(const TActorContext& ctx) const override; + virtual TString BuildStreamPath() const = 0; +}; -}; // TTableTarget +class TTargetTable: public TTargetTableBase { +public: + explicit TTargetTable(TReplication* replication, + ui64 id, const TString& srcPath, const TString& dstPath); + +protected: + TString BuildStreamPath() const override; +}; + +class TTargetIndexTable: public TTargetTableBase { +public: + explicit TTargetIndexTable(TReplication* replication, + ui64 id, const TString& srcPath, const TString& dstPath); + +protected: + TString BuildStreamPath() const override; +}; } diff --git a/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp b/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp index 686de8fb261e..7b75428e2c71 100644 --- a/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp +++ b/ydb/core/tx/replication/controller/tx_discovery_targets_result.cpp @@ -43,23 +43,19 @@ class TController::TTxDiscoveryTargetsResult: public TTxBase { if (Ev->Get()->IsSuccess()) { for (const auto& target : Ev->Get()->ToAdd) { - const auto kind = TargetKindFromEntryType(target.first.Type); - const auto& srcPath = target.first.Name; - const auto& dstPath = target.second; - - const auto tid = Replication->AddTarget(kind, srcPath, dstPath); + const auto tid = Replication->AddTarget(target.Kind, target.SrcPath, target.DstPath); db.Table().Key(rid, tid).Update( - NIceDb::TUpdate(kind), - NIceDb::TUpdate(srcPath), - NIceDb::TUpdate(dstPath) + NIceDb::TUpdate(target.Kind), + NIceDb::TUpdate(target.SrcPath), + NIceDb::TUpdate(target.DstPath) ); CLOG_N(ctx, "Add target" << ": rid# " << rid << ", tid# " << tid - << ", kind# " << kind - << ", srcPath# " << srcPath - << ", dstPath# " << dstPath); + << ", kind# " << target.Kind + << ", srcPath# " << target.SrcPath + << ", dstPath# " << target.DstPath); } } else { const auto error = JoinSeq(", ", Ev->Get()->Failed); diff --git a/ydb/core/tx/replication/controller/ut_target_discoverer/ya.make b/ydb/core/tx/replication/controller/ut_target_discoverer/ya.make new file mode 100644 index 000000000000..08eec946a5b4 --- /dev/null +++ b/ydb/core/tx/replication/controller/ut_target_discoverer/ya.make @@ -0,0 +1,20 @@ +UNITTEST_FOR(ydb/core/tx/replication/controller) + +FORK_SUBTESTS() + +SIZE(MEDIUM) + +TIMEOUT(600) + +PEERDIR( + ydb/core/tx/replication/ut_helpers + library/cpp/testing/unittest +) + +SRCS( + target_discoverer_ut.cpp +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/tx/replication/controller/util.h b/ydb/core/tx/replication/controller/util.h index 0963fa19d044..d0b3afba3199 100644 --- a/ydb/core/tx/replication/controller/util.h +++ b/ydb/core/tx/replication/controller/util.h @@ -1,47 +1,12 @@ #pragma once -#include "replication.h" - -#include #include -#include - #include -#include #include namespace NKikimr::NReplication::NController { -inline TMaybe TryTargetKindFromEntryType(NYdb::NScheme::ESchemeEntryType type) { - switch (type) { - case NYdb::NScheme::ESchemeEntryType::Table: - return TReplication::ETargetKind::Table; - case NYdb::NScheme::ESchemeEntryType::Unknown: - case NYdb::NScheme::ESchemeEntryType::Directory: - case NYdb::NScheme::ESchemeEntryType::PqGroup: - case NYdb::NScheme::ESchemeEntryType::SubDomain: - case NYdb::NScheme::ESchemeEntryType::RtmrVolume: - case NYdb::NScheme::ESchemeEntryType::BlockStoreVolume: - case NYdb::NScheme::ESchemeEntryType::CoordinationNode: - case NYdb::NScheme::ESchemeEntryType::Sequence: - case NYdb::NScheme::ESchemeEntryType::Replication: - case NYdb::NScheme::ESchemeEntryType::ColumnTable: - case NYdb::NScheme::ESchemeEntryType::ColumnStore: - case NYdb::NScheme::ESchemeEntryType::Topic: - case NYdb::NScheme::ESchemeEntryType::ExternalTable: - case NYdb::NScheme::ESchemeEntryType::ExternalDataSource: - case NYdb::NScheme::ESchemeEntryType::View: - return Nothing(); - } -} - -inline TReplication::ETargetKind TargetKindFromEntryType(NYdb::NScheme::ESchemeEntryType type) { - auto res = TryTargetKindFromEntryType(type); - Y_VERIFY_S(res, "Unexpected entry type: " << static_cast(type)); - return *res; -} - inline TString& TruncatedIssue(TString& issue) { static constexpr ui32 sizeLimit = 2_KB; static constexpr TStringBuf ellipsis = "..."; diff --git a/ydb/core/tx/replication/controller/ya.make b/ydb/core/tx/replication/controller/ya.make index 52bdfbbabdcb..bc742cbefccc 100644 --- a/ydb/core/tx/replication/controller/ya.make +++ b/ydb/core/tx/replication/controller/ya.make @@ -8,6 +8,7 @@ PEERDIR( ydb/core/tablet_flat ydb/core/tx/replication/common ydb/core/tx/replication/ydb_proxy + ydb/core/tx/scheme_board ydb/core/util ydb/core/ydb_convert ydb/services/metadata @@ -59,4 +60,5 @@ END() RECURSE_FOR_TESTS( ut_dst_creator + ut_target_discoverer ) diff --git a/ydb/core/tx/replication/service/service.cpp b/ydb/core/tx/replication/service/service.cpp index a8ea5d64ef5f..0e399990b268 100644 --- a/ydb/core/tx/replication/service/service.cpp +++ b/ydb/core/tx/replication/service/service.cpp @@ -125,9 +125,9 @@ class TSessionInfo { }; // TSessionInfo -struct TCredentialsKey: std::tuple { - explicit TCredentialsKey(const TString& endpoint, const TString& database, const TString& user) - : std::tuple(endpoint, database, user) +struct TCredentialsKey: std::tuple { + explicit TCredentialsKey(const TString& endpoint, const TString& database, bool ssl, const TString& user) + : std::tuple(endpoint, database, ssl, user) { } @@ -139,12 +139,20 @@ struct TCredentialsKey: std::tuple { return std::get<1>(*this); } + bool EnableSsl() const { + return std::get<2>(*this); + } + static TCredentialsKey FromParams(const NKikimrReplication::TConnectionParams& params) { + const auto& endpoint = params.GetEndpoint(); + const auto& database = params.GetDatabase(); + const bool ssl = params.GetEnableSsl(); + switch (params.GetCredentialsCase()) { case NKikimrReplication::TConnectionParams::kStaticCredentials: - return TCredentialsKey(params.GetEndpoint(), params.GetDatabase(), params.GetStaticCredentials().GetUser()); + return TCredentialsKey(endpoint, database, ssl, params.GetStaticCredentials().GetUser()); case NKikimrReplication::TConnectionParams::kOAuthToken: - return TCredentialsKey(params.GetEndpoint(), params.GetDatabase(), params.GetOAuthToken().GetToken() /* TODO */); + return TCredentialsKey(endpoint, database, ssl, params.GetOAuthToken().GetToken()); default: Y_ABORT("Unexpected credentials"); } @@ -155,7 +163,7 @@ struct TCredentialsKey: std::tuple { } // NKikimr::NReplication::NService template <> -struct THash : THash> {}; +struct THash : THash> {}; namespace NKikimr::NReplication { @@ -212,7 +220,7 @@ class TReplicationService: public TActorBootstrapped { const TActorId& GetOrCreateYdbProxy(TCredentialsKey&& key, Args&&... args) { auto it = YdbProxies.find(key); if (it == YdbProxies.end()) { - auto ydbProxy = Register(CreateYdbProxy(key.Endpoint(), key.Database(), std::forward(args)...)); + auto ydbProxy = Register(CreateYdbProxy(key.Endpoint(), key.Database(), key.EnableSsl(), std::forward(args)...)); auto res = YdbProxies.emplace(std::move(key), std::move(ydbProxy)); Y_ABORT_UNLESS(res.second); it = res.first; diff --git a/ydb/core/tx/replication/service/table_writer_impl.h b/ydb/core/tx/replication/service/table_writer_impl.h index 1fd77232fb2d..475aa3c351a2 100644 --- a/ydb/core/tx/replication/service/table_writer_impl.h +++ b/ydb/core/tx/replication/service/table_writer_impl.h @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -278,16 +279,6 @@ class TLocalTableWriter return Check(&TSchemeCacheHelpers::CheckEntryKind, &TThis::LogCritAndLeave, entry, expected); } - static TVector MakePartitionIds(const TVector& partitions) { - TVector result(::Reserve(partitions.size())); - - for (const auto& partition : partitions) { - result.push_back(partition.ShardId); - } - - return result; - } - void Registered(TActorSystem*, const TActorId&) override { this->ChangeServer = this->SelfId(); } @@ -348,6 +339,12 @@ class TLocalTableWriter return; } + if (TableVersion && TableVersion == entry.Self->Info.GetVersion().GetGeneralVersion()) { + Y_ABORT_UNLESS(Initialized); + Resolving = false; + return this->CreateSenders(); + } + auto schema = MakeIntrusive(); if (entry.Self && entry.Self->Info.HasVersion()) { schema->Version = entry.Self->Info.GetVersion().GetTableSchemaVersion(); @@ -415,11 +412,9 @@ class TLocalTableWriter return LogWarnAndRetry("Empty partitions"); } - const bool versionChanged = !TableVersion || TableVersion != entry.GeneralVersion; TableVersion = entry.GeneralVersion; - KeyDesc = std::move(entry.KeyDescription); - this->CreateSenders(MakePartitionIds(KeyDesc->GetPartitions()), versionChanged); + this->CreateSenders(NChangeExchange::MakePartitionIds(KeyDesc->GetPartitions())); if (!Initialized) { this->Send(Worker, new TEvWorker::TEvHandshake()); diff --git a/ydb/core/tx/replication/service/table_writer_ut.cpp b/ydb/core/tx/replication/service/table_writer_ut.cpp index 65c2c710e2a9..5b1b3de7b169 100644 --- a/ydb/core/tx/replication/service/table_writer_ut.cpp +++ b/ydb/core/tx/replication/service/table_writer_ut.cpp @@ -39,10 +39,7 @@ Y_UNIT_TEST_SUITE(LocalTableWriter) { } Y_UNIT_TEST(SupportedTypes) { - auto featureFlags = TFeatureFlags(); - featureFlags.SetEnableTableDatetime64(true); - - TEnv env(featureFlags); + TEnv env(TFeatureFlags().SetEnableTableDatetime64(true)); env.GetRuntime().SetLogPriority(NKikimrServices::REPLICATION_SERVICE, NLog::PRI_DEBUG); env.CreateTable("/Root", *MakeTableDescription(TTestTableDescription{ diff --git a/ydb/core/tx/replication/ut_helpers/test_env.h b/ydb/core/tx/replication/ut_helpers/test_env.h index f0aec854575f..e51542c99513 100644 --- a/ydb/core/tx/replication/ut_helpers/test_env.h +++ b/ydb/core/tx/replication/ut_helpers/test_env.h @@ -10,6 +10,9 @@ namespace NKikimr::NReplication::NTestHelpers { +class TFeatureFlags: public TTestFeatureFlagsHolder { +}; + template class TEnv { static constexpr char DomainName[] = "Root"; @@ -32,7 +35,7 @@ class TEnv { Database = "/" + ToString(DomainName); YdbProxy = Server.GetRuntime()->Register(CreateYdbProxy( - Endpoint, UseDatabase ? Database : "", std::forward(args)...)); + Endpoint, UseDatabase ? Database : "", false /* ssl */, std::forward(args)...)); Sender = Server.GetRuntime()->AllocateEdgeActor(); } @@ -63,7 +66,7 @@ class TEnv { TEnv(const TFeatureFlags& featureFlags, bool init = true) : Settings(Tests::TServerSettings(PortManager.GetPort(), {}, MakePqConfig()) .SetDomainName(DomainName) - .SetFeatureFlags(featureFlags) + .SetFeatureFlags(featureFlags.FeatureFlags) ) , Server(Settings) , Client(Settings) @@ -160,7 +163,12 @@ class TEnv { auto CreateTableWithIndex(Args&&... args) { return Client.CreateTableWithUniformShardedIndex(std::forward(args)...); } - + + template + auto MkDir(Args&&... args) { + return Client.MkDir(std::forward(args)...); + } + void SendAsync(const TActorId& recipient, IEventBase* ev) { Server.GetRuntime()->Send(new IEventHandle(recipient, Sender, ev)); } diff --git a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp index ccd65e9cc62e..a92aa13aec37 100644 --- a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp +++ b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.cpp @@ -419,20 +419,21 @@ class TYdbProxy: public TBaseProxyActor { Call(ev, &TTopicClient::CommitOffset); } - static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database) { + static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, bool ssl) { return TCommonClientSettings() .DiscoveryEndpoint(endpoint) .DiscoveryMode(EDiscoveryMode::Async) - .Database(database); + .Database(database) + .SslCredentials(ssl); } - static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, const TString& token) { - return MakeSettings(endpoint, database) + static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, bool ssl, const TString& token) { + return MakeSettings(endpoint, database, ssl) .AuthToken(token); } - static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, const TStaticCredentials& credentials) { - return MakeSettings(endpoint, database) + static TCommonClientSettings MakeSettings(const TString& endpoint, const TString& database, bool ssl, const TStaticCredentials& credentials) { + return MakeSettings(endpoint, database, ssl) .CredentialsProviderFactory(CreateLoginCredentialsProviderFactory({ .User = credentials.GetUser(), .Password = credentials.GetPassword(), @@ -485,16 +486,16 @@ class TYdbProxy: public TBaseProxyActor { }; // TYdbProxy -IActor* CreateYdbProxy(const TString& endpoint, const TString& database) { - return new TYdbProxy(endpoint, database); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl) { + return new TYdbProxy(endpoint, database, ssl); } -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, const TString& token) { - return new TYdbProxy(endpoint, database, token); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const TString& token) { + return new TYdbProxy(endpoint, database, ssl, token); } -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, const TStaticCredentials& credentials) { - return new TYdbProxy(endpoint, database, credentials); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const TStaticCredentials& credentials) { + return new TYdbProxy(endpoint, database, ssl, credentials); } } diff --git a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h index 8eba5e9a00a2..9b9c13244679 100644 --- a/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h +++ b/ydb/core/tx/replication/ydb_proxy/ydb_proxy.h @@ -255,9 +255,9 @@ struct TEvYdbProxy { #pragma pop_macro("RemoveDirectory") -IActor* CreateYdbProxy(const TString& endpoint, const TString& database); -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, const TString& token); -IActor* CreateYdbProxy(const TString& endpoint, const TString& database, +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const TString& token); +IActor* CreateYdbProxy(const TString& endpoint, const TString& database, bool ssl, const NKikimrReplication::TStaticCredentials& credentials); } diff --git a/ydb/core/tx/scheme_board/cache.cpp b/ydb/core/tx/scheme_board/cache.cpp index 07062b4ce553..0c0c2061a3c3 100644 --- a/ydb/core/tx/scheme_board/cache.cpp +++ b/ydb/core/tx/scheme_board/cache.cpp @@ -13,9 +13,9 @@ #include #include #include +#include #include #include -#include #include #include #include @@ -26,6 +26,8 @@ #include #include #include +#include + #include #include @@ -898,8 +900,6 @@ class TSchemeCache: public TMonitorableActor { default: return false; } - case NKikimrSchemeOp::EPathTypeTableIndex: - return true; default: return false; } @@ -978,6 +978,62 @@ class TSchemeCache: public TMonitorableActor { return partitions; } + static void FillTopicPartitioning( + const NKikimrSchemeOp::TPersQueueGroupDescription& pqDesc, + TVector& schema, + TVector& partitioning) + { + const auto& pqConfig = pqDesc.GetPQTabletConfig(); + if (pqConfig.GetPartitionKeySchema().empty()) { + return; + } + + schema.reserve(pqConfig.PartitionKeySchemaSize()); + for (const auto& keySchema : pqConfig.GetPartitionKeySchema()) { + // TODO: support pg types + schema.push_back(NScheme::TTypeInfo(keySchema.GetTypeId())); + } + + partitioning.reserve(pqDesc.PartitionsSize()); + for (const auto& partition : pqDesc.GetPartitions()) { + auto keyRange = NPQ::TPartitionKeyRange::Parse(partition.GetKeyRange()); + Y_ABORT_UNLESS(!keyRange.FromBound || keyRange.FromBound->GetCells().size() == schema.size()); + Y_ABORT_UNLESS(!keyRange.ToBound || keyRange.ToBound->GetCells().size() == schema.size()); + + auto& info = partitioning.emplace_back(partition.GetPartitionId()); + if (keyRange.ToBound) { + info.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{ + .EndKeyPrefix = *keyRange.ToBound, + }; + } else { + info.Range = NKikimr::TKeyDesc::TPartitionRangeInfo{}; + } + } + + Sort(partitioning.begin(), partitioning.end(), [&schema](const auto& lhs, const auto& rhs) { + Y_ABORT_UNLESS(lhs.Range && rhs.Range); + Y_ABORT_UNLESS(lhs.Range->EndKeyPrefix || rhs.Range->EndKeyPrefix); + + if (!lhs.Range->EndKeyPrefix) { + return false; + } + + if (!rhs.Range->EndKeyPrefix) { + return true; + } + + Y_ABORT_UNLESS(lhs.Range->EndKeyPrefix && rhs.Range->EndKeyPrefix); + + const int compares = CompareTypedCellVectors( + lhs.Range->EndKeyPrefix.GetCells().data(), + rhs.Range->EndKeyPrefix.GetCells().data(), + schema.data(), schema.size() + ); + + return (compares < 0); + }); + } + bool IsSysTable() const { return Kind == TNavigate::KindTable && PathId.OwnerId == TSysTables::SysSchemeShard; } @@ -1486,6 +1542,7 @@ class TSchemeCache: public TMonitorableActor { if (Created) { NPQ::Migrate(*pathDesc.MutablePersQueueGroup()->MutablePQTabletConfig()); FillInfo(Kind, PQGroupInfo, std::move(*pathDesc.MutablePersQueueGroup())); + FillTopicPartitioning(PQGroupInfo->Description, PQGroupInfo->Schema, PQGroupInfo->Partitioning); } break; case NKikimrSchemeOp::EPathTypeCdcStream: @@ -2570,14 +2627,12 @@ class TSchemeCache: public TMonitorableActor { if (entry.RequestType == TNavigate::TEntry::ERequestType::ByPath) { auto pathExtractor = [this](TNavigate::TEntry& entry) { + NSysView::ISystemViewResolver::TSystemViewPath sysViewPath; if (AppData()->FeatureFlags.GetEnableSystemViews() - && (entry.Operation == TNavigate::OpPath || entry.Operation == TNavigate::OpTable)) + && SystemViewResolver->IsSystemViewPath(entry.Path, sysViewPath)) { - NSysView::ISystemViewResolver::TSystemViewPath sysViewPath; - if (SystemViewResolver->IsSystemViewPath(entry.Path, sysViewPath)) { - entry.TableId.SysViewInfo = sysViewPath.ViewName; - return CanonizePath(sysViewPath.Parent); - } + entry.TableId.SysViewInfo = sysViewPath.ViewName; + return CanonizePath(sysViewPath.Parent); } TString path = CanonizePath(entry.Path); diff --git a/ydb/core/tx/scheme_board/ya.make b/ydb/core/tx/scheme_board/ya.make index 22f66c217f09..ffb2121ca4c9 100644 --- a/ydb/core/tx/scheme_board/ya.make +++ b/ydb/core/tx/scheme_board/ya.make @@ -4,6 +4,7 @@ PEERDIR( ydb/library/actors/core ydb/core/base ydb/core/mon + ydb/core/persqueue/partition_key_range ydb/core/protos ydb/core/sys_view/common ydb/core/tx/scheme_cache diff --git a/ydb/core/tx/scheme_cache/scheme_cache.h b/ydb/core/tx/scheme_cache/scheme_cache.h index b81736613daa..99c120e99112 100644 --- a/ydb/core/tx/scheme_cache/scheme_cache.h +++ b/ydb/core/tx/scheme_cache/scheme_cache.h @@ -177,6 +177,8 @@ struct TSchemeCacheNavigate { struct TPQGroupInfo : public TAtomicRefCount { EKind Kind = KindUnknown; NKikimrSchemeOp::TPersQueueGroupDescription Description; + TVector Schema; + TVector Partitioning; }; struct TRtmrVolumeInfo : public TAtomicRefCount { diff --git a/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h b/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h index 1a56abe419e1..06c2b8939d22 100644 --- a/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h +++ b/ydb/core/tx/schemeshard/olap/bg_tasks/tx_chain/session.h @@ -21,6 +21,9 @@ class TTxChainSession: public NKikimr::NOlap::NBackground::TSessionProtoAdapter< YDB_READONLY_DEF(TTxChainData, TxData); YDB_READONLY(ui32, StepForExecute, 0); std::optional CurrentTxId; + + static const inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetStaticClassName()); + protected: virtual TConclusion> DoCreateActor(const NKikimr::NOlap::NBackground::TStartContext& context) const override; virtual TConclusionStatus DoDeserializeFromProto(const TProtoLogic& proto) override { @@ -48,6 +51,8 @@ class TTxChainSession: public NKikimr::NOlap::NBackground::TSessionProtoAdapter< return result; } public: + TTxChainSession() = default; + TTxChainSession(const TTxChainData& data) : TxData(data) { diff --git a/ydb/core/tx/schemeshard/olap/columns/update.cpp b/ydb/core/tx/schemeshard/olap/columns/update.cpp index 83e300176007..c66da237c712 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.cpp +++ b/ydb/core/tx/schemeshard/olap/columns/update.cpp @@ -119,6 +119,11 @@ namespace NKikimr::NSchemeShard { serializer.DeserializeFromProto(columnSchema.GetCompression()).Validate(); Serializer = serializer; } + if (columnSchema.HasDataAccessorConstructor()) { + NArrow::NAccessor::TConstructorContainer container; + AFL_VERIFY(container.DeserializeFromProto(columnSchema.GetDataAccessorConstructor())); + AccessorConstructor = container; + } if (columnSchema.HasDictionaryEncoding()) { auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnSchema.GetDictionaryEncoding()); Y_ABORT_UNLESS(settings.IsSuccess()); @@ -140,6 +145,9 @@ namespace NKikimr::NSchemeShard { if (Serializer) { Serializer->SerializeToProto(*columnSchema.MutableSerializer()); } + if (AccessorConstructor) { + *columnSchema.MutableDataAccessorConstructor() = AccessorConstructor.SerializeToProto(); + } if (DictionaryEncoding) { *columnSchema.MutableDictionaryEncoding() = DictionaryEncoding->SerializeToProto(); } @@ -160,6 +168,14 @@ namespace NKikimr::NSchemeShard { return false; } } + if (!!diffColumn.GetAccessorConstructor()) { + auto conclusion = diffColumn.GetAccessorConstructor()->BuildConstructor(); + if (conclusion.IsFail()) { + errors.AddError(conclusion.GetErrorMessage()); + return false; + } + AccessorConstructor = conclusion.DetachResult(); + } if (diffColumn.GetStorageId()) { StorageId = *diffColumn.GetStorageId(); } diff --git a/ydb/core/tx/schemeshard/olap/columns/update.h b/ydb/core/tx/schemeshard/olap/columns/update.h index ec463a69c7c2..84a728829d6e 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.h +++ b/ydb/core/tx/schemeshard/olap/columns/update.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -17,6 +18,7 @@ class TOlapColumnDiff { YDB_READONLY_DEF(NArrow::NDictionary::TEncodingDiff, DictionaryEncoding); YDB_READONLY_DEF(std::optional, StorageId); YDB_READONLY_DEF(std::optional, DefaultValue); + YDB_READONLY_DEF(NArrow::NAccessor::TRequestedConstructorContainer, AccessorConstructor); public: bool ParseFromRequest(const NKikimrSchemeOp::TOlapColumnDiff& columnSchema, IErrorCollector& errors) { Name = columnSchema.GetName(); @@ -30,6 +32,12 @@ class TOlapColumnDiff { if (columnSchema.HasDefaultValue()) { DefaultValue = columnSchema.GetDefaultValue(); } + if (columnSchema.HasDataAccessorConstructor()) { + if (!AccessorConstructor.DeserializeFromProto(columnSchema.GetDataAccessorConstructor())) { + errors.AddError("cannot parse accessor constructor from proto"); + return false; + } + } if (columnSchema.HasSerializer()) { if (!Serializer.DeserializeFromProto(columnSchema.GetSerializer())) { errors.AddError("cannot parse serializer diff from proto"); @@ -55,6 +63,7 @@ class TOlapColumnAdd { YDB_READONLY_DEF(std::optional, Serializer); YDB_READONLY_DEF(std::optional, DictionaryEncoding); YDB_READONLY_DEF(NOlap::TColumnDefaultScalarValue, DefaultValue); + YDB_READONLY_DEF(NArrow::NAccessor::TConstructorContainer, AccessorConstructor); public: TOlapColumnAdd(const std::optional& keyOrder) : KeyOrder(keyOrder) { diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp index 0f31bf0e2ede..8b00178a459c 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp @@ -1,19 +1,18 @@ #include "schema.h" #include +#include namespace NKikimr::NSchemeShard { void TOlapIndexSchema::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& indexSchema) const { indexSchema.SetId(Id); indexSchema.SetName(Name); - indexSchema.SetStorageId(StorageId); IndexMeta.SerializeToProto(indexSchema); } void TOlapIndexSchema::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& indexSchema) { Id = indexSchema.GetId(); Name = indexSchema.GetName(); - StorageId = indexSchema.GetStorageId(); AFL_VERIFY(IndexMeta.DeserializeFromProto(indexSchema))("incorrect_proto", indexSchema.DebugString()); } @@ -24,9 +23,6 @@ bool TOlapIndexSchema::ApplyUpdate(const TOlapSchema& currentSchema, const TOlap errors.AddError("different index classes: " + upsert.GetIndexConstructor().GetClassName() + " vs " + IndexMeta.GetClassName()); return false; } - if (upsert.GetStorageId()) { - StorageId = *upsert.GetStorageId(); - } auto object = upsert.GetIndexConstructor()->CreateIndexMeta(GetId(), GetName(), currentSchema, errors); if (!object) { return false; diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.h b/ydb/core/tx/schemeshard/olap/indexes/schema.h index 1aa302ecb826..630016fe96a5 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.h +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.h @@ -10,7 +10,6 @@ class TOlapIndexSchema { using TBase = TOlapIndexUpsert; YDB_READONLY(ui32, Id, Max()); YDB_READONLY_DEF(TString, Name); - YDB_READONLY_DEF(TString, StorageId); YDB_READONLY_DEF(NBackgroundTasks::TInterfaceProtoContainer, IndexMeta); public: TOlapIndexSchema() = default; diff --git a/ydb/core/tx/schemeshard/olap/layout/layout.h b/ydb/core/tx/schemeshard/olap/layout/layout.h index b3e338af1014..087194ceb6c2 100644 --- a/ydb/core/tx/schemeshard/olap/layout/layout.h +++ b/ydb/core/tx/schemeshard/olap/layout/layout.h @@ -15,48 +15,19 @@ template class TLayoutIdSet { private: ui64 Hash = 0; - std::set Elements; + THashSet Elements; public: TLayoutIdSet() = default; TLayoutIdSet(const TSetElement elem) { AddId(elem); } - typename std::set::const_iterator begin() const { - return Elements.begin(); - } - - typename std::set::const_iterator end() const { - return Elements.end(); - } - size_t Size() const { return Elements.size(); } - std::vector GetIdsVector() const { - return std::vector(Elements.begin(), Elements.end()); - } - - const std::set& GetIds() const { - return Elements; - } - - std::set GetIds(const ui32 count) const { - std::set result; - ui32 idx = 0; - for (auto&& i : Elements) { - if (++idx > count) { - return result; - } - result.emplace(i); - } - return result; - } - - std::vector GetIdsVector(const ui32 count) const { - std::set result = GetIds(count); - return std::vector(result.begin(), result.end()); + bool HasId(const TSetElement& id) const { + return Elements.contains(id); } bool AddId(const TSetElement& id) { @@ -131,7 +102,7 @@ class TColumnTablesLayout { void RemoveGroupsWithPathId(const TPathId& pathId) { const auto pred = [&](const TTablesGroup& item) { - return item.GetTableIds().GetIds().contains(pathId); + return item.GetTableIds().HasId(pathId); }; Groups.erase(std::remove_if(Groups.begin(), Groups.end(), pred), Groups.end()); } diff --git a/ydb/core/tx/schemeshard/olap/manager/manager.cpp b/ydb/core/tx/schemeshard/olap/manager/manager.cpp index 0f8b13cd0104..72a8c93c5f1c 100644 --- a/ydb/core/tx/schemeshard/olap/manager/manager.cpp +++ b/ydb/core/tx/schemeshard/olap/manager/manager.cpp @@ -29,12 +29,12 @@ void TTablesStorage::OnRemoveObject(const TPathId& pathId, TColumnTableInfo::TPt } } -const std::set& TTablesStorage::GetTablesWithTiering(const TString& tieringId) const { +const THashSet& TTablesStorage::GetTablesWithTiering(const TString& tieringId) const { auto it = PathsByTieringId.find(tieringId); if (it != PathsByTieringId.end()) { return it->second; } else { - return Default>(); + return Default>(); } } diff --git a/ydb/core/tx/schemeshard/olap/manager/manager.h b/ydb/core/tx/schemeshard/olap/manager/manager.h index d4ff264ec874..0873a12da22d 100644 --- a/ydb/core/tx/schemeshard/olap/manager/manager.h +++ b/ydb/core/tx/schemeshard/olap/manager/manager.h @@ -9,7 +9,7 @@ namespace NKikimr::NSchemeShard { class TTablesStorage { private: THashMap Tables; - THashMap> PathsByTieringId; + THashMap> PathsByTieringId; THashMap TablesByShard; void OnAddObject(const TPathId& pathId, TColumnTableInfo::TPtr object); @@ -20,7 +20,7 @@ class TTablesStorage { TColumnTablesLayout GetTablesLayout(const std::vector& tabletIds) const; - const std::set& GetTablesWithTiering(const TString& tieringId) const; + const THashSet& GetTablesWithTiering(const TString& tieringId) const; class TTableReadGuard { protected: @@ -104,7 +104,9 @@ class TTablesStorage { TTableCreatedGuard BuildNew(const TPathId& id, TColumnTableInfo::TPtr object); TTableExtractedGuard TakeVerified(const TPathId& id); TTableExtractedGuard TakeAlterVerified(const TPathId& id); - + bool empty() const { + return Tables.empty(); + } bool contains(const TPathId& id) const { return Tables.contains(id); } diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make b/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make index 66a40695a604..bb4459a412fd 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make +++ b/ydb/core/tx/schemeshard/olap/operations/alter/abstract/ya.make @@ -12,6 +12,7 @@ PEERDIR( ydb/library/accessor ydb/core/protos ydb/library/actors/wilson + ydb/library/formats/arrow ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h b/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h index c33f18eb031b..fd10245bc284 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h +++ b/ydb/core/tx/schemeshard/olap/operations/alter/common/update.h @@ -2,6 +2,7 @@ #include #include #include +#include namespace NKikimr::NSchemeShard::NOlap::NAlter { @@ -19,6 +20,16 @@ class TColumnTableUpdate: public ISSEntityUpdate { return NKikimrTxColumnShard::ETransactionKind::TX_KIND_SCHEMA; } virtual TConclusionStatus DoInitializeImpl(const TUpdateInitializationContext& context) = 0; + + bool IsAlterCompression(const TUpdateInitializationContext& context) const { + for (const auto& alterColumn : context.GetModification()->GetAlterColumnTable().GetAlterSchema().GetAlterColumns()) { + if (alterColumn.HasSerializer()) { + return true; + } + } + return false; + } + protected: virtual TConclusionStatus DoStartImpl(const TUpdateStartContext& /*context*/) { return TConclusionStatus::Success(); @@ -27,6 +38,9 @@ class TColumnTableUpdate: public ISSEntityUpdate { return TConclusionStatus::Success(); } virtual TConclusionStatus DoInitialize(const TUpdateInitializationContext& context) override final { + if (!AppData()->FeatureFlags.GetEnableOlapCompression() && IsAlterCompression(context)) { + return TConclusionStatus::Fail("Compression is disabled for OLAP tables"); + } if (!context.GetModification()->HasAlterColumnTable() && !context.GetModification()->HasAlterTable()) { return TConclusionStatus::Fail("no update data"); } @@ -52,6 +66,17 @@ class TColumnTableUpdate: public ISSEntityUpdate { return result; } + bool CheckTargetSchema(const TOlapSchema& targetSchema) { + if (!AppData()->FeatureFlags.GetEnableSparsedColumns()) { + for (auto& [_, column]: targetSchema.GetColumns().GetColumns()) { + if (column.GetDefaultValue().GetValue() || (column.GetAccessorConstructor().GetClassName() == NKikimr::NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName)) { + return false; + } + } + } + return true; + } + public: }; diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp index 119dc04e00c4..1173535cb0fe 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.cpp @@ -30,7 +30,7 @@ NKikimr::TConclusionStatus TInStoreShardsTransfer::DoInitializeImpl(const TUpdat for (auto&& i : alter.GetSourceTabletIds()) { destinationSession.MutableTransferContext()->AddSourceTabletIds(i); } - DestinationSessions.emplace_back(destinationSession); + AFL_VERIFY(DestinationSessions.emplace(destinationSession.GetTransferContext().GetDestinationTabletId(), destinationSession).second); AFL_VERIFY(ShardIdsUsage.emplace(alter.GetDestinationTabletId()).second); } const auto& inStoreOriginal = context.GetOriginalEntityAsVerified(); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h index 5b19aa69fb99..0207e3528dc9 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h +++ b/ydb/core/tx/schemeshard/olap/operations/alter/in_store/transfer/update.h @@ -10,7 +10,7 @@ namespace NKikimr::NSchemeShard::NOlap::NAlter { class TInStoreShardsTransfer: public TInStoreTableUpdate { private: using TBase = TInStoreTableUpdate; - std::vector DestinationSessions; + THashMap DestinationSessions; std::shared_ptr TargetInStoreTable; std::set ShardIdsUsage; @@ -29,13 +29,9 @@ class TInStoreShardsTransfer: public TInStoreTableUpdate { virtual TConclusionStatus DoInitializeImpl(const TUpdateInitializationContext& context) override; virtual TString DoGetShardTxBodyString(const ui64 tabletId, const TMessageSeqNo& /*seqNo*/) const override { - for (auto&& i : DestinationSessions) { - if (i.GetTransferContext().GetDestinationTabletId() == tabletId) { - return i.SerializeAsString(); - } - } - AFL_VERIFY(false); - return ""; + auto it = DestinationSessions.find(tabletId); + AFL_VERIFY(it != DestinationSessions.end()); + return it->second.SerializeAsString(); } virtual std::set DoGetShardIds() const override { diff --git a/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp b/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp index b94ff1888af2..2902534fbb24 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter/standalone/update.cpp @@ -1,6 +1,7 @@ #include "update.h" #include #include +#include namespace NKikimr::NSchemeShard::NOlap::NAlter { @@ -36,6 +37,24 @@ NKikimr::TConclusionStatus TStandaloneSchemaUpdate::DoInitializeImpl(const TUpda return TConclusionStatus::Fail("schema update error: " + collector->GetErrorMessage() + ". in alter constructor STANDALONE_UPDATE"); } } + + const TString& parentPathStr = context.GetModification()->GetWorkingDir(); + if (parentPathStr) { // Not empty only if called from Propose, not from ProgressState + NSchemeShard::TPath parentPath = NSchemeShard::TPath::Resolve(parentPathStr, context.GetSSOperationContext()->SS); + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + if (targetSchema.GetColumns().GetColumns().size() > limits.MaxColumnTableColumns) { + TString errStr = TStringBuilder() + << "Too many columns" + << ": new: " << targetSchema.GetColumns().GetColumns().size() + << ". Limit: " << limits.MaxColumnTableColumns; + return TConclusionStatus::Fail(errStr); + } + } + + if (!CheckTargetSchema(targetSchema)) { + return TConclusionStatus::Fail("schema update error: sparsed columns are disabled"); + } auto description = originalTable.GetTableInfoVerified().Description; targetSchema.Serialize(*description.MutableSchema()); auto ttl = originalTable.GetTableTTLOptional() ? *originalTable.GetTableTTLOptional() : TOlapTTL(); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp b/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp index 446b43017821..57f05068b162 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter_store.cpp @@ -1,6 +1,9 @@ #include #include #include +#include + +#include "checks.h" namespace { @@ -434,6 +437,18 @@ class TAlterOlapStore: public TSubOperation { } } + bool IsAlterCompression() const { + const auto& alter = Transaction.GetAlterColumnStore(); + for (const auto& alterSchema : alter.GetAlterSchemaPresets()) { + for (const auto& alterColumn : alterSchema.GetAlterSchema().GetAlterColumns()) { + if (alterColumn.HasSerializer()) { + return true; + } + } + } + return false; + } + public: using TSubOperation::TSubOperation; @@ -458,7 +473,13 @@ class TAlterOlapStore: public TSubOperation { return result; } - TPath path = TPath::Resolve(parentPathStr, context.SS).Dive(name); + if (!AppData()->FeatureFlags.GetEnableOlapCompression() && IsAlterCompression()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Compression is disabled for OLAP tables"); + return result; + } + + TPath parentPath = TPath::Resolve(parentPathStr, context.SS); + TPath path = parentPath.Dive(name); { TPath::TChecker checks = path.Check(); checks @@ -504,6 +525,26 @@ class TAlterOlapStore: public TSubOperation { if (!alterData) { return result; } + + if (!AppData()->FeatureFlags.GetEnableSparsedColumns()) { + for (auto& [_, preset]: alterData->SchemaPresets) { + for (auto& [_, column]: preset.GetColumns().GetColumns()) { + if (column.GetDefaultValue().GetValue() || (column.GetAccessorConstructor().GetClassName() == NKikimr::NArrow::NAccessor::TGlobalConst::SparsedDataAccessorName)) { + result->SetError(NKikimrScheme::StatusSchemeError,"schema update error: sparsed columns are disabled"); + return result; + } + } + } + } + + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + + if (!NKikimr::NSchemeShard::NOlap::CheckLimits(limits, alterData, errStr)) { + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } + storeInfo->AlterData = alterData; NIceDb::TNiceDb db(context.GetDB()); diff --git a/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp b/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp index 6dff78961fff..4fb76b4a75a0 100644 --- a/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/alter_table.cpp @@ -265,6 +265,19 @@ class TAlterColumnTable: public TSubOperation { auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), ui64(ssId)); + const bool isAlterSharding = Transaction.HasAlterColumnTable() && Transaction.GetAlterColumnTable().HasReshardColumnTable(); + if (isAlterSharding && !AppData()->FeatureFlags.GetEnableAlterShardingInColumnShard()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Alter sharding is disabled for OLAP tables"); + return result; + } + + const bool hasTiering = Transaction.HasAlterColumnTable() && Transaction.GetAlterColumnTable().HasAlterTtlSettings() && + Transaction.GetAlterColumnTable().GetAlterTtlSettings().HasUseTiering(); + if (hasTiering && HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Tiering functionality is disabled for OLAP tables"); + return result; + } + const TString& parentPathStr = Transaction.GetWorkingDir(); const TString& name = Transaction.HasAlterColumnTable() ? Transaction.GetAlterColumnTable().GetName() : Transaction.GetAlterTable().GetName(); LOG_NOTICE_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, diff --git a/ydb/core/tx/schemeshard/olap/operations/checks.h b/ydb/core/tx/schemeshard/olap/operations/checks.h new file mode 100644 index 000000000000..7a2e58fa807d --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/operations/checks.h @@ -0,0 +1,19 @@ +#pragma once + +namespace NKikimr::NSchemeShard::NOlap { + inline bool CheckLimits(const TSchemeLimits& limits, TOlapStoreInfo::TPtr alterData, TString& errStr) { + for (auto& [_, preset]: alterData->SchemaPresets) { + ui64 columnCount = preset.GetColumns().GetColumns().size(); + if (columnCount > limits.MaxColumnTableColumns) { + errStr = TStringBuilder() + << "Too many columns" + << ". new: " << columnCount + << ". Limit: " << limits.MaxColumnTableColumns; + return false; + } + } + return true; + } +} + + diff --git a/ydb/core/tx/schemeshard/olap/operations/create_store.cpp b/ydb/core/tx/schemeshard/olap/operations/create_store.cpp index 137c42d8fd4c..d66f3bed7d44 100644 --- a/ydb/core/tx/schemeshard/olap/operations/create_store.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/create_store.cpp @@ -7,6 +7,8 @@ #include #include +#include "checks.h" + using namespace NKikimr; using namespace NKikimr::NSchemeShard; @@ -327,12 +329,10 @@ class TCreateOlapStore: public TSubOperation { TEvSchemeShard::EStatus status = NKikimrScheme::StatusAccepted; auto result = MakeHolder(status, ui64(OperationId.GetTxId()), ui64(ssId)); - if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { - if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard()) { - result->SetError(NKikimrScheme::StatusPreconditionFailed, - "OLAP schema operations are not supported"); - return result; - } + if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard() && context.SS->OlapStores.empty()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, + "OLAP schema operations are not supported"); + return result; } NSchemeShard::TPath parentPath = NSchemeShard::TPath::Resolve(parentPathStr, context.SS); @@ -396,12 +396,20 @@ class TCreateOlapStore: public TSubOperation { return result; } + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + TProposeErrorCollector errors(*result); TOlapStoreInfo::TPtr storeInfo = std::make_shared(); if (!storeInfo->ParseFromRequest(createDescription, errors)) { return result; } + if (!NKikimr::NSchemeShard::NOlap::CheckLimits(limits, storeInfo, errStr)) { + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } + // Construct channels bindings for columnshards TChannelsBindings channelsBindings; if (!context.SS->GetOlapChannelsBindings(dstPath.GetPathIdForDomain(), storeInfo->GetStorageConfig(), channelsBindings, errStr)) { diff --git a/ydb/core/tx/schemeshard/olap/operations/create_table.cpp b/ydb/core/tx/schemeshard/olap/operations/create_table.cpp index 0f4c491b76f1..0c155d68d761 100644 --- a/ydb/core/tx/schemeshard/olap/operations/create_table.cpp +++ b/ydb/core/tx/schemeshard/olap/operations/create_table.cpp @@ -582,12 +582,10 @@ class TCreateColumnTable: public TSubOperation { TEvSchemeShard::EStatus status = NKikimrScheme::StatusAccepted; auto result = MakeHolder(status, ui64(opTxId), ui64(ssId)); - if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { - if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard()) { - result->SetError(NKikimrScheme::StatusPreconditionFailed, - "OLAP schema operations are not supported"); - return result; - } + if (AppData()->ColumnShardConfig.GetDisabledOnSchemeShard() && context.SS->ColumnTables.empty()) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, + "OLAP schema operations are not supported"); + return result; } if (createDescription.GetSharding().GetColumnShards().size()) { @@ -683,11 +681,23 @@ class TCreateColumnTable: public TSubOperation { TProposeErrorCollector errors(*result); TColumnTableInfo::TPtr tableInfo; bool needUpdateObject = false; + auto domainInfo = parentPath.DomainInfo(); + const TSchemeLimits& limits = domainInfo->GetSchemeLimits(); + if (storeInfo) { TOlapPresetConstructor tableConstructor(*storeInfo); tableInfo = tableConstructor.BuildTableInfo(createDescription, context, errors); needUpdateObject = tableConstructor.GetNeedUpdateObject(); } else { + ui64 columnCount = createDescription.schema().columns().size(); + if (columnCount > limits.MaxColumnTableColumns) { + TString errStr = TStringBuilder() + << "Too many columns" + << ". new: " << columnCount + << ". Limit: " << limits.MaxColumnTableColumns; + result->SetError(NKikimrScheme::StatusSchemeError, errStr); + return result; + } TOlapTableConstructor tableConstructor; tableInfo = tableConstructor.BuildTableInfo(createDescription, context, errors); } diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.cpp b/ydb/core/tx/schemeshard/olap/schema/schema.cpp index 3f6749641ec3..dd1889779c1e 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/schema.cpp @@ -1,6 +1,5 @@ #include "schema.h" #include -#include namespace NKikimr::NSchemeShard { @@ -83,14 +82,6 @@ bool TOlapSchema::ValidateTtlSettings(const NKikimrSchemeOp::TColumnDataLifeCycl errors.AddError("Incorrect ttl column - not found in scheme"); return false; } - if (!Statistics.GetByIdOptional(NOlap::NStatistics::EType::Max, {column->GetId()})) { - TOlapStatisticsModification modification; - NOlap::NStatistics::TConstructorContainer container(std::make_shared(column->GetName())); - modification.AddUpsert("__TTL_PROVIDER::" + TGUID::CreateTimebased().AsUuidString(), container); - if (!Statistics.ApplyUpdate(*this, modification, errors)) { - return false; - } - } return ValidateColumnTableTtl(ttl.GetEnabled(), {}, Columns.GetColumns(), Columns.GetColumnsByName(), errors); } case TTtlProto::kDisabled: @@ -110,10 +101,6 @@ bool TOlapSchema::Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& return false; } - if (!Statistics.ApplyUpdate(*this, schemaUpdate.GetStatistics(), errors)) { - return false; - } - if (!Options.ApplyUpdate(schemaUpdate.GetOptions(), errors)) { return false; } @@ -140,7 +127,6 @@ void TOlapSchema::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& ta Columns.Parse(tableSchema); Indexes.Parse(tableSchema); Options.Parse(tableSchema); - Statistics.Parse(tableSchema); } void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchemaExt) const { @@ -154,7 +140,6 @@ void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchemaExt) Columns.Serialize(resultLocal); Indexes.Serialize(resultLocal); Options.Serialize(resultLocal); - Statistics.Serialize(resultLocal); std::swap(resultLocal, tableSchemaExt); } @@ -171,10 +156,6 @@ bool TOlapSchema::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, return false; } - if (!Statistics.Validate(opSchema, errors)) { - return false; - } - if (opSchema.GetEngine() != Engine) { errors.AddError("Specified schema engine does not match schema preset"); return false; diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.h b/ydb/core/tx/schemeshard/olap/schema/schema.h index b840f97ca616..f800750341fa 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.h +++ b/ydb/core/tx/schemeshard/olap/schema/schema.h @@ -1,11 +1,9 @@ #pragma once #include #include -#include #include #include #include -#include #include "update.h" namespace NKikimr::NSchemeShard { @@ -16,16 +14,11 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_DEF(TOlapColumnsDescription, Columns); YDB_READONLY_DEF(TOlapIndexesDescription, Indexes); YDB_READONLY_DEF(TOlapOptionsDescription, Options); - mutable TOlapStatisticsDescription Statistics; YDB_READONLY(ui32, NextColumnId, 1); YDB_READONLY(ui32, Version, 0); public: - const TOlapStatisticsDescription& GetStatistics() const { - return Statistics; - } - bool Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& errors); void ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); diff --git a/ydb/core/tx/schemeshard/olap/schema/update.cpp b/ydb/core/tx/schemeshard/olap/schema/update.cpp index b78161394b78..3b0087e3b756 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/update.cpp @@ -23,10 +23,6 @@ namespace NKikimr::NSchemeShard { return false; } - if (!Statistics.Parse(alterRequest, errors)) { - return false; - } - if (!Options.Parse(alterRequest, errors)) { return false; } diff --git a/ydb/core/tx/schemeshard/olap/schema/update.h b/ydb/core/tx/schemeshard/olap/schema/update.h index d61b97749a5d..0cd98c09b3c1 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.h +++ b/ydb/core/tx/schemeshard/olap/schema/update.h @@ -1,6 +1,5 @@ #pragma once #include -#include #include #include #include @@ -11,7 +10,6 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_DEF(TOlapColumnsUpdate, Columns); YDB_READONLY_DEF(TOlapIndexesUpdate, Indexes); YDB_READONLY_DEF(TOlapOptionsUpdate, Options); - YDB_READONLY_DEF(TOlapStatisticsModification, Statistics); YDB_READONLY_OPT(NKikimrSchemeOp::EColumnTableEngine, Engine); public: bool Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema, IErrorCollector& errors, bool allowNullKeys = false); diff --git a/ydb/core/tx/schemeshard/olap/schema/ya.make b/ydb/core/tx/schemeshard/olap/schema/ya.make index 03fae68d790d..76b2d2d1c801 100644 --- a/ydb/core/tx/schemeshard/olap/schema/ya.make +++ b/ydb/core/tx/schemeshard/olap/schema/ya.make @@ -9,8 +9,6 @@ PEERDIR( ydb/core/tx/schemeshard/olap/columns ydb/core/tx/schemeshard/olap/indexes ydb/core/tx/schemeshard/olap/options - ydb/core/tx/schemeshard/olap/statistics - ydb/core/tx/columnshard/engines/scheme/statistics/max ydb/core/tx/schemeshard/common ) diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp b/ydb/core/tx/schemeshard/olap/statistics/schema.cpp deleted file mode 100644 index af6f9e711d05..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "schema.h" -#include - -namespace NKikimr::NSchemeShard { - -void TOlapStatisticsSchema::SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { - Operator.SerializeToProto(proto); -} - -bool TOlapStatisticsSchema::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { - AFL_VERIFY(Operator.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); - return true; -} - -bool TOlapStatisticsSchema::ApplyUpdate(const TOlapSchema& /*currentSchema*/, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors) { - AFL_VERIFY(upsert.GetName() == Operator.GetName()); - AFL_VERIFY(!!upsert.GetConstructor()); - if (upsert.GetConstructor().GetClassName() != Operator.GetClassName()) { - errors.AddError("different index classes: " + upsert.GetConstructor().GetClassName() + " vs " + Operator.GetClassName()); - return false; - } - errors.AddError("cannot modify statistics calculation for " + Operator.GetName() + ". not implemented currently."); - return false; -} - -bool TOlapStatisticsDescription::ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors) { - for (auto&& stat : schemaUpdate.GetUpsert()) { - auto* current = MutableByNameOptional(stat.GetName()); - if (current) { - if (!current->ApplyUpdate(currentSchema, stat, errors)) { - return false; - } - } else { - auto meta = stat.GetConstructor()->CreateOperator(stat.GetName(), currentSchema); - if (!meta) { - errors.AddError(meta.GetErrorMessage()); - return false; - } - TOlapStatisticsSchema object(meta.DetachResult()); - Y_ABORT_UNLESS(ObjectsByName.emplace(stat.GetName(), std::move(object)).second); - } - } - - for (const auto& name : schemaUpdate.GetDrop()) { - auto info = GetByNameOptional(name); - if (!info) { - errors.AddError(NKikimrScheme::StatusSchemeError, TStringBuilder() << "Unknown stat for drop: " << name); - return false; - } - AFL_VERIFY(ObjectsByName.erase(name)); - } - - return true; -} - -void TOlapStatisticsDescription::Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { - for (const auto& proto : tableSchema.GetStatistics()) { - TOlapStatisticsSchema object; - AFL_VERIFY(object.DeserializeFromProto(proto)); - AFL_VERIFY(ObjectsByName.emplace(proto.GetName(), std::move(object)).second); - } -} - -void TOlapStatisticsDescription::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { - for (const auto& object : ObjectsByName) { - object.second.SerializeToProto(*tableSchema.AddStatistics()); - } -} - -bool TOlapStatisticsDescription::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const { - THashSet usedObjects; - for (const auto& proto : opSchema.GetStatistics()) { - if (proto.GetName().empty()) { - errors.AddError("Statistic cannot have an empty name"); - return false; - } - - const TString& name = proto.GetName(); - if (!GetByNameOptional(name)) { - errors.AddError("Stat '" + name + "' does not match schema preset"); - return false; - } - - if (!usedObjects.emplace(proto.GetName()).second) { - errors.AddError("Column '" + name + "' is specified multiple times"); - return false; - } - } - return true; -} - -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.h b/ydb/core/tx/schemeshard/olap/statistics/schema.h deleted file mode 100644 index 37a79fc17fdd..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/schema.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once -#include "update.h" - -namespace NKikimr::NSchemeShard { - -class TOlapSchema; - -class TOlapStatisticsSchema { -private: - YDB_READONLY_DEF(NOlap::NStatistics::TOperatorContainer, Operator); -public: - TOlapStatisticsSchema() = default; - - TOlapStatisticsSchema(const NOlap::NStatistics::TOperatorContainer& container) - : Operator(container) - { - AFL_VERIFY(container.GetName()); - } - - bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors); - - void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const; - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); -}; - -class TOlapStatisticsDescription { -public: - using TObjectsByName = THashMap; - -private: - YDB_READONLY_DEF(TObjectsByName, ObjectsByName); -public: - const TOlapStatisticsSchema* GetByIdOptional(const NOlap::NStatistics::EType type, const std::vector& entityIds) const noexcept { - for (auto&& i : ObjectsByName) { - if (!i.second.GetOperator()) { - continue; - } - if (i.second.GetOperator()->GetIdentifier() != NOlap::NStatistics::TIdentifier(type, entityIds)) { - continue; - } - return &i.second; - } - return nullptr; - } - - const TOlapStatisticsSchema* GetByNameOptional(const TString& name) const noexcept { - auto it = ObjectsByName.find(name); - if (it != ObjectsByName.end()) { - return &it->second; - } - return nullptr; - } - - const TOlapStatisticsSchema& GetByNameVerified(const TString& name) const noexcept { - auto object = GetByNameOptional(name); - AFL_VERIFY(object); - return *object; - } - - TOlapStatisticsSchema* MutableByNameOptional(const TString& name) noexcept { - auto it = ObjectsByName.find(name); - if (it != ObjectsByName.end()) { - return &it->second; - } - return nullptr; - } - - TOlapStatisticsSchema& MutableByNameVerified(const TString& name) noexcept { - auto* object = MutableByNameOptional(name); - AFL_VERIFY(object); - return *object; - } - - bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors); - - void Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); - void Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const; - bool Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const; -}; -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.cpp b/ydb/core/tx/schemeshard/olap/statistics/update.cpp deleted file mode 100644 index 1c82c07c300c..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/update.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "update.h" - -namespace NKikimr::NSchemeShard { - -void TOlapStatisticsUpsert::SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const { - requestedProto.SetName(Name); - Constructor.SerializeToProto(requestedProto); -} - -bool TOlapStatisticsUpsert::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { - Name = proto.GetName(); - AFL_VERIFY(Constructor.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); - return true; -} - -bool TOlapStatisticsModification::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors) { - for (const auto& name : alterRequest.GetDropStatistics()) { - if (!Drop.emplace(name).second) { - errors.AddError(NKikimrScheme::StatusInvalidParameter, "Duplicated statistics for drop"); - return false; - } - } - TSet upsertNames; - for (auto& schema : alterRequest.GetUpsertStatistics()) { - TOlapStatisticsUpsert stat; - AFL_VERIFY(stat.DeserializeFromProto(schema)); - if (!upsertNames.emplace(stat.GetName()).second) { - errors.AddError(NKikimrScheme::StatusAlreadyExists, TStringBuilder() << "stat '" << stat.GetName() << "' duplication for add"); - return false; - } - Upsert.emplace_back(std::move(stat)); - } - return true; -} -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.h b/ydb/core/tx/schemeshard/olap/statistics/update.h deleted file mode 100644 index 96558928acf3..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/update.h +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NSchemeShard { - - class TOlapStatisticsUpsert { - private: - YDB_READONLY_DEF(TString, Name); - protected: - NOlap::NStatistics::TConstructorContainer Constructor; - public: - TOlapStatisticsUpsert() = default; - TOlapStatisticsUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer& constructor) - : Name(name) - , Constructor(constructor) - { - - } - - const NOlap::NStatistics::TConstructorContainer& GetConstructor() const { - return Constructor; - } - - bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto); - void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const; - }; - - class TOlapStatisticsModification { - private: - YDB_READONLY_DEF(TVector, Upsert); - YDB_READONLY_DEF(TSet, Drop); - public: - void AddUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer container) { - Upsert.emplace_back(TOlapStatisticsUpsert(name, container)); - } - - bool Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors); - }; -} diff --git a/ydb/core/tx/schemeshard/olap/statistics/ya.make b/ydb/core/tx/schemeshard/olap/statistics/ya.make deleted file mode 100644 index 3f4902454ef1..000000000000 --- a/ydb/core/tx/schemeshard/olap/statistics/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - schema.cpp - update.cpp -) - -PEERDIR( - ydb/services/bg_tasks/abstract - ydb/core/tx/schemeshard/olap/common - ydb/core/tx/columnshard/engines/scheme/statistics/abstract - ydb/core/protos -) - -END() diff --git a/ydb/core/tx/schemeshard/olap/table/table.h b/ydb/core/tx/schemeshard/olap/table/table.h index e37ede6c3eab..a092e175e25d 100644 --- a/ydb/core/tx/schemeshard/olap/table/table.h +++ b/ydb/core/tx/schemeshard/olap/table/table.h @@ -99,8 +99,9 @@ struct TColumnTableInfo { Stats.UpdateShardStats(shardIdx, newStats); } - void UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats) { - Stats.UpdateTableStats(pathId, newStats); + void UpdateTableStats(const TShardIdx shardIdx, const TPathId& pathId, const TPartitionStats& newStats) { + Stats.TableStats[pathId].Aggregated.PartCount = GetColumnShards().size(); + Stats.UpdateTableStats(shardIdx, pathId, newStats); } TConclusion> BuildEntity(const TPathId& pathId, const NOlap::NAlter::TEntityInitializationContext& iContext) const; diff --git a/ydb/core/tx/schemeshard/olap/ttl/ya.make b/ydb/core/tx/schemeshard/olap/ttl/ya.make index 0eb0e83c9a22..8aea246ebddf 100644 --- a/ydb/core/tx/schemeshard/olap/ttl/ya.make +++ b/ydb/core/tx/schemeshard/olap/ttl/ya.make @@ -6,6 +6,7 @@ SRCS( ) PEERDIR( + ydb/core/base ydb/core/protos ) diff --git a/ydb/core/tx/schemeshard/olap/ya.make b/ydb/core/tx/schemeshard/olap/ya.make index d41824702a58..4fde54f9fbd0 100644 --- a/ydb/core/tx/schemeshard/olap/ya.make +++ b/ydb/core/tx/schemeshard/olap/ya.make @@ -7,7 +7,6 @@ PEERDIR( ydb/core/tx/schemeshard/olap/schema ydb/core/tx/schemeshard/olap/common ydb/core/tx/schemeshard/olap/operations - ydb/core/tx/schemeshard/olap/statistics ydb/core/tx/schemeshard/olap/options ydb/core/tx/schemeshard/olap/layout ydb/core/tx/schemeshard/olap/manager diff --git a/ydb/core/tx/schemeshard/schemeshard__init.cpp b/ydb/core/tx/schemeshard/schemeshard__init.cpp index 0b77b71a9656..2226cc2576c3 100644 --- a/ydb/core/tx/schemeshard/schemeshard__init.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__init.cpp @@ -1242,6 +1242,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { .MaxPathElementLength = rowSet.template GetValueOrDefault(defaults.MaxPathElementLength), .ExtraPathSymbolsAllowed = rowSet.template GetValueOrDefault(defaults.ExtraPathSymbolsAllowed), .MaxTableColumns = rowSet.template GetValueOrDefault(defaults.MaxTableColumns), + .MaxColumnTableColumns = rowSet.template GetValueOrDefault(defaults.MaxColumnTableColumns), .MaxTableColumnNameLength = rowSet.template GetValueOrDefault(defaults.MaxTableColumnNameLength), .MaxTableKeyColumns = rowSet.template GetValueOrDefault(defaults.MaxTableKeyColumns), .MaxTableIndices = rowSet.template GetValueOrDefault(defaults.MaxTableIndices), @@ -1825,7 +1826,7 @@ struct TSchemeShard::TTxInit : public TTransactionBase { Y_ABORT_UNLESS(parseOk); if (tableInfo->IsAsyncReplica()) { - Self->PathsById.at(pathId)->SetAsyncReplica(); + Self->PathsById.at(pathId)->SetAsyncReplica(true); } } @@ -1905,6 +1906,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase { auto& view = Self->Views[pathId] = new TViewInfo(); view->AlterVersion = rowset.GetValue(); view->QueryText = rowset.GetValue(); + Y_PROTOBUF_SUPPRESS_NODISCARD view->CapturedContext.ParseFromString( + rowset.GetValue() + ); Self->IncrementPathDbRefCount(pathId); if (!rowset.Next()) { @@ -2475,8 +2479,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase { Y_ABORT_UNLESS(it != Self->Topics.end()); Y_ABORT_UNLESS(it->second); TTopicInfo::TPtr pqGroup = it->second; - if (pqInfo->AlterVersion <= pqGroup->AlterVersion) + if (pqInfo->AlterVersion <= pqGroup->AlterVersion) { ++pqGroup->TotalPartitionCount; + if (pqInfo->Status == NKikimrPQ::ETopicPartitionStatus::Active) { + ++pqGroup->ActivePartitionCount; + } + } if (pqInfo->PqId >= pqGroup->NextPartitionId) { pqGroup->NextPartitionId = pqInfo->PqId + 1; pqGroup->TotalGroupCount = pqInfo->PqId + 1; @@ -4058,7 +4066,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase { if (path->IsPQGroup()) { auto pqGroup = Self->Topics.at(path->PathId); - auto delta = pqGroup->AlterData ? pqGroup->AlterData->TotalPartitionCount : pqGroup->TotalPartitionCount; + auto partitionDelta = pqGroup->AlterData ? pqGroup->AlterData->TotalPartitionCount : pqGroup->TotalPartitionCount; + auto activePartitionDelta = pqGroup->AlterData ? pqGroup->AlterData->ActivePartitionCount : pqGroup->ActivePartitionCount; + auto tabletConfig = pqGroup->AlterData ? (pqGroup->AlterData->TabletConfig.empty() ? pqGroup->TabletConfig : pqGroup->AlterData->TabletConfig) : pqGroup->TabletConfig; NKikimrPQ::TPQTabletConfig config; @@ -4066,12 +4076,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase { bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); Y_ABORT_UNLESS(parseOk); - const PQGroupReserve reserve(config, delta); + const PQGroupReserve reserve(config, activePartitionDelta); - inclusiveDomainInfo->IncPQPartitionsInside(delta); + inclusiveDomainInfo->IncPQPartitionsInside(partitionDelta); inclusiveDomainInfo->IncPQReservedStorage(reserve.Storage); - Self->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(delta); + Self->TabletCounters->Simple()[COUNTER_STREAM_SHARDS_COUNT].Add(partitionDelta); Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_THROUGHPUT].Add(reserve.Throughput); Self->TabletCounters->Simple()[COUNTER_STREAM_RESERVED_STORAGE].Add(reserve.Storage); } @@ -4193,8 +4203,9 @@ struct TSchemeShard::TTxInit : public TTransactionBase { TString settings = rowset.GetValue(); auto domainPathId = TPathId(rowset.GetValueOrDefault(selfId), rowset.GetValue()); + TString peerName = rowset.GetValueOrDefault(); - TExportInfo::TPtr exportInfo = new TExportInfo(id, uid, kind, settings, domainPathId); + TExportInfo::TPtr exportInfo = new TExportInfo(id, uid, kind, settings, domainPathId, peerName); if (rowset.HaveValue()) { exportInfo->UserSID = rowset.GetValue(); @@ -4291,11 +4302,12 @@ struct TSchemeShard::TTxInit : public TTransactionBase { TImportInfo::EKind kind = static_cast(rowset.GetValue()); auto domainPathId = TPathId(rowset.GetValue(), rowset.GetValue()); + TString peerName = rowset.GetValueOrDefault(); Ydb::Import::ImportFromS3Settings settings; Y_ABORT_UNLESS(ParseFromStringNoSizeLimit(settings, rowset.GetValue())); - TImportInfo::TPtr importInfo = new TImportInfo(id, uid, kind, settings, domainPathId); + TImportInfo::TPtr importInfo = new TImportInfo(id, uid, kind, settings, domainPathId, peerName); if (rowset.HaveValue()) { importInfo->UserSID = rowset.GetValue(); diff --git a/ydb/core/tx/schemeshard/schemeshard__login.cpp b/ydb/core/tx/schemeshard/schemeshard__login.cpp index 85d2f54a4892..0fa53652e545 100644 --- a/ydb/core/tx/schemeshard/schemeshard__login.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__login.cpp @@ -1,5 +1,8 @@ -#include "schemeshard_impl.h" #include +#include + +#include "schemeshard_audit_log.h" +#include "schemeshard_impl.h" namespace NKikimr { namespace NSchemeShard { @@ -19,10 +22,16 @@ struct TSchemeShard::TTxLogin : TSchemeShard::TRwTxBase { TTxType GetTxType() const override { return TXTYPE_LOGIN; } NLogin::TLoginProvider::TLoginUserRequest GetLoginRequest() const { + const auto& record(Request->Get()->Record); return { - .User = Request->Get()->Record.GetUser(), - .Password = Request->Get()->Record.GetPassword(), - .ExternalAuth = Request->Get()->Record.GetExternalAuth() + .User = record.GetUser(), + .Password = record.GetPassword(), + .Options = { + .ExpiresAfter = record.HasExpiresAfterMs() + ? std::chrono::milliseconds(record.GetExpiresAfterMs()) + : std::chrono::system_clock::duration::zero() + }, + .ExternalAuth = record.GetExternalAuth(), }; } @@ -66,15 +75,22 @@ struct TSchemeShard::TTxLogin : TSchemeShard::TRwTxBase { Self->PublishToSchemeBoard(TTxId(), {SubDomainPathId}, ctx); } - NLogin::TLoginProvider::TLoginUserResponse LoginResponse = Self->LoginProvider.LoginUser(GetLoginRequest()); THolder result = MakeHolder(); - if (LoginResponse.Error) { - result->Record.SetError(LoginResponse.Error); - } - if (LoginResponse.Token) { - result->Record.SetToken(LoginResponse.Token); + const auto& loginRequest = GetLoginRequest(); + if (loginRequest.ExternalAuth || AppData(ctx)->AuthConfig.GetEnableLoginAuthentication()) { + NLogin::TLoginProvider::TLoginUserResponse LoginResponse = Self->LoginProvider.LoginUser(loginRequest); + if (LoginResponse.Error) { + result->Record.SetError(LoginResponse.Error); + } + if (LoginResponse.Token) { + result->Record.SetToken(LoginResponse.Token); + } + } else { + result->Record.SetError("Login authentication is disabled"); } + AuditLogLogin(Request->Get()->Record, result->Record, Self); + LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxLogin DoComplete" << ", result: " << result->Record.ShortDebugString() diff --git a/ydb/core/tx/schemeshard/schemeshard__operation.cpp b/ydb/core/tx/schemeshard/schemeshard__operation.cpp index ee7de50fa0a1..0888afa2706d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation.cpp @@ -104,14 +104,12 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request } TOperation::TPtr operation = new TOperation(txId); - Operations[txId] = operation; //record is erased at ApplyOnExecute if all parts are done at propose for (const auto& transaction : record.GetTransaction()) { auto quotaResult = operation->ConsumeQuota(transaction, context); if (quotaResult.Status != NKikimrScheme::StatusSuccess) { response.Reset(new TProposeResponse(quotaResult.Status, ui64(txId), ui64(selfId))); response->SetError(quotaResult.Status, quotaResult.Reason); - Operations.erase(txId); return std::move(response); } } @@ -131,7 +129,6 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request if (splitResult.Status != NKikimrScheme::StatusSuccess) { response.Reset(new TProposeResponse(splitResult.Status, ui64(txId), ui64(selfId))); response->SetError(splitResult.Status, splitResult.Reason); - Operations.erase(txId); return std::move(response); } @@ -140,11 +137,15 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request const TString owner = record.HasOwner() ? record.GetOwner() : BUILTIN_ACL_ROOT; + bool prevProposeUndoSafe = true; + + Operations[txId] = operation; //record is erased at ApplyOnExecute if all parts are done at propose + for (const auto& transaction : transactions) { auto parts = operation->ConstructParts(transaction, context); if (parts.size() > 1) { - // les't allow altering impl index tables as part of consistent operation + // allow altering impl index tables as part of consistent operation context.IsAllowedPrivateTables = true; } @@ -198,18 +199,21 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request << ", with reason: " << response->Record.GetReason() << ", tx message: " << SecureDebugString(record)); - context.OnComplete = {}; // recreate - context.DbChanges = {}; + AbortOperationPropose(txId, context); - for (auto& toAbort : operation->Parts) { - toAbort->AbortPropose(context); - } + return std::move(response); + } - context.MemChanges.UnDo(context.SS); - context.OnComplete.ApplyOnExecute(context.SS, context.GetTxc(), context.Ctx); - Operations.erase(txId); + // Check suboperations for undo safety. Log first unsafe suboperation in the schema transaction. + if (prevProposeUndoSafe && !context.IsUndoChangesSafe()) { + prevProposeUndoSafe = false; - return std::move(response); + LOG_WARN_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Operation part proposed ok, but propose itself is undo unsafe" + << ", suboperation type: " << NKikimrSchemeOp::EOperationType_Name(part->GetTransaction().GetOperationType()) + << ", opId: " << part->GetOperationId() + << ", at schemeshard: " << selfId + ); } } } @@ -217,6 +221,55 @@ THolder TSchemeShard::IgniteOperation(TProposeRequest& request return std::move(response); } +void TSchemeShard::AbortOperationPropose(const TTxId txId, TOperationContext& context) { + Y_ABORT_UNLESS(Operations.contains(txId)); + TOperation::TPtr operation = Operations.at(txId); + + // Drop operation side effects, undo memory changes + // (Local db changes were already applied) + context.OnComplete = {}; + context.DbChanges = {}; + + for (auto& i : operation->Parts) { + i->AbortPropose(context); + } + + context.MemChanges.UnDo(context.SS); + + // And remove aborted operation from existence + Operations.erase(txId); +} + +void AbortOperation(TOperationContext& context, const TTxId txId, const TString& reason) { + LOG_ERROR_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute" + << ", txId: " << txId + << ", operation is rejected and all changes reverted" + << ", " << reason + << ", at schemeshard: " << context.SS->SelfTabletId() + ); + + context.GetTxc().DB.RollbackChanges(); + context.SS->AbortOperationPropose(txId, context); +} + +bool IsCommitRedoSizeOverLimit(TString* reason, TOperationContext& context) { + // MaxCommitRedoMB is the ICB control shared with NTabletFlatExecutor::TExecutor. + // We subtract from MaxCommitRedoMB additional 1MB for anything extra + // that executor/tablet may (or may not) add under the hood + const ui64 limitBytes = (context.SS->MaxCommitRedoMB - 1) << 20; // MB to bytes + const ui64 commitRedoBytes = context.GetTxc().DB.GetCommitRedoBytes(); + if (commitRedoBytes >= limitBytes) { + *reason = TStringBuilder() + << "local tx commit redo size generated by IgniteOperation() is more than allowed limit: " + << "commit redo size " << commitRedoBytes + << ", limit " << limitBytes + << ", excess " << (commitRedoBytes - limitBytes) + ; + return true; + } + return false; +} + struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransactionBase { using TBase = NTabletFlatExecutor::TTransactionBase; @@ -236,6 +289,7 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti bool Execute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override { TTabletId selfId = Self->SelfTabletId(); + auto txId = TTxId(Request->Get()->Record.GetTxId()); LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute" @@ -246,7 +300,6 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti auto [userToken, tokenParseError] = ParseUserToken(Request->Get()->Record.GetUserToken()); if (tokenParseError) { - auto txId = Request->Get()->Record.GetTxId(); Response = MakeHolder(NKikimrScheme::StatusInvalidParameter, ui64(txId), ui64(selfId), "Failed to parse user token"); return true; } @@ -258,10 +311,52 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti TStorageChanges dbChanges; TOperationContext context{Self, txc, ctx, OnComplete, memChanges, dbChanges, std::move(userToken)}; + //NOTE: Successful IgniteOperation will leave created operation in Self->Operations and accumulated changes in the context. + // Unsuccessful IgniteOperation will leave no operation and context will also be clean. Response = Self->IgniteOperation(*Request->Get(), context); - OnComplete.ApplyOnExecute(Self, txc, ctx); + //NOTE: Successfully created operation also must be checked for the size of this local tx. + // + // Limitation on a commit redo size of local transactions is imposed at the tablet executor level + // (See ydb/core/tablet_flat/flat_executor.cpp, NTabletFlatExecutor::TExecutor::ExecuteTransaction()). + // And a tablet violating that limit is considered broken and will be stopped unconditionally and immediately. + // + // So even if operation was ignited successfully, it's local tx size still must be checked + // as a precaution measure to avoid infinite loop of schemeshard restarting, attempting to propose + // persisted operation again, hitting commit redo size limit and restarting again. + // + // On unsuccessful check, local tx should be rolled back, operation should be rejected and + // all accumulated changes dropped or reverted. + // + + // Actually build commit redo (dbChanges could be empty) dbChanges.Apply(Self, txc, ctx); + + if (Self->Operations.contains(txId)) { + Y_ABORT_UNLESS(Response->IsDone() || Response->IsAccepted() || Response->IsConditionalAccepted()); + + // Check local tx commit redo size + TString reason; + if (IsCommitRedoSizeOverLimit(&reason, context)) { + Response = MakeHolder(NKikimrScheme::StatusSchemeError, ui64(txId), ui64(selfId), reason); + + AbortOperation(context, txId, reason); + + if (!context.IsUndoChangesSafe()) { + LOG_ERROR_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute" + << ", opId: " << txId + << ", operation should be rejected and all changes be reverted" + << ", but context.IsUndoChangesSafe is false, which means some direct writes have been done" + << ", message: " << SecureDebugString(Request->Get()->Record) + << ", at schemeshard: " << context.SS->SelfTabletId() + ); + } + } + } + + // Apply accumulated changes (changes could be empty) + OnComplete.ApplyOnExecute(Self, txc, ctx); + return true; } @@ -1168,7 +1263,7 @@ TVector TOperation::ConstructParts(const TTxTransaction& tx case NKikimrSchemeOp::EOperationType::ESchemeOpCreateSubDomain: return {CreateSubDomain(NextPartId(), tx)}; case NKikimrSchemeOp::EOperationType::ESchemeOpAlterSubDomain: - return {CreateCompatibleSubdomainAlter(context.SS, NextPartId(), tx)}; + return CreateCompatibleSubdomainAlter(NextPartId(), tx, context); case NKikimrSchemeOp::EOperationType::ESchemeOpDropSubDomain: return {CreateDropSubdomain(NextPartId(), tx)}; case NKikimrSchemeOp::EOperationType::ESchemeOpForceDropSubDomain: diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp index 4efa03b17955..9113d5d8edc9 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.cpp @@ -61,7 +61,7 @@ class TPropose: public TSubOperationState { NIceDb::TNiceDb db(context.GetDB()); context.SS->PersistCdcStream(db, pathId); - context.SS->CdcStreams[pathId] = stream->AlterData; + context.SS->CdcStreams[pathId]->FinishAlter(); context.SS->ClearDescribePathCaches(path); context.OnComplete.PublishToSchemeBoard(OperationId, pathId); @@ -143,9 +143,12 @@ class TAlterCdcStream: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -370,10 +373,13 @@ class TAlterCdcStreamAtTable: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -476,10 +482,10 @@ class TAlterCdcStreamAtTable: public TSubOperation { } // anonymous std::variant DoAlterStreamPathChecks( - const TOperationId& opId, - const TPath& workingDirPath, - const TString& tableName, - const TString& streamName) + const TOperationId& opId, + const TPath& workingDirPath, + const TString& tableName, + const TString& streamName) { const auto tablePath = workingDirPath.Child(tableName); { @@ -492,9 +498,12 @@ std::variant DoAlterStreamPathChecks( .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { return CreateReject(opId, checks.GetStatus(), checks.GetError()); } @@ -521,27 +530,24 @@ std::variant DoAlterStreamPathChecks( } void DoAlterStream( - const NKikimrSchemeOp::TAlterCdcStream& op, - const TOperationId& opId, - const TPath& workingDirPath, - const TPath& tablePath, - TVector& result) + TVector& result, + const NKikimrSchemeOp::TAlterCdcStream& op, + const TOperationId& opId, + const TPath& workingDirPath, + const TPath& tablePath) { { auto outTx = TransactionTemplate(tablePath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterCdcStreamImpl); outTx.MutableAlterCdcStream()->CopyFrom(op); - if (op.HasGetReady()) { outTx.MutableLockGuard()->SetOwnerTxId(op.GetGetReady().GetLockTxId()); } result.push_back(CreateAlterCdcStreamImpl(NextPartId(opId, result), outTx)); } - { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterCdcStreamAtTable); outTx.MutableAlterCdcStream()->CopyFrom(op); - if (op.HasGetReady()) { outTx.MutableLockGuard()->SetOwnerTxId(op.GetGetReady().GetLockTxId()); } @@ -601,7 +607,7 @@ TVector CreateAlterCdcStream(TOperationId opId, const TTxTr TVector result; - DoAlterStream(op, opId, workingDirPath, tablePath, result); + DoAlterStream(result, op, opId, workingDirPath, tablePath); if (op.HasGetReady()) { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropLock); @@ -613,6 +619,14 @@ TVector CreateAlterCdcStream(TOperationId opId, const TTxTr result.push_back(DropLock(NextPartId(opId, result), outTx)); } + if (workingDirPath.IsTableIndex()) { + auto outTx = TransactionTemplate(workingDirPath.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + outTx.MutableAlterTableIndex()->SetName(workingDirPath.LeafName()); + outTx.MutableAlterTableIndex()->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); + + result.push_back(CreateAlterTableIndex(NextPartId(opId, result), outTx)); + } + return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h index 198d5ae35cc9..6154ee05ed17 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_cdc_stream.h @@ -1,7 +1,7 @@ #pragma once -#include "schemeshard__operation_create_cdc_stream.h" // for TStreamPaths #include "schemeshard__operation_common.h" +#include "schemeshard__operation_create_cdc_stream.h" // for TStreamPaths #include "schemeshard__operation_part.h" #include "schemeshard_impl.h" @@ -17,10 +17,10 @@ std::variant DoAlterStreamPathChecks( const TString& streamName); void DoAlterStream( + TVector& result, const NKikimrSchemeOp::TAlterCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, - const TPath& tablePath, - TVector& result); + const TPath& tablePath); } // namespace NKikimr::NSchemesShard::NCdc diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp index 24f9c7c0d0f8..69883a91a28a 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_continuous_backup.cpp @@ -1,9 +1,8 @@ -#include "schemeshard__operation_part.h" +#include "schemeshard__operation_alter_cdc_stream.h" #include "schemeshard__operation_common.h" +#include "schemeshard__operation_part.h" #include "schemeshard_impl.h" -#include "schemeshard__operation_alter_cdc_stream.h" - #include #include @@ -83,7 +82,7 @@ TVector CreateAlterContinuousBackup(TOperationId opId, cons const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry; NKikimrSchemeOp::TTableDescription schema; - context.SS->DescribeTable(table, typeRegistry, true, false, &schema); + context.SS->DescribeTable(table, typeRegistry, true, &schema); schema.MutablePartitionConfig()->CopyFrom(table->TableDescription.GetPartitionConfig()); TString errStr; @@ -111,7 +110,7 @@ TVector CreateAlterContinuousBackup(TOperationId opId, cons TVector result; - NCdc::DoAlterStream(alterCdcStreamOp, opId, workingDirPath, tablePath, result); + NCdc::DoAlterStream(result, alterCdcStreamOp, opId, workingDirPath, tablePath); if (cbOp.GetActionCase() == NKikimrSchemeOp::TAlterContinuousBackup::kTakeIncrementalBackup) { DoCreateIncBackupTable(opId, backupTablePath, schema, result); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp index c67f26b3cea2..1009c5dff470 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_data_source.cpp @@ -213,6 +213,13 @@ class TAlterExternalDataSource : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalDataSource::IsParentPathValid( result, parentPath, Transaction, /* isCreate */ false)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp index ad563b01c900..54f63ca0db57 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_external_table.cpp @@ -308,6 +308,13 @@ class TAlterExternalTable: public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const auto parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalTable::IsParentPathValid(result, parentPath)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp index 5b67ca14030c..6b330b329df0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_extsubdomain.cpp @@ -814,7 +814,7 @@ class TSyncHive: public TSubOperationState { } bool HandleReply(TEvHive::TEvUpdateDomainReply::TPtr& ev, TOperationContext& context) override { - const TTabletId hive = TTabletId(ev->Get()->Record.GetOrigin()); + const TTabletId hive = TTabletId(ev->Get()->Record.GetOrigin()); LOG_I(DebugHint() << "HandleReply TEvUpdateDomainReply" << ", from hive: " << hive); @@ -936,21 +936,23 @@ class TAlterExtSubDomain: public TSubOperation { // Create or derive alter. // (We could have always created new alter from a current subdomainInfo but // we need to take into account possible version increase from CreateHive suboperation.) - auto createAlterFrom = [&inputSettings, &delta](auto prototype) { + auto createAlterFrom = [&inputSettings](auto prototype, const TStoragePools& additionalPools) { return MakeIntrusive( *prototype, inputSettings.GetPlanResolution(), inputSettings.GetTimeCastBucketsPerMediator(), - delta.StoragePoolsAdded + additionalPools ); }; TSubDomainInfo::TPtr alter = [&delta, &subdomainInfo, &createAlterFrom, &context]() { if (delta.AddExternalHive && context.SS->EnableAlterDatabaseCreateHiveFirst) { Y_ABORT_UNLESS(subdomainInfo->GetAlter()); - return createAlterFrom(subdomainInfo->GetAlter()); + //NOTE: existing alter already has all storage pools that combined operation wanted to add, + // should not add them second time when deriving alter from alter + return createAlterFrom(subdomainInfo->GetAlter(), {}); } else { Y_ABORT_UNLESS(!subdomainInfo->GetAlter()); - return createAlterFrom(subdomainInfo); + return createAlterFrom(subdomainInfo, delta.StoragePoolsAdded); } }(); @@ -1084,7 +1086,13 @@ ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, TTxState::ETxState } TVector CreateCompatibleAlterExtSubDomain(TOperationId id, const TTxTransaction& tx, TOperationContext& context) { - Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterExtSubDomain); + //NOTE: Accepting ESchemeOpAlterSubDomain operation for an ExtSubDomain is a special compatibility case + // for those old subdomains that at the time went through migration to a separate tenants. + // Console tablet holds records about types of the subdomains but they hadn't been updated + // at the migration time. So Console still thinks that old subdomains are plain subdomains + // whereas they had been migrated to the extsubdomains. + // This compatibility case should be upholded until Console records would be updated. + Y_ABORT_UNLESS(tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterExtSubDomain || tx.GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterSubDomain); LOG_I("CreateCompatibleAlterExtSubDomain, opId " << id << ", feature flag EnableAlterDatabaseCreateHiveFirst " << context.SS->EnableAlterDatabaseCreateHiveFirst diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp index 9e4bf359c59c..0415c18c18b0 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_index.cpp @@ -140,8 +140,11 @@ class TAlterTableIndex: public TSubOperation { .NotDeleted() .NotUnderDeleting() .IsCommonSensePath() - .IsTable() - .NotAsyncReplicaTable(); + .IsTable(); + + if (!Transaction.GetInternal()) { + checks.NotAsyncReplicaTable(); + } if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp index a4a5fd6d2533..48f9f400bdb2 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_login.cpp @@ -1,6 +1,7 @@ #include "schemeshard__operation_part.h" #include "schemeshard__operation_common.h" #include "schemeshard_impl.h" +#include namespace { @@ -15,7 +16,9 @@ class TAlterLogin: public TSubOperationBase { NIceDb::TNiceDb db(context.GetTxc().DB); // do not track is there are direct writes happen TTabletId ssId = context.SS->SelfTabletId(); auto result = MakeHolder(OperationId.GetTxId(), ssId); - if (Transaction.GetWorkingDir() != context.SS->LoginProvider.Audience) { + if (!AppData()->AuthConfig.GetEnableLoginAuthentication()) { + result->SetStatus(NKikimrScheme::StatusPreconditionFailed, "Login authentication is disabled"); + } else if (Transaction.GetWorkingDir() != context.SS->LoginProvider.Audience) { result->SetStatus(NKikimrScheme::StatusPreconditionFailed, "Wrong working dir"); } else { const NKikimrConfig::TDomainsConfig::TSecurityConfig& securityConfig = context.SS->GetDomainsConfig().GetSecurityConfig(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp index dac4b58742ce..7e07965f5def 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_pq.cpp @@ -136,6 +136,10 @@ class TAlterPQ: public TSubOperation { return nullptr; } + if (!alterConfig.HasPartitionStrategy() && tabletConfig->HasPartitionStrategy()) { + alterConfig.MutablePartitionStrategy()->CopyFrom(tabletConfig->GetPartitionStrategy()); + } + if (alterConfig.GetPartitionConfig().HasLifetimeSeconds()) { const auto lifetimeSeconds = alterConfig.GetPartitionConfig().GetLifetimeSeconds(); if (lifetimeSeconds <= 0 || (ui32)lifetimeSeconds > TSchemeShard::MaxPQLifetimeSeconds) { @@ -177,12 +181,8 @@ class TAlterPQ: public TSubOperation { if (alterConfig.HasPartitionStrategy() && !NPQ::SplitMergeEnabled(alterConfig) && tabletConfig->HasPartitionStrategy() && NPQ::SplitMergeEnabled(*tabletConfig)) { - if (!alterConfig.GetPartitionStrategy().HasMaxPartitionCount() || 0 != alterConfig.GetPartitionStrategy().GetMaxPartitionCount()) { - errStr = TStringBuilder() << "Can`t disable auto partitioning. Disabling auto partitioning is a destructive operation, " - << "after which all partitions will become active and the message order guarantee will be violated. " - << "If you are sure of this, then set max_active_partitions to 0."; - return nullptr; - } + errStr = TStringBuilder() << "Can`t disable auto partitioning."; + return nullptr; } if (!alterConfig.HasPartitionStrategy() && tabletConfig->HasPartitionStrategy()) { @@ -577,10 +577,13 @@ class TAlterPQ: public TSubOperation { return result; } + alterData->ActivePartitionCount = topic->ActivePartitionCount; + bool splitMergeEnabled = AppData()->FeatureFlags.GetEnableTopicSplitMerge() && NKikimr::NPQ::SplitMergeEnabled(tabletConfig) && NKikimr::NPQ::SplitMergeEnabled(newTabletConfig); + THashSet involvedPartitions; if (splitMergeEnabled) { auto Hex = [](const auto& value) { return HexText(TBasicStringBuf(value)); @@ -589,10 +592,9 @@ class TAlterPQ: public TSubOperation { ui32 nextId = topic->NextPartitionId; ui32 nextGroupId = topic->TotalGroupCount; - THashSet involvedPartitions; - for (const auto& split : alter.GetSplit()) { alterData->TotalGroupCount += 2; + ++alterData->ActivePartitionCount; const auto splittedPartitionId = split.GetPartition(); if (!topic->Partitions.contains(splittedPartitionId)) { @@ -654,6 +656,7 @@ class TAlterPQ: public TSubOperation { } for (const auto& merge : alter.GetMerge()) { alterData->TotalGroupCount += 1; + --alterData->ActivePartitionCount; const auto partitionId = merge.GetPartition(); if (!topic->Partitions.contains(partitionId)) { @@ -737,6 +740,10 @@ class TAlterPQ: public TSubOperation { } alterData->TotalPartitionCount = topic->TotalPartitionCount + alterData->PartitionsToAdd.size(); + if (!splitMergeEnabled) { + alterData->ActivePartitionCount = alterData->TotalPartitionCount; + } + alterData->NextPartitionId = topic->NextPartitionId; for (const auto& p : alterData->PartitionsToAdd) { if (p.GroupId == 0 || p.GroupId > alterData->TotalGroupCount) { @@ -780,10 +787,11 @@ class TAlterPQ: public TSubOperation { return result; } - const PQGroupReserve reserve(newTabletConfig, alterData->TotalPartitionCount); - const PQGroupReserve oldReserve(tabletConfig, topic->TotalPartitionCount); + const PQGroupReserve reserve(newTabletConfig, alterData->ActivePartitionCount); + const PQGroupReserve reserveForCheckLimit(newTabletConfig, alterData->ActivePartitionCount + involvedPartitions.size()); + const PQGroupReserve oldReserve(tabletConfig, topic->ActivePartitionCount); - const ui64 storageToReserve = reserve.Storage > oldReserve.Storage ? reserve.Storage - oldReserve.Storage : 0; + const ui64 storageToReserve = reserveForCheckLimit.Storage > oldReserve.Storage ? reserveForCheckLimit.Storage - oldReserve.Storage : 0; { TPath::TChecker checks = path.Check(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp index 863533fe7aa2..25ce9ad0c08e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_resource_pool.cpp @@ -129,6 +129,13 @@ class TAlterResourcePool : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(context.SS->SelfTabletId())); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableResourcePoolsOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Resource pools are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath& parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NResourcePool::IsParentPathValid(result, parentPath)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp index f512165507a5..3ce17bb6b85d 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_alter_table.cpp @@ -365,6 +365,10 @@ class TPropose: public TSubOperationState { TTableInfo::TPtr table = context.SS->Tables.at(pathId); table->FinishAlter(); + if (!table->IsAsyncReplica()) { + path->SetAsyncReplica(false); + } + auto ttlIt = context.SS->TTLEnabledTables.find(pathId); if (table->IsTTLEnabled() && ttlIt == context.SS->TTLEnabledTables.end()) { context.SS->TTLEnabledTables[pathId] = table; @@ -519,8 +523,10 @@ class TAlterTable: public TSubOperation { .IsTable() .NotUnderOperation(); - if (!Transaction.GetInternal()) { - checks.NotAsyncReplicaTable(); + if (checks && !Transaction.GetInternal()) { + checks + .NotAsyncReplicaTable() + .NotBackupTable(); } if (!context.IsAllowedPrivateTables) { @@ -722,6 +728,10 @@ TVector CreateConsistentAlterTable(TOperationId id, const T return {CreateAlterTable(id, tx)}; } + if (path.IsBackupTable()) { + return {CreateAlterTable(id, tx)}; + } + TPath parent = path.Parent(); if (!parent.IsTableIndex()) { @@ -731,7 +741,7 @@ TVector CreateConsistentAlterTable(TOperationId id, const T // Admins can alter indexImplTable unconditionally. // Regular users can only alter allowed fields. if (!IsSuperUser(context.UserToken.Get()) - && (!CheckAllowedFields(alter, {"Name", "PartitionConfig"}) + && (!CheckAllowedFields(alter, {"Name", "PathId", "PartitionConfig", "ReplicationConfig"}) || (alter.HasPartitionConfig() && !CheckAllowedFields(alter.GetPartitionConfig(), {"PartitioningPolicy"}) ) @@ -744,6 +754,7 @@ TVector CreateConsistentAlterTable(TOperationId id, const T { auto tableIndexAltering = TransactionTemplate(parent.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + tableIndexAltering.SetInternal(tx.GetInternal()); auto alterIndex = tableIndexAltering.MutableAlterTableIndex(); alterIndex->SetName(parent.LeafName()); alterIndex->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp index 371e5acaf297..bdfd80bc99a7 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common.cpp @@ -695,7 +695,7 @@ THolder TConfigureParts::MakeEvProposeTrans const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, @@ -703,7 +703,7 @@ THolder TConfigureParts::MakeEvProposeTrans TTxState::ETxType txType, const TOperationContext& context) { - auto event = MakeHolder(); + auto event = MakeHolder(); event->Record.SetTxId(ui64(txId)); ActorIdToProto(context.SS->SelfId(), event->Record.MutableSourceActor()); @@ -719,7 +719,7 @@ THolder TConfigureParts::MakeEvProposeTrans databasePath); if (bootstrapConfig) { Y_ABORT_UNLESS(txType == TTxState::TxCreatePQGroup); - event->Record.MutableConfig()->MutableBootstrapConfig()->CopyFrom(*bootstrapConfig); + event->PreSerializedData += bootstrapConfig->GetPreSerializedProposeTransaction(); } LOG_DEBUG_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, @@ -734,7 +734,7 @@ THolder TConfigureParts::MakeEvUpdateConfig(TTxId const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, @@ -742,7 +742,7 @@ THolder TConfigureParts::MakeEvUpdateConfig(TTxId TTxState::ETxType txType, const TOperationContext& context) { - auto event = MakeHolder(); + auto event = MakeHolder(); event->Record.SetTxId(ui64(txId)); MakePQTabletConfig(context, @@ -757,7 +757,7 @@ THolder TConfigureParts::MakeEvUpdateConfig(TTxId databasePath); if (bootstrapConfig) { Y_ABORT_UNLESS(txType == TTxState::TxCreatePQGroup); - event->Record.MutableBootstrapConfig()->CopyFrom(*bootstrapConfig); + event->PreSerializedData += bootstrapConfig->GetPreSerializedUpdateConfig(); } LOG_DEBUG_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_common.h b/ydb/core/tx/schemeshard/schemeshard__operation_common.h index 14ec42f43168..33813a3f9b10 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_common.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_common.h @@ -552,6 +552,54 @@ class TDone: public TSubOperationState { namespace NPQState { +class TBootstrapConfigWrapper: public NKikimrPQ::TBootstrapConfig { + struct TSerializedProposeTransaction { + TString Value; + + static TSerializedProposeTransaction Serialize(const NKikimrPQ::TBootstrapConfig& value) { + NKikimrPQ::TEvProposeTransaction record; + record.MutableConfig()->MutableBootstrapConfig()->CopyFrom(value); + return {record.SerializeAsString()}; + } + }; + + struct TSerializedUpdateConfig { + TString Value; + + static TSerializedUpdateConfig Serialize(const NKikimrPQ::TBootstrapConfig& value) { + NKikimrPQ::TUpdateConfig record; + record.MutableBootstrapConfig()->CopyFrom(value); + return {record.SerializeAsString()}; + } + }; + + mutable std::optional> PreSerialized; + + template + const TString& Get() const { + if (!PreSerialized) { + PreSerialized.emplace(T::Serialize(*this)); + } + + const auto* value = std::get_if(&PreSerialized.value()); + Y_ABORT_UNLESS(value); + + return value->Value; + } + +public: + const TString& GetPreSerializedProposeTransaction() const { + return Get(); + } + + const TString& GetPreSerializedUpdateConfig() const { + return Get(); + } +}; + class TConfigureParts: public TSubOperationState { private: TOperationId OperationId; @@ -627,7 +675,6 @@ class TConfigureParts: public TSubOperationState { return false; } - bool ProgressState(TOperationContext& context) override { TTabletId ssId = context.SS->SelfTabletId(); @@ -669,7 +716,7 @@ class TConfigureParts: public TSubOperationState { TString databasePath = TPath::Init(context.SS->RootPathId(), context.SS).PathString(); auto topicPath = TPath::Init(txState->TargetPathId, context.SS); - std::optional bootstrapConfig; + std::optional bootstrapConfig; if (txState->TxType == TTxState::TxCreatePQGroup && topicPath.Parent().IsCdcStream()) { bootstrapConfig.emplace(); @@ -881,20 +928,37 @@ class TConfigureParts: public TSubOperationState { config.SetVersion(pqGroup.AlterData->AlterVersion); } + THashSet linkedPartitions; + for(const auto& pq : pqShard.Partitions) { config.AddPartitionIds(pq->PqId); auto& partition = *config.AddPartitions(); FillPartition(partition, pq.Get(), 0); + + linkedPartitions.insert(pq->PqId); + linkedPartitions.insert(pq->ParentPartitionIds.begin(), pq->ParentPartitionIds.end()); + linkedPartitions.insert(pq->ChildPartitionIds.begin(), pq->ChildPartitionIds.end()); + for (auto c : pq->ChildPartitionIds) { + auto it = pqGroup.Partitions.find(c); + if (it == pqGroup.Partitions.end()) { + continue; + } + linkedPartitions.insert(it->second->ParentPartitionIds.begin(), it->second->ParentPartitionIds.end()); + } } - for(const auto& p : pqGroup.Shards) { - const auto& pqShard = p.second; - const auto& tabletId = context.SS->ShardInfos[p.first].TabletID; - for (const auto& pq : pqShard->Partitions) { - auto& partition = *config.AddAllPartitions(); - FillPartition(partition, pq.Get(), ui64(tabletId)); + for(auto lp : linkedPartitions) { + auto it = pqGroup.Partitions.find(lp); + if (it == pqGroup.Partitions.end()) { + continue; } + + auto* partitionInfo = it->second; + const auto& tabletId = context.SS->ShardInfos[partitionInfo->ShardIdx].TabletID; + + auto& partition = *config.AddAllPartitions(); + FillPartition(partition, partitionInfo, ui64(tabletId)); } } @@ -918,7 +982,7 @@ class TConfigureParts: public TSubOperationState { const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, @@ -931,7 +995,7 @@ class TConfigureParts: public TSubOperationState { const TTopicTabletInfo& pqShard, const TString& topicName, const TString& topicPath, - const std::optional& bootstrapConfig, + const std::optional& bootstrapConfig, const TString& cloudId, const TString& folderId, const TString& databaseId, diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp index 417e52cc18da..2bf68f59488c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_copy_table.cpp @@ -14,7 +14,7 @@ void PrepareScheme(NKikimrSchemeOp::TTableDescription* schema, const TString& na const NScheme::TTypeRegistry* typeRegistry = AppData(context.Ctx)->TypeRegistry; NKikimrSchemeOp::TTableDescription completedSchema; - context.SS->DescribeTable(srcTableInfo, typeRegistry, true, false, &completedSchema); + context.SS->DescribeTable(srcTableInfo, typeRegistry, true, &completedSchema); completedSchema.SetName(name); //inherit all from Src except PartitionConfig, PartitionConfig could be altered diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp index a615d6b68a4a..55994586f433 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.cpp @@ -105,38 +105,22 @@ class TNewCdcStream: public TSubOperation { } } - TString BuildWorkingDir() const { - if (Transaction.GetCreateCdcStream().HasIndexName()) { - return Transaction.GetWorkingDir() + "/" - + Transaction.GetCreateCdcStream().GetIndexName() + "/indexImplTable"; - } else { - return Transaction.GetWorkingDir(); - } - } - public: using TSubOperation::TSubOperation; THolder Propose(const TString& owner, TOperationContext& context) override { + const auto& workingDir = Transaction.GetWorkingDir(); const auto& op = Transaction.GetCreateCdcStream(); const auto& streamDesc = op.GetStreamDescription(); const auto& streamName = streamDesc.GetName(); const auto acceptExisted = !Transaction.GetFailOnExist(); - auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); - - if (op.HasAllIndexes()) { - result->SetError(NKikimrScheme::StatusInvalidParameter, - "Illigal part operation with all indexes flag"); - return result; - } - - const auto& workingDir = BuildWorkingDir(); - LOG_N("TNewCdcStream Propose" << ": opId# " << OperationId << ", stream# " << workingDir << "/" << streamName); + auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); + const auto tablePath = TPath::Resolve(workingDir, context.SS); { const auto checks = tablePath.Check(); @@ -146,15 +130,13 @@ class TNewCdcStream: public TSubOperation { .IsAtLocalSchemeShard() .IsResolved() .NotDeleted() + .IsTable() + .NotBackupTable() .NotAsyncReplicaTable() .NotUnderDeleting(); - if (op.HasIndexName() && op.GetIndexName()) { - checks.IsInsideTableIndexPath(); - } else { - checks - .IsTable() - .IsCommonSensePath(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); } if (!checks) { @@ -529,35 +511,17 @@ class TNewCdcStreamAtTable: public TSubOperation { } THolder Propose(const TString&, TOperationContext& context) override { - auto workingDir = Transaction.GetWorkingDir(); + const auto& workingDir = Transaction.GetWorkingDir(); const auto& op = Transaction.GetCreateCdcStream(); - auto tableName = op.GetTableName(); + const auto& tableName = op.GetTableName(); const auto& streamName = op.GetStreamDescription().GetName(); - auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); - bool isIndexTable = false; - - if (op.HasAllIndexes()) { - result->SetError(NKikimrScheme::StatusInvalidParameter, - "Illigal part operation with all indexes flag"); - return result; - } - - if (op.HasIndexName()) { - if (!op.GetIndexName()) { - result->SetError(NKikimrScheme::StatusInvalidParameter, - "Unexpected empty index name"); - return result; - } - isIndexTable = true; - workingDir += ("/" + tableName + "/" + op.GetIndexName()); - tableName = "indexImplTable"; - } - LOG_N("TNewCdcStreamAtTable Propose" << ": opId# " << OperationId << ", stream# " << workingDir << "/" << tableName << "/" << streamName); + auto result = MakeHolder(NKikimrScheme::StatusAccepted, ui64(OperationId.GetTxId()), context.SS->TabletID()); + const auto workingDirPath = TPath::Resolve(workingDir, context.SS); { const auto checks = workingDirPath.Check(); @@ -569,9 +533,7 @@ class TNewCdcStreamAtTable: public TSubOperation { .IsLikeDirectory() .NotUnderDeleting(); - if (isIndexTable) { - checks.IsInsideTableIndexPath(); - } else { + if (checks && !workingDirPath.IsTableIndex()) { checks.IsCommonSensePath(); } @@ -595,7 +557,7 @@ class TNewCdcStreamAtTable: public TSubOperation { .NotUnderDeleting(); if (checks) { - if (!isIndexTable) { + if (!tablePath.IsInsideTableIndexPath()) { checks.IsCommonSensePath(); } if (InitialScan) { @@ -679,27 +641,34 @@ class TNewCdcStreamAtTable: public TSubOperation { private: const bool InitialScan; + }; // TNewCdcStreamAtTable -void DoCreateLock(const TOperationId opId, const TPath& workingDirPath, const TPath& tablePath, bool allowIndexImplLock, - TVector& result) +void DoCreateLock( + TVector& result, + const TOperationId opId, + const TPath& workingDirPath, + const TPath& tablePath) { - auto outTx = TransactionTemplate(workingDirPath.PathString(), - NKikimrSchemeOp::EOperationType::ESchemeOpCreateLock); + auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateLock); outTx.SetFailOnExist(false); outTx.SetInternal(true); - auto cfg = outTx.MutableLockConfig(); - cfg->SetName(tablePath.LeafName()); - cfg->SetAllowIndexImplLock(allowIndexImplLock); + outTx.MutableLockConfig()->SetName(tablePath.LeafName()); result.push_back(CreateLock(NextPartId(opId, result), outTx)); } } // anonymous -void DoCreatePqPart(const TOperationId& opId, const TPath& streamPath, const TString& streamName, - const TIntrusivePtr table, const NKikimrSchemeOp::TCreateCdcStream& op, - const TVector& boundaries, const bool acceptExisted, TVector& result) +void DoCreatePqPart( + TVector& result, + const NKikimrSchemeOp::TCreateCdcStream& op, + const TOperationId& opId, + const TPath& streamPath, + const TString& streamName, + TTableInfo::TCPtr table, + const TVector& boundaries, + const bool acceptExisted) { auto outTx = TransactionTemplate(streamPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreatePersQueueGroup); outTx.SetFailOnExist(!acceptExisted); @@ -752,34 +721,37 @@ void DoCreatePqPart(const TOperationId& opId, const TPath& streamPath, const TSt result.push_back(CreateNewPQ(NextPartId(opId, result), outTx)); } -void FillModifySchemaForCdc(NKikimrSchemeOp::TModifyScheme& outTx, const NKikimrSchemeOp::TCreateCdcStream& op, - const TOperationId& opId, const TString& indexName, bool acceptExisted, bool initialScan) +static void FillModifySchemaForCdc( + NKikimrSchemeOp::TModifyScheme& outTx, + const NKikimrSchemeOp::TCreateCdcStream& op, + const TOperationId& opId, + bool acceptExisted, + bool initialScan) { outTx.SetFailOnExist(!acceptExisted); outTx.MutableCreateCdcStream()->CopyFrom(op); - if (indexName) { - outTx.MutableCreateCdcStream()->SetIndexName(indexName); - } else { - outTx.MutableCreateCdcStream()->ClearIndexMode(); - } - if (initialScan) { outTx.MutableLockGuard()->SetOwnerTxId(ui64(opId.GetTxId())); } } -void DoCreateStream(const NKikimrSchemeOp::TCreateCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, const TPath& tablePath, - const bool acceptExisted, const bool initialScan, const TString& indexName, TVector& result) +void DoCreateStream( + TVector& result, + const NKikimrSchemeOp::TCreateCdcStream& op, + const TOperationId& opId, + const TPath& workingDirPath, + const TPath& tablePath, + const bool acceptExisted, + const bool initialScan) { { auto outTx = TransactionTemplate(tablePath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateCdcStreamImpl); - FillModifySchemaForCdc(outTx, op, opId, indexName, acceptExisted, initialScan); + FillModifySchemaForCdc(outTx, op, opId, acceptExisted, initialScan); result.push_back(CreateNewCdcStreamImpl(NextPartId(opId, result), outTx)); } - { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpCreateCdcStreamAtTable); - FillModifySchemaForCdc(outTx, op, opId, indexName, acceptExisted, initialScan); + FillModifySchemaForCdc(outTx, op, opId, acceptExisted, initialScan); result.push_back(CreateNewCdcStreamAtTable(NextPartId(opId, result), outTx, initialScan)); } } @@ -826,10 +798,24 @@ ISubOperation::TPtr RejectOnTablePathChecks(const TOperationId& opId, const TPat .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks) { + if (!tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } else { + if (!tablePath.Parent().IsTableIndex(NKikimrSchemeOp::EIndexTypeGlobal)) { + return CreateReject(opId, NKikimrScheme::StatusPreconditionFailed, + "Cannot add changefeed to index table"); + } + if (!AppData()->FeatureFlags.GetEnableChangefeedsOnIndexTables()) { + return CreateReject(opId, NKikimrScheme::StatusPreconditionFailed, + "Changefeed on index table is not supported yet"); + } + } + } + if (!checks) { return CreateReject(opId, checks.GetStatus(), checks.GetError()); } @@ -837,19 +823,7 @@ ISubOperation::TPtr RejectOnTablePathChecks(const TOperationId& opId, const TPat return nullptr; } -void CalcBoundaries(const TTableInfo& table, TVector& boundaries) { - const auto& partitions = table.GetPartitions(); - boundaries.reserve(partitions.size() - 1); - - for (ui32 i = 0; i < partitions.size(); ++i) { - const auto& partition = partitions.at(i); - if (i != partitions.size() - 1) { - boundaries.push_back(partition.EndOfRange); - } - } -} - -bool FillBoundaries(const TTableInfo& table, const ::NKikimrSchemeOp::TCreateCdcStream& op, TVector& boundaries, TString& errStr) { +bool FillBoundaries(const TTableInfo& table, const NKikimrSchemeOp::TCreateCdcStream& op, TVector& boundaries, TString& errStr) { if (op.HasTopicPartitions()) { const auto& keyColumns = table.KeyColumnIds; const auto& columns = table.Columns; @@ -862,8 +836,17 @@ bool FillBoundaries(const TTableInfo& table, const ::NKikimrSchemeOp::TCreateCdc return false; } } else { - CalcBoundaries(table, boundaries); + const auto& partitions = table.GetPartitions(); + boundaries.reserve(partitions.size() - 1); + + for (ui32 i = 0; i < partitions.size(); ++i) { + const auto& partition = partitions.at(i); + if (i != partitions.size() - 1) { + boundaries.push_back(partition.EndOfRange); + } + } } + return true; } @@ -921,7 +904,6 @@ TVector CreateNewCdcStream(TOperationId opId, const TTxTran const auto& tableName = op.GetTableName(); const auto& streamDesc = op.GetStreamDescription(); const auto& streamName = streamDesc.GetName(); - const auto workingDirPath = TPath::Resolve(tx.GetWorkingDir(), context.SS); const auto checksResult = DoNewStreamPathChecks(opId, workingDirPath, tableName, streamName, acceptExisted); @@ -971,76 +953,35 @@ TVector CreateNewCdcStream(TOperationId opId, const TTxTran << "Initial scan is not supported yet")}; } - if (op.HasTopicPartitions()) { - if (op.GetTopicPartitions() <= 0) { - return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, "Topic partitions count must be greater than 0")}; - } - } - - std::vector candidates; - - if (op.GetIndexModeCase() == NKikimrSchemeOp::TCreateCdcStream::kAllIndexes) { - candidates.reserve(tablePath->GetChildren().size()); - for (const auto& child : tablePath->GetChildren()) { - candidates.emplace_back(child.first); - } - } else if (op.GetIndexModeCase() == NKikimrSchemeOp::TCreateCdcStream::kIndexName) { - auto it = tablePath->GetChildren().find(op.GetIndexName()); - if (it == tablePath->GetChildren().end()) { - return {CreateReject(opId, NKikimrScheme::StatusSchemeError, - "requested particular path hasn't been found")}; - } - candidates.emplace_back(it->first); + if (op.HasTopicPartitions() && op.GetTopicPartitions() <= 0) { + return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, "Topic partitions count must be greater than 0")}; } TVector result; - for (const auto& name : candidates) { - const TPath indexPath = tablePath.Child(name); - if (!indexPath.IsTableIndex() || indexPath.IsDeleted()) { - continue; - } - - const TPath indexImplPath = indexPath.Child("indexImplTable"); - if (!indexImplPath) { - return {CreateReject(opId, NKikimrScheme::StatusSchemeError, - "indexImplTable hasn't been found")}; - } - - Y_ABORT_UNLESS(context.SS->Tables.contains(tablePath.Base()->PathId)); - auto indexImplTable = context.SS->Tables.at(indexImplPath.Base()->PathId); - - const TPath indexStreamPath = indexImplPath.Child(streamName); - if (auto reject = RejectOnCdcChecks(opId, indexStreamPath, acceptExisted)) { - return {reject}; - } - - if (initialScan) { - DoCreateLock(opId, indexPath, indexImplPath, true, result); - } - - TVector boundaries; - if (!FillBoundaries(*indexImplTable, op, boundaries, errStr)) { - return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, errStr)}; - } - - DoCreateStream(op, opId, workingDirPath, tablePath, acceptExisted, initialScan, name, result); - DoCreatePqPart(opId, indexStreamPath, streamName, indexImplTable, op, boundaries, acceptExisted, result); + if (initialScan) { + DoCreateLock(result, opId, workingDirPath, tablePath); } - if (initialScan) { - DoCreateLock(opId, workingDirPath, tablePath, false, result); + if (workingDirPath.IsTableIndex()) { + auto outTx = TransactionTemplate(workingDirPath.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + outTx.MutableAlterTableIndex()->SetName(workingDirPath.LeafName()); + outTx.MutableAlterTableIndex()->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); + + result.push_back(CreateAlterTableIndex(NextPartId(opId, result), outTx)); } Y_ABORT_UNLESS(context.SS->Tables.contains(tablePath.Base()->PathId)); auto table = context.SS->Tables.at(tablePath.Base()->PathId); + TVector boundaries; if (!FillBoundaries(*table, op, boundaries, errStr)) { return {CreateReject(opId, NKikimrScheme::StatusInvalidParameter, errStr)}; } - DoCreateStream(op, opId, workingDirPath, tablePath, acceptExisted, initialScan, {}, result); - DoCreatePqPart(opId, streamPath, streamName, table, op, boundaries, acceptExisted, result); + DoCreateStream(result, op, opId, workingDirPath, tablePath, acceptExisted, initialScan); + DoCreatePqPart(result, op, opId, streamPath, streamName, table, boundaries, acceptExisted); + return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h index 11a921d84168..635e57a28b63 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_cdc_stream.h @@ -1,7 +1,7 @@ #pragma once -#include "schemeshard__operation_part.h" #include "schemeshard__operation_common.h" +#include "schemeshard__operation_part.h" #include "schemeshard_impl.h" #include @@ -22,23 +22,22 @@ std::variant DoNewStreamPathChecks( bool acceptExisted); void DoCreateStream( + TVector& result, const NKikimrSchemeOp::TCreateCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, const TPath& tablePath, const bool acceptExisted, - const bool initialScan, - const TString& indexName, - TVector& result); + const bool initialScan); void DoCreatePqPart( + TVector& result, + const NKikimrSchemeOp::TCreateCdcStream& op, const TOperationId& opId, const TPath& streamPath, const TString& streamName, - const TIntrusivePtr table, - const NKikimrSchemeOp::TCreateCdcStream& op, + TTableInfo::TCPtr table, const TVector& boundaries, - const bool acceptExisted, - TVector& result); + const bool acceptExisted); } // namespace NKikimr::NSchemesShard::NCdc diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp index 6bb280316138..ab2c187d24bf 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_continuous_backup.cpp @@ -68,8 +68,8 @@ TVector CreateNewContinuousBackup(TOperationId opId, const TVector result; - NCdc::DoCreateStream(createCdcStreamOp, opId, workingDirPath, tablePath, acceptExisted, false, {}, result); - NCdc::DoCreatePqPart(opId, streamPath, NBackup::CB_CDC_STREAM_NAME, table, createCdcStreamOp, boundaries, acceptExisted, result); + NCdc::DoCreateStream(result, createCdcStreamOp, opId, workingDirPath, tablePath, acceptExisted, false); + NCdc::DoCreatePqPart(result, createCdcStreamOp, opId, streamPath, NBackup::CB_CDC_STREAM_NAME, table, boundaries, acceptExisted); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp index 6ecbfd3c4b85..2c52d5d486d7 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_data_source.cpp @@ -239,6 +239,13 @@ class TCreateExternalDataSource : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalDataSource::IsParentPathValid( result, parentPath, Transaction, /* isCreate */ true)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp index 1268f15956d9..c4e0a5e11a4b 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_external_table.cpp @@ -308,6 +308,13 @@ class TCreateExternalTable: public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(ssId)); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableExternalDataSourcesOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "External data sources are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const auto parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NExternalTable::IsParentPathValid(result, parentPath)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp index f5c2d464031b..3e37d408681e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_index.cpp @@ -139,7 +139,8 @@ class TCreateTableIndex: public TSubOperation { .NotDeleted() .NotUnderDeleting() .IsCommonSensePath() - .IsTable(); + .IsTable() + .NotBackupTable(); if (!internal) { checks.NotAsyncReplicaTable(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp index bd7ad540099d..c119f253f47f 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_lock.cpp @@ -127,9 +127,7 @@ class TCreateLock: public TSubOperation { .IsLikeDirectory() .FailOnRestrictedCreateInTempZone(); - if (op.GetAllowIndexImplLock()) { - checks.IsInsideTableIndexPath(); - } else { + if (checks && !parentPath.IsTableIndex()) { checks.IsCommonSensePath(); } @@ -151,7 +149,7 @@ class TCreateLock: public TSubOperation { .IsTable() .NotAsyncReplicaTable(); - if (!op.GetAllowIndexImplLock()) { + if (checks && !parentPath.IsTableIndex()) { checks.IsCommonSensePath(); } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp index a83c459588bd..4cd1303a1fa3 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_pq.cpp @@ -155,6 +155,7 @@ TTopicInfo::TPtr CreatePersQueueGroup(TOperationContext& context, pqGroupInfo->TotalGroupCount = partitionCount; pqGroupInfo->TotalPartitionCount = partitionCount; + pqGroupInfo->ActivePartitionCount = partitionCount; ui32 tabletCount = pqGroupInfo->ExpectedShardCount(); if (tabletCount > TSchemeShard::MaxPQGroupTabletsCount) { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp index 29c5aa399e72..a765216aa5b5 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_resource_pool.cpp @@ -155,6 +155,13 @@ class TCreateResourcePool : public TSubOperation { static_cast(OperationId.GetTxId()), static_cast(context.SS->SelfTabletId())); + if (context.SS->IsServerlessDomain(TPath::Init(context.SS->RootPathId(), context.SS))) { + if (!context.SS->EnableResourcePoolsOnServerless) { + result->SetError(NKikimrScheme::StatusPreconditionFailed, "Resource pools are disabled for serverless domains. Please contact your system administrator to enable it"); + return result; + } + } + const TPath& parentPath = TPath::Resolve(parentPathStr, context.SS); RETURN_RESULT_UNLESS(NResourcePool::IsParentPathValid(result, parentPath)); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp index 499e54b4fe15..3e42ac8cf6ff 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_sequence.cpp @@ -376,6 +376,7 @@ class TCreateSequence : public TSubOperation { if (checks) { if (parentPath->IsTable()) { + checks.NotBackupTable(); // allow immediately inside a normal table if (parentPath.IsUnderOperation()) { checks.IsUnderTheSameOperation(OperationId.GetTxId()); // allowed only as part of consistent operations diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp index 029e72fe4c3f..545ef3642c6e 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_table.cpp @@ -641,7 +641,7 @@ class TCreateTable: public TSubOperation { Y_ABORT_UNLESS(tableInfo->GetPartitions().back().EndOfRange.empty(), "End of last range must be +INF"); if (tableInfo->IsAsyncReplica()) { - newTable->SetAsyncReplica(); + newTable->SetAsyncReplica(true); } context.SS->Tables[newTable->PathId] = tableInfo; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp index e4f6e69922ba..0e172572f778 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_create_view.cpp @@ -48,7 +48,7 @@ class TPropose: public TSubOperationState { Y_ABORT_UNLESS(txState->TxType == TTxState::TxCreateView); context.SS->TabletCounters->Simple()[COUNTER_VIEW_COUNT].Add(1); - + const auto pathId = txState->TargetPathId; auto path = TPath::Init(pathId, context.SS); @@ -68,6 +68,7 @@ TViewInfo::TPtr CreateView(const NKikimrSchemeOp::TViewDescription& desc) { TViewInfo::TPtr viewInfo = new TViewInfo; viewInfo->AlterVersion = 1; viewInfo->QueryText = desc.GetQueryText(); + viewInfo->CapturedContext = desc.GetCapturedContext(); return viewInfo; } @@ -109,7 +110,7 @@ class TCreateView: public TSubOperation { const auto& viewDescription = Transaction.GetCreateView(); const TString& name = viewDescription.GetName(); - + LOG_N("TCreateView Propose" << ", path: " << parentPathStr << "/" << name << ", opId: " << OperationId diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp index 07d9bd17e0f7..1654ca58bdba 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.cpp @@ -150,10 +150,13 @@ class TDropCdcStream: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .IsUnderOperation() .IsUnderTheSameOperation(OperationId.GetTxId()); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -332,10 +335,13 @@ class TDropCdcStreamAtTable: public TSubOperation { .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -438,10 +444,10 @@ class TDropCdcStreamAtTable: public TSubOperation { } // anonymous std::variant DoDropStreamPathChecks( - const TOperationId& opId, - const TPath& workingDirPath, - const TString& tableName, - const TString& streamName) + const TOperationId& opId, + const TPath& workingDirPath, + const TString& tableName, + const TString& streamName) { const auto tablePath = workingDirPath.Child(tableName); { @@ -454,10 +460,13 @@ std::variant DoDropStreamPathChecks( .NotDeleted() .IsTable() .NotAsyncReplicaTable() - .IsCommonSensePath() .NotUnderDeleting() .NotUnderOperation(); + if (checks && !tablePath.IsInsideTableIndexPath()) { + checks.IsCommonSensePath(); + } + if (!checks) { return CreateReject(opId, checks.GetStatus(), checks.GetError()); } @@ -485,10 +494,11 @@ std::variant DoDropStreamPathChecks( } ISubOperation::TPtr DoDropStreamChecks( - const TOperationId& opId, - const TPath& tablePath, - const TTxId lockTxId, - TOperationContext& context) { + const TOperationId& opId, + const TPath& tablePath, + const TTxId lockTxId, + TOperationContext& context) +{ TString errStr; if (!context.SS->CheckLocks(tablePath.Base()->PathId, lockTxId, errStr)) { @@ -499,14 +509,14 @@ ISubOperation::TPtr DoDropStreamChecks( } void DoDropStream( - const NKikimrSchemeOp::TDropCdcStream& op, - const TOperationId& opId, - const TPath& workingDirPath, - const TPath& tablePath, - const TPath& streamPath, - const TTxId lockTxId, - TOperationContext& context, - TVector& result) + TVector& result, + const NKikimrSchemeOp::TDropCdcStream& op, + const TOperationId& opId, + const TPath& workingDirPath, + const TPath& tablePath, + const TPath& streamPath, + const TTxId lockTxId, + TOperationContext& context) { { auto outTx = TransactionTemplate(workingDirPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamAtTable); @@ -529,6 +539,14 @@ void DoDropStream( result.push_back(DropLock(NextPartId(opId, result), outTx)); } + if (workingDirPath.IsTableIndex()) { + auto outTx = TransactionTemplate(workingDirPath.Parent().PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpAlterTableIndex); + outTx.MutableAlterTableIndex()->SetName(workingDirPath.LeafName()); + outTx.MutableAlterTableIndex()->SetState(NKikimrSchemeOp::EIndexState::EIndexStateReady); + + result.push_back(CreateAlterTableIndex(NextPartId(opId, result), outTx)); + } + { auto outTx = TransactionTemplate(tablePath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamImpl); outTx.MutableDrop()->SetName(streamPath.Base()->Name); @@ -615,7 +633,7 @@ TVector CreateDropCdcStream(TOperationId opId, const TTxTra TVector result; - DoDropStream(op, opId, workingDirPath, tablePath, streamPath, lockTxId, context, result); + DoDropStream(result, op, opId, workingDirPath, tablePath, streamPath, lockTxId, context); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h index ec4720da71c0..12be7102684c 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_cdc_stream.h @@ -1,8 +1,8 @@ #pragma once +#include "schemeshard__operation_common.h" #include "schemeshard__operation_create_cdc_stream.h" // for TStreamPaths #include "schemeshard__operation_part.h" -#include "schemeshard__operation_common.h" #include "schemeshard_impl.h" #include @@ -23,13 +23,13 @@ ISubOperation::TPtr DoDropStreamChecks( TOperationContext& context); void DoDropStream( + TVector& result, const NKikimrSchemeOp::TDropCdcStream& op, const TOperationId& opId, const TPath& workingDirPath, const TPath& tablePath, const TPath& streamPath, const TTxId lockTxId, - TOperationContext& context, - TVector& result); + TOperationContext& context); } // namespace NKikimr::NSchemesShard::NCdc diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp index 9e7ec8ac7e43..e0e882b84d67 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_continuous_backup.cpp @@ -1,8 +1,7 @@ -#include "schemeshard__operation_part.h" #include "schemeshard__operation_common.h" -#include "schemeshard_impl.h" - #include "schemeshard__operation_drop_cdc_stream.h" +#include "schemeshard__operation_part.h" +#include "schemeshard_impl.h" #include @@ -40,7 +39,7 @@ TVector CreateDropContinuousBackup(TOperationId opId, const TVector result; - NCdc::DoDropStream(dropCdcStreamOp, opId, workingDirPath, tablePath, streamPath, InvalidTxId, context, result); + NCdc::DoDropStream(result, dropCdcStreamOp, opId, workingDirPath, tablePath, streamPath, InvalidTxId, context); return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp index fb268a8b0619..a0d6ff35513f 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_index.cpp @@ -473,21 +473,22 @@ TVector CreateDropIndex(TOperationId nextId, const TTxTrans result.push_back(CreateDropTableIndex(NextPartId(nextId, result), indexDropping)); } - for (const auto& items: indexPath.Base()->GetChildren()) { - Y_ABORT_UNLESS(context.SS->PathsById.contains(items.second)); - auto implPath = context.SS->PathsById.at(items.second); - if (implPath->Dropped()) { + for (const auto& [childName, childPathId] : indexPath.Base()->GetChildren()) { + TPath child = indexPath.Child(childName); + if (child.IsDeleted()) { continue; } - auto implTable = context.SS->PathsById.at(items.second); - Y_ABORT_UNLESS(implTable->IsTable()); + Y_ABORT_UNLESS(child.Base()->IsTable()); auto implTableDropping = TransactionTemplate(indexPath.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable); auto operation = implTableDropping.MutableDrop(); - operation->SetName(items.first); + operation->SetName(child.LeafName()); result.push_back(CreateDropTable(NextPartId(nextId, result), implTableDropping)); + if (auto reject = CascadeDropTableChildren(result, nextId, child)) { + return {reject}; + } } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp index 5f32acec8c03..c0ef94bbda58 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_indexed_table.cpp @@ -443,102 +443,8 @@ TVector CreateDropIndexedTable(TOperationId nextId, const T TVector result; result.push_back(CreateDropTable(NextPartId(nextId, result), tx)); - - for (const auto& [childName, childPathId] : table.Base()->GetChildren()) { - TPath child = table.Child(childName); - { - TPath::TChecker checks = child.Check(); - checks - .NotEmpty() - .IsResolved(); - - if (checks) { - if (child.IsDeleted()) { - continue; - } - } - - if (child.IsTableIndex()) { - checks.IsTableIndex(); - } else if (child.IsCdcStream()) { - checks.IsCdcStream(); - } else if (child.IsSequence()) { - checks.IsSequence(); - } - - checks.NotDeleted() - .NotUnderDeleting() - .NotUnderOperation(); - - if (!checks) { - return {CreateReject(nextId, checks.GetStatus(), checks.GetError())}; - } - } - Y_ABORT_UNLESS(child.Base()->PathId == childPathId); - - if (child.IsSequence()) { - auto dropSequence = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropSequence); - dropSequence.MutableDrop()->SetName(ToString(child->Name)); - - result.push_back(CreateDropSequence(NextPartId(nextId, result), dropSequence)); - continue; - } else if (child.IsTableIndex()) { - auto dropIndex = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTableIndex); - dropIndex.MutableDrop()->SetName(ToString(child.Base()->Name)); - - result.push_back(CreateDropTableIndex(NextPartId(nextId, result), dropIndex)); - } else if (child.IsCdcStream()) { - auto dropStream = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamImpl); - dropStream.MutableDrop()->SetName(ToString(child.Base()->Name)); - - result.push_back(CreateDropCdcStreamImpl(NextPartId(nextId, result), dropStream)); - } - - Y_ABORT_UNLESS(child.Base()->GetChildren().size() == 1); - for (auto& [implName, implPathId] : child.Base()->GetChildren()) { - Y_ABORT_UNLESS(implName == "indexImplTable" || implName == "streamImpl", - "unexpected name %s", implName.c_str()); - - TPath implPath = child.Child(implName); - { - TPath::TChecker checks = implPath.Check(); - checks - .NotEmpty() - .IsResolved() - .NotDeleted() - .NotUnderDeleting() - .NotUnderOperation(); - - if (checks) { - if (implPath.Base()->IsTable()) { - checks - .IsTable() - .IsInsideTableIndexPath(); - } else if (implPath.Base()->IsPQGroup()) { - checks - .IsPQGroup() - .IsInsideCdcStreamPath(); - } - } - - if (!checks) { - return {CreateReject(nextId, checks.GetStatus(), checks.GetError())}; - } - } - Y_ABORT_UNLESS(implPath.Base()->PathId == implPathId); - - if (implPath.Base()->IsTable()) { - auto dropIndexTable = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable); - dropIndexTable.MutableDrop()->SetName(ToString(implPath.Base()->Name)); - - result.push_back(CreateDropTable(NextPartId(nextId, result), dropIndexTable)); - } else if (implPath.Base()->IsPQGroup()) { - auto dropPQGroup = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); - dropPQGroup.MutableDrop()->SetName(ToString(implPath.Base()->Name)); - - result.push_back(CreateDropPQ(NextPartId(nextId, result), dropPQGroup)); - } - } + if (auto reject = CascadeDropTableChildren(result, nextId, table)) { + return {reject}; } return result; diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp index c5026592a0ec..8dc9cf01b553 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_lock.cpp @@ -119,9 +119,12 @@ class TDropLock: public TSubOperation { .IsResolved() .NotDeleted() .NotUnderDeleting() - .IsCommonSensePath() .IsLikeDirectory(); + if (checks && !parentPath.IsTableIndex()) { + checks.IsCommonSensePath(); + } + if (!checks) { result->SetError(checks.GetStatus(), checks.GetError()); return result; @@ -134,10 +137,12 @@ class TDropLock: public TSubOperation { checks .IsAtLocalSchemeShard() .IsResolved() - .NotUnderDeleting() - .IsCommonSensePath(); + .NotUnderDeleting(); if (checks) { + if (!parentPath.IsTableIndex()) { + checks.IsCommonSensePath(); + } if (dstPath.IsUnderOperation()) { // may be part of a consistent operation checks.IsUnderTheSameOperation(OperationId.GetTxId()); } else { diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp index 70d663c713eb..5e4f9ca67177 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_drop_pq.cpp @@ -183,7 +183,7 @@ class TPropose: public TSubOperationState { bool parseOk = ParseFromStringNoSizeLimit(config, tabletConfig); Y_ABORT_UNLESS(parseOk); - const PQGroupReserve reserve(config, pqGroup->TotalPartitionCount); + const PQGroupReserve reserve(config, pqGroup->ActivePartitionCount); auto domainInfo = context.SS->ResolveDomainInfo(pathId); domainInfo->DecPathsInside(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp index 7fb9a651574f..a347d5841b90 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_initiate_build_index.cpp @@ -88,7 +88,9 @@ class TConfigureParts: public TSubOperationState { found = true; Y_ABORT_UNLESS(index->AlterData); - context.SS->DescribeTableIndex(childPathId, childName, index->AlterData, *initiate->MutableIndexDescription()); + context.SS->DescribeTableIndex(childPathId, childName, index->AlterData, false, false, + *initiate->MutableIndexDescription() + ); } txState->ClearShardsInProgress(); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp index 32dadccbd563..9a7da1e828fb 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_move_tables.cpp @@ -65,6 +65,10 @@ TVector CreateConsistentMoveTable(TOperationId nextId, cons return {CreateReject(nextId, NKikimrScheme::StatusPreconditionFailed, "Cannot move table with cdc streams")}; } + if (srcChildPath.IsSequence()) { + return {CreateReject(nextId, NKikimrScheme::StatusPreconditionFailed, "Cannot move table with sequences")}; + } + TPath dstIndexPath = dstPath.Child(name); Y_ABORT_UNLESS(srcChildPath.Base()->PathId == child.second); diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp index e06c7c1cca8a..a854295c8da1 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_part.cpp @@ -1,5 +1,6 @@ #include "schemeshard__operation_part.h" #include "schemeshard_impl.h" +#include "schemeshard_path.h" namespace NKikimr::NSchemeShard { @@ -99,4 +100,108 @@ void TSubOperationState::IgnoreMessages(TString debugHint, TSet mgsIds) { MsgToIgnore.swap(mgsIds); } +ISubOperation::TPtr CascadeDropTableChildren(TVector& result, const TOperationId& id, const TPath& table) { + for (const auto& [childName, childPathId] : table.Base()->GetChildren()) { + TPath child = table.Child(childName); + { + TPath::TChecker checks = child.Check(); + checks + .NotEmpty() + .IsResolved(); + + if (checks) { + if (child.IsDeleted()) { + continue; + } + } + + if (child.IsTableIndex()) { + checks.IsTableIndex(); + } else if (child.IsCdcStream()) { + checks.IsCdcStream(); + } else if (child.IsSequence()) { + checks.IsSequence(); + } + + checks.NotDeleted() + .NotUnderDeleting() + .NotUnderOperation(); + + if (!checks) { + return CreateReject(id, checks.GetStatus(), checks.GetError()); + } + } + Y_ABORT_UNLESS(child.Base()->PathId == childPathId); + + if (child.IsSequence()) { + auto dropSequence = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropSequence); + dropSequence.MutableDrop()->SetName(ToString(child->Name)); + + result.push_back(CreateDropSequence(NextPartId(id, result), dropSequence)); + continue; + } else if (child.IsTableIndex()) { + auto dropIndex = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTableIndex); + dropIndex.MutableDrop()->SetName(ToString(child.Base()->Name)); + + result.push_back(CreateDropTableIndex(NextPartId(id, result), dropIndex)); + } else if (child.IsCdcStream()) { + auto dropStream = TransactionTemplate(table.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropCdcStreamImpl); + dropStream.MutableDrop()->SetName(ToString(child.Base()->Name)); + + result.push_back(CreateDropCdcStreamImpl(NextPartId(id, result), dropStream)); + } + + Y_ABORT_UNLESS(child.Base()->GetChildren().size() == 1); + for (auto& [implName, implPathId] : child.Base()->GetChildren()) { + Y_ABORT_UNLESS(implName == "indexImplTable" || implName == "streamImpl", + "unexpected name %s", implName.c_str()); + + TPath implPath = child.Child(implName); + { + TPath::TChecker checks = implPath.Check(); + checks + .NotEmpty() + .IsResolved() + .NotDeleted() + .NotUnderDeleting() + .NotUnderOperation(); + + if (checks) { + if (implPath.Base()->IsTable()) { + checks + .IsTable() + .IsInsideTableIndexPath(); + } else if (implPath.Base()->IsPQGroup()) { + checks + .IsPQGroup() + .IsInsideCdcStreamPath(); + } + } + + if (!checks) { + return CreateReject(id, checks.GetStatus(), checks.GetError()); + } + } + Y_ABORT_UNLESS(implPath.Base()->PathId == implPathId); + + if (implPath.Base()->IsTable()) { + auto dropIndexTable = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropTable); + dropIndexTable.MutableDrop()->SetName(ToString(implPath.Base()->Name)); + + result.push_back(CreateDropTable(NextPartId(id, result), dropIndexTable)); + if (auto reject = CascadeDropTableChildren(result, id, implPath)) { + return reject; + } + } else if (implPath.Base()->IsPQGroup()) { + auto dropPQGroup = TransactionTemplate(child.PathString(), NKikimrSchemeOp::EOperationType::ESchemeOpDropPersQueueGroup); + dropPQGroup.MutableDrop()->SetName(ToString(implPath.Base()->Name)); + + result.push_back(CreateDropPQ(NextPartId(id, result), dropPQGroup)); + } + } + } + + return nullptr; +} + } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_part.h b/ydb/core/tx/schemeshard/schemeshard__operation_part.h index 386dcb20768a..b0e9a3589503 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_part.h +++ b/ydb/core/tx/schemeshard/schemeshard__operation_part.h @@ -84,6 +84,7 @@ namespace NKikimr { namespace NSchemeShard { class TSchemeShard; +class TPath; struct TOperationContext { public: @@ -492,7 +493,7 @@ ISubOperation::TPtr CreateAlterSubDomain(TOperationId id, const TTxTransaction& ISubOperation::TPtr CreateAlterSubDomain(TOperationId id, TTxState::ETxState state); ISubOperation::TPtr CreateCompatibleSubdomainDrop(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx); -ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx); +TVector CreateCompatibleSubdomainAlter(TOperationId id, const TTxTransaction& tx, TOperationContext& context); ISubOperation::TPtr CreateUpgradeSubDomain(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateUpgradeSubDomain(TOperationId id, TTxState::ETxState state); @@ -513,10 +514,10 @@ ISubOperation::TPtr CreateExtSubDomain(TOperationId id, TTxState::ETxState state // Alter TVector CreateCompatibleAlterExtSubDomain(TOperationId nextId, const TTxTransaction& tx, TOperationContext& context); -ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateAlterExtSubDomain(TOperationId id, TTxState::ETxState state); -ISubOperation::TPtr CreateAlterExtSubDomainCreateHive(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateAlterExtSubDomainCreateHive(TOperationId id, TTxState::ETxState state); +//NOTE: no variants to construct individual suboperations directly from TTxTransaction -- +// -- it should be possible only through CreateCompatibleAlterExtSubDomain // Drop ISubOperation::TPtr CreateForceDropExtSubDomain(TOperationId id, const TTxTransaction& tx); @@ -620,5 +621,8 @@ ISubOperation::TPtr CreateAlterResourcePool(TOperationId id, TTxState::ETxState ISubOperation::TPtr CreateDropResourcePool(TOperationId id, const TTxTransaction& tx); ISubOperation::TPtr CreateDropResourcePool(TOperationId id, TTxState::ETxState state); +// returns Reject in case of error, nullptr otherwise +ISubOperation::TPtr CascadeDropTableChildren(TVector& result, const TOperationId& id, const TPath& table); + } } diff --git a/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp b/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp index 6f6714d4ba1d..fcf9bbb4ef40 100644 --- a/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__operation_upgrade_subdomain.cpp @@ -1530,13 +1530,13 @@ ISubOperation::TPtr CreateCompatibleSubdomainDrop(TSchemeShard* ss, TOperationId return CreateForceDropSubDomain(id, tx); } -ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationId id, const TTxTransaction& tx) { +TVector CreateCompatibleSubdomainAlter(TOperationId id, const TTxTransaction& tx, TOperationContext& context) { const auto& info = tx.GetSubDomain(); const TString& parentPathStr = tx.GetWorkingDir(); const TString& name = info.GetName(); - TPath path = TPath::Resolve(parentPathStr, ss).Dive(name); + TPath path = TPath::Resolve(parentPathStr, context.SS).Dive(name); { TPath::TChecker checks = path.Check(); @@ -1546,15 +1546,16 @@ ISubOperation::TPtr CreateCompatibleSubdomainAlter(TSchemeShard* ss, TOperationI .NotDeleted(); if (!checks) { - return CreateAlterSubDomain(id, tx); + return {CreateAlterSubDomain(id, tx)}; } } if (path.Base()->IsExternalSubDomainRoot()) { - return CreateAlterExtSubDomain(id, tx); + // plain subdomains don't have subdomain/tenant hives so only single operation should be returned here + return CreateCompatibleAlterExtSubDomain(id, tx, context); } - return CreateAlterSubDomain(id, tx); + return {CreateAlterSubDomain(id, tx)}; } } diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp index 72e7b4a80379..bad7dd18885f 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats.cpp @@ -246,6 +246,9 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, } TShardIdx shardIdx = Self->TabletIdToShardIdx[datashardId]; + LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "PersistSingleStats for pathId " << pathId.LocalPathId << " shard idx " << shardIdx << " data size " << dataSize << " row count " << rowCount + ); const auto* shardInfo = Self->ShardInfos.FindPtr(shardIdx); if (!shardInfo) { LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, @@ -259,18 +262,19 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, subDomainInfo->EffectiveStoragePools(), shardInfo->BindedChannels); + const auto pathElement = Self->PathsById[pathId]; LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxStoreTableStats.PersistSingleStats: main stats from" << " datashardId(TabletID)=" << datashardId << " maps to shardIdx: " << shardIdx - << ", pathId: " << pathId << ", pathId map=" << Self->PathsById[pathId]->Name + << ", pathId: " << pathId << ", pathId map=" << pathElement->Name << ", is column=" << isColumnTable << ", is olap=" << isOlapStore); const TPartitionStats newStats = PrepareStats(ctx, rec, channelsMapping); LOG_INFO_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, - "Add stats from shard with datashardId(TabletID)=" << datashardId + "Add stats from shard with datashardId(TabletID)=" << datashardId << ", pathId " << pathId.LocalPathId - << ": RowCount " << newStats.RowCount + << ": RowCount " << newStats.RowCount << ", DataSize " << newStats.DataSize << (newStats.HasBorrowedData ? ", with borrowed parts" : "")); @@ -321,11 +325,10 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, const TPathId tablePathId = TPathId(TOwnerId(pathId.OwnerId), TLocalPathId(table.GetTableLocalId())); if (Self->ColumnTables.contains(tablePathId)) { - LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + LOG_TRACE_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "add stats for exists table with pathId=" << tablePathId); - auto columnTable = Self->ColumnTables.TakeVerified(tablePathId); - columnTable->UpdateTableStats(tablePathId, newTableStats); + Self->ColumnTables.GetVerifiedPtr(tablePathId)->UpdateTableStats(shardIdx, tablePathId, newTableStats); } else { LOG_WARN_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "failed add stats for table with pathId=" << tablePathId); @@ -336,7 +339,7 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, LOG_INFO_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "PersistSingleStats: ColumnTable rec.GetColumnTables() size=" << rec.GetTables().size()); - auto columnTable = Self->ColumnTables.TakeVerified(pathId); + auto columnTable = Self->ColumnTables.GetVerifiedPtr(pathId); oldAggrStats = columnTable->GetStats().Aggregated; columnTable->UpdateShardStats(shardIdx, newStats); newAggrStats = columnTable->GetStats().Aggregated; @@ -400,11 +403,14 @@ bool TTxStoreTableStats::PersistSingleStats(const TPathId& pathId, Self->TabletCounters->Percentile()[COUNTER_NUM_SHARDS_BY_TTL_LAG].IncrementFor(lag->Seconds()); } + const TTableIndexInfo* index = Self->Indexes.Value(pathElement->ParentPathId, nullptr).Get(); const TTableInfo* mainTableForIndex = Self->GetMainTableForIndex(pathId); const auto forceShardSplitSettings = Self->SplitSettings.GetForceShardSplitSettings(); TVector shardsToMerge; - if (table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex)) { + if ((!index || index->State == NKikimrSchemeOp::EIndexStateReady) + && table->CheckCanMergePartitions(Self->SplitSettings, forceShardSplitSettings, shardIdx, shardsToMerge, mainTableForIndex) + ) { TTxId txId = Self->GetCachedTxId(ctx); if (!txId) { diff --git a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp index 091e60fdc26f..edd640d02ccd 100644 --- a/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp +++ b/ydb/core/tx/schemeshard/schemeshard__table_stats_histogram.cpp @@ -34,11 +34,41 @@ static bool IsIntegerType(NScheme::TTypeInfo type) { } } -TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, const TConstArrayRef &keyColumnTypes) { - ui64 bucketsCount = histogram.BucketsSize(); - ui64 idxLo = bucketsCount * 0.33; - ui64 idxMed = bucketsCount * 0.5; - ui64 idxHi = bucketsCount * 0.66; +TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, ui64 total, const TConstArrayRef &keyColumnTypes) { + if (histogram.GetBuckets().empty()) { + return {}; + } + + ui64 idxLo = Max(), idxMed = Max(), idxHi = Max(); + { // search for median and acceptable bounds range so that after the split smallest size is >= 25% + ui64 idxMedDiff = Max(), idx = 0; + for (const auto& point : histogram.GetBuckets()) { + ui64 leftSize = Min(point.GetValue(), total); + ui64 rightSize = total - leftSize; + + // search for a median point at which abs(leftSize - rightSize) is minimum + ui64 sizesDiff = Max(leftSize, rightSize) - Min(leftSize, rightSize); + if (idxMedDiff > sizesDiff) { + idxMed = idx; + idxMedDiff = sizesDiff; + } + + if (leftSize * 4 >= total && idxLo == Max()) { + idxLo = idx; // first point at which leftSize >= 25% + } + if (rightSize * 4 >= total) { + idxHi = idx; // last point at which rightSize >= 25% + } + + idx++; + } + + bool canSplit = idxLo != Max() && idxLo <= idxMed && idxMed <= idxHi && idxHi != Max(); + + if (!canSplit) { + return {}; + } + } TSerializedCellVec keyLo(histogram.GetBuckets(idxLo).GetKey()); TSerializedCellVec keyMed(histogram.GetBuckets(idxMed).GetKey()); @@ -302,7 +332,8 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex << " for pathId " << tableId << " state '" << DatashardStateName(rec.GetShardState()).data() << "'" << " dataSize " << dataSize - << " rowCount " << rowCount); + << " rowCount " << rowCount + << " dataSizeHistogram buckets " << rec.GetTableStats().GetDataSizeHistogram().BucketsSize()); if (!Self->Tables.contains(tableId)) return true; @@ -353,12 +384,15 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex } else { // Choose number of parts and split boundaries const auto& histogram = rec.GetTableStats().GetDataSizeHistogram(); - if (histogram.BucketsSize() < 2) { + + splitKey = ChooseSplitKeyByHistogram(histogram, dataSize, keyColumnTypes); + if (splitKey.GetBuffer().empty()) { + LOG_WARN(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, + "Failed to find proper split key (initially) for '%s' of datashard %" PRIu64, + ToString(splitReason), datashardId); return true; } - splitKey = ChooseSplitKeyByHistogram(histogram, keyColumnTypes); - // Split key must not be less than the first key TSerializedCellVec lowestKey(histogram.GetBuckets(0).GetKey()); if (0 < CompareTypedCellVectors(lowestKey.GetCells().data(), splitKey.GetCells().data(), @@ -366,7 +400,7 @@ bool TTxPartitionHistogram::Execute(TTransactionContext& txc, const TActorContex lowestKey.GetCells().size(), splitKey.GetCells().size())) { LOG_WARN(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, - "Failed to find proper split key for '%s' of datashard %" PRIu64, + "Failed to find proper split key (less than first) for '%s' of datashard %" PRIu64, ToString(splitReason), datashardId); return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp index 8bfe30debba0..a5b5c7e8272d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log.cpp @@ -1,14 +1,32 @@ -#include "schemeshard_audit_log.h" -#include "schemeshard_path.h" -#include "schemeshard_audit_log_fragment.h" +#include + +#include +#include + +#include -#include #include +#include +#include + #include -#include +#include + +#include "schemeshard_path.h" +#include "schemeshard_impl.h" +#include "schemeshard_xxport__helpers.h" +#include "schemeshard_audit_log_fragment.h" +#include "schemeshard_audit_log.h" namespace NKikimr::NSchemeShard { +namespace { + +const TString SchemeshardComponentName = "schemeshard"; + +//NOTE: EmptyValue couldn't be an empty string as AUDIT_PART() skips parts with an empty values +const TString EmptyValue = "{none}"; + TString GeneralStatus(NKikimrScheme::EStatus actualStatus) { switch(actualStatus) { case NKikimrScheme::EStatus::StatusAlreadyExists: @@ -61,12 +79,9 @@ TPath DatabasePathFromWorkingDir(TSchemeShard* SS, const TString &opWorkingDir) return databasePath; } -void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID) { - static const TString SchemeshardComponentName = "schemeshard"; - - //NOTE: EmptyValue couldn't be an empty string as AUDIT_PART() skips parts with an empty values - static const TString EmptyValue = "{none}"; +} // anonymous namespace +void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID) { // Each TEvModifySchemeTransaction.Transaction is a self sufficient operation and should be logged independently // (even if it was packed into a single TxProxy transaction with some other operations). for (const auto& operation : request.GetTransaction()) { @@ -79,7 +94,7 @@ void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransac AUDIT_LOG( AUDIT_PART("component", SchemeshardComponentName) AUDIT_PART("tx_id", std::to_string(request.GetTxId())) - AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EmptyValue) ) + AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EmptyValue)) AUDIT_PART("subject", (!userSID.empty() ? userSID : EmptyValue)) AUDIT_PART("database", (!databasePath.IsEmpty() ? databasePath.GetDomainPathString() : EmptyValue)) AUDIT_PART("operation", logEntry.Operation) @@ -165,4 +180,224 @@ void AuditLogModifySchemeTransactionDeprecated(const NKikimrScheme::TEvModifySch } } +namespace { + +struct TXxportRecord { + TString OperationName; + ui64 Id; + TString Uid; + TString RemoteAddress; + TString UserSID; + TString DatabasePath; + TString Status; + Ydb::StatusIds::StatusCode DetailedStatus; + TString Reason; + TVector> AdditionalParts; + TString StartTime; + TString EndTime; + TString CloudId; + TString FolderId; + TString ResourceId; +}; + +void AuditLogXxport(TXxportRecord&& record) { + AUDIT_LOG( + AUDIT_PART("component", SchemeshardComponentName) + + AUDIT_PART("id", std::to_string(record.Id)) + AUDIT_PART("uid", record.Uid); + AUDIT_PART("remote_address", (!record.RemoteAddress.empty() ? record.RemoteAddress : EmptyValue)) + AUDIT_PART("subject", (!record.UserSID.empty() ? record.UserSID : EmptyValue)) + AUDIT_PART("database", (!record.DatabasePath.empty() ? record.DatabasePath : EmptyValue)) + AUDIT_PART("operation", record.OperationName) + AUDIT_PART("status", record.Status) + AUDIT_PART("detailed_status", Ydb::StatusIds::StatusCode_Name(record.DetailedStatus)) + AUDIT_PART("reason", record.Reason) + + // all parts are considered required, so all empty values are replaced with a special stub + for (const auto& [name, value] : record.AdditionalParts) { + AUDIT_PART(name, (!value.empty() ? value : EmptyValue)) + } + + AUDIT_PART("start_time", record.StartTime) + AUDIT_PART("end_time", record.EndTime) + + AUDIT_PART("cloud_id", record.CloudId); + AUDIT_PART("folder_id", record.FolderId); + AUDIT_PART("resource_id", record.ResourceId); + ); +} + +using TParts = decltype(TXxportRecord::AdditionalParts); + +template +TParts ExportKindSpecificParts(const Proto& proto) { + //NOTE: intentional switch -- that will help to detect (by breaking the compilation) + // the moment when and if oneof Settings will be extended + switch (proto.GetSettingsCase()) { + case Proto::kExportToYtSettings: + return ExportKindSpecificParts(proto.GetExportToYtSettings()); + case Proto::kExportToS3Settings: + return ExportKindSpecificParts(proto.GetExportToS3Settings()); + case Proto::SETTINGS_NOT_SET: + return {}; + } +} +template <> TParts ExportKindSpecificParts(const Ydb::Export::ExportToYtSettings& proto) { + return { + {"export_type", "yt"}, + {"export_item_count", ToString(proto.items().size())}, + {"export_yt_prefix", ((proto.items().size() > 0) ? proto.items(0).destination_path() : "")}, + }; +} +template <> TParts ExportKindSpecificParts(const Ydb::Export::ExportToS3Settings& proto) { + return { + {"export_type", "s3"}, + {"export_item_count", ToString(proto.items().size())}, + {"export_s3_bucket", proto.bucket()}, + //NOTE: take first item's destination_prefix as a "good enough approximation" + // (each item has its own destination_prefix, but in practice they are all the same) + {"export_s3_prefix", ((proto.items().size() > 0) ? proto.items(0).destination_prefix() : "")}, + }; +} + +template +TParts ImportKindSpecificParts(const Proto& proto) { + //NOTE: intentional switch -- that will help to detect (by breaking the compilation) + // the moment when and if oneof Settings will be extended + switch (proto.GetSettingsCase()) { + case Proto::kImportFromS3Settings: + return ImportKindSpecificParts(proto.GetImportFromS3Settings()); + case Proto::SETTINGS_NOT_SET: + return {}; + } +} +template <> TParts ImportKindSpecificParts(const Ydb::Import::ImportFromS3Settings& proto) { + return { + {"import_type", "s3"}, + {"export_item_count", ToString(proto.items().size())}, + {"import_s3_bucket", proto.bucket()}, + //NOTE: take first item's source_prefix as a "good enough approximation" + // (each item has its own source_prefix, but in practice they are all the same) + {"import_s3_prefix", ((proto.items().size() > 0) ? proto.items(0).source_prefix() : "")}, + }; +} + +} // anonymous namespace + +template +void _AuditLogXxportStart(const Request& request, const Response& response, const TString& operationName, TParts&& additionalParts, TSchemeShard* SS) { + TPath databasePath = DatabasePathFromWorkingDir(SS, request.GetDatabaseName()); + auto [cloud_id, folder_id, database_id] = GetDatabaseCloudIds(databasePath); + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(request.GetPeerName()); + const auto& entry = response.GetResponse().GetEntry(); + + AuditLogXxport({ + .OperationName = operationName, + //NOTE: original request's tx-id is used as an operation id + .Id = request.GetTxId(), + .Uid = GetUid(request.GetRequest().GetOperationParams()), + .RemoteAddress = peerName, + .UserSID = request.GetUserSID(), + .DatabasePath = databasePath.PathString(), + .Status = (entry.GetStatus() == Ydb::StatusIds::SUCCESS ? "SUCCESS" : "ERROR"), + .DetailedStatus = entry.GetStatus(), + //NOTE: use main issue (on {ex,im}port itself), ignore issues on individual items + .Reason = ((entry.IssuesSize() > 0) ? entry.GetIssues(0).message() : ""), + + .AdditionalParts = std::move(additionalParts), + + // no start or end times + + .CloudId = cloud_id, + .FolderId = folder_id, + .ResourceId = database_id, + }); +} + +void AuditLogExportStart(const NKikimrExport::TEvCreateExportRequest& request, const NKikimrExport::TEvCreateExportResponse& response, TSchemeShard* SS) { + _AuditLogXxportStart(request, response, "EXPORT START", ExportKindSpecificParts(request.GetRequest()), SS); +} + +void AuditLogImportStart(const NKikimrImport::TEvCreateImportRequest& request, const NKikimrImport::TEvCreateImportResponse& response, TSchemeShard* SS) { + _AuditLogXxportStart(request, response, "IMPORT START", ImportKindSpecificParts(request.GetRequest()), SS); +} + +template +void _AuditLogXxportEnd(const Info& info, const TString& operationName, TParts&& additionalParts, TSchemeShard* SS) { + const TPath databasePath = TPath::Init(info.DomainPathId, SS); + auto [cloud_id, folder_id, database_id] = GetDatabaseCloudIds(databasePath); + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(info.PeerName); + TString userSID = *info.UserSID.OrElse(EmptyValue); + TString startTime = (info.StartTime != TInstant::Zero() ? info.StartTime.ToString() : TString()); + TString endTime = (info.EndTime != TInstant::Zero() ? info.EndTime.ToString() : TString()); + + // Info.State can't be anything but Done or Cancelled here + Y_ABORT_UNLESS(info.State == Info::EState::Done || info.State == Info::EState::Cancelled); + TString status = TString(info.State == Info::EState::Done ? "SUCCESS" : "ERROR"); + Ydb::StatusIds::StatusCode detailedStatus = (info.State == Info::EState::Done ? Ydb::StatusIds::SUCCESS : Ydb::StatusIds::CANCELLED); + + AuditLogXxport({ + .OperationName = operationName, + .Id = info.Id, + .Uid = info.Uid, + .RemoteAddress = peerName, + .UserSID = userSID, + .DatabasePath = databasePath.PathString(), + .Status = status, + .DetailedStatus = detailedStatus, + .Reason = info.Issue, + + .AdditionalParts = std::move(additionalParts), + + .StartTime = startTime, + .EndTime = endTime, + + .CloudId = cloud_id, + .FolderId = folder_id, + .ResourceId = database_id, + }); +} + +void AuditLogExportEnd(const TExportInfo& info, TSchemeShard* SS) { + NKikimrExport::TCreateExportRequest proto; + // TSchemeShard::FromXxportInfo() can not be used here + switch (info.Kind) { + case TExportInfo::EKind::YT: + Y_ABORT_UNLESS(proto.MutableExportToYtSettings()->ParseFromString(info.Settings)); + proto.MutableExportToYtSettings()->clear_token(); + break; + case TExportInfo::EKind::S3: + Y_ABORT_UNLESS(proto.MutableExportToS3Settings()->ParseFromString(info.Settings)); + proto.MutableExportToS3Settings()->clear_access_key(); + proto.MutableExportToS3Settings()->clear_secret_key(); + break; + } + _AuditLogXxportEnd(info, "EXPORT END", ExportKindSpecificParts(proto), SS); +} +void AuditLogImportEnd(const TImportInfo& info, TSchemeShard* SS) { + _AuditLogXxportEnd(info, "IMPORT END", ImportKindSpecificParts(info.Settings), SS); +} + +void AuditLogLogin(const NKikimrScheme::TEvLogin& request, const NKikimrScheme::TEvLoginResult& response, TSchemeShard* SS) { + static const TString LoginOperationName = "LOGIN"; + + TPath databasePath = TPath::Root(SS); + auto peerName = NKikimr::NAddressClassifier::ExtractAddress(request.GetPeerName()); + + // NOTE: audit field set here must be in sync with ydb/core/security/audit_log.h, AuditLogWebUILogout() + AUDIT_LOG( + AUDIT_PART("component", SchemeshardComponentName) + AUDIT_PART("remote_address", (!peerName.empty() ? peerName : EmptyValue)) + AUDIT_PART("database", (!databasePath.PathString().empty() ? databasePath.PathString() : EmptyValue)) + AUDIT_PART("operation", LoginOperationName) + AUDIT_PART("status", TString(response.GetError().empty() ? "SUCCESS" : "ERROR")) + AUDIT_PART("reason", response.GetError(), response.HasError()) + + // Login + AUDIT_PART("login_user", (request.HasUser() ? request.GetUser() : EmptyValue)) + AUDIT_PART("login_auth_domain", (!request.GetExternalAuth().empty() ? request.GetExternalAuth() : EmptyValue)) + ); +} + } diff --git a/ydb/core/tx/schemeshard/schemeshard_audit_log.h b/ydb/core/tx/schemeshard/schemeshard_audit_log.h index 559099131b86..7d095db2122d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_audit_log.h +++ b/ydb/core/tx/schemeshard/schemeshard_audit_log.h @@ -5,13 +5,41 @@ namespace NKikimrScheme { class TEvModifySchemeTransaction; class TEvModifySchemeTransactionResult; + +class TEvLogin; +class TEvLoginResult; +} + +namespace NKikimrExport { +class TEvCreateExportRequest; +class TEvCreateExportResponse; +} + +namespace NKikimrImport { +class TEvCreateImportRequest; +class TEvCreateImportResponse; +} + +namespace NHttp { +class THttpIncomingRequest; } namespace NKikimr::NSchemeShard { class TSchemeShard; +struct TExportInfo; +struct TImportInfo; void AuditLogModifySchemeTransaction(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID); void AuditLogModifySchemeTransactionDeprecated(const NKikimrScheme::TEvModifySchemeTransaction& request, const NKikimrScheme::TEvModifySchemeTransactionResult& response, TSchemeShard* SS, const TString& userSID); +void AuditLogExportStart(const NKikimrExport::TEvCreateExportRequest& request, const NKikimrExport::TEvCreateExportResponse& response, TSchemeShard* SS); +void AuditLogExportEnd(const TExportInfo& exportInfo, TSchemeShard* SS); + +void AuditLogImportStart(const NKikimrImport::TEvCreateImportRequest& request, const NKikimrImport::TEvCreateImportResponse& response, TSchemeShard* SS); +void AuditLogImportEnd(const TImportInfo& importInfo, TSchemeShard* SS); + +void AuditLogLogin(const NKikimrScheme::TEvLogin& request, const NKikimrScheme::TEvLoginResult& response, TSchemeShard* SS); +void AuditLogWebUILogout(const NHttp::THttpIncomingRequest& request, const TString& userSID); + } diff --git a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp index 6021cbe67056..450db67f8e46 100644 --- a/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_build_index__create.cpp @@ -1,4 +1,5 @@ #include "schemeshard_build_index.h" +#include "schemeshard_xxport__helpers.h" #include "schemeshard_build_index_helpers.h" #include "schemeshard_build_index_tx_base.h" #include "schemeshard_impl.h" @@ -29,7 +30,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder << "Index build with id '" << id << "' already exists"); } - const TString& uid = GetUid(request.GetOperationParams().labels()); + const TString& uid = GetUid(request.GetOperationParams()); if (uid && Self->IndexBuildsByUid.contains(uid)) { return Reply(Ydb::StatusIds::ALREADY_EXISTS, TStringBuilder() << "Index build with uid '" << uid << "' already exists"); @@ -212,7 +213,7 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder if (settings.has_index() && settings.has_column_build_operation()) { explain = "unable to build index and column in the single operation"; - return false; + return false; } if (settings.has_index()) { @@ -240,19 +241,10 @@ class TSchemeShard::TIndexBuilder::TTxCreate: public TSchemeShard::TIndexBuilder Ydb::StatusIds::StatusCode status; if (!FillIndexTablePartitioning(buildInfo->ImplTableDescription, index, status, explain)) { return false; - } + } } return true; } - - static TString GetUid(const google::protobuf::Map& labels) { - auto it = labels.find("uid"); - if (it == labels.end()) { - return TString(); - } - - return it->second; - } }; ITransaction* TSchemeShard::CreateTxCreate(TEvIndexBuilder::TEvCreateRequest::TPtr& ev) { diff --git a/ydb/core/tx/schemeshard/schemeshard_export.cpp b/ydb/core/tx/schemeshard/schemeshard_export.cpp index 1a201de7edf4..a8ae8e67d9a6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export.cpp @@ -83,7 +83,7 @@ namespace { void TSchemeShard::FromXxportInfo(NKikimrExport::TExport& exprt, const TExportInfo::TPtr exportInfo) { exprt.SetId(exportInfo->Id); exprt.SetStatus(Ydb::StatusIds::SUCCESS); - + if (exportInfo->StartTime != TInstant::Zero()) { *exprt.MutableStartTime() = SecondsToProtoTimeStamp(exportInfo->StartTime.Seconds()); } @@ -91,6 +91,10 @@ void TSchemeShard::FromXxportInfo(NKikimrExport::TExport& exprt, const TExportIn *exprt.MutableEndTime() = SecondsToProtoTimeStamp(exportInfo->EndTime.Seconds()); } + if (exportInfo->UserSID) { + exprt.SetUserSID(*exportInfo->UserSID); + } + switch (exportInfo->State) { case TExportInfo::EState::CreateExportDir: case TExportInfo::EState::CopyTables: diff --git a/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp b/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp index 531168da9275..ccc6146e269d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__cancel.cpp @@ -1,6 +1,7 @@ #include "schemeshard_xxport__tx_base.h" #include "schemeshard_export_flow_proposals.h" #include "schemeshard_export.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -88,6 +89,11 @@ struct TSchemeShard::TExport::TTxCancel: public TSchemeShard::TXxport::TTxBase { Send(Request->Sender, std::move(response), 0, Request->Cookie); SendNotificationsIfFinished(exportInfo); + + if (exportInfo->IsFinished()) { + AuditLogExportEnd(*exportInfo.Get(), Self); + } + return true; } @@ -167,6 +173,11 @@ struct TSchemeShard::TExport::TTxCancelAck: public TSchemeShard::TXxport::TTxBas } SendNotificationsIfFinished(exportInfo); + + if (exportInfo->IsFinished()) { + AuditLogExportEnd(*exportInfo.Get(), Self); + } + return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp index dc822ca86d5b..f65eabb43f89 100644 --- a/ydb/core/tx/schemeshard/schemeshard_export__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_export__create.cpp @@ -1,7 +1,9 @@ #include "schemeshard_xxport__tx_base.h" +#include "schemeshard_xxport__helpers.h" #include "schemeshard_export_flow_proposals.h" #include "schemeshard_export_helpers.h" #include "schemeshard_export.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -50,7 +52,7 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { ); } - const TString& uid = GetUid(request.GetRequest().GetOperationParams().labels()); + const TString& uid = GetUid(request.GetRequest().GetOperationParams()); if (uid) { if (auto it = Self->ExportsByUid.find(uid); it != Self->ExportsByUid.end()) { if (IsSameDomain(it->second, request.GetDatabaseName())) { @@ -95,7 +97,7 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { case NKikimrExport::TCreateExportRequest::kExportToYtSettings: { const auto& settings = request.GetRequest().GetExportToYtSettings(); - exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::YT, settings, domainPath.Base()->PathId); + exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::YT, settings, domainPath.Base()->PathId, request.GetPeerName()); TString explain; if (!FillItems(exportInfo, settings, explain)) { @@ -115,7 +117,7 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { settings.set_scheme(Ydb::Export::ExportToS3Settings::HTTPS); } - exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::S3, settings, domainPath.Base()->PathId); + exportInfo = new TExportInfo(id, uid, TExportInfo::EKind::S3, settings, domainPath.Base()->PathId, request.GetPeerName()); TString explain; if (!FillItems(exportInfo, settings, explain)) { @@ -166,15 +168,6 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { } private: - static TString GetUid(const google::protobuf::Map& labels) { - auto it = labels.find("uid"); - if (it == labels.end()) { - return TString(); - } - - return it->second; - } - bool Reply( THolder response, const Ydb::StatusIds::StatusCode status = Ydb::StatusIds::SUCCESS, @@ -191,6 +184,8 @@ struct TSchemeShard::TExport::TTxCreate: public TSchemeShard::TXxport::TTxBase { AddIssue(exprt, errorMessage); } + AuditLogExportStart(Request->Get()->Record, response->Record, Self); + Send(Request->Sender, std::move(response), 0, Request->Cookie); return true; @@ -895,7 +890,7 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase OnNotifyResult(txId, id, itemIdx, txc); Self->TxIdToExport.erase(txId); } - + if (Self->TxIdToDependentExport.contains(txId)) { for (const auto id : Self->TxIdToDependentExport.at(txId)) { OnNotifyResult(txId, id, Max(), txc); @@ -997,6 +992,10 @@ struct TSchemeShard::TExport::TTxProgress: public TSchemeShard::TXxport::TTxBase Self->PersistExportState(db, exportInfo); SendNotificationsIfFinished(exportInfo); + + if (exportInfo->IsFinished()) { + AuditLogExportEnd(*exportInfo.Get(), Self); + } } }; // TTxProgress diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.cpp b/ydb/core/tx/schemeshard/schemeshard_impl.cpp index 05098b99d693..4f19e3dcd91a 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_impl.cpp @@ -598,8 +598,9 @@ void TSchemeShard::ClearDescribePathCaches(const TPathElement::TPtr node, bool f } else if (node->PathType == NKikimrSchemeOp::EPathType::EPathTypeTable) { Y_ABORT_UNLESS(Tables.contains(node->PathId)); TTableInfo::TPtr tabletInfo = Tables.at(node->PathId); - tabletInfo->PreSerializedPathDescription.clear(); - tabletInfo->PreSerializedPathDescriptionWithoutRangeKey.clear(); + tabletInfo->PreserializedTablePartitions.clear(); + tabletInfo->PreserializedTablePartitionsNoKeys.clear(); + tabletInfo->PreserializedTableSplitBoundaries.clear(); } } @@ -2954,7 +2955,9 @@ void TSchemeShard::PersistView(NIceDb::TNiceDb &db, TPathId pathId) { db.Table().Key(pathId.LocalPathId).Update( NIceDb::TUpdate{viewInfo->AlterVersion}, - NIceDb::TUpdate{viewInfo->QueryText}); + NIceDb::TUpdate{viewInfo->QueryText}, + NIceDb::TUpdate{viewInfo->CapturedContext.SerializeAsString()} + ); } void TSchemeShard::PersistRemoveView(NIceDb::TNiceDb& db, TPathId pathId) { @@ -4266,6 +4269,10 @@ ui64 TSchemeShard::GetAliveChildren(TPathElement::TPtr pathEl, const std::option Y_ABORT_UNLESS(PathsById.contains(pathId)); auto childPath = PathsById.at(pathId); + if (childPath->Dropped()) { + continue; + } + count += ui64(childPath->PathType == *type); } @@ -4462,6 +4469,8 @@ void TSchemeShard::OnActivateExecutor(const TActorContext &ctx) { appData->Icb->RegisterSharedControl(DisablePublicationsOfDropping, "SchemeShard_DisablePublicationsOfDropping"); appData->Icb->RegisterSharedControl(FillAllocatePQ, "SchemeShard_FillAllocatePQ"); + appData->Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB"); + AllowDataColumnForIndexTable = appData->FeatureFlags.GetEnableDataColumnForIndexTable(); appData->Icb->RegisterSharedControl(AllowDataColumnForIndexTable, "SchemeShard_AllowDataColumnForIndexTable"); @@ -6538,6 +6547,12 @@ TString TSchemeShard::FillAlterTableTxBody(TPathId pathId, TShardIdx shardIdx, T *patch); } + if (alterData->TableDescriptionFull.Defined() && alterData->TableDescriptionFull->HasReplicationConfig()) { + proto->MutableReplicationConfig()->CopyFrom(alterData->TableDescriptionFull->GetReplicationConfig()); + } else if (tableInfo->HasReplicationConfig()) { + proto->MutableReplicationConfig()->CopyFrom(tableInfo->ReplicationConfig()); + } + TString txBody; Y_PROTOBUF_SUPPRESS_NODISCARD tx.SerializeToString(&txBody); return txBody; @@ -6675,7 +6690,9 @@ void TSchemeShard::FillTableDescriptionForShardIdx( case NKikimrSchemeOp::EPathTypeTableIndex: { Y_ABORT_UNLESS(Indexes.contains(childPathId)); auto info = Indexes.at(childPathId); - DescribeTableIndex(childPathId, childName, newTable ? info->AlterData : info, *tableDescr->MutableTableIndexes()->Add()); + DescribeTableIndex(childPathId, childName, newTable ? info->AlterData : info, false, false, + *tableDescr->MutableTableIndexes()->Add() + ); break; } @@ -7001,6 +7018,8 @@ void TSchemeShard::ApplyConsoleConfigs(const NKikimrConfig::TFeatureFlags& featu EnableTempTables = featureFlags.GetEnableTempTables(); EnableReplaceIfExistsForExternalEntities = featureFlags.GetEnableReplaceIfExistsForExternalEntities(); EnableTableDatetime64 = featureFlags.GetEnableTableDatetime64(); + EnableResourcePoolsOnServerless = featureFlags.GetEnableResourcePoolsOnServerless(); + EnableExternalDataSourcesOnServerless = featureFlags.GetEnableExternalDataSourcesOnServerless(); } void TSchemeShard::ConfigureStatsBatching(const NKikimrConfig::TSchemeShardConfig& config, const TActorContext& ctx) { diff --git a/ydb/core/tx/schemeshard/schemeshard_impl.h b/ydb/core/tx/schemeshard/schemeshard_impl.h index da3c182c8a62..d0d5315d283b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_impl.h +++ b/ydb/core/tx/schemeshard/schemeshard_impl.h @@ -187,6 +187,9 @@ class TSchemeShard TControlWrapper DisablePublicationsOfDropping; TControlWrapper FillAllocatePQ; + // Shared with NTabletFlatExecutor::TExecutor + TControlWrapper MaxCommitRedoMB; + TSplitSettings SplitSettings; struct TTenantInitState { @@ -326,6 +329,8 @@ class TSchemeShard bool EnableReplaceIfExistsForExternalEntities = false; bool EnableTempTables = false; bool EnableTableDatetime64 = false; + bool EnableResourcePoolsOnServerless = false; + bool EnableExternalDataSourcesOnServerless = false; TShardDeleter ShardDeleter; @@ -370,6 +375,8 @@ class TSchemeShard NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory{NExternalSource::CreateExternalSourceFactory({})}; THolder IgniteOperation(TProposeRequest& request, TOperationContext& context); + void AbortOperationPropose(const TTxId txId, TOperationContext& context); + THolder MakeDataShardProposal(const TPathId& pathId, const TOperationId& opId, const TString& body, const TActorContext& ctx) const; @@ -419,7 +426,7 @@ class TSchemeShard return MakeLocalId(NextLocalPathId); } - TPathId AllocatePathId () { + TPathId AllocatePathId() { TPathId next = PeekNextPathId(); ++NextLocalPathId; return next; @@ -1015,9 +1022,13 @@ class TSchemeShard void FillAsyncIndexInfo(const TPathId& tableId, NKikimrTxDataShard::TFlatSchemeTransaction& tx); void DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry, - bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const; - void DescribeTableIndex(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TIndexDescription& entry); - void DescribeTableIndex(const TPathId& pathId, const TString& name, TTableIndexInfo::TPtr indexInfo, NKikimrSchemeOp::TIndexDescription& entry); + bool fillConfig, NKikimrSchemeOp::TTableDescription* entry) const; + void DescribeTableIndex(const TPathId& pathId, const TString& name, + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry + ) const; + void DescribeTableIndex(const TPathId& pathId, const TString& name, TTableIndexInfo::TPtr indexInfo, + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry + ) const; void DescribeCdcStream(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TCdcStreamDescription& desc); void DescribeCdcStream(const TPathId& pathId, const TString& name, TCdcStreamInfo::TPtr info, NKikimrSchemeOp::TCdcStreamDescription& desc); void DescribeSequence(const TPathId& pathId, const TString& name, @@ -1027,7 +1038,6 @@ class TSchemeShard void DescribeReplication(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TReplicationDescription& desc); void DescribeReplication(const TPathId& pathId, const TString& name, TReplicationInfo::TPtr info, NKikimrSchemeOp::TReplicationDescription& desc); void DescribeBlobDepot(const TPathId& pathId, const TString& name, NKikimrSchemeOp::TBlobDepotDescription& desc); - static void FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField& boundaries); void Handle(NKikimr::NOlap::NBackground::TEvExecuteGeneralLocalTransaction::TPtr& ev, const TActorContext& ctx); void Handle(NKikimr::NOlap::NBackground::TEvRemoveSession::TPtr& ev, const TActorContext& ctx); diff --git a/ydb/core/tx/schemeshard/schemeshard_import.cpp b/ydb/core/tx/schemeshard/schemeshard_import.cpp index c43e8198cddf..222f785228ba 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import.cpp @@ -55,6 +55,10 @@ void TSchemeShard::FromXxportInfo(NKikimrImport::TImport& import, const TImportI *import.MutableEndTime() = SecondsToProtoTimeStamp(importInfo->EndTime.Seconds()); } + if (importInfo->UserSID) { + import.SetUserSID(*importInfo->UserSID); + } + switch (importInfo->State) { case TImportInfo::EState::Waiting: switch (GetMinState(importInfo)) { diff --git a/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp b/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp index 0b7bcf62ffb0..d494c514c3df 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__cancel.cpp @@ -1,6 +1,7 @@ #include "schemeshard_xxport__tx_base.h" #include "schemeshard_import_flow_proposals.h" #include "schemeshard_import.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -91,6 +92,11 @@ struct TSchemeShard::TImport::TTxCancel: public TSchemeShard::TXxport::TTxBase { Self->PersistImportState(db, importInfo); SendNotificationsIfFinished(importInfo); + + if (importInfo->IsFinished()) { + AuditLogImportEnd(*importInfo.Get(), Self); + } + return respond(Ydb::StatusIds::SUCCESS); default: @@ -191,6 +197,11 @@ struct TSchemeShard::TImport::TTxCancelAck: public TSchemeShard::TXxport::TTxBas Self->PersistImportState(db, importInfo); SendNotificationsIfFinished(importInfo); + + if (importInfo->IsFinished()) { + AuditLogImportEnd(*importInfo.Get(), Self); + } + return true; } diff --git a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp index 25a66878d895..2ddf4c3051b6 100644 --- a/ydb/core/tx/schemeshard/schemeshard_import__create.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_import__create.cpp @@ -1,8 +1,10 @@ #include "schemeshard_xxport__tx_base.h" +#include "schemeshard_xxport__helpers.h" #include "schemeshard_import_flow_proposals.h" #include "schemeshard_import_scheme_getter.h" #include "schemeshard_import_helpers.h" #include "schemeshard_import.h" +#include "schemeshard_audit_log.h" #include "schemeshard_impl.h" #include @@ -52,7 +54,7 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { ); } - const TString& uid = GetUid(request.GetRequest().GetOperationParams().labels()); + const TString& uid = GetUid(request.GetRequest().GetOperationParams()); if (uid) { if (auto it = Self->ImportsByUid.find(uid); it != Self->ImportsByUid.end()) { if (IsSameDomain(it->second, request.GetDatabaseName())) { @@ -101,7 +103,7 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { settings.set_scheme(Ydb::Import::ImportFromS3Settings::HTTPS); } - importInfo = new TImportInfo(id, uid, TImportInfo::EKind::S3, settings, domainPath.Base()->PathId); + importInfo = new TImportInfo(id, uid, TImportInfo::EKind::S3, settings, domainPath.Base()->PathId, request.GetPeerName()); if (request.HasUserSID()) { importInfo->UserSID = request.GetUserSID(); @@ -148,15 +150,6 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { } private: - static TString GetUid(const google::protobuf::Map& labels) { - auto it = labels.find("uid"); - if (it == labels.end()) { - return TString(); - } - - return it->second; - } - bool Reply( THolder response, const Ydb::StatusIds::StatusCode status = Ydb::StatusIds::SUCCESS, @@ -173,6 +166,8 @@ struct TSchemeShard::TImport::TTxCreate: public TSchemeShard::TXxport::TTxBase { AddIssue(entry, errorMessage); } + AuditLogImportStart(Request->Get()->Record, response->Record, Self); + Send(Request->Sender, std::move(response), 0, Request->Cookie); return true; @@ -1017,6 +1012,10 @@ struct TSchemeShard::TImport::TTxProgress: public TSchemeShard::TXxport::TTxBase Self->PersistImportState(db, importInfo); SendNotificationsIfFinished(importInfo); + + if (importInfo->IsFinished()) { + AuditLogImportEnd(*importInfo.Get(), Self); + } } }; // TTxProgress diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index 54bc391e5c6a..8c8256d3f218 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -356,10 +356,10 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( const TTableInfo::TColumn& sourceColumn = source->Columns[colId]; if (col.HasDefaultFromSequence()) { - if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64 + if (sourceColumn.PType.GetTypeId() != NScheme::NTypeIds::Int64 && NPg::PgTypeIdFromTypeDesc(sourceColumn.PType.GetTypeDesc()) != INT8OID) { - TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg - ? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID)) + TString sequenceType = sourceColumn.PType.GetTypeId() == NScheme::NTypeIds::Pg + ? NPg::PgTypeNameFromTypeDesc(NPg::TypeDescFromPgTypeId(INT8OID)) : NScheme::TypeName(NScheme::NTypeIds::Int64); errStr = Sprintf( "Sequence value type '%s' must be equal to the column type '%s'", sequenceType.c_str(), @@ -411,7 +411,7 @@ TTableInfo::TAlterDataPtr TTableInfo::CreateAlterData( return nullptr; default: break; - } + } } } else { auto* typeDesc = NPg::TypeDescFromPgTypeName(typeName); @@ -1614,8 +1614,9 @@ void TTableInfo::SetPartitioning(TVector&& newPartitioning) { Stats.PartitionStats.swap(newPartitionStats); Stats.Aggregated = newAggregatedStats; Partitions.swap(newPartitioning); - PreSerializedPathDescription.clear(); - PreSerializedPathDescriptionWithoutRangeKey.clear(); + PreserializedTablePartitions.clear(); + PreserializedTablePartitionsNoKeys.clear(); + PreserializedTableSplitBoundaries.clear(); CondEraseSchedule.clear(); InFlightCondErase.clear(); @@ -1630,7 +1631,7 @@ void TTableInfo::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& Stats.UpdateShardStats(datashardIdx, newStats); } -void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats) { +void TTableAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats) { // Ignore stats from unknown datashard (it could have been split) if (!PartitionStats.contains(datashardIdx)) return; @@ -1719,33 +1720,10 @@ void TAggregatedStats::UpdateShardStats(TShardIdx datashardIdx, const TPartition } } -void TAggregatedStats::UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats) { - if (!TableStats.contains(pathId)) { - TableStats[pathId] = newStats; - return; - } - - TPartitionStats& oldStats = TableStats[pathId]; - - if (newStats.SeqNo <= oldStats.SeqNo) { - // Ignore outdated message - return; - } - - if (newStats.SeqNo.Generation > oldStats.SeqNo.Generation) { - // Reset incremental counter baselines if tablet has restarted - oldStats.ImmediateTxCompleted = 0; - oldStats.PlannedTxCompleted = 0; - oldStats.TxRejectedByOverload = 0; - oldStats.TxRejectedBySpace = 0; - oldStats.RowUpdates = 0; - oldStats.RowDeletes = 0; - oldStats.RowReads = 0; - oldStats.RangeReads = 0; - oldStats.RangeReadRows = 0; - } - TableStats[pathId].RowCount += (newStats.RowCount - oldStats.RowCount); - TableStats[pathId].DataSize += (newStats.DataSize - oldStats.DataSize); +void TAggregatedStats::UpdateTableStats(TShardIdx shardIdx, const TPathId& pathId, const TPartitionStats& newStats) { + auto& tableStats = TableStats[pathId]; + tableStats.PartitionStats[shardIdx]; // insert if none + tableStats.UpdateShardStats(shardIdx, newStats); } void TTableInfo::RegisterSplitMergeOp(TOperationId opId, const TTxState& txState) { @@ -2017,6 +1995,7 @@ TString TExportInfo::ToString() const { << " DomainPathId: " << DomainPathId << " ExportPathId: " << ExportPathId << " UserSID: '" << UserSID << "'" + << " PeerName: '" << PeerName << "'" << " State: " << State << " WaitTxId: " << WaitTxId << " Issue: '" << Issue << "'" diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.h b/ydb/core/tx/schemeshard/schemeshard_info_types.h index b5297bf20037..8bbdca2ac540 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.h @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -320,14 +321,18 @@ struct TPartitionStats { ui64 CPU = 0; }; -struct TAggregatedStats { +struct TTableAggregatedStats { TPartitionStats Aggregated; THashMap PartitionStats; - THashMap TableStats; size_t PartitionStatsUpdated = 0; void UpdateShardStats(TShardIdx datashardIdx, const TPartitionStats& newStats); - void UpdateTableStats(const TPathId& pathId, const TPartitionStats& newStats); +}; + +struct TAggregatedStats : public TTableAggregatedStats { + THashMap TableStats; + + void UpdateTableStats(TShardIdx datashardIdx, const TPathId& pathId, const TPartitionStats& newStats); }; struct TSubDomainInfo; @@ -436,8 +441,11 @@ struct TTableInfo : public TSimpleRefCount { TMap BackupHistory; TMap RestoreHistory; - TString PreSerializedPathDescription; - TString PreSerializedPathDescriptionWithoutRangeKey; + // Preserialized TDescribeSchemeResult with PathDescription.TablePartitions field filled + TString PreserializedTablePartitions; + TString PreserializedTablePartitionsNoKeys; + // Preserialized TDescribeSchemeResult with PathDescription.Table.SplitBoundary field filled + TString PreserializedTableSplitBoundaries; THashMap PerShardPartitionConfig; @@ -937,6 +945,8 @@ struct TTopicTabletInfo : TSimpleRefCount { THashSet ParentPartitionIds; THashSet ChildPartitionIds; + TShardIdx ShardIdx; + void SetStatus(const TActorContext& ctx, ui32 value) { if (value >= NKikimrPQ::ETopicPartitionStatus::Active && value <= NKikimrPQ::ETopicPartitionStatus::Deleted) { @@ -1122,6 +1132,7 @@ struct TTopicInfo : TSimpleRefCount { TTabletId BalancerTabletID = InvalidTabletId; TShardIdx BalancerShardIdx = InvalidShardIdx; THashMap Partitions; + size_t ActivePartitionCount = 0; TString PreSerializedPathDescription; // Cached path description TString PreSerializedPartitionsDescription; // Cached partition description @@ -1129,6 +1140,8 @@ struct TTopicInfo : TSimpleRefCount { TTopicStats Stats; void AddPartition(TShardIdx shardIdx, TTopicTabletInfo::TTopicPartitionInfo* partition) { + partition->ShardIdx = shardIdx; + TTopicTabletInfo::TPtr& pqShard = Shards[shardIdx]; if (!pqShard) { pqShard.Reset(new TTopicTabletInfo()); @@ -1218,6 +1231,7 @@ struct TTopicInfo : TSimpleRefCount { alterData->AlterVersion = AlterVersion + 1; Y_ABORT_UNLESS(alterData->TotalGroupCount); Y_ABORT_UNLESS(alterData->TotalPartitionCount); + Y_ABORT_UNLESS(0 < alterData->ActivePartitionCount && alterData->ActivePartitionCount <= alterData->TotalPartitionCount); Y_ABORT_UNLESS(alterData->NextPartitionId); Y_ABORT_UNLESS(alterData->MaxPartsPerTablet); alterData->KeySchema = KeySchema; @@ -1231,6 +1245,7 @@ struct TTopicInfo : TSimpleRefCount { TotalGroupCount = AlterData->TotalGroupCount; NextPartitionId = AlterData->NextPartitionId; TotalPartitionCount = AlterData->TotalPartitionCount; + ActivePartitionCount = AlterData->ActivePartitionCount; MaxPartsPerTablet = AlterData->MaxPartsPerTablet; if (!AlterData->TabletConfig.empty()) TabletConfig = std::move(AlterData->TabletConfig); @@ -2431,6 +2446,20 @@ struct TCdcStreamInfo : public TSimpleRefCount { return result; } + void FinishAlter() { + Y_ABORT_UNLESS(AlterData); + + AlterVersion = AlterData->AlterVersion; + Mode = AlterData->Mode; + Format = AlterData->Format; + VirtualTimestamps = AlterData->VirtualTimestamps; + ResolvedTimestamps = AlterData->ResolvedTimestamps; + AwsRegion = AlterData->AwsRegion; + State = AlterData->State; + + AlterData.Reset(); + } + ui64 AlterVersion = 1; EMode Mode; EFormat Format; @@ -2602,12 +2631,13 @@ struct TExportInfo: public TSimpleRefCount { static bool IsDropped(const TItem& item); }; - ui64 Id; + ui64 Id; // TxId from the original TEvCreateExportRequest TString Uid; EKind Kind; TString Settings; TPathId DomainPathId; TMaybe UserSID; + TString PeerName; // required for making audit log records TVector Items; TPathId ExportPathId = InvalidPathId; @@ -2632,12 +2662,14 @@ struct TExportInfo: public TSimpleRefCount { const TString& uid, const EKind kind, const TString& settings, - const TPathId domainPathId) + const TPathId domainPathId, + const TString& peerName) : Id(id) , Uid(uid) , Kind(kind) , Settings(settings) , DomainPathId(domainPathId) + , PeerName(peerName) { } @@ -2647,8 +2679,9 @@ struct TExportInfo: public TSimpleRefCount { const TString& uid, const EKind kind, const TSettingsPB& settingsPb, - const TPathId domainPathId) - : TExportInfo(id, uid, kind, SerializeSettings(settingsPb), domainPathId) + const TPathId domainPathId, + const TString& peerName) + : TExportInfo(id, uid, kind, SerializeSettings(settingsPb), domainPathId, peerName) { } @@ -2759,12 +2792,13 @@ struct TImportInfo: public TSimpleRefCount { static bool IsDone(const TItem& item); }; - ui64 Id; + ui64 Id; // TxId from the original TEvCreateImportRequest TString Uid; EKind Kind; Ydb::Import::ImportFromS3Settings Settings; TPathId DomainPathId; TMaybe UserSID; + TString PeerName; // required for making audit log records EState State = EState::Invalid; TString Issue; @@ -2780,12 +2814,14 @@ struct TImportInfo: public TSimpleRefCount { const TString& uid, const EKind kind, const Ydb::Import::ImportFromS3Settings& settings, - const TPathId domainPathId) + const TPathId domainPathId, + const TString& peerName) : Id(id) , Uid(uid) , Kind(kind) , Settings(settings) , DomainPathId(domainPathId) + , PeerName(peerName) { } @@ -3245,6 +3281,7 @@ struct TViewInfo : TSimpleRefCount { ui64 AlterVersion = 0; TString QueryText; + NYql::NProto::TTranslationSettings CapturedContext; }; struct TResourcePoolInfo : TSimpleRefCount { diff --git a/ydb/core/tx/schemeshard/schemeshard_path.cpp b/ydb/core/tx/schemeshard/schemeshard_path.cpp index f45e6190eb5f..0704fbcb5cee 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path.cpp @@ -1596,20 +1596,22 @@ bool TPath::IsInsideCdcStreamPath() const { return false; } - ++item; - for (; item != Elements.rend(); ++item) { - if (!(*item)->IsDirectory() && !(*item)->IsSubDomainRoot()) { - return false; - } - } - return true; } -bool TPath::IsTableIndex() const { +bool TPath::IsTableIndex(const TMaybe& type) const { Y_ABORT_UNLESS(IsResolved()); - return Base()->IsTableIndex(); + if (!Base()->IsTableIndex()) { + return false; + } + + if (!type.Defined()) { + return true; + } + + Y_ABORT_UNLESS(SS->Indexes.contains(Base()->PathId)); + return SS->Indexes.at(Base()->PathId)->Type == *type; } bool TPath::IsBackupTable() const { diff --git a/ydb/core/tx/schemeshard/schemeshard_path.h b/ydb/core/tx/schemeshard/schemeshard_path.h index a1474fef7176..4e6aec639db3 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path.h +++ b/ydb/core/tx/schemeshard/schemeshard_path.h @@ -5,6 +5,8 @@ #include +#include + namespace NKikimr::NSchemeShard { class TSchemeShard; @@ -159,7 +161,7 @@ class TPath { bool AtLocalSchemeShardPath() const; bool IsInsideTableIndexPath() const; bool IsInsideCdcStreamPath() const; - bool IsTableIndex() const; + bool IsTableIndex(const TMaybe& type = {}) const; bool IsBackupTable() const; bool IsAsyncReplicaTable() const; bool IsCdcStream() const; diff --git a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp index bf10858b1ff0..12e6a238570e 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_describer.cpp @@ -8,6 +8,16 @@ #include +namespace { + +void FillPartitionConfig(const NKikimrSchemeOp::TPartitionConfig& in, NKikimrSchemeOp::TPartitionConfig& out) { + out.CopyFrom(in); + NKikimr::NSchemeShard::TPartitionConfigMerger::DeduplicateColumnFamiliesById(out); + out.MutableStorageRooms()->Clear(); +} + +} + namespace NKikimr { namespace NSchemeShard { @@ -62,6 +72,78 @@ static void FillTableStats(NKikimrSchemeOp::TPathDescription& pathDescription, c FillTableMetrics(pathDescription.MutableTabletMetrics(), stats); } +static void FillColumns( + const TTableInfo& tableInfo, + google::protobuf::RepeatedPtrField& out +) { + bool familyNamesBuilt = false; + THashMap familyNames; + + out.Reserve(tableInfo.Columns.size()); + for (const auto& col : tableInfo.Columns) { + const auto& cinfo = col.second; + if (cinfo.IsDropped()) + continue; + + auto* colDescr = out.Add(); + colDescr->SetName(cinfo.Name); + colDescr->SetType(NScheme::TypeName(cinfo.PType, cinfo.PTypeMod)); + auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(cinfo.PType, cinfo.PTypeMod); + colDescr->SetTypeId(columnType.TypeId); + if (columnType.TypeInfo) { + *colDescr->MutableTypeInfo() = *columnType.TypeInfo; + } + colDescr->SetId(cinfo.Id); + colDescr->SetNotNull(cinfo.NotNull); + + if (cinfo.Family != 0) { + colDescr->SetFamily(cinfo.Family); + + if (!familyNamesBuilt) { + for (const auto& family : tableInfo.PartitionConfig().GetColumnFamilies()) { + if (family.HasName() && family.HasId()) { + familyNames[family.GetId()] = family.GetName(); + } + } + familyNamesBuilt = true; + } + + auto it = familyNames.find(cinfo.Family); + if (it != familyNames.end() && !it->second.empty()) { + colDescr->SetFamilyName(it->second); + } + } + + colDescr->SetIsBuildInProgress(cinfo.IsBuildInProgress); + + switch (cinfo.DefaultKind) { + case ETableColumnDefaultKind::None: + break; + case ETableColumnDefaultKind::FromSequence: + colDescr->SetDefaultFromSequence(cinfo.DefaultValue); + break; + case ETableColumnDefaultKind::FromLiteral: + Y_ABORT_UNLESS(colDescr->MutableDefaultFromLiteral()->ParseFromString( + cinfo.DefaultValue)); + break; + } + } +} + +static void FillKeyColumns( + const TTableInfo& tableInfo, + google::protobuf::RepeatedPtrField& names, + google::protobuf::RepeatedField& ids +) { + Y_ABORT_UNLESS(!tableInfo.KeyColumnIds.empty()); + names.Reserve(tableInfo.KeyColumnIds.size()); + ids.Reserve(tableInfo.KeyColumnIds.size()); + for (ui32 keyColId : tableInfo.KeyColumnIds) { + *names.Add() = tableInfo.Columns.at(keyColId).Name; + *ids.Add() = keyColId; + } +} + void TPathDescriber::FillPathDescr(NKikimrSchemeOp::TDirEntry* descr, TPathElement::TPtr pathEl, TPathElement::EPathSubType subType) { FillChildDescr(descr, pathEl); @@ -207,6 +289,68 @@ void TPathDescriber::DescribeDir(const TPath& path) { DescribeChildren(path); } +void FillTableBoundaries( + google::protobuf::RepeatedPtrField* result, + const TTableInfo::TPtr tableInfo +) { + TString errStr; + // Number of split boundaries equals to number of partitions - 1 + result->Reserve(tableInfo->GetPartitions().size() - 1); + for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) { + const auto& p = tableInfo->GetPartitions()[pi]; + TSerializedCellVec endKey(p.EndOfRange); + auto boundary = result->Add()->MutableKeyPrefix(); + for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){ + const auto& c = endKey.GetCells()[ki]; + auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType; + bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr); + Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data()); + } + } +} + +void FillTablePartitions( + google::protobuf::RepeatedPtrField* result, + const TTableInfo::TPtr tableInfo, + const THashMap& shardInfos, + bool includeKeys +) { + result->Reserve(tableInfo->GetPartitions().size()); + for (auto& p : tableInfo->GetPartitions()) { + const auto& tabletId = ui64(shardInfos.at(p.ShardIdx).TabletID); + const auto& key = p.EndOfRange; + + auto part = result->Add(); + part->SetDatashardId(tabletId); + if (includeKeys) { + // Currently we only support uniform partitioning where each range is [start, end) + // +inf as the end of the last range is represented by empty TCell vector + part->SetIsPoint(false); + part->SetIsInclusive(false); + part->SetEndOfRangeKeyPrefix(key); + } + } +} + +const TString& GetSerializedTablePartitions( + const TTableInfo::TPtr tableInfo, + const THashMap& shardInfos, + bool returnRangeKey +) { + TString& cache = (returnRangeKey + ? tableInfo->PreserializedTablePartitions + : tableInfo->PreserializedTablePartitionsNoKeys + ); + + if (cache.empty()) { + NKikimrScheme::TEvDescribeSchemeResult result; + FillTablePartitions(result.MutablePathDescription()->MutableTablePartitions(), tableInfo, shardInfos, returnRangeKey); + Y_PROTOBUF_SUPPRESS_NODISCARD result.SerializeToString(&cache); + } + + return cache; +} + void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPathElement::TPtr pathEl) { const NScheme::TTypeRegistry* typeRegistry = AppData(ctx)->TypeRegistry; const TTableInfo::TPtr tableInfo = *Self->Tables.FindPtr(pathId); @@ -220,6 +364,7 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa bool returnBoundaries = false; bool returnRangeKey = true; bool returnSetVal = Params.GetOptions().GetReturnSetVal(); + bool returnIndexTableBoundaries = Params.GetOptions().GetReturnIndexTableBoundaries(); if (Params.HasOptions()) { returnConfig = Params.GetOptions().GetReturnPartitionConfig(); returnPartitioning = Params.GetOptions().GetReturnPartitioningInfo(); @@ -228,50 +373,30 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa returnRangeKey = Params.GetOptions().GetReturnRangeKey(); } - Self->DescribeTable(tableInfo, typeRegistry, returnConfig, returnBoundaries, entry); + Self->DescribeTable(tableInfo, typeRegistry, returnConfig, entry); entry->SetName(pathEl->Name); - if (returnPartitioning) { - // partitions - if (tableInfo->PreSerializedPathDescription.empty()) { + if (returnBoundaries) { + // split boundaries (split keys without shard's tablet-ids) + if (tableInfo->PreserializedTableSplitBoundaries.empty()) { NKikimrScheme::TEvDescribeSchemeResult preSerializedResult; - NKikimrScheme::TEvDescribeSchemeResult preSerializedResultWithoutRangeKey; - - NKikimrSchemeOp::TPathDescription& pathDescription = *preSerializedResult.MutablePathDescription(); - NKikimrSchemeOp::TPathDescription& pathDescriptionWithoutRangeKey = *preSerializedResultWithoutRangeKey.MutablePathDescription(); - - pathDescription.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size()); - pathDescriptionWithoutRangeKey.MutableTablePartitions()->Reserve(tableInfo->GetPartitions().size()); - for (auto& p : tableInfo->GetPartitions()) { - auto part = pathDescription.AddTablePartitions(); - auto partWithoutRangeKey = pathDescriptionWithoutRangeKey.AddTablePartitions(); - auto datashardIdx = p.ShardIdx; - auto datashardTabletId = Self->ShardInfos[datashardIdx].TabletID; - // Currently we only support uniform partitioning where each range is [start, end) - // +inf as the end of the last range is represented by empty TCell vector - part->SetDatashardId(ui64(datashardTabletId)); - partWithoutRangeKey->SetDatashardId(ui64(datashardTabletId)); - - part->SetIsPoint(false); - partWithoutRangeKey->SetIsPoint(false); - - part->SetIsInclusive(false); - partWithoutRangeKey->SetIsInclusive(false); - - part->SetEndOfRangeKeyPrefix(p.EndOfRange); - } - Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreSerializedPathDescription); - Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResultWithoutRangeKey.SerializeToString(&tableInfo->PreSerializedPathDescriptionWithoutRangeKey); - } - if (returnRangeKey) { - Result->PreSerializedData += tableInfo->PreSerializedPathDescription; - } else { - Result->PreSerializedData += tableInfo->PreSerializedPathDescriptionWithoutRangeKey; - } - if (!pathEl->IsCreateFinished()) { - tableInfo->PreSerializedPathDescription.clear(); // KIKIMR-4337 - tableInfo->PreSerializedPathDescriptionWithoutRangeKey.clear(); + auto& tableDesc = *preSerializedResult.MutablePathDescription()->MutableTable(); + FillTableBoundaries(tableDesc.MutableSplitBoundary(), tableInfo); + Y_PROTOBUF_SUPPRESS_NODISCARD preSerializedResult.SerializeToString(&tableInfo->PreserializedTableSplitBoundaries); } + Result->PreSerializedData += tableInfo->PreserializedTableSplitBoundaries; + } + + if (returnPartitioning) { + // partitions (shard tablet-ids with range keys) + Result->PreSerializedData += GetSerializedTablePartitions(tableInfo, Self->ShardInfos, returnRangeKey); + } + + // KIKIMR-4337: table info is in flux until table is finally created + if (!pathEl->IsCreateFinished()) { + tableInfo->PreserializedTablePartitions.clear(); + tableInfo->PreserializedTablePartitionsNoKeys.clear(); + tableInfo->PreserializedTableSplitBoundaries.clear(); } FillAggregatedStats(*Result->Record.MutablePathDescription(), tableInfo->GetStats()); @@ -364,7 +489,9 @@ void TPathDescriber::DescribeTable(const TActorContext& ctx, TPathId pathId, TPa switch (childPath->PathType) { case NKikimrSchemeOp::EPathTypeTableIndex: - Self->DescribeTableIndex(childPathId, childName, *entry->AddTableIndexes()); + Self->DescribeTableIndex( + childPathId, childName, returnConfig, returnIndexTableBoundaries, *entry->AddTableIndexes() + ); break; case NKikimrSchemeOp::EPathTypeCdcStream: Self->DescribeCdcStream(childPathId, childName, *entry->AddCdcStreams()); @@ -427,7 +554,9 @@ void TPathDescriber::DescribeColumnTable(TPathId pathId, TPathElement::TPtr path description->MutableSchema()->SetVersion(description->GetSchema().GetVersion() + description->GetSchemaPresetVersionAdj()); } if (tableInfo->GetStats().TableStats.contains(pathId)) { - FillTableStats(*pathDescription, tableInfo->GetStats().TableStats.at(pathId)); + FillTableStats(*pathDescription, tableInfo->GetStats().TableStats.at(pathId).Aggregated); + } else { + FillTableStats(*pathDescription, TPartitionStats()); } } } @@ -586,8 +715,12 @@ void TPathDescriber::DescribeRtmrVolume(TPathId pathId, TPathElement::TPtr pathE } void TPathDescriber::DescribeTableIndex(const TPath& path) { - Self->DescribeTableIndex(path.Base()->PathId, path.Base()->Name, - *Result->Record.MutablePathDescription()->MutableTableIndex()); + bool returnConfig = Params.GetReturnPartitionConfig(); + bool returnBoundaries = Params.HasOptions() && Params.GetOptions().GetReturnBoundaries(); + + Self->DescribeTableIndex(path.Base()->PathId, path.Base()->Name, returnConfig, returnBoundaries, + *Result->Record.MutablePathDescription()->MutableTableIndex() + ); DescribeChildren(path); } @@ -903,6 +1036,7 @@ void TPathDescriber::DescribeView(const TActorContext&, TPathId pathId, TPathEle PathIdFromPathId(pathId, entry->MutablePathId()); entry->SetVersion(viewInfo->AlterVersion); entry->SetQueryText(viewInfo->QueryText); + *entry->MutableCapturedContext() = viewInfo->CapturedContext; } void TPathDescriber::DescribeResourcePool(TPathId pathId, TPathElement::TPtr pathEl) { @@ -1108,80 +1242,21 @@ THolder DescribePath( return DescribePath(self, ctx, pathId, options); } -void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme::TTypeRegistry* typeRegistry, - bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TTableDescription* entry) const +void TSchemeShard::DescribeTable( + const TTableInfo::TPtr tableInfo, + const NScheme::TTypeRegistry* typeRegistry, + bool fillConfig, + NKikimrSchemeOp::TTableDescription* entry + ) const { Y_UNUSED(typeRegistry); - THashMap familyNames; - bool familyNamesBuilt = false; entry->SetTableSchemaVersion(tableInfo->AlterVersion); - entry->MutableColumns()->Reserve(tableInfo->Columns.size()); - for (auto col : tableInfo->Columns) { - const auto& cinfo = col.second; - if (cinfo.IsDropped()) - continue; - - auto colDescr = entry->AddColumns(); - colDescr->SetName(cinfo.Name); - colDescr->SetType(NScheme::TypeName(cinfo.PType, cinfo.PTypeMod)); - auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(cinfo.PType, cinfo.PTypeMod); - colDescr->SetTypeId(columnType.TypeId); - if (columnType.TypeInfo) { - *colDescr->MutableTypeInfo() = *columnType.TypeInfo; - } - colDescr->SetId(cinfo.Id); - colDescr->SetNotNull(cinfo.NotNull); - - if (cinfo.Family != 0) { - colDescr->SetFamily(cinfo.Family); - - if (!familyNamesBuilt) { - for (const auto& family : tableInfo->PartitionConfig().GetColumnFamilies()) { - if (family.HasName() && family.HasId()) { - familyNames[family.GetId()] = family.GetName(); - } - } - familyNamesBuilt = true; - } - - auto it = familyNames.find(cinfo.Family); - if (it != familyNames.end() && !it->second.empty()) { - colDescr->SetFamilyName(it->second); - } - } - - colDescr->SetIsBuildInProgress(cinfo.IsBuildInProgress); - - switch (cinfo.DefaultKind) { - case ETableColumnDefaultKind::None: - break; - case ETableColumnDefaultKind::FromSequence: - colDescr->SetDefaultFromSequence(cinfo.DefaultValue); - break; - case ETableColumnDefaultKind::FromLiteral: - Y_ABORT_UNLESS(colDescr->MutableDefaultFromLiteral()->ParseFromString( - cinfo.DefaultValue)); - break; - } - } - Y_ABORT_UNLESS(!tableInfo->KeyColumnIds.empty()); - - entry->MutableKeyColumnNames()->Reserve(tableInfo->KeyColumnIds.size()); - entry->MutableKeyColumnIds()->Reserve(tableInfo->KeyColumnIds.size()); - for (ui32 keyColId : tableInfo->KeyColumnIds) { - entry->AddKeyColumnNames(tableInfo->Columns[keyColId].Name); - entry->AddKeyColumnIds(keyColId); - } + FillColumns(*tableInfo, *entry->MutableColumns()); + FillKeyColumns(*tableInfo, *entry->MutableKeyColumnNames(), *entry->MutableKeyColumnIds()); if (fillConfig) { - entry->MutablePartitionConfig()->CopyFrom(tableInfo->PartitionConfig()); - TPartitionConfigMerger::DeduplicateColumnFamiliesById(*entry->MutablePartitionConfig()); - entry->MutablePartitionConfig()->MutableStorageRooms()->Clear(); - } - - if (fillBoundaries) { - FillTableBoundaries(tableInfo, *entry->MutableSplitBoundary()); + FillPartitionConfig(tableInfo->PartitionConfig(), *entry->MutablePartitionConfig()); } if (tableInfo->HasTTLSettings()) { @@ -1196,17 +1271,17 @@ void TSchemeShard::DescribeTable(const TTableInfo::TPtr tableInfo, const NScheme } void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name, - NKikimrSchemeOp::TIndexDescription& entry) + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry) const { auto it = Indexes.FindPtr(pathId); Y_ABORT_UNLESS(it, "TableIndex is not found"); TTableIndexInfo::TPtr indexInfo = *it; - DescribeTableIndex(pathId, name, indexInfo, entry); + DescribeTableIndex(pathId, name, indexInfo, fillConfig, fillBoundaries, entry); } void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name, TTableIndexInfo::TPtr indexInfo, - NKikimrSchemeOp::TIndexDescription& entry) + bool fillConfig, bool fillBoundaries, NKikimrSchemeOp::TIndexDescription& entry) const { Y_ABORT_UNLESS(indexInfo, "Empty index info"); @@ -1226,23 +1301,26 @@ void TSchemeShard::DescribeTableIndex(const TPathId& pathId, const TString& name *entry.MutableDataColumnNames()->Add() = dataColumns; } - Y_ABORT_UNLESS(PathsById.contains(pathId)); - auto indexPath = PathsById.at(pathId); - + auto indexPath = *PathsById.FindPtr(pathId); + Y_ABORT_UNLESS(indexPath); Y_ABORT_UNLESS(indexPath->GetChildren().size() == 1); - const auto& indexImplPathId = indexPath->GetChildren().begin()->second; + const auto& indexImplTablePathId = indexPath->GetChildren().begin()->second; - Y_ABORT_UNLESS(Tables.contains(indexImplPathId)); - auto indexImplTable = Tables.at(indexImplPathId); + auto tableInfo = *Tables.FindPtr(indexImplTablePathId); + Y_ABORT_UNLESS(tableInfo); - const auto& tableStats = indexImplTable->GetStats().Aggregated; + const auto& tableStats = tableInfo->GetStats().Aggregated; entry.SetDataSize(tableStats.DataSize + tableStats.IndexSize); - *entry.MutablePartitioningPolicy() = indexImplTable->PartitionConfig().GetPartitioningPolicy(); - if (const auto& explicitPartitions = indexImplTable->TableDescription.GetSplitBoundary(); - !explicitPartitions.empty() - ) { - *entry.MutableExplicitPartitions()->MutableSplitBoundary() = explicitPartitions; + auto* tableDescription = entry.AddIndexImplTableDescriptions(); + if (fillConfig) { + FillPartitionConfig(tableInfo->PartitionConfig(), *tableDescription->MutablePartitionConfig()); + } + if (fillBoundaries) { + // column info is necessary for split boundary type conversion + FillColumns(*tableInfo, *tableDescription->MutableColumns()); + FillKeyColumns(*tableInfo, *tableDescription->MutableKeyColumnNames(), *tableDescription->MutableKeyColumnIds()); + FillTableBoundaries(tableDescription->MutableSplitBoundary(), tableInfo); } } @@ -1389,22 +1467,5 @@ void TSchemeShard::DescribeBlobDepot(const TPathId& pathId, const TString& name, desc.SetTabletId(static_cast(it->second->BlobDepotTabletId)); } -void TSchemeShard::FillTableBoundaries(const TTableInfo::TPtr tableInfo, google::protobuf::RepeatedPtrField& boundaries) { - TString errStr; - // Number of split boundaries equals to number of partitions - 1 - boundaries.Reserve(tableInfo->GetPartitions().size() - 1); - for (ui32 pi = 0; pi < tableInfo->GetPartitions().size() - 1; ++pi) { - const auto& p = tableInfo->GetPartitions()[pi]; - TSerializedCellVec endKey(p.EndOfRange); - auto boundary = boundaries.Add()->MutableKeyPrefix(); - for (ui32 ki = 0; ki < endKey.GetCells().size(); ++ki){ - const auto& c = endKey.GetCells()[ki]; - auto type = tableInfo->Columns[tableInfo->KeyColumnIds[ki]].PType; - bool ok = NMiniKQL::CellToValue(type, c, *boundary->AddTuple(), errStr); - Y_ABORT_UNLESS(ok, "Failed to build key tuple at position %" PRIu32 " error: %s", ki, errStr.data()); - } - } -} - } // NSchemeShard } // NKikimr diff --git a/ydb/core/tx/schemeshard/schemeshard_path_element.cpp b/ydb/core/tx/schemeshard/schemeshard_path_element.cpp index 05a0a9068ebc..c8901de18568 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_element.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_path_element.cpp @@ -411,8 +411,8 @@ bool TPathElement::CheckFileStoreSpaceChange(TFileStoreSpace newSpace, TFileStor CheckSpaceChanged(FileStoreSpaceHDD, newSpace.HDD, oldSpace.HDD, errStr, "filestore", " (hdd)")); } -void TPathElement::SetAsyncReplica() { - IsAsyncReplica = true; +void TPathElement::SetAsyncReplica(bool value) { + IsAsyncReplica = value; } bool TPathElement::HasRuntimeAttrs() const { diff --git a/ydb/core/tx/schemeshard/schemeshard_path_element.h b/ydb/core/tx/schemeshard/schemeshard_path_element.h index e9caae4888cd..1cdce90e1a4d 100644 --- a/ydb/core/tx/schemeshard/schemeshard_path_element.h +++ b/ydb/core/tx/schemeshard/schemeshard_path_element.h @@ -160,7 +160,7 @@ struct TPathElement : TSimpleRefCount { void ChangeFileStoreSpaceBegin(TFileStoreSpace newSpace, TFileStoreSpace oldSpace); void ChangeFileStoreSpaceCommit(TFileStoreSpace newSpace, TFileStoreSpace oldSpace); bool CheckFileStoreSpaceChange(TFileStoreSpace newSpace, TFileStoreSpace oldSpace, TString& errStr); - void SetAsyncReplica(); + void SetAsyncReplica(bool value); bool HasRuntimeAttrs() const; void SerializeRuntimeAttrs(google::protobuf::RepeatedPtrField* userAttrs) const; }; diff --git a/ydb/core/tx/schemeshard/schemeshard_schema.h b/ydb/core/tx/schemeshard/schemeshard_schema.h index ac7cce9805c8..a44c1adbef61 100644 --- a/ydb/core/tx/schemeshard/schemeshard_schema.h +++ b/ydb/core/tx/schemeshard/schemeshard_schema.h @@ -761,6 +761,7 @@ struct Schema : NIceDb::Schema { struct ImportsLimit : Column<29, NScheme::NTypeIds::Uint64> {}; struct AuditSettings : Column<30, NScheme::NTypeIds::String> {}; struct ServerlessComputeResourcesMode : Column<31, NScheme::NTypeIds::Uint32> { using Type = EServerlessComputeResourcesMode; }; + struct ColumnTableColumnsLimit : Column<32, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -794,7 +795,8 @@ struct Schema : NIceDb::Schema { ExportsLimit, ImportsLimit, AuditSettings, - ServerlessComputeResourcesMode + ServerlessComputeResourcesMode, + ColumnTableColumnsLimit >; }; @@ -1160,6 +1162,7 @@ struct Schema : NIceDb::Schema { struct StartTime : Column<14, NScheme::NTypeIds::Uint64> {}; struct EndTime : Column<15, NScheme::NTypeIds::Uint64> {}; + struct PeerName : Column<16, NScheme::NTypeIds::Utf8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -1177,7 +1180,8 @@ struct Schema : NIceDb::Schema { Kind, UserSID, StartTime, - EndTime + EndTime, + PeerName >; }; @@ -1482,6 +1486,7 @@ struct Schema : NIceDb::Schema { struct StartTime : Column<11, NScheme::NTypeIds::Uint64> {}; struct EndTime : Column<12, NScheme::NTypeIds::Uint64> {}; + struct PeerName : Column<13, NScheme::NTypeIds::Utf8> {}; using TKey = TableKey; using TColumns = TableColumns< @@ -1496,7 +1501,8 @@ struct Schema : NIceDb::Schema { Issue, UserSID, StartTime, - EndTime + EndTime, + PeerName >; }; @@ -1779,9 +1785,11 @@ struct Schema : NIceDb::Schema { struct PathId: Column<1, NScheme::NTypeIds::Uint64> { using Type = TLocalPathId; }; struct AlterVersion: Column<2, NScheme::NTypeIds::Uint64> {}; struct QueryText: Column<3, NScheme::NTypeIds::String> {}; + // CapturedContext is a serialized NYql::NProto::TTranslationSettings. + struct CapturedContext: Column<4, NScheme::NTypeIds::String> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; }; struct BackgroundSessions: Table<109> { diff --git a/ydb/core/tx/schemeshard/schemeshard_types.cpp b/ydb/core/tx/schemeshard/schemeshard_types.cpp index 443cafd3e7ff..eaad291862a8 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_types.cpp @@ -20,6 +20,9 @@ TSchemeLimits TSchemeLimits::FromProto(const NKikimrScheme::TSchemeLimits& proto if (proto.HasMaxTableColumns()) { result.MaxTableColumns = proto.GetMaxTableColumns(); } + if (proto.HasMaxColumnTableColumns()) { + result.MaxColumnTableColumns = proto.GetMaxColumnTableColumns(); + } if (proto.HasMaxTableColumnNameLength()) { result.MaxTableColumnNameLength = proto.GetMaxTableColumnNameLength(); } @@ -69,6 +72,7 @@ NKikimrScheme::TSchemeLimits TSchemeLimits::AsProto() const { result.SetMaxAclBytesSize(MaxAclBytesSize); result.SetMaxTableColumns(MaxTableColumns); + result.SetMaxColumnTableColumns(MaxColumnTableColumns); result.SetMaxTableColumnNameLength(MaxTableColumnNameLength); result.SetMaxTableKeyColumns(MaxTableKeyColumns); result.SetMaxTableIndices(MaxTableIndices); diff --git a/ydb/core/tx/schemeshard/schemeshard_types.h b/ydb/core/tx/schemeshard/schemeshard_types.h index 5e843c58a293..6afcdeedb1e2 100644 --- a/ydb/core/tx/schemeshard/schemeshard_types.h +++ b/ydb/core/tx/schemeshard/schemeshard_types.h @@ -39,6 +39,7 @@ struct TSchemeLimits { // table ui64 MaxTableColumns = 200; + ui64 MaxColumnTableColumns = 10000; ui64 MaxTableColumnNameLength = 255; ui64 MaxTableKeyColumns = 20; ui64 MaxTableIndices = 20; diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.cpp b/ydb/core/tx/schemeshard/schemeshard_utils.cpp index 13642fc1054e..74ce3f4b30bc 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_utils.cpp @@ -317,6 +317,10 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( result.AddKeyColumnNames(keyName); } + if (indexTableDesc.HasReplicationConfig()) { + result.MutableReplicationConfig()->CopyFrom(indexTableDesc.GetReplicationConfig()); + } + return result; } @@ -384,6 +388,10 @@ NKikimrSchemeOp::TTableDescription CalcImplTableDesc( result.AddKeyColumnNames(keyName); } + if (indexTableDesc.HasReplicationConfig()) { + result.MutableReplicationConfig()->CopyFrom(indexTableDesc.GetReplicationConfig()); + } + return result; } diff --git a/ydb/core/tx/schemeshard/schemeshard_utils.h b/ydb/core/tx/schemeshard/schemeshard_utils.h index c4a4d1dcf244..b74161e931ef 100644 --- a/ydb/core/tx/schemeshard/schemeshard_utils.h +++ b/ydb/core/tx/schemeshard/schemeshard_utils.h @@ -54,7 +54,7 @@ inline NKikimrSchemeOp::TModifyScheme TransactionTemplate(const TString& working return tx; } -TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, +TSerializedCellVec ChooseSplitKeyByHistogram(const NKikimrTableStats::THistogram& histogram, ui64 total, const TConstArrayRef& keyColumnTypes); class TShardDeleter { diff --git a/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.cpp b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.cpp new file mode 100644 index 000000000000..8990964b78d3 --- /dev/null +++ b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.cpp @@ -0,0 +1,14 @@ +#include + +namespace NKikimr::NSchemeShard { + +TString GetUid(const Ydb::Operations::OperationParams& operationParams) { + const auto& labels = operationParams.labels(); + auto it = labels.find("uid"); + if (it != labels.end()) { + return it->second; + } + return {}; +} + +} // NKikimr::NSchemeShard diff --git a/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.h b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.h new file mode 100644 index 000000000000..e6fafda10fb0 --- /dev/null +++ b/ydb/core/tx/schemeshard/schemeshard_xxport__helpers.h @@ -0,0 +1,13 @@ +#pragma once + +#include + +namespace Ydb::Operations { + class OperationParams; +} + +namespace NKikimr::NSchemeShard { + +TString GetUid(const Ydb::Operations::OperationParams& operationParams); + +} // NKikimr::NSchemeShard diff --git a/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp b/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp index 1338f2d8f42d..771b929051c0 100644 --- a/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp +++ b/ydb/core/tx/schemeshard/ut_backup/ut_backup.cpp @@ -7,9 +7,27 @@ #include #include +#include + +#include + using namespace NSchemeShardUT_Private; using namespace NKikimr::NWrappers::NTestHelpers; +namespace { + +Aws::SDKOptions Options; + +Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); +} + +Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); +} + +} + Y_UNIT_TEST_SUITE(TBackupTests) { using TFillFn = std::function; diff --git a/ydb/core/tx/schemeshard/ut_backup/ya.make b/ydb/core/tx/schemeshard/ut_backup/ya.make index d9ee6dd81405..aac9bc5f9334 100644 --- a/ydb/core/tx/schemeshard/ut_backup/ya.make +++ b/ydb/core/tx/schemeshard/ut_backup/ya.make @@ -20,6 +20,7 @@ IF (NOT OS_WINDOWS) library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers diff --git a/ydb/core/tx/schemeshard/ut_base/ut_base.cpp b/ydb/core/tx/schemeshard/ut_base/ut_base.cpp index 3f1ab7be33fc..5e8b2269c8c9 100644 --- a/ydb/core/tx/schemeshard/ut_base/ut_base.cpp +++ b/ydb/core/tx/schemeshard/ut_base/ut_base.cpp @@ -3603,6 +3603,35 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { NLs::IsBackupTable(true), }); + // cannot alter backup table + TestAlterTable(runtime, ++txId, "/MyRoot", R"( + Name: "CopyTable" + DropColumns { Name: "value" } + )", {NKikimrScheme::StatusSchemeError}); + + // cannot add cdc stream to backup table + TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "CopyTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusSchemeError}); + + // cannot add sequence to backup table + TestCreateSequence(runtime, ++txId, "/MyRoot/CopyTable", R"( + Name: "Sequence" + )", {NKikimrScheme::StatusSchemeError}); + + // cannot add index to backup table + TestBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/CopyTable", "Index", {"value"}); + env.TestWaitNotification(runtime, txId); + { + auto desc = TestGetBuildIndex(runtime, TTestTxConfig::SchemeShard, "/MyRoot", txId); + UNIT_ASSERT_EQUAL(desc.GetIndexBuild().GetState(), Ydb::Table::IndexBuildState::STATE_REJECTED); + } + // consistent copy table TestConsistentCopyTables(runtime, ++txId, "/", R"( CopyTableDescriptions { @@ -3741,16 +3770,18 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { )", {NKikimrScheme::StatusInvalidParameter}); // cannot remove 'IsBackup' property from existent table - TestAlterTable(runtime, ++txId, "/MyRoot", R"( + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(AlterTableRequest(++txId, "/MyRoot", R"( Name: "CopyTable" IsBackup: false - )", {NKikimrScheme::StatusInvalidParameter}); + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusInvalidParameter}); - TestAlterTable(runtime, ++txId, "/MyRoot", R"( + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(AlterTableRequest(++txId, "/MyRoot", R"( Name: "CopyTable" IsBackup: false DropColumns { Name: "value" } - )", {NKikimrScheme::StatusInvalidParameter}); + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusInvalidParameter}); // sanity check @@ -6343,6 +6374,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { "PartitionPerTablet: 10 " "PQTabletConfig: {PartitionConfig { LifetimeSeconds : 10}}" ); + env.TestWaitNotification(runtime, txId); TestDescribeResult(DescribePath(runtime, "/MyRoot/DirA/PQGroup_1", true), {NLs::CheckPartCount("PQGroup_1", 100, 10, 10, 100), @@ -6865,7 +6897,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { AsyncForceDropUnsafe(runtime, ++txId, pVer.PathId.LocalPathId); TestModificationResult(runtime, txId-2, NKikimrScheme::StatusAccepted); - TestModificationResult(runtime, txId-1, NKikimrScheme::StatusAccepted); + TestModificationResults(runtime, txId-1, {NKikimrScheme::StatusAccepted, NKikimrScheme::StatusMultipleModifications}); TestModificationResult(runtime, txId, NKikimrScheme::StatusAccepted); TActorId sender = runtime.AllocateEdgeActor(); @@ -9912,9 +9944,16 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { : KeyColumnTypes(keyColumnTypes.begin(), keyColumnTypes.end()) {} - TString FindSplitKey(const TVector>& histogramKeys) const { - NKikimrTableStats::THistogram histogram = FillHistogram(histogramKeys); - TSerializedCellVec splitKey = ChooseSplitKeyByHistogram(histogram, KeyColumnTypes); + TString FindSplitKey(const TVector>& histogramKeys, TVector histogramValues = {}, ui64 total = 0) const { + if (histogramValues.empty() && !histogramKeys.empty()) { + for (size_t i = 0; i < histogramKeys.size(); i++) { + histogramValues.push_back(i + 1); + } + total = histogramKeys.size() + 1; + } + + NKikimrTableStats::THistogram histogram = FillHistogram(histogramKeys, histogramValues); + TSerializedCellVec splitKey = ChooseSplitKeyByHistogram(histogram, total, KeyColumnTypes); return PrintKey(splitKey); } @@ -9964,11 +10003,13 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { return NKikimr::TSerializedCellVec(cells); } - NKikimrTableStats::THistogram FillHistogram(const TVector>& keys) const { + NKikimrTableStats::THistogram FillHistogram(const TVector>& keys, const TVector& values) const { NKikimrTableStats::THistogram histogram; - for (const auto& k : keys) { - TSerializedCellVec sk(MakeCells(k)); - histogram.AddBuckets()->SetKey(sk.GetBuffer()); + for (auto i : xrange(keys.size())) { + TSerializedCellVec sk(MakeCells(keys[i])); + auto bucket = histogram.AddBuckets(); + bucket->SetKey(sk.GetBuffer()); + bucket->SetValue(values[i]); } return histogram; } @@ -10085,7 +10126,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { { "2", "f", "42" }, { "3", "cccccccccccccccccccccccc", "42" } }); - UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : d, Uint32 : NULL)"); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : c, Uint32 : NULL)"); } { @@ -10102,6 +10143,140 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { }); UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 2, Utf8 : bbb, Uint32 : NULL)"); } + + { + TString splitKey = + schemaHelper.FindSplitKey({}); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 53, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 25, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 75, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 0, Utf8 : a, Uint32 : 1)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 24, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + }, { + 76, + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "()"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + }, 10); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 4, Utf8 : NULL, Uint32 : NULL)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 1, + 2, + 3, + 4, + 5, + 6, + 30, + 40, + 70 + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 7, Utf8 : NULL, Uint32 : NULL)"); + } + + { + TString splitKey = + schemaHelper.FindSplitKey({ + { "0", "a", "1" }, + { "1", "a", "1" }, + { "2", "a", "2" }, + { "3", "a", "3" }, + { "4", "a", "4" }, + { "5", "a", "5" }, + { "6", "a", "1" }, + { "7", "a", "2" }, + { "8", "a", "42" }, + }, { + 30, + 40, + 70, + 90, + 91, + 92, + 93, + 94, + 95 + }, 100); + UNIT_ASSERT_VALUES_EQUAL(splitKey, "(Uint64 : 1, Utf8 : NULL, Uint32 : NULL)"); + } } Y_UNIT_TEST(ListNotCreatedDirCase) { @@ -10941,6 +11116,74 @@ Y_UNIT_TEST_SUITE(TSchemeShardTest) { AssertReserve("/MyRoot/Topic2", 3 * 17); } + Y_UNIT_TEST(TopicWithAutopartitioningReserveSize) { + TTestEnvOptions opts; + opts.EnableTopicSplitMerge(true); + opts.EnablePQConfigTransactionsAtSchemeShard(true); + + TTestBasicRuntime runtime; + + TTestEnv env(runtime, opts); + ui64 txId = 100; + + const auto AssertReserve = [&] (TString path, ui64 expectedReservedStorage) { + TestDescribeResult(DescribePath(runtime, path), + {NLs::Finished, + NLs::TopicReservedStorage(expectedReservedStorage)}); + }; + + // create with WriteSpeedInBytesPerSecond + TestCreatePQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + TotalGroupCount: 1 + PartitionPerTablet: 1 + PQTabletConfig { + PartitionConfig { + LifetimeSeconds: 13 + WriteSpeedInBytesPerSecond : 19 + } + MeteringMode: METERING_MODE_RESERVED_CAPACITY + PartitionStrategy { + MinPartitionCount: 1 + MaxPartitionCount: 7 + PartitionStrategyType: CAN_SPLIT_AND_MERGE + } + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 1 * 13 * 19); + + TestAlterPQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + Split { + Partition: 0 + SplitBoundary: 'A' + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 2 * 13 * 19); // There are only 2 active partitions + + TestAlterPQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + Split { + Partition: 1 + SplitBoundary: '0' + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 3 * 13 * 19); // There are only 3 active partitions + + TestAlterPQGroup(runtime, ++txId, "/MyRoot", R"( + Name: "Topic1" + Merge { + Partition: 2 + AdjacentPartition: 4 + } + )"); + env.TestWaitNotification(runtime, txId); + AssertReserve("/MyRoot/Topic1", 2 * 13 * 19); // There are only 2 active partitions + } + Y_UNIT_TEST(FindSubDomainPathId) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_base/ut_commit_redo_limit.cpp b/ydb/core/tx/schemeshard/ut_base/ut_commit_redo_limit.cpp new file mode 100644 index 000000000000..260b4765d771 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_base/ut_commit_redo_limit.cpp @@ -0,0 +1,113 @@ +#include + +using namespace NKikimr; +using namespace NSchemeShard; +using namespace NSchemeShardUT_Private; + +Y_UNIT_TEST_SUITE(TSchemeShardCheckProposeSize) { + + //TODO: can't check all operations as many of them do not implement + // TSubOperation::AbortPropose() properly and will abort. + + Y_UNIT_TEST(CopyTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + + // Take control over MaxCommitRedoMB ICB setting. + // Drop down its min-value limit to be able to set it as low as test needs. + TControlWrapper MaxCommitRedoMB; + { + runtime.GetAppData().Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB"); + MaxCommitRedoMB.Reset(200, 1, 4096); + } + + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "table" + Columns { Name: "key" Type: "Uint64"} + Columns { Name: "value" Type: "Utf8"} + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + // 1. Set MaxCommitRedoMB to 1 and try to create table. + // + // (Check at the operation's Propose tests commit redo size against (MaxCommitRedoMB - 1) + // to give 1MB leeway to executer/tablet inner stuff to may be do "something extra". + // So MaxCommitRedoMB = 1 means effective 0 for the size of operation's commit.) + { + MaxCommitRedoMB = 1; + AsyncCopyTable(runtime, ++txId, "/MyRoot", "table-copy", "/MyRoot/table"); + TestModificationResults(runtime, txId, + {{NKikimrScheme::StatusSchemeError, "local tx commit redo size generated by IgniteOperation() is more than allowed limit"}} + ); + env.TestWaitNotification(runtime, txId); + } + + // 2. Set MaxCommitRedoMB back to high value and try again. + { + MaxCommitRedoMB = 200; + AsyncCopyTable(runtime, ++txId, "/MyRoot", "table-copy", "/MyRoot/table"); + env.TestWaitNotification(runtime, txId); + } + } + + Y_UNIT_TEST(CopyTables) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + + // Take control over MaxCommitRedoMB ICB setting. + // Drop down its min-value limit to be able to set it as low as test needs. + TControlWrapper MaxCommitRedoMB; + { + runtime.GetAppData().Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB"); + MaxCommitRedoMB.Reset(200, 1, 4096); + } + + const ui64 tables = 100; + const ui64 shardsPerTable = 1; + + ui64 txId = 100; + + for (ui64 i : xrange(tables)) { + TestCreateTable(runtime, ++txId, "/MyRoot", Sprintf( + R"( + Name: "table-%lu" + Columns { Name: "key" Type: "Uint64"} + Columns { Name: "value" Type: "Utf8"} + KeyColumnNames: ["key"] + UniformPartitionsCount: %lu + )", + i, + shardsPerTable + )); + env.TestWaitNotification(runtime, txId); + } + + auto testCopyTables = [](auto& runtime, ui64 txId, ui64 tables) { + TVector schemeTxs; + for (ui64 i : xrange(tables)) { + schemeTxs.push_back(CopyTableRequest(txId, "/MyRoot", Sprintf("table-%lu-copy", i), Sprintf("/MyRoot/table-%lu", i))); + } + AsyncSend(runtime, TTestTxConfig::SchemeShard, CombineSchemeTransactions(schemeTxs)); + }; + + // 1. Set MaxCommitRedoMB to 1 and try to copy tables. + { + MaxCommitRedoMB = 1; + testCopyTables(runtime, ++txId, tables); + TestModificationResults(runtime, txId, + {{NKikimrScheme::StatusSchemeError, "local tx commit redo size generated by IgniteOperation() is more than allowed limit"}} + ); + } + + // 2. Set MaxCommitRedoMB back to high value and try again. + { + MaxCommitRedoMB = 200; + testCopyTables(runtime, ++txId, tables); + TestModificationResults(runtime, txId, {{NKikimrScheme::StatusAccepted}}); + } + } + +} diff --git a/ydb/core/tx/schemeshard/ut_base/ya.make b/ydb/core/tx/schemeshard/ut_base/ya.make index 4699c198de39..dd0047f98d6b 100644 --- a/ydb/core/tx/schemeshard/ut_base/ya.make +++ b/ydb/core/tx/schemeshard/ut_base/ya.make @@ -27,6 +27,7 @@ SRCS( ut_base.cpp ut_info_types.cpp ut_table_pg_types.cpp + ut_commit_redo_limit.cpp ) END() diff --git a/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp b/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp index a4e9f6a1b2c2..f81110939025 100644 --- a/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp +++ b/ydb/core/tx/schemeshard/ut_cdc_stream/ut_cdc_stream.cpp @@ -1,7 +1,9 @@ #include +#include #include #include #include +#include #include #include @@ -591,15 +593,6 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { } )", {NKikimrScheme::StatusNameConflict}); - TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( - TableName: "indexImplTable" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )", {NKikimrScheme::StatusNameConflict}); - TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( TableName: "Table" StreamDescription { @@ -617,29 +610,6 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { )"); env.TestWaitNotification(runtime, txId); - TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "StreamWithIndex" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - IndexName: "NotExistedIndex" - )", {NKikimrScheme::StatusSchemeError}); - - TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "StreamWithIndex" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - IndexName: "Index" - )"); - env.TestWaitNotification(runtime, txId); - - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/StreamWithIndex/streamImpl"), {NLs::PathExist}); - TestDropTable(runtime, ++txId, "/MyRoot", "Table"); env.TestWaitNotification(runtime, txId); @@ -1232,6 +1202,266 @@ Y_UNIT_TEST_SUITE(TCdcStreamTests) { } } + Y_UNIT_TEST(StreamOnIndexTableNegative) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(false)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusPreconditionFailed}); + } + + Y_UNIT_TEST(StreamOnIndexTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "SyncIndex" + KeyColumnNames: ["indexed"] + } + IndexDescription { + Name: "AsyncIndex" + KeyColumnNames: ["indexed"] + Type: EIndexTypeGlobalAsync + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(2)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(3)}); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/UnknownIndex", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusPathDoesNotExist}); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/AsyncIndex", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusPreconditionFailed}); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/SyncIndex", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(3)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(4)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), {NLs::PathExist}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream/streamImpl"), {NLs::PathExist}); + + TestAlterCdcStream(runtime, ++txId, "/MyRoot/Table/UnknownIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + Disable {} + )", {NKikimrScheme::StatusPathDoesNotExist}); + + TestAlterCdcStream(runtime, ++txId, "/MyRoot/Table/SyncIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + Disable {} + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(4)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(5)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), { + NLs::StreamState(NKikimrSchemeOp::ECdcStreamStateDisabled), + }); + + TestDropCdcStream(runtime, ++txId, "/MyRoot/Table/UnknownIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + )", {NKikimrScheme::StatusPathDoesNotExist}); + + TestDropCdcStream(runtime, ++txId, "/MyRoot/Table/SyncIndex", R"( + TableName: "indexImplTable" + StreamName: "Stream" + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex"), {NLs::PathVersionEqual(5)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable"), {NLs::PathVersionEqual(6)}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), {NLs::PathNotExist}); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream/streamImpl"), {NLs::PathNotExist}); + } + + Y_UNIT_TEST(StreamOnBuildingIndexTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + THolder blockedBuildIndexRequest; + auto blockBuildIndexRequest = runtime.AddObserver([&](auto& ev) { + blockedBuildIndexRequest.Reset(ev.Release()); + }); + + AsyncBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", "Index", {"indexed"}); + const auto buildIndexId = txId; + { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&blockedBuildIndexRequest](IEventHandle&) { + return bool(blockedBuildIndexRequest); + }); + runtime.DispatchEvents(opts); + } + blockBuildIndexRequest.Remove(); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )", {NKikimrScheme::StatusMultipleModifications}); + + runtime.Send(blockedBuildIndexRequest.Release(), 0, true); + env.TestWaitNotification(runtime, buildIndexId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + } + + Y_UNIT_TEST(DropIndexWithStream) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDropTableIndex(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + IndexName: "Index" + )"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream"), { + NLs::PathNotExist, + }); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream/streamImpl"), { + NLs::PathNotExist, + }); + } + + Y_UNIT_TEST(DropTableWithIndexWithStream) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableChangefeedsOnIndexTables(true)); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + env.TestWaitNotification(runtime, txId); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot/Table/Index", R"( + TableName: "indexImplTable" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + env.TestWaitNotification(runtime, txId); + + TestDropTable(runtime, ++txId, "/MyRoot", "Table"); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream"), { + NLs::PathNotExist, + }); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable/Stream/streamImpl"), { + NLs::PathNotExist, + }); + } + } // TCdcStreamTests Y_UNIT_TEST_SUITE(TCdcStreamWithInitialScanTests) { @@ -1526,6 +1756,80 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithInitialScanTests) { env.TestWaitNotification(runtime, txId); } + Y_UNIT_TEST(RacyAlterStreamAndRestart) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions() + .EnableChangefeedInitialScan(true)); + ui64 txId = 100; + + TActorId schemeShardActorId; + auto findActorId = runtime.AddObserver([&](auto& ev) { + if (!schemeShardActorId) { + schemeShardActorId = ev->Sender; + } + }); + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TBlockEvents blockedAlterStream(runtime, [&](auto& ev) { + const auto& record = ev->Get()->Record; + if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpAlterCdcStream) { + txId = record.GetTxId(); + return true; + } + return false; + }); + + TestCreateCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + State: ECdcStreamStateScan + } + )"); + env.TestWaitNotification(runtime, txId); + + runtime.WaitFor("AlterCdcStream", [&]{ return blockedAlterStream.size(); }); + blockedAlterStream.Stop(); + + UNIT_ASSERT(schemeShardActorId); + + TBlockEvents blockedProgress(runtime, [&](auto& ev) { + return schemeShardActorId == ev->Sender; + }); + + blockedAlterStream.Unblock(); + runtime.WaitFor("Progress", [&]{ return blockedProgress.size(); }); + blockedProgress.Stop(); + + RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + NLs::PathExist, + NLs::StreamState(NKikimrSchemeOp::ECdcStreamStateReady), + }); + + TestDropCdcStream(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + StreamName: "Stream" + )"); + env.TestWaitNotification(runtime, txId); + + RebootTablet(runtime, TTestTxConfig::SchemeShard, runtime.AllocateEdgeActor()); + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + NLs::PathNotExist, + }); + } + void Metering(bool serverless) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions() diff --git a/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp b/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp index 42ecc9f6397d..47209679f222 100644 --- a/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp +++ b/ydb/core/tx/schemeshard/ut_cdc_stream_reboots/ut_cdc_stream_reboots.cpp @@ -6,40 +6,38 @@ using namespace NSchemeShardUT_Private; -static const TString createTableProto = R"( - Name: "Table" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] -)"; - -static const TString createTableWithIndexProto = R"( - TableDescription { - Name: "Table" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] - } - IndexDescription { - Name: "SyncIndex" - KeyColumnNames: ["value"] - } -)"; - Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { template - void CreateStream(const TMaybe& state = Nothing(), bool vt = false, bool tableWithIndex = false) { + void CreateStream(const TMaybe& state = Nothing(), bool vt = false, bool onIndex = false) { T t; - t.GetTestEnvOptions().EnableChangefeedInitialScan(true); + t.GetTestEnvOptions() + .EnableChangefeedInitialScan(true) + .EnableChangefeedsOnIndexTables(true); t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { { TInactiveZone inactive(activeZone); runtime.GetAppData().DisableCdcAutoSwitchingToReadyStateForTests = true; - if (tableWithIndex) { - TestCreateIndexedTable(runtime, ++t.TxId, "/MyRoot", createTableWithIndexProto); + if (!onIndex) { + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); } else { - TestCreateTable(runtime, ++t.TxId, "/MyRoot", createTableProto); + TestCreateIndexedTable(runtime, ++t.TxId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); } t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -58,24 +56,19 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { const bool ok = google::protobuf::TextFormat::PrintToString(streamDesc, &strDesc); UNIT_ASSERT_C(ok, "protobuf serialization failed"); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", Sprintf(R"( - TableName: "Table" + const TString path = !onIndex ? "/MyRoot" : "/MyRoot/Table/Index"; + const TString tableName = !onIndex ? "Table": "indexImplTable"; + + TestCreateCdcStream(runtime, ++t.TxId, path, Sprintf(R"( + TableName: "%s" StreamDescription { %s } - AllIndexes {} - )", strDesc.c_str())); + )", tableName.c_str(), strDesc.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), { + TestDescribeResult(DescribePrivatePath(runtime, path + "/" + tableName + "/Stream"), { NLs::PathExist, NLs::StreamVirtualTimestamps(vt), }); - - if (tableWithIndex) { - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/SyncIndex/indexImplTable/Stream"), { - NLs::PathExist, - NLs::StreamVirtualTimestamps(vt), - }); - } }); } @@ -83,15 +76,15 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { CreateStream(); } - Y_UNIT_TEST_WITH_REBOOTS(CreateStreamTableWithIndex) { - CreateStream(Nothing(), false, true); + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTable) { + CreateStream({}, false, true); } Y_UNIT_TEST_WITH_REBOOTS(CreateStreamExplicitReady) { CreateStream(NKikimrSchemeOp::ECdcStreamStateReady); } - Y_UNIT_TEST_WITH_REBOOTS(CreateStreamExplicitReadyTableWithIndex) { + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTableExplicitReady) { CreateStream(NKikimrSchemeOp::ECdcStreamStateReady, false, true); } @@ -99,7 +92,7 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { CreateStream(NKikimrSchemeOp::ECdcStreamStateScan); } - Y_UNIT_TEST_WITH_REBOOTS(CreateStreamWithInitialScanTableWithIndex) { + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTableWithInitialScan) { CreateStream(NKikimrSchemeOp::ECdcStreamStateScan, false, true); } @@ -107,6 +100,10 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { CreateStream({}, true); } + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamOnIndexTableWithVirtualTimestamps) { + CreateStream({}, true, true); + } + Y_UNIT_TEST_WITH_REBOOTS(CreateStreamWithAwsRegion) { T t; t.GetTestEnvOptions().EnableChangefeedDynamoDBStreamsFormat(true); @@ -293,21 +290,41 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { } template - void DropStream(const TMaybe& state = Nothing()) { + void DropStream(const TMaybe& state = Nothing(), bool onIndex = false) { T t; - t.GetTestEnvOptions().EnableChangefeedInitialScan(true); + t.GetTestEnvOptions() + .EnableChangefeedInitialScan(true) + .EnableChangefeedsOnIndexTables(true); t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + const TString path = !onIndex ? "/MyRoot" : "/MyRoot/Table/Index"; + const TString tableName = !onIndex ? "Table": "indexImplTable"; + { TInactiveZone inactive(activeZone); runtime.GetAppData().DisableCdcAutoSwitchingToReadyStateForTests = true; - TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( - Name: "Table" - Columns { Name: "key" Type: "Uint64" } - Columns { Name: "value" Type: "Uint64" } - KeyColumnNames: ["key"] - )"); + if (!onIndex) { + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + } else { + TestCreateIndexedTable(runtime, ++t.TxId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + } + )"); + } t.TestEnv->TestWaitNotification(runtime, t.TxId); NKikimrSchemeOp::TCdcStreamDescription streamDesc; @@ -323,20 +340,20 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { const bool ok = google::protobuf::TextFormat::PrintToString(streamDesc, &strDesc); UNIT_ASSERT_C(ok, "protobuf serialization failed"); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", Sprintf(R"( - TableName: "Table" + TestCreateCdcStream(runtime, ++t.TxId, path, Sprintf(R"( + TableName: "%s" StreamDescription { %s } - )", strDesc.c_str())); + )", tableName.c_str(), strDesc.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); } - TestDropCdcStream(runtime, ++t.TxId, "/MyRoot", R"( - TableName: "Table" + TestDropCdcStream(runtime, ++t.TxId, path, Sprintf(R"( + TableName: "%s" StreamName: "Stream" - )"); + )", tableName.c_str())); t.TestEnv->TestWaitNotification(runtime, t.TxId); - TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Stream"), {NLs::PathNotExist}); + TestDescribeResult(DescribePrivatePath(runtime, path + "/" + tableName + "/Stream"), {NLs::PathNotExist}); }); } @@ -344,14 +361,26 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { DropStream(); } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamOnIndexTable) { + DropStream({}, true); + } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamExplicitReady) { DropStream(NKikimrSchemeOp::ECdcStreamStateReady); } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamOnIndexTableExplicitReady) { + DropStream(NKikimrSchemeOp::ECdcStreamStateReady, true); + } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamCreatedWithInitialScan) { DropStream(NKikimrSchemeOp::ECdcStreamStateScan); } + Y_UNIT_TEST_WITH_REBOOTS(DropStreamOnIndexTableCreatedWithInitialScan) { + DropStream(NKikimrSchemeOp::ECdcStreamStateScan, true); + } + Y_UNIT_TEST_WITH_REBOOTS(CreateDropRecreate) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { @@ -556,68 +585,77 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { }); } + bool CheckRegistrations(TTestActorRuntime& runtime, NKikimrPQ::TMessageGroupInfo::EState expectedState, + const google::protobuf::RepeatedPtrField& tablePartitions, + const google::protobuf::RepeatedPtrField& topicPartitions) + { + for (const auto& topicPartition : topicPartitions) { + auto request = MakeHolder(); + { + auto& record = *request->Record.MutablePartitionRequest(); + record.SetPartition(topicPartition.GetPartitionId()); + auto& cmd = *record.MutableCmdGetMaxSeqNo(); + for (const auto& tablePartition : tablePartitions) { + cmd.AddSourceId(NPQ::NSourceIdEncoding::EncodeSimple(ToString(tablePartition.GetDatashardId()))); + } + } + + const auto& sender = runtime.AllocateEdgeActor(); + ForwardToTablet(runtime, topicPartition.GetTabletId(), sender, request.Release()); + + auto response = runtime.GrabEdgeEvent(sender); + { + const auto& record = response->Get()->Record.GetPartitionResponse(); + const auto& result = record.GetCmdGetMaxSeqNoResult().GetSourceIdInfo(); + + UNIT_ASSERT_VALUES_EQUAL(result.size(), tablePartitions.size()); + for (const auto& item: result) { + if (item.GetState() != expectedState) { + return false; + } + } + } + } + + return true; + } + struct TItem { TString Path; - ui32 nPartitions; + ui32 ExpectedPartitionCount; }; - void CheckRegistrations(TTestActorRuntime& runtime, const TItem& table, const TItem& topic) { + void CheckRegistrations(TTestActorRuntime& runtime, const TItem& table, const TItem& topic, + const google::protobuf::RepeatedPtrField* initialTablePartitions = nullptr) + { auto tableDesc = DescribePath(runtime, table.Path, true, true); const auto& tablePartitions = tableDesc.GetPathDescription().GetTablePartitions(); - UNIT_ASSERT_VALUES_EQUAL(tablePartitions.size(), table.nPartitions); + UNIT_ASSERT_VALUES_EQUAL(tablePartitions.size(), table.ExpectedPartitionCount); auto topicDesc = DescribePrivatePath(runtime, topic.Path); const auto& topicPartitions = topicDesc.GetPathDescription().GetPersQueueGroup().GetPartitions(); - UNIT_ASSERT_VALUES_EQUAL(topicPartitions.size(), topic.nPartitions); + UNIT_ASSERT_VALUES_EQUAL(topicPartitions.size(), topic.ExpectedPartitionCount); while (true) { runtime.SimulateSleep(TDuration::Seconds(1)); - bool done = true; - - for (ui32 i = 0; i < topic.nPartitions; ++i) { - auto request = MakeHolder(); - { - auto& record = *request->Record.MutablePartitionRequest(); - record.SetPartition(topicPartitions[i].GetPartitionId()); - auto& cmd = *record.MutableCmdGetMaxSeqNo(); - for (const auto& tablePartition : tablePartitions) { - cmd.AddSourceId(NPQ::NSourceIdEncoding::EncodeSimple(ToString(tablePartition.GetDatashardId()))); - } - } - - const auto& sender = runtime.AllocateEdgeActor(); - ForwardToTablet(runtime, topicPartitions[i].GetTabletId(), sender, request.Release()); - - auto response = runtime.GrabEdgeEvent(sender); - { - const auto& record = response->Get()->Record.GetPartitionResponse(); - const auto& result = record.GetCmdGetMaxSeqNoResult().GetSourceIdInfo(); - - UNIT_ASSERT_VALUES_EQUAL(result.size(), table.nPartitions); - for (const auto& item: result) { - done &= item.GetState() == NKikimrPQ::TMessageGroupInfo::STATE_REGISTERED; - if (!done) { - break; - } - } - } - - if (!done) { - break; - } - } - - if (done) { + if (CheckRegistrations(runtime, NKikimrPQ::TMessageGroupInfo::STATE_REGISTERED, tablePartitions, topicPartitions)) { break; } } + + if (initialTablePartitions) { + UNIT_ASSERT(CheckRegistrations(runtime, NKikimrPQ::TMessageGroupInfo::STATE_UNKNOWN, *initialTablePartitions, topicPartitions)); + } } - Y_UNIT_TEST_WITH_REBOOTS(SplitTable) { + template + void SplitTable(const TString& cdcStreamDesc) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + NKikimrScheme::TEvDescribeSchemeResult initialTableDesc; { TInactiveZone inactive(activeZone); + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( Name: "Table" Columns { Name: "key" Type: "Uint32" } @@ -625,15 +663,9 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { KeyColumnNames: ["key"] )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); + initialTableDesc = DescribePath(runtime, "/MyRoot/Table", true, true); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )"); + TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", cdcStreamDesc); t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -651,16 +683,43 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { TInactiveZone inactive(activeZone); UploadRow(runtime, "/MyRoot/Table", 0, {1}, {2}, {TCell::Make(1u)}, {TCell::Make(1u)}); UploadRow(runtime, "/MyRoot/Table", 1, {1}, {2}, {TCell::Make(Max())}, {TCell::Make(Max())}); - CheckRegistrations(runtime, {"/MyRoot/Table", 2}, {"/MyRoot/Table/Stream/streamImpl", 1}); + CheckRegistrations(runtime, {"/MyRoot/Table", 2}, {"/MyRoot/Table/Stream/streamImpl", 1}, + &initialTableDesc.GetPathDescription().GetTablePartitions()); } }); } - Y_UNIT_TEST_WITH_REBOOTS(MergeTable) { + Y_UNIT_TEST_WITH_REBOOTS(SplitTable) { + SplitTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + } + + Y_UNIT_TEST_WITH_REBOOTS(SplitTableResolvedTimestamps) { + SplitTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + ResolvedTimestampsIntervalMs: 1000 + } + )"); + } + + template + void MergeTable(const TString& cdcStreamDesc) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { + NKikimrScheme::TEvDescribeSchemeResult initialTableDesc; { TInactiveZone inactive(activeZone); + TestCreateTable(runtime, ++t.TxId, "/MyRoot", R"( Name: "Table" Columns { Name: "key" Type: "Uint32" } @@ -674,15 +733,9 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { } )"); t.TestEnv->TestWaitNotification(runtime, t.TxId); + initialTableDesc = DescribePath(runtime, "/MyRoot/Table", true, true); - TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", R"( - TableName: "Table" - StreamDescription { - Name: "Stream" - Mode: ECdcStreamModeKeysOnly - Format: ECdcStreamFormatProto - } - )"); + TestCreateCdcStream(runtime, ++t.TxId, "/MyRoot", cdcStreamDesc); t.TestEnv->TestWaitNotification(runtime, t.TxId); } @@ -696,11 +749,35 @@ Y_UNIT_TEST_SUITE(TCdcStreamWithRebootsTests) { TInactiveZone inactive(activeZone); UploadRow(runtime, "/MyRoot/Table", 0, {1}, {2}, {TCell::Make(1u)}, {TCell::Make(1u)}); UploadRow(runtime, "/MyRoot/Table", 0, {1}, {2}, {TCell::Make(Max())}, {TCell::Make(Max())}); - CheckRegistrations(runtime, {"/MyRoot/Table", 1}, {"/MyRoot/Table/Stream/streamImpl", 2}); + CheckRegistrations(runtime, {"/MyRoot/Table", 1}, {"/MyRoot/Table/Stream/streamImpl", 2}, + &initialTableDesc.GetPathDescription().GetTablePartitions()); } }); } + Y_UNIT_TEST_WITH_REBOOTS(MergeTable) { + MergeTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + } + )"); + } + + Y_UNIT_TEST_WITH_REBOOTS(MergeTableResolvedTimestamps) { + MergeTable(R"( + TableName: "Table" + StreamDescription { + Name: "Stream" + Mode: ECdcStreamModeKeysOnly + Format: ECdcStreamFormatProto + ResolvedTimestampsIntervalMs: 1000 + } + )"); + } + Y_UNIT_TEST_WITH_REBOOTS(RacySplitTableAndCreateStream) { T t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { diff --git a/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp b/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp index 4671e1d1d39c..f5884ec8433b 100644 --- a/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp +++ b/ydb/core/tx/schemeshard/ut_compaction/ut_compaction.cpp @@ -845,7 +845,7 @@ Y_UNIT_TEST_SUITE(TSchemeshardBorrowedCompactionTest) { } })"); env.TestWaitNotification(runtime, txId); - env.SimulateSleep(runtime, TDuration::Seconds(30)); + env.SimulateSleep(runtime, TDuration::Seconds(60)); simpleInfo = GetPathInfo(runtime, "/MyRoot/Simple"); UNIT_ASSERT_VALUES_EQUAL(simpleInfo.Shards.size(), 5UL); diff --git a/ydb/core/tx/schemeshard/ut_export/ut_export.cpp b/ydb/core/tx/schemeshard/ut_export/ut_export.cpp index aebe7014e51d..8d41d7bab562 100644 --- a/ydb/core/tx/schemeshard/ut_export/ut_export.cpp +++ b/ydb/core/tx/schemeshard/ut_export/ut_export.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -12,14 +13,28 @@ #include #include +#include + +#include + using namespace NSchemeShardUT_Private; using namespace NKikimr::NWrappers::NTestHelpers; namespace { + Aws::SDKOptions Options; + + Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); + } + + Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); + } + void Run(TTestBasicRuntime& runtime, TTestEnv& env, const TVector& tables, const TString& request, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS, - const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "") { + const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "", const TString& peerName = "") { ui64 txId = 100; @@ -104,7 +119,7 @@ namespace { const auto initialStatus = expectedStatus == Ydb::StatusIds::PRECONDITION_FAILED ? expectedStatus : Ydb::StatusIds::SUCCESS; - TestExport(runtime, schemeshardId, ++txId, dbName, request, userSID, initialStatus); + TestExport(runtime, schemeshardId, ++txId, dbName, request, userSID, peerName, initialStatus); env.TestWaitNotification(runtime, txId, schemeshardId); if (initialStatus != Ydb::StatusIds::SUCCESS) { @@ -124,6 +139,9 @@ namespace { void Cancel(const TVector& tables, const TString& request, TDelayFunc delayFunc) { TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + TTestEnv env(runtime); ui64 txId = 100; @@ -147,6 +165,22 @@ namespace { TestExport(runtime, ++txId, "/MyRoot", request); const ui64 exportId = txId; + // Check audit record for export start + { + auto line = FindAuditLine(auditLines, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + if (!delayed) { TDispatchOptions opts; opts.FinalEvents.emplace_back([&delayed](IEventHandle&) -> bool { @@ -161,6 +195,23 @@ namespace { runtime.Send(delayed.Release(), 0, true); env.TestWaitNotification(runtime, exportId); + // Check audit record for export end + // + { + auto line = FindAuditLine(auditLines, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::CANCELLED); TestForgetExport(runtime, ++txId, "/MyRoot", exportId); @@ -1650,7 +1701,7 @@ partitioning_settings { return ev->Get()->Record .GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpBackup; }; - + THolder delayed; auto prevObserver = runtime.SetObserverFunc([&](TAutoPtr& ev) { if (delayFunc(ev)) { @@ -1708,6 +1759,235 @@ partitioning_settings { UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); } + // Based on CompletedExportEndTime + Y_UNIT_TEST(AuditCompletedExport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + // Prepare table to export + // + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + // Start export + // + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "" + } + } + )", port); + TestExport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + + // Check audit record for export start + { + auto line = FindAuditLine(auditLines, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + // Do export + // + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); + + env.TestWaitNotification(runtime, txId); + + const auto desc = TestGetExport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_DONE); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + + // Check audit record for export end + // + { + auto line = FindAuditLine(auditLines, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + } + + Y_UNIT_TEST(AuditCancelledExport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + // Prepare table to export + // + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + auto delayFunc = [](TAutoPtr& ev) { + if (ev->GetTypeRewrite() != TEvSchemeShard::EvModifySchemeTransaction) { + return false; + } + + return ev->Get()->Record + .GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpBackup; + }; + + THolder delayed; + auto prevObserver = runtime.SetObserverFunc([&](TAutoPtr& ev) { + if (delayFunc(ev)) { + delayed.Reset(ev.Release()); + return TTestActorRuntime::EEventAction::DROP; + } + return TTestActorRuntime::EEventAction::PROCESS; + }); + + // Start export + // + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "" + } + } + )", port); + TestExport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + const ui64 exportId = txId; + + // Check audit record for export start + { + auto line = FindAuditLine(auditLines, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + // Do export (unsuccessfully) + // + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); + + if (!delayed) { + TDispatchOptions opts; + opts.FinalEvents.emplace_back([&delayed](IEventHandle&) -> bool { + return bool(delayed); + }); + runtime.DispatchEvents(opts); + } + runtime.SetObserverFunc(prevObserver); + + // Cancel export mid-air + // + TestCancelExport(runtime, ++txId, "/MyRoot", exportId); + + auto desc = TestGetExport(runtime, exportId, "/MyRoot"); + auto entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_CANCELLATION); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(!entry.HasEndTime()); + + runtime.Send(delayed.Release(), 0, true); + env.TestWaitNotification(runtime, exportId); + + desc = TestGetExport(runtime, exportId, "/MyRoot", Ydb::StatusIds::CANCELLED); + entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_CANCELLED); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + + // Check audit record for export end + // + { + auto line = FindAuditLine(auditLines, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=EXPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", exportId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + } + Y_UNIT_TEST(ExportPartitioningSettings) { TPortManager portManager; const ui16 port = portManager.GetPort(); @@ -1818,4 +2098,41 @@ partitioning_settings { )")); } + Y_UNIT_TEST(UserSID) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Utf8" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock({}, TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const TString request = Sprintf(R"( + ExportToS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_path: "/MyRoot/Table" + destination_prefix: "" + } + } + )", port); + const TString userSID = "user@builtin"; + TestExport(runtime, ++txId, "/MyRoot", request, userSID); + + const auto desc = TestGetExport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Export::ExportProgress::PROGRESS_PREPARING); + UNIT_ASSERT_VALUES_EQUAL(entry.GetUserSID(), userSID); + } } diff --git a/ydb/core/tx/schemeshard/ut_export/ya.make b/ydb/core/tx/schemeshard/ut_export/ya.make index 4d5bf91e2698..c62dc9ea8ebc 100644 --- a/ydb/core/tx/schemeshard/ut_export/ya.make +++ b/ydb/core/tx/schemeshard/ut_export/ya.make @@ -20,6 +20,7 @@ IF (NOT OS_WINDOWS) library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers diff --git a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp index 5e1e42d17ce9..97e34a6d3fa4 100644 --- a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp +++ b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ut_export_reboots_s3.cpp @@ -4,10 +4,28 @@ #include +#include + +#include + using namespace NSchemeShardUT_Private; using namespace NSchemeShardUT_Private::NExportReboots; using namespace NKikimr::NWrappers::NTestHelpers; +namespace { + +Aws::SDKOptions Options; + +Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); +} + +Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); +} + +} + Y_UNIT_TEST_SUITE(TExportToS3WithRebootsTests) { using TUnderlying = std::function&, const TString&, TTestWithReboots&)>; diff --git a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make index bc7ca966e0dc..caf4fb7de362 100644 --- a/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make +++ b/ydb/core/tx/schemeshard/ut_export_reboots_s3/ya.make @@ -19,6 +19,7 @@ PEERDIR( library/cpp/getopt library/cpp/regex/pcre library/cpp/svnversion + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core ydb/core/testlib/default ydb/core/tx ydb/core/tx/schemeshard/ut_helpers diff --git a/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp b/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp index 4737ccdddc6f..8bbd846af415 100644 --- a/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_extsubdomain/ut_extsubdomain.cpp @@ -119,7 +119,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { {NLs::InExternalSubdomain}); } - Y_UNIT_TEST_FLAG(CreateAndAlterWithoutEnablingTx, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndAlterWithoutEnablingTx, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -137,15 +137,19 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { env.TestWaitNotification(runtime, {txId, txId - 1}); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot/dir", - R"( - Name: "USER_0" - ExternalSchemeShard: true - )", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ), {{NKikimrScheme::StatusInvalidParameter, "ExtSubDomain without coordinators/mediators"}} ); } - Y_UNIT_TEST_FLAG(CreateAndAlter, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndAlter, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -156,41 +160,49 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - PlanResolution: 50 - Coordinators: 3 - Mediators: 3 - TimeCastBucketsPerMediator: 2 - )", + Sprintf(R"( + Name: "USER_0" + PlanResolution: 50 + Coordinators: 3 + Mediators: 3 + TimeCastBucketsPerMediator: 2 + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ), {{NKikimrScheme::StatusInvalidParameter, "ExtSubDomain without ExternalSchemeShard"}} ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - StoragePools { - Name: "pool-1" - Kind: "pool-kind-1" - } - StoragePools { - Name: "pool-2" - Kind: "pool-kind-2" - } - StoragePools { - Name: "/dc-1/users/tenant-1:hdd" - Kind: "hdd" - } - StoragePools { - Name: "/dc-1/users/tenant-1:hdd-1" - Kind: "hdd-1" - } - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - ExternalSchemeShard: true - Name: "USER_0" - )" + Sprintf(R"( + StoragePools { + Name: "pool-1" + Kind: "pool-kind-1" + } + StoragePools { + Name: "pool-2" + Kind: "pool-kind-2" + } + StoragePools { + Name: "/dc-1/users/tenant-1:hdd" + Kind: "hdd" + } + StoragePools { + Name: "/dc-1/users/tenant-1:hdd-1" + Kind: "hdd-1" + } + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + ExternalSchemeShard: true + Name: "USER_0" + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1, txId - 2}); @@ -247,7 +259,64 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::Finished}); } - Y_UNIT_TEST_FLAG(CreateAndAlterTwice, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndSameAlterTwice, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + + const TString alterText = Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", alterText); + env.TestWaitNotification(runtime, {txId, txId - 1}); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", alterText); + env.TestWaitNotification(runtime, txId); + + ui64 tenantSchemeShard = 0; + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::ExtractTenantSchemeshard(&tenantSchemeShard), + }); + + UNIT_ASSERT(tenantSchemeShard != 0 + && tenantSchemeShard != (ui64)-1 + && tenantSchemeShard != TTestTxConfig::SchemeShard + ); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::StoragePoolsEqual({"pool-1"}), + }); + + TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::StoragePoolsEqual({"pool-1"}) + }); + } + + Y_UNIT_TEST_FLAGS(CreateAndAlterAlterAddStoragePool, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -258,34 +327,42 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - ExternalSchemeShard: true - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - )" + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1}); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - StoragePools { - Name: "pool-2" - Kind: "hdd-1" - } - )" + Sprintf(R"( + Name: "USER_0" + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + StoragePools { + Name: "pool-2" + Kind: "hdd-1" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, txId); @@ -322,6 +399,214 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::UserAttrsEqual({{"user__attr_1", "value"}})}); } + Y_UNIT_TEST_FLAGS(CreateAndAlterAlterSameStoragePools, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + + env.TestWaitNotification(runtime, {txId, txId - 1}); + + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + DatabaseQuotas { + data_size_hard_quota: 1288490188800 + data_size_soft_quota: 1224065679360 + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, txId); + + + ui64 tenantSchemeShard = 0; + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::ExtractTenantSchemeshard(&tenantSchemeShard) + }); + + UNIT_ASSERT(tenantSchemeShard != 0 + && tenantSchemeShard != (ui64)-1 + && tenantSchemeShard != TTestTxConfig::SchemeShard + ); + + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::IsExternalSubDomain("USER_0"), + NLs::StoragePoolsEqual({"pool-1"}) + }); + + TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/USER_0"), { + NLs::PathExist, + NLs::StoragePoolsEqual({"pool-1"}) + }); + } + + Y_UNIT_TEST_FLAGS(AlterWithPlainAlterSubdomain, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + // Create extsubdomain + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, {txId, txId - 1}); + + // Altering extsubdomain but with plain altersubdomain should succeed + // (post tenant migration compatibility) + + //NOTE: SubDomain and not ExtSubdomain + TestAlterSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, txId); + } + + Y_UNIT_TEST_FLAGS(AlterTwiceAndWithPlainAlterSubdomain, AlterDatabaseCreateHiveFirst, ExternalHive) { + TTestBasicRuntime runtime; + TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); + ui64 txId = 100; + + TestCreateExtSubDomain(runtime, ++txId, "/MyRoot", + R"(Name: "USER_0")" + ); + TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + env.TestWaitNotification(runtime, {txId, txId - 1}); + + AsyncAlterExtSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) + ); + // TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted}); + const auto firstAlterTxId = txId; + + //NOTE: SubDomain vs ExtSubDomain + TestAlterSubDomain(runtime, ++txId, "/MyRoot", + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ), + {{NKikimrScheme::StatusMultipleModifications}} + ); + + env.TestWaitNotification(runtime, firstAlterTxId); + } + Y_UNIT_TEST(CreateWithOnlyDotsNotAllowed) { TTestBasicRuntime runtime; TTestEnv env(runtime); @@ -1082,7 +1367,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { testCreations(); } - Y_UNIT_TEST_FLAG(Drop, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(Drop, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -1093,18 +1378,22 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - ExternalSchemeShard: true - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - )" + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1}); @@ -1139,16 +1428,18 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::PathsInsideDomain(1), NLs::ShardsInsideDomain(0)}); + const ui64 AdditionalHiveTablet = (ExternalHive ? 1 : 0); + TestDescribeResult(DescribePath(runtime, "/MyRoot/USER_0/dir/table_1"), {NLs::PathRedirected, NLs::PathsInsideDomain(0), - NLs::ShardsInsideDomain(3)}); + NLs::ShardsInsideDomain(3 + AdditionalHiveTablet)}); TestDescribeResult(DescribePath(runtime, tenantSchemeShard, "/MyRoot/USER_0/dir/table_1"), {NLs::PathExist, NLs::Finished, NLs::PathsInsideDomain(2), - NLs::ShardsInsideDomain(5)}); + NLs::ShardsInsideDomain(5 + AdditionalHiveTablet)}); TestForceDropExtSubDomain(runtime, ++txId, "/MyRoot", "USER_0"); env.TestWaitNotification(runtime, txId); @@ -1164,7 +1455,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { NLs::PathsInsideDomain(0), NLs::ShardsInsideDomain(0)}); - env.TestWaitTabletDeletion(runtime, xrange(TTestTxConfig::FakeHiveTablets, TTestTxConfig::FakeHiveTablets + 5)); + // env.TestWaitTabletDeletion(runtime, xrange(TTestTxConfig::FakeHiveTablets, TTestTxConfig::FakeHiveTablets + 5)); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "SubDomains", "PathId", 2)); UNIT_ASSERT(!CheckLocalRowExists(runtime, TTestTxConfig::SchemeShard, "Paths", "Id", 2)); } @@ -1209,7 +1500,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { } } - Y_UNIT_TEST_FLAG(CreateAndAlterThenDropChangesParent, AlterDatabaseCreateHiveFirst) { + Y_UNIT_TEST_FLAGS(CreateAndAlterThenDropChangesParent, AlterDatabaseCreateHiveFirst, ExternalHive) { TTestBasicRuntime runtime; TTestEnv env(runtime, TTestEnvOptions().EnableAlterDatabaseCreateHiveFirst(AlterDatabaseCreateHiveFirst)); ui64 txId = 100; @@ -1218,18 +1509,22 @@ Y_UNIT_TEST_SUITE(TSchemeShardExtSubDomainTest) { R"(Name: "USER_0")" ); TestAlterExtSubDomain(runtime, ++txId, "/MyRoot", - R"( - Name: "USER_0" - ExternalSchemeShard: true - PlanResolution: 50 - Coordinators: 1 - Mediators: 1 - TimeCastBucketsPerMediator: 2 - StoragePools { - Name: "pool-1" - Kind: "hdd" - } - )" + Sprintf(R"( + Name: "USER_0" + ExternalSchemeShard: true + PlanResolution: 50 + Coordinators: 1 + Mediators: 1 + TimeCastBucketsPerMediator: 2 + StoragePools { + Name: "pool-1" + Kind: "hdd" + } + + ExternalHive: %s + )", + ToString(ExternalHive).c_str() + ) ); env.TestWaitNotification(runtime, {txId, txId - 1}); diff --git a/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.cpp new file mode 100644 index 000000000000..9826eafc523a --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.cpp @@ -0,0 +1,53 @@ +#include +#include +#include + +#include +#include +#include + +#include + +#include "auditlog_helpers.h" + +namespace NSchemeShardUT_Private { + +namespace { + +class TMemoryLogBackend: public TLogBackend { +public: + std::vector& Buffer; + + TMemoryLogBackend(std::vector& buffer) + : Buffer(buffer) + {} + + virtual void WriteData(const TLogRecord& rec) override { + Buffer.emplace_back(rec.Data, rec.Len); + } + + virtual void ReopenLog() override { + } +}; + +} // anonymous namespace + +NAudit::TAuditLogBackends CreateTestAuditLogBackends(std::vector& lineBuffer) { + NAudit::TAuditLogBackends logBackends; + logBackends[NKikimrConfig::TAuditConfig::TXT].emplace_back(new TMemoryLogBackend(lineBuffer)); + return logBackends; +} + +std::string FindAuditLine(const std::vector& auditLines, const std::string& substr) { + Cerr << "AUDIT LOG buffer(" << auditLines.size() << "):" << Endl; + for (auto i : auditLines) { + Cerr << " " << i << Endl; + } + auto found = std::find_if(auditLines.begin(), auditLines.end(), [&](auto i) { return i.contains(substr); }); + UNIT_ASSERT_C(found != auditLines.end(), "No audit record with substring: '" + substr + "'"); + auto line = *found; + Cerr << "AUDIT LOG checked line:" << Endl << " " << line << Endl; + return line; +} + +} // namespace NSchemeShardUT_Private diff --git a/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.h b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.h new file mode 100644 index 000000000000..b7d57ba3cb70 --- /dev/null +++ b/ydb/core/tx/schemeshard/ut_helpers/auditlog_helpers.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include + +#include + +namespace NSchemeShardUT_Private { + +using namespace NKikimr; + +NAudit::TAuditLogBackends CreateTestAuditLogBackends(std::vector& lineBuffer); + +std::string FindAuditLine(const std::vector& auditLines, const std::string& substr); + +} // namespace NSchemeShardUT_Private diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index 0fe031eb3cf8..dc3cc8833366 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -1026,7 +1026,7 @@ namespace NSchemeShardUT_Private { return result; } - void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { + void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { NKikimrExport::TCreateExportRequest request; UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(requestStr, &request)); @@ -1044,26 +1044,29 @@ namespace NSchemeShardUT_Private { if (userSID) { ev->Record.SetUserSID(userSID); } + if (peerName) { + ev->Record.SetPeerName(peerName); + } AsyncSend(runtime, schemeshardId, ev.Release()); } - void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { - AsyncExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID); + void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { + AsyncExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName); } - void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - AsyncExport(runtime, schemeshardId, id, dbName, requestStr, userSID); + AsyncExport(runtime, schemeshardId, id, dbName, requestStr, userSID, peerName); TAutoPtr handle; auto ev = runtime.GrabEdgeEvent(handle); UNIT_ASSERT_EQUAL(ev->Record.GetResponse().GetEntry().GetStatus(), expectedStatus); } - void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - TestExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, expectedStatus); + TestExport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName, expectedStatus); } NKikimrExport::TEvGetExportResponse TestGetExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, @@ -1152,7 +1155,7 @@ namespace NSchemeShardUT_Private { return TestForgetExport(runtime, TTestTxConfig::SchemeShard, txId, dbName, exportId, expectedStatus); } - void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { + void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { NKikimrImport::TCreateImportRequest request; UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(requestStr, &request)); @@ -1160,26 +1163,29 @@ namespace NSchemeShardUT_Private { if (userSID) { ev->Record.SetUserSID(userSID); } + if (peerName) { + ev->Record.SetPeerName(peerName); + } AsyncSend(runtime, schemeshardId, ev.Release()); } - void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID) { - AsyncImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID); + void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName) { + AsyncImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName); } - void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - AsyncImport(runtime, schemeshardId, id, dbName, requestStr, userSID); + AsyncImport(runtime, schemeshardId, id, dbName, requestStr, userSID, peerName); TAutoPtr handle; auto ev = runtime.GrabEdgeEvent(handle); UNIT_ASSERT_EQUAL(ev->Record.GetResponse().GetEntry().GetStatus(), expectedStatus); } - void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, + void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID, const TString& peerName, Ydb::StatusIds::StatusCode expectedStatus) { - TestImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, expectedStatus); + TestImport(runtime, TTestTxConfig::SchemeShard, id, dbName, requestStr, userSID, peerName, expectedStatus); } NKikimrImport::TEvGetImportResponse TestGetImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, @@ -1500,6 +1506,7 @@ namespace NSchemeShardUT_Private { (let child '('ChildrenLimit (Uint64 '%lu))) (let acl '('AclByteSizeLimit (Uint64 '%lu))) (let columns '('TableColumnsLimit (Uint64 '%lu))) + (let columnColumns '('ColumnTableColumnsLimit (Uint64 '%lu))) (let colName '('TableColumnNameLengthLimit (Uint64 '%lu))) (let keyCols '('TableKeyColumnsLimit (Uint64 '%lu))) (let indices '('TableIndicesLimit (Uint64 '%lu))) @@ -1512,11 +1519,11 @@ namespace NSchemeShardUT_Private { (let pqPartitions '('PQPartitionsLimit (Uint64 '%lu))) (let exports '('ExportsLimit (Uint64 '%lu))) (let imports '('ImportsLimit (Uint64 '%lu))) - (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns colName keyCols indices streams shards pathShards consCopy maxPathLength extraSymbols pqPartitions exports imports)))) + (let ret (AsList (UpdateRow 'SubDomains key '(depth paths child acl columns columnColumns colName keyCols indices streams shards pathShards consCopy maxPathLength extraSymbols pqPartitions exports imports)))) (return ret) ) )", domainId, limits.MaxDepth, limits.MaxPaths, limits.MaxChildrenInDir, limits.MaxAclBytesSize, - limits.MaxTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, + limits.MaxTableColumns, limits.MaxColumnTableColumns, limits.MaxTableColumnNameLength, limits.MaxTableKeyColumns, limits.MaxTableIndices, limits.MaxTableCdcStreams, limits.MaxShards, limits.MaxShardsInPath, limits.MaxConsistentCopyTargets, limits.MaxPathElementLength, escapedStr.c_str(), limits.MaxPQPartitions, @@ -1672,12 +1679,18 @@ namespace NSchemeShardUT_Private { *index.mutable_data_columns() = {cfg.DataColumns.begin(), cfg.DataColumns.end()}; switch (cfg.IndexType) { - case NKikimrSchemeOp::EIndexTypeGlobal: - *index.mutable_global_index() = Ydb::Table::GlobalIndex(); - break; - case NKikimrSchemeOp::EIndexTypeGlobalAsync: - *index.mutable_global_async_index() = Ydb::Table::GlobalAsyncIndex(); - break; + case NKikimrSchemeOp::EIndexTypeGlobal: { + auto& settings = *index.mutable_global_index()->mutable_settings(); + if (cfg.GlobalIndexSettings) { + cfg.GlobalIndexSettings[0].SerializeTo(settings); + } + } break; + case NKikimrSchemeOp::EIndexTypeGlobalAsync: { + auto& settings = *index.mutable_global_async_index()->mutable_settings(); + if (cfg.GlobalIndexSettings) { + cfg.GlobalIndexSettings[0].SerializeTo(settings); + } + } break; default: UNIT_ASSERT_C(false, "Unknown index type: " << static_cast(cfg.IndexType)); } @@ -1987,7 +2000,7 @@ namespace NSchemeShardUT_Private { Runtime.SendToPipe(shardData.ShardId, sender, proposal); TAutoPtr handle; auto event = Runtime.GrabEdgeEventIf(handle, - [=](const TEvDataShard::TEvProposeTransactionResult& event) { + [this, shardData](const TEvDataShard::TEvProposeTransactionResult& event) { return event.GetTxId() == TxId && event.GetOrigin() == shardData.ShardId; }); activeZone = true; diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.h b/ydb/core/tx/schemeshard/ut_helpers/helpers.h index e59f1d3fe07d..dc97f8b5cd8b 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.h +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.h @@ -20,6 +20,7 @@ #include #include +#include #include @@ -48,6 +49,19 @@ template \ void N(NUnitTest::TTestContext&) +#define Y_UNIT_TEST_FLAGS(N, OPT1, OPT2) \ + template void N(NUnitTest::TTestContext&); \ + struct TTestRegistration##N { \ + TTestRegistration##N() { \ + TCurrentTest::AddTest(#N, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT2, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT1, static_cast(&N), false); \ + TCurrentTest::AddTest(#N "-" #OPT1 "-" #OPT2, static_cast(&N), false); \ + } \ + }; \ + static TTestRegistration##N testRegistration##N; \ + template \ + void N(NUnitTest::TTestContext&) namespace NSchemeShardUT_Private { using namespace NKikimr; @@ -348,6 +362,7 @@ namespace NSchemeShardUT_Private { NKikimrSchemeOp::EIndexType IndexType = NKikimrSchemeOp::EIndexTypeGlobal; TVector IndexColumns; TVector DataColumns; + TVector GlobalIndexSettings = {}; }; std::unique_ptr CreateBuildColumnRequest(ui64 id, const TString& dbName, const TString& src, const TString& columnName, const Ydb::TypedValue& literal); @@ -370,11 +385,11 @@ namespace NSchemeShardUT_Private { ////////// export TVector GetExportTargetPaths(const TString& requestStr); - void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void AsyncExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void AsyncExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void TestExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); - void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void TestExport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); NKikimrExport::TEvGetExportResponse TestGetExport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TVector& expectedStatuses); @@ -398,11 +413,11 @@ namespace NSchemeShardUT_Private { Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); ////////// import - void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = ""); - void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void AsyncImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void AsyncImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = ""); + void TestImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); - void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", + void TestImport(TTestActorRuntime& runtime, ui64 id, const TString& dbName, const TString& requestStr, const TString& userSID = "", const TString& peerName = "", Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS); NKikimrImport::TEvGetImportResponse TestGetImport(TTestActorRuntime& runtime, ui64 schemeshardId, ui64 id, const TString& dbName, const TVector& expectedStatuses); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp index afef5e0ffab5..9e5b7c8543f6 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.cpp @@ -541,6 +541,8 @@ NSchemeShardUT_Private::TTestEnv::TTestEnv(TTestActorRuntime& runtime, const TTe app.SetEnableServerlessExclusiveDynamicNodes(opts.EnableServerlessExclusiveDynamicNodes_); app.SetEnableAddColumsWithDefaults(opts.EnableAddColumsWithDefaults_); app.SetEnableReplaceIfExistsForExternalEntities(opts.EnableReplaceIfExistsForExternalEntities_); + app.SetEnableChangefeedsOnIndexTables(opts.EnableChangefeedsOnIndexTables_); + app.SetEnableTieringInColumnShard(opts.EnableTieringInColumnShard_); app.ColumnShardConfig.SetDisabledOnSchemeShard(false); diff --git a/ydb/core/tx/schemeshard/ut_helpers/test_env.h b/ydb/core/tx/schemeshard/ut_helpers/test_env.h index 36a9d36888cb..c433855c9c76 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/test_env.h +++ b/ydb/core/tx/schemeshard/ut_helpers/test_env.h @@ -65,6 +65,8 @@ namespace NSchemeShardUT_Private { OPTION(std::optional, EnableAddColumsWithDefaults, std::nullopt); OPTION(std::optional, EnableReplaceIfExistsForExternalEntities, std::nullopt); OPTION(std::optional, GraphBackendType, std::nullopt); + OPTION(std::optional, EnableChangefeedsOnIndexTables, std::nullopt); + OPTION(std::optional, EnableTieringInColumnShard, std::nullopt); #undef OPTION }; diff --git a/ydb/core/tx/schemeshard/ut_helpers/ya.make b/ydb/core/tx/schemeshard/ut_helpers/ya.make index 89f867d6c154..5eb4bf14ca88 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/ya.make +++ b/ydb/core/tx/schemeshard/ut_helpers/ya.make @@ -22,9 +22,12 @@ PEERDIR( ydb/public/lib/scheme_types ydb/library/yql/public/issue ydb/public/sdk/cpp/client/ydb_driver + ydb/public/sdk/cpp/client/ydb_table ) SRCS( + auditlog_helpers.cpp + auditlog_helpers.h export_reboots_common.cpp failing_mtpq.cpp helpers.cpp diff --git a/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp b/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp index 9103a56a148d..15427cb64dd8 100644 --- a/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp +++ b/ydb/core/tx/schemeshard/ut_index/ut_async_index.cpp @@ -5,6 +5,7 @@ #include #include #include +#include using namespace NKikimr; using namespace NSchemeShard; diff --git a/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp b/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp index 4ab86bcf9a7c..f24bc002567c 100644 --- a/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp +++ b/ydb/core/tx/schemeshard/ut_index_build/ut_index_build.cpp @@ -1,10 +1,13 @@ #include #include +#include #include #include #include +#include + using namespace NKikimr; using namespace NSchemeShard; using namespace NSchemeShardUT_Private; @@ -667,6 +670,36 @@ Y_UNIT_TEST_SUITE(IndexBuildTest) { env.TestWaitNotification(runtime, {txId, txId - 1}); } + Y_UNIT_TEST(CheckLimitWithDroppedIndex) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TSchemeLimits lowLimits; + lowLimits.MaxTableIndices = 1; + SetSchemeshardSchemaLimits(runtime, lowLimits); + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + TestBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", "Index1", {"value"}, Ydb::StatusIds::SUCCESS); + env.TestWaitNotification(runtime, txId); + + TestDropTableIndex(runtime, ++txId, "/MyRoot", R"( + TableName: "Table" + IndexName: "Index1" + )"); + env.TestWaitNotification(runtime, txId); + + TestBuildIndex(runtime, ++txId, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", "Index2", {"value"}, Ydb::StatusIds::SUCCESS); + env.TestWaitNotification(runtime, txId); + } + Y_UNIT_TEST(Lock) { TTestBasicRuntime runtime; TTestEnv env(runtime); @@ -729,6 +762,154 @@ Y_UNIT_TEST_SUITE(IndexBuildTest) { } + Y_UNIT_TEST(MergeIndexTableShardsOnlyWhenReady) { + TTestBasicRuntime runtime; + + TTestEnvOptions opts; + opts.EnableBackgroundCompaction(false); + opts.DisableStatsBatching(true); + TTestEnv env(runtime, opts); + + NDataShard::gDbStatsReportInterval = TDuration::Seconds(0); + + ui64 txId = 100; + + TestCreateTable(runtime, ++txId, "/MyRoot", R"( + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + )"); + env.TestWaitNotification(runtime, txId); + + Ydb::Table::GlobalIndexSettings settings; + UNIT_ASSERT(google::protobuf::TextFormat::ParseFromString(R"( + partition_at_keys { + split_points { + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 10 } } + } + split_points { + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 20 } } + } + split_points { + type { tuple_type { elements { optional_type { item { type_id: UINT64 } } } } } + value { items { uint64_value: 30 } } + } + } + )", &settings)); + + TBlockEvents indexApplicationBlocker(runtime, [](const auto& ev) { + const auto& modifyScheme = ev->Get()->Record.GetTransaction(0); + return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpApplyIndexBuild; + }); + + ui64 indexInitializationTx = 0; + using TEvent = TEvSchemeShard::TEvModifySchemeTransaction; + auto indexInitializationObserver = runtime.AddObserver([&indexInitializationTx](const TEvent::TPtr& ev) { + const auto& record = ev->Get()->Record; + if (record.GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpCreateIndexBuild) { + indexInitializationTx = record.GetTxId(); + } + } + ); + + const ui64 buildIndexTx = ++txId; + TestBuildIndex(runtime, buildIndexTx, TTestTxConfig::SchemeShard, "/MyRoot", "/MyRoot/Table", TBuildIndexConfig{ + "ByValue", NKikimrSchemeOp::EIndexTypeGlobal, { "value" }, {}, + { NYdb::NTable::TGlobalIndexSettings::FromProto(settings) } + }); + + runtime.WaitFor("index initialization", [&indexInitializationTx]{ + return indexInitializationTx != 0; + }); + env.TestWaitNotification(runtime, indexInitializationTx); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue"), { + NLs::PathExist, + NLs::IndexState(NKikimrSchemeOp::EIndexStateWriteOnly) + }); + + TVector indexShards; + auto shardCollector = [&indexShards](const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrScheme::StatusSuccess); + const auto& partitions = record.GetPathDescription().GetTablePartitions(); + indexShards.clear(); + indexShards.reserve(partitions.size()); + for (const auto& partition : partitions) { + indexShards.emplace_back(partition.GetDatashardId()); + } + }; + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), { + NLs::PathExist, + NLs::PartitionCount(4), + shardCollector + }); + UNIT_ASSERT_VALUES_EQUAL(indexShards.size(), 4); + + { + // make sure no shards are merged + TBlockEvents mergeBlocker(runtime, [](const auto& ev) { + const auto& modifyScheme = ev->Get()->Record.GetTransaction(0); + return modifyScheme.GetOperationType() == NKikimrSchemeOp::ESchemeOpSplitMergeTablePartitions; + }); + + { + // wait for all index shards to send statistics + THashSet shardsWithStats; + using TEvType = TEvDataShard::TEvPeriodicTableStats; + auto statsObserver = runtime.AddObserver([&shardsWithStats](const TEvType::TPtr& ev) { + shardsWithStats.emplace(ev->Get()->Record.GetDatashardId()); + }); + + runtime.WaitFor("all index shards to send statistics", [&]{ + return AllOf(indexShards, [&shardsWithStats](ui64 indexShard) { + return shardsWithStats.contains(indexShard); + }); + }); + } + + // we expect to not have observed any attempts to merge + UNIT_ASSERT(mergeBlocker.empty()); + + // wait for 1 minute to ensure that no merges have been started by SchemeShard + env.SimulateSleep(runtime, TDuration::Minutes(1)); + UNIT_ASSERT(mergeBlocker.empty()); + } + + // splits are allowed even if the index is not ready + TestSplitTable(runtime, ++txId, "/MyRoot/Table/ByValue/indexImplTable", Sprintf(R"( + SourceTabletId: %lu + SplitBoundary { KeyPrefix { Tuple { Optional { Uint64: 5 } } } } + )", + indexShards.front() + ) + ); + env.TestWaitNotification(runtime, txId); + + indexApplicationBlocker.Stop().Unblock(); + env.TestWaitNotification(runtime, buildIndexTx); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue"), { + NLs::IndexState(NKikimrSchemeOp::EIndexStateReady) + }); + + // wait until all index impl table shards are merged into one + while (true) { + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), { + shardCollector + }); + if (indexShards.size() > 1) { + // If a merge happens, old shards are deleted and replaced with a new one. + // That is why we need to wait for * all * the shards to be deleted. + env.TestWaitTabletDeletion(runtime, indexShards); + } else { + break; + } + } + } + Y_UNIT_TEST(DropIndex) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_login/ut_login.cpp b/ydb/core/tx/schemeshard/ut_login/ut_login.cpp index 1a46fafeb1d8..e9a8cae8bbcd 100644 --- a/ydb/core/tx/schemeshard/ut_login/ut_login.cpp +++ b/ydb/core/tx/schemeshard/ut_login/ut_login.cpp @@ -1,19 +1,38 @@ -#include +#include + #include +#include +#include +#include +#include +#include +#include using namespace NKikimr; using namespace NSchemeShard; using namespace NSchemeShardUT_Private; +namespace NSchemeShardUT_Private { + +// convert into generic test helper? +void TestCreateAlterLoginCreateUser(TTestActorRuntime& runtime, ui64 txId, const TString& database, const TString& user, const TString& password, const TVector& expectedResults) { + std::unique_ptr modifyTx(CreateAlterLoginCreateUser(txId, user, password)); + //TODO: move setting of TModifyScheme.WorkingDir into CreateAlterLoginCreateUser() + //NOTE: TModifyScheme.Name isn't set, intentionally + modifyTx->Record.MutableTransaction(0)->SetWorkingDir(database); + AsyncSend(runtime, TTestTxConfig::SchemeShard, modifyTx.release()); + // AlterLoginCreateUser is synchronous in nature, result is returned immediately + TestModificationResults(runtime, txId, expectedResults); +} + +} // namespace NSchemeShardUT_Private + Y_UNIT_TEST_SUITE(TSchemeShardLoginTest) { Y_UNIT_TEST(BasicLogin) { TTestBasicRuntime runtime; TTestEnv env(runtime); ui64 txId = 100; - TActorId sender = runtime.AllocateEdgeActor(); - std::unique_ptr transaction(CreateAlterLoginCreateUser(++txId, "user1", "password1")); - transaction->Record.MutableTransaction(0)->SetWorkingDir("/MyRoot"); - ForwardToTablet(runtime, TTestTxConfig::SchemeShard, sender, transaction.release()); + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); auto resultLogin = Login(runtime, "user1", "password1"); UNIT_ASSERT_VALUES_EQUAL(resultLogin.error(), ""); auto describe = DescribePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot"); @@ -28,4 +47,234 @@ Y_UNIT_TEST_SUITE(TSchemeShardLoginTest) { auto resultValidate = login.ValidateToken({.Token = resultLogin.token()}); UNIT_ASSERT_VALUES_EQUAL(resultValidate.User, "user1"); } + + Y_UNIT_TEST(DisableBuiltinAuthMechanism) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + runtime.GetAppData().AuthConfig.SetEnableLoginAuthentication(false); + ui64 txId = 100; + TActorId sender = runtime.AllocateEdgeActor(); + std::unique_ptr transaction(CreateAlterLoginCreateUser(++txId, "user1", "password1")); + transaction->Record.MutableTransaction(0)->SetWorkingDir("/MyRoot"); + ForwardToTablet(runtime, TTestTxConfig::SchemeShard, sender, transaction.release()); + auto resultLogin = Login(runtime, "user1", "password1"); + UNIT_ASSERT_VALUES_EQUAL(resultLogin.error(), "Login authentication is disabled"); + UNIT_ASSERT_VALUES_EQUAL(resultLogin.token(), ""); + auto describe = DescribePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot"); + UNIT_ASSERT(describe.HasPathDescription()); + UNIT_ASSERT(describe.GetPathDescription().HasDomainDescription()); + UNIT_ASSERT(describe.GetPathDescription().GetDomainDescription().HasSecurityState()); + UNIT_ASSERT(describe.GetPathDescription().GetDomainDescription().GetSecurityState().PublicKeysSize() > 0); + } + + Y_UNIT_TEST(AuditLogLoginSuccess) { + TTestBasicRuntime runtime; + std::vector lines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(lines)); + TTestEnv env(runtime); + + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 1); // alter root subdomain + + ui64 txId = 100; + + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 2); // +user creation + + // test body + { + auto resultLogin = Login(runtime, "user1", "password1"); + UNIT_ASSERT_C(resultLogin.error().empty(), resultLogin); + } + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); // +user login + + auto last = FindAuditLine(lines, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(last, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(last, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(last, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "status=SUCCESS"); + UNIT_ASSERT(!last.contains("reason")); + UNIT_ASSERT_STRING_CONTAINS(last, "login_user=user1"); + UNIT_ASSERT_STRING_CONTAINS(last, "login_auth_domain={none}"); + } + + Y_UNIT_TEST(AuditLogLoginFailure) { + TTestBasicRuntime runtime; + std::vector lines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(lines)); + TTestEnv env(runtime); + + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 1); // alter root subdomain + + ui64 txId = 100; + + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 2); // +user creation + + // test body + { + auto resultLogin = Login(runtime, "user1", "bad_password"); + UNIT_ASSERT_C(!resultLogin.error().empty(), resultLogin); + } + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); // +user login + + auto last = FindAuditLine(lines, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(last, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(last, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(last, "operation=LOGIN"); + UNIT_ASSERT_STRING_CONTAINS(last, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(last, "reason=Invalid password"); + UNIT_ASSERT_STRING_CONTAINS(last, "login_user=user1"); + UNIT_ASSERT_STRING_CONTAINS(last, "login_auth_domain={none}"); + } +} + +namespace NSchemeShardUT_Private { + +void EatWholeString(NHttp::THttpIncomingRequestPtr request, const TString& data) { + request->EnsureEnoughSpaceAvailable(data.size()); + auto size = std::min(request->Avail(), data.size()); + memcpy(request->Pos(), data.data(), size); + request->Advance(size); +} + +NHttp::THttpIncomingRequestPtr MakeLoginRequest(const TString& user, const TString& password) { + TString payload = [](const auto& user, const auto& password) { + NJson::TJsonValue value; + value["user"] = user; + value["password"] = password; + return NJson::WriteJson(value, false); + }(user, password); + TStringBuilder text; + text << "POST /login HTTP/1.1\r\n" + << "Host: test.ydb\r\n" + << "Content-Type: application/json\r\n" + << "Content-Length: " << payload.size() << "\r\n" + << "\r\n" + << payload; + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(); + EatWholeString(request, text); + // WebLoginService will crash without address + request->Address = std::make_shared("127.0.0.1", 0); + // Cerr << "TEST: http login request: " << text << Endl; + return request; +} + +NHttp::THttpIncomingRequestPtr MakeLogoutRequest(const TString& cookieName, const TString& cookieValue) { + TStringBuilder text; + text << "POST /logout HTTP/1.1\r\n" + << "Host: test.ydb\r\n" + << "Content-Type: text/plain\r\n" + << "Cookie: " << cookieName << "=" << cookieValue << "\r\n" + << "\r\n"; + NHttp::THttpIncomingRequestPtr request = new NHttp::THttpIncomingRequest(); + EatWholeString(request, text); + // WebLoginService will crash without address + request->Address = std::make_shared("127.0.0.1", 0); + // Cerr << "TEST: http logout request: " << text << Endl; + return request; +} + +} + +Y_UNIT_TEST_SUITE(TWebLoginService) { + + Y_UNIT_TEST(Logout) { + TTestBasicRuntime runtime; + std::vector lines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(lines)); + TTestEnv env(runtime); + + // Add ticket parser to the mix + { + NKikimrProto::TAuthConfig authConfig; + authConfig.SetUseBlackBox(false); + authConfig.SetUseLoginProvider(true); + + IActor* ticketParser = NKikimr::CreateTicketParser({.AuthConfig = authConfig}); + TActorId ticketParserId = runtime.Register(ticketParser); + runtime.RegisterService(NKikimr::MakeTicketParserID(), ticketParserId); + } + + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 1); // alter root subdomain + + ui64 txId = 100; + + TestCreateAlterLoginCreateUser(runtime, ++txId, "/MyRoot", "user1", "password1", {{NKikimrScheme::StatusSuccess}}); + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 2); // +user creation + + // test body + const auto target = runtime.Register(CreateWebLoginService()); + const auto edge = runtime.AllocateEdgeActor(); + + TString ydbSessionId; + { + runtime.Send(new IEventHandle(target, edge, new NHttp::TEvHttpProxy::TEvHttpIncomingRequest( + MakeLoginRequest("user1", "password1") + ))); + + TAutoPtr handle; + auto responseEv = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT_STRINGS_EQUAL(responseEv->Response->Status, "200"); + NHttp::THeaders headers(responseEv->Response->Headers); + NHttp::TCookies cookies(headers["Set-Cookie"]); + ydbSessionId = cookies["ydb_session_id"]; + } + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); // +user login + + // New security keys are created in the subdomain as a consequence of a login. + // In real system they are transferred to the ticket parser by the grpc-proxy + // on receiving subdomain update notification. + // Here there are no grpc-proxy, so we should transfer keys to the ticket parser manually. + { + const auto describe = DescribePath(runtime, "/MyRoot"); + const auto& securityState = describe.GetPathDescription().GetDomainDescription().GetSecurityState(); + TActorId edge = runtime.AllocateEdgeActor(); + runtime.Send(new IEventHandle(MakeTicketParserID(), edge, new TEvTicketParser::TEvUpdateLoginSecurityState(securityState)), 0); + } + + // Then we are ready to test some authentication on /logout + { // no cookie + runtime.Send(new IEventHandle(target, edge, new NHttp::TEvHttpProxy::TEvHttpIncomingRequest( + MakeLogoutRequest("not-an-ydb_session_id", ydbSessionId) + ))); + + TAutoPtr handle; + auto responseEv = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT_STRINGS_EQUAL(responseEv->Response->Status, "401"); + + // no audit record for actions without auth + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); + } + { // bad cookie + runtime.Send(new IEventHandle(target, edge, new NHttp::TEvHttpProxy::TEvHttpIncomingRequest( + MakeLogoutRequest("ydb_session_id", "jklhagsfjhg") + ))); + + TAutoPtr handle; + auto responseEv = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT_STRINGS_EQUAL(responseEv->Response->Status, "403"); + + // no audit record for actions without auth + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 3); + } + { // good cookie + runtime.Send(new IEventHandle(target, edge, new NHttp::TEvHttpProxy::TEvHttpIncomingRequest( + MakeLogoutRequest("ydb_session_id", ydbSessionId) + ))); + + TAutoPtr handle; + auto responseEv = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT_STRINGS_EQUAL(responseEv->Response->Status, "200"); + + UNIT_ASSERT_VALUES_EQUAL(lines.size(), 4); // +user web logout + + auto last = FindAuditLine(lines, "operation=LOGOUT"); + UNIT_ASSERT_STRING_CONTAINS(last, "component=web-login"); + UNIT_ASSERT_STRING_CONTAINS(last, "remote_address="); // can't check the value + UNIT_ASSERT_STRING_CONTAINS(last, "operation=LOGOUT"); + UNIT_ASSERT_STRING_CONTAINS(last, "status=SUCCESS"); + } + } } diff --git a/ydb/core/tx/schemeshard/ut_move/ut_move.cpp b/ydb/core/tx/schemeshard/ut_move/ut_move.cpp index 7690dd906a7e..1c9e7391798f 100644 --- a/ydb/core/tx/schemeshard/ut_move/ut_move.cpp +++ b/ydb/core/tx/schemeshard/ut_move/ut_move.cpp @@ -1191,4 +1191,25 @@ Y_UNIT_TEST_SUITE(TSchemeShardMoveTest) { TestMoveTable(runtime, ++txId, "/MyRoot/Table", "/MyRoot/TableMove"); env.TestWaitNotification(runtime, txId); } + + Y_UNIT_TEST(MoveTableWithSequence) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" DefaultFromSequence: "myseq" } + Columns { Name: "value" Type: "Uint64" } + KeyColumnNames: ["key"] + } + SequenceDescription { + Name: "myseq" + } + )"); + env.TestWaitNotification(runtime, txId); + + TestMoveTable(runtime, ++txId, "/MyRoot/Table", "/MyRoot/TableMove", {NKikimrScheme::StatusPreconditionFailed}); + } } diff --git a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp index c53fdeb58425..1a6a85e46e27 100644 --- a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp +++ b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -39,7 +40,7 @@ static const TString defaultTableSchema = R"( )"; static const TVector defaultYdbSchema = { - NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp)).SetNullable(false), NArrow::NTest::TTestColumn("data", TTypeInfo(NTypeIds::Utf8) ) }; @@ -556,11 +557,24 @@ Y_UNIT_TEST_SUITE(TOlap) { } } )", {NKikimrScheme::StatusAccepted}); + + env.TestWaitNotification(runtime, txId); + TestAlterOlapStore(runtime, ++txId, "/MyRoot", R"( + Name: "OlapStore" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AlterColumns { Name: "comment" DefaultValue: "10" } + } + } + )", {NKikimrScheme::StatusSchemeError}); } Y_UNIT_TEST(AlterTtl) { TTestBasicRuntime runtime; - TTestEnv env(runtime); + TTestEnvOptions options; + options.EnableTieringInColumnShard(true); + TTestEnv env(runtime, options); ui64 txId = 100; TString olapSchema = R"( @@ -634,17 +648,17 @@ Y_UNIT_TEST_SUITE(TOlap) { env.TestWaitNotification(runtime, txId); } - // TODO: AlterTiers - // negatives for store: disallow alters - // negatives for table: wrong tiers count, wrong tiers, wrong eviction column, wrong eviction values, - // different TTL columns in tiers -#if 0 Y_UNIT_TEST(StoreStats) { TTestBasicRuntime runtime; TTestEnv env(runtime); runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); runtime.UpdateCurrentTime(TInstant::Now() - TDuration::Seconds(600)); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetOverridePeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetOverrideLagForCompactionBeforeTierings(TDuration::Seconds(1)); + csController->SetOverrideReduceMemoryIntervalLimit(1LLU << 30); + // disable stats batching auto& appData = runtime.GetAppData(); appData.SchemeShardConfig.SetStatsBatchTimeoutMs(0); @@ -690,6 +704,16 @@ Y_UNIT_TEST_SUITE(TOlap) { UNIT_ASSERT(shardId); UNIT_ASSERT(pathId); UNIT_ASSERT(planStep); + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore/ColumnTable", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); + + UNIT_ASSERT(description.GetPathDescription().HasTableStats()); + UNIT_ASSERT_EQUAL(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_EQUAL(tabletStats.GetDataSize(), 0); + } + ui32 rowsInBatch = 100000; @@ -702,7 +726,7 @@ Y_UNIT_TEST_SUITE(TOlap) { TSet txIds; for (ui32 i = 0; i < 10; ++i) { std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds); + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); txIds.insert(txId); } @@ -714,16 +738,38 @@ Y_UNIT_TEST_SUITE(TOlap) { // trigger periodic stats at shard (after timeout) std::vector writeIds; - NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds); + NTxUT::WriteData(runtime, sender, shardId, ++writeId, pathId, data, defaultYdbSchema, &writeIds, NEvWrite::EModificationType::Upsert); NTxUT::ProposeCommit(runtime, sender, shardId, ++txId, writeIds); NTxUT::PlanCommit(runtime, sender, shardId, ++planStep, {txId}); } + csController->WaitIndexation(TDuration::Seconds(5)); + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); + + UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); + UNIT_ASSERT_GT(tabletStats.GetPartCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetRowUpdates(), 0); + UNIT_ASSERT_GT(tabletStats.GetImmediateTxCompleted(), 0); + UNIT_ASSERT_GT(tabletStats.GetPlannedTxCompleted(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastAccessTime(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastUpdateTime(), 0); + } + + { + auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore/ColumnTable", true, true); + Cerr << description.DebugString() << Endl; + auto& tabletStats = description.GetPathDescription().GetTableStats(); - auto description = DescribePrivatePath(runtime, TTestTxConfig::SchemeShard, "/MyRoot/OlapStore", true, true); - auto& tabletStats = description.GetPathDescription().GetTableStats(); + UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); + UNIT_ASSERT_GT(tabletStats.GetPartCount(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastAccessTime(), 0); + UNIT_ASSERT_GT(tabletStats.GetLastUpdateTime(), 0); + } - UNIT_ASSERT_GT(tabletStats.GetRowCount(), 0); - UNIT_ASSERT_GT(tabletStats.GetDataSize(), 0); #if 0 TestDropColumnTable(runtime, ++txId, "/MyRoot/OlapStore", "ColumnTable"); env.TestWaitNotification(runtime, txId); @@ -738,5 +784,4 @@ Y_UNIT_TEST_SUITE(TOlap) { TestLsPathId(runtime, 2, NLs::PathStringEqual("")); #endif } -#endif } diff --git a/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp b/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp index 8817995f8881..4b596e677ca1 100644 --- a/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp +++ b/ydb/core/tx/schemeshard/ut_replication/ut_replication.cpp @@ -344,6 +344,53 @@ Y_UNIT_TEST_SUITE(TReplicationTests) { TestDescribeResult(DescribePath(runtime, "/MyRoot/Table"), { NLs::ReplicationMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_NONE), + NLs::UserAttrsEqual({}), + }); + } + + Y_UNIT_TEST(AlterReplicatedIndexTable) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(CreateIndexedTableRequest(++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "indexed" Type: "Uint64" } + KeyColumnNames: ["key"] + ReplicationConfig { + Mode: REPLICATION_MODE_READ_ONLY + } + } + IndexDescription { + Name: "Index" + KeyColumnNames: ["indexed"] + IndexImplTableDescription { + ReplicationConfig { + Mode: REPLICATION_MODE_READ_ONLY + } + } + } + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable"), { + NLs::ReplicationMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_READ_ONLY), + }); + + AsyncSend(runtime, TTestTxConfig::SchemeShard, InternalTransaction(AlterTableRequest(++txId, "/MyRoot/Table/Index", R"( + Name: "indexImplTable" + ReplicationConfig { + Mode: REPLICATION_MODE_NONE + } + )"))); + TestModificationResults(runtime, txId, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/Index/indexImplTable"), { + NLs::ReplicationMode(NKikimrSchemeOp::TTableReplicationConfig::REPLICATION_MODE_NONE), }); } diff --git a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp index 13b365dc102e..6007cf9619d5 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp +++ b/ydb/core/tx/schemeshard/ut_restore/ut_restore.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -20,8 +21,10 @@ #include +#include #include #include +#include #include #include @@ -37,6 +40,16 @@ using namespace NKikimr::NWrappers::NTestHelpers; namespace { + Aws::SDKOptions Options; + + Y_TEST_HOOK_BEFORE_RUN(InitAwsAPI) { + Aws::InitAPI(Options); + } + + Y_TEST_HOOK_AFTER_RUN(ShutdownAwsAPI) { + Aws::ShutdownAPI(Options); + } + const TString EmptyYsonStr = R"([[[[];%false]]])"; TString GenerateScheme(const NKikimrSchemeOp::TPathDescription& pathDesc) { @@ -48,7 +61,7 @@ namespace { scheme.mutable_primary_key()->CopyFrom(tableDesc.GetKeyColumnNames()); FillColumnDescription(scheme, mkqlKeyType, tableDesc); - FillIndexDescription(scheme, tableDesc, mkqlKeyType); + FillIndexDescription(scheme, tableDesc); FillStorageSettings(scheme, tableDesc); FillColumnFamilies(scheme, tableDesc); FillAttributes(scheme, pathDesc); @@ -316,7 +329,6 @@ namespace { runtime.SetObserverFunc(prevObserver); } - } // anonymous Y_UNIT_TEST_SUITE(TRestoreTests) { @@ -1206,7 +1218,7 @@ value { const TVector keyTags = {1}; TVector valueTags(values.size()); - std::iota(valueTags.begin(), valueTags.end(), 2); + std::iota(valueTags.begin(), valueTags.end(), 2); UploadRow(runtime, "/MyRoot/Table", partitionIdx, keyTags, valueTags, keys, values); @@ -1298,7 +1310,7 @@ value { "jsondoc_value", "uuid_value", }; - + auto contentOriginalTable = ReadTable(runtime, TTestTxConfig::FakeHiveTablets, "Table", readKeyDesc, readColumns); NKqp::CompareYson(expectedJson, contentOriginalTable); @@ -2166,7 +2178,7 @@ Y_UNIT_TEST_SUITE(TImportTests) { void Run(TTestBasicRuntime& runtime, TTestEnv& env, THashMap&& data, const TString& request, Ydb::StatusIds::StatusCode expectedStatus = Ydb::StatusIds::SUCCESS, - const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "") + const TString& dbName = "/MyRoot", bool serverless = false, const TString& userSID = "", const TString& peerName = "") { ui64 id = 100; @@ -2256,7 +2268,7 @@ Y_UNIT_TEST_SUITE(TImportTests) { break; } - TestImport(runtime, schemeshardId, ++id, dbName, Sprintf(request.data(), port), userSID, initialStatus); + TestImport(runtime, schemeshardId, ++id, dbName, Sprintf(request.data(), port), userSID, peerName, initialStatus); env.TestWaitNotification(runtime, id, schemeshardId); if (initialStatus != Ydb::StatusIds::SUCCESS) { @@ -3090,6 +3102,9 @@ Y_UNIT_TEST_SUITE(TImportTests) { void CancelShouldSucceed(TDelayFunc delayFunc) { TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + TTestEnv env(runtime, TTestEnvOptions()); ui64 txId = 100; @@ -3134,12 +3149,45 @@ Y_UNIT_TEST_SUITE(TImportTests) { )", port)); const ui64 importId = txId; + // Check audit record for import start + { + auto line = FindAuditLine(auditLines, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + WaitForDelayed(runtime, delayed, prevObserver); TestCancelImport(runtime, ++txId, "/MyRoot", importId); runtime.Send(delayed.Release(), 0, true); env.TestWaitNotification(runtime, importId); + // Check audit record for import end + // + { + auto line = FindAuditLine(auditLines, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject={none}"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + TestGetImport(runtime, importId, "/MyRoot", Ydb::StatusIds::CANCELLED); } @@ -3429,6 +3477,260 @@ Y_UNIT_TEST_SUITE(TImportTests) { UNIT_ASSERT(entry.HasEndTime()); UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); } + + // Based on CompletedImportEndTime + Y_UNIT_TEST(AuditCompletedImport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock(ConvertTestData(data), TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )", port); + TestImport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + + // Check audit record for import start + { + auto line = FindAuditLine(auditLines, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); // doing import + + env.TestWaitNotification(runtime, txId); + + // Check audit record for import end + // + { + auto line = FindAuditLine(auditLines, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", txId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + + const auto desc = TestGetImport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_DONE); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + } + + // Based on CancelledImportEndTime + Y_UNIT_TEST(AuditCancelledImport) { + TTestBasicRuntime runtime; + std::vector auditLines; + runtime.AuditLogBackends = std::move(CreateTestAuditLogBackends(auditLines)); + + TTestEnv env(runtime); + + runtime.UpdateCurrentTime(TInstant::Now()); + ui64 txId = 100; + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock(ConvertTestData(data), TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + auto delayFunc = [](TAutoPtr& ev) { + if (ev->GetTypeRewrite() != TEvSchemeShard::EvModifySchemeTransaction) { + return false; + } + + return ev->Get()->Record + .GetTransaction(0).GetOperationType() == NKikimrSchemeOp::ESchemeOpRestore; + }; + + THolder delayed; + auto prevObserver = SetDelayObserver(runtime, delayed, delayFunc); + + const auto request = Sprintf(R"( + OperationParams { + labels { + key: "uid" + value: "foo" + } + } + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )", port); + TestImport(runtime, ++txId, "/MyRoot", request, /*userSID*/ "user@builtin", /*peerName*/ "127.0.0.1:9876"); + const ui64 importId = txId; + + // Check audit record for import start + { + auto line = FindAuditLine(auditLines, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT START"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=SUCCESS"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=SUCCESS"); + UNIT_ASSERT(!line.contains("reason")); + UNIT_ASSERT(!line.contains("start_time")); + UNIT_ASSERT(!line.contains("end_time")); + } + + runtime.AdvanceCurrentTime(TDuration::Seconds(30)); // doing import + + WaitForDelayed(runtime, delayed, prevObserver); + + TestCancelImport(runtime, ++txId, "/MyRoot", importId); + + auto desc = TestGetImport(runtime, importId, "/MyRoot"); + auto entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_CANCELLATION); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(!entry.HasEndTime()); + + runtime.Send(delayed.Release(), 0, true); + env.TestWaitNotification(runtime, importId); + + desc = TestGetImport(runtime, importId, "/MyRoot", Ydb::StatusIds::CANCELLED); + entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_CANCELLED); + UNIT_ASSERT(entry.HasStartTime()); + UNIT_ASSERT(entry.HasEndTime()); + UNIT_ASSERT_LT(entry.GetStartTime().seconds(), entry.GetEndTime().seconds()); + + // Check audit record for import end + // + { + auto line = FindAuditLine(auditLines, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, "component=schemeshard"); + UNIT_ASSERT_STRING_CONTAINS(line, "operation=IMPORT END"); + UNIT_ASSERT_STRING_CONTAINS(line, Sprintf("id=%lu", importId)); + UNIT_ASSERT_STRING_CONTAINS(line, "uid=foo"); + UNIT_ASSERT_STRING_CONTAINS(line, "remote_address=127.0.0.1"); + UNIT_ASSERT_STRING_CONTAINS(line, "subject=user@builtin"); + UNIT_ASSERT_STRING_CONTAINS(line, "database=/MyRoot"); + UNIT_ASSERT_STRING_CONTAINS(line, "status=ERROR"); + UNIT_ASSERT_STRING_CONTAINS(line, "detailed_status=CANCELLED"); + UNIT_ASSERT_STRING_CONTAINS(line, "reason=Cancelled"); + UNIT_ASSERT_STRING_CONTAINS(line, "start_time="); + UNIT_ASSERT_STRING_CONTAINS(line, "end_time="); + } + } + + Y_UNIT_TEST(UserSID) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + const auto data = GenerateTestData(R"( + columns { + name: "key" + type { optional_type { item { type_id: UTF8 } } } + } + columns { + name: "value" + type { optional_type { item { type_id: UTF8 } } } + } + primary_key: "key" + )", {{"a", 1}}); + + TPortManager portManager; + const ui16 port = portManager.GetPort(); + + TS3Mock s3Mock(ConvertTestData(data), TS3Mock::TSettings(port)); + UNIT_ASSERT(s3Mock.Start()); + + const TString request = Sprintf(R"( + ImportFromS3Settings { + endpoint: "localhost:%d" + scheme: HTTP + items { + source_prefix: "" + destination_path: "/MyRoot/Table" + } + } + )", port); + const TString userSID = "user@builtin"; + TestImport(runtime, ++txId, "/MyRoot", request, userSID); + + const auto desc = TestGetImport(runtime, txId, "/MyRoot"); + const auto& entry = desc.GetResponse().GetEntry(); + UNIT_ASSERT_VALUES_EQUAL(entry.GetProgress(), Ydb::Import::ImportProgress::PROGRESS_PREPARING); + UNIT_ASSERT_VALUES_EQUAL(entry.GetUserSID(), userSID); + } } Y_UNIT_TEST_SUITE(TImportWithRebootsTests) { diff --git a/ydb/core/tx/schemeshard/ut_restore/ya.make b/ydb/core/tx/schemeshard/ut_restore/ya.make index 7044d4283b5e..d514b36b49ee 100644 --- a/ydb/core/tx/schemeshard/ut_restore/ya.make +++ b/ydb/core/tx/schemeshard/ut_restore/ya.make @@ -14,6 +14,7 @@ ELSE() ENDIF() PEERDIR( + contrib/libs/aws-sdk-cpp/aws-cpp-sdk-core contrib/libs/double-conversion library/cpp/string_utils/quote ydb/core/kqp/ut/common diff --git a/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp b/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp index 2ea65bb9caea..6c78df21a2ce 100644 --- a/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp +++ b/ydb/core/tx/schemeshard/ut_split_merge/ut_split_merge.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -277,6 +278,69 @@ Y_UNIT_TEST_SUITE(TSchemeShardSplitBySizeTest) { // test requires more txids than cached at start } + Y_UNIT_TEST(MergeIndexTableShards) { + TTestBasicRuntime runtime; + + TTestEnvOptions opts; + opts.EnableBackgroundCompaction(false); + TTestEnv env(runtime, opts); + + ui64 txId = 100; + + TBlockEvents statsBlocker(runtime); + + TestCreateIndexedTable(runtime, ++txId, "/MyRoot", R"( + TableDescription { + Name: "Table" + Columns { Name: "key" Type: "Uint64" } + Columns { Name: "value" Type: "Utf8" } + KeyColumnNames: ["key"] + } + IndexDescription { + Name: "ByValue" + KeyColumnNames: ["value"] + IndexImplTableDescription { + SplitBoundary { KeyPrefix { Tuple { Optional { Text: "A" } } } } + SplitBoundary { KeyPrefix { Tuple { Optional { Text: "B" } } } } + SplitBoundary { KeyPrefix { Tuple { Optional { Text: "C" } } } } + } + } + )" + ); + env.TestWaitNotification(runtime, txId); + + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), + { NLs::PartitionCount(4) } + ); + + statsBlocker.Stop().Unblock(); + + TVector indexShards; + auto shardCollector = [&indexShards](const NKikimrScheme::TEvDescribeSchemeResult& record) { + UNIT_ASSERT_VALUES_EQUAL(record.GetStatus(), NKikimrScheme::StatusSuccess); + const auto& partitions = record.GetPathDescription().GetTablePartitions(); + indexShards.clear(); + indexShards.reserve(partitions.size()); + for (const auto& partition : partitions) { + indexShards.emplace_back(partition.GetDatashardId()); + } + }; + + // wait until all index impl table shards are merged into one + while (true) { + TestDescribeResult(DescribePrivatePath(runtime, "/MyRoot/Table/ByValue/indexImplTable", true), { + shardCollector + }); + if (indexShards.size() > 1) { + // If a merge happens, old shards are deleted and replaced with a new one. + // That is why we need to wait for * all * the shards to be deleted. + env.TestWaitTabletDeletion(runtime, indexShards); + } else { + break; + } + } + } + Y_UNIT_TEST(AutoMergeInOne) { TTestWithReboots t; t.Run([&](TTestActorRuntime& runtime, bool& activeZone) { diff --git a/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp b/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp index 78db80f9f32c..8fc34d9edbe0 100644 --- a/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp +++ b/ydb/core/tx/schemeshard/ut_subdomain/ut_subdomain.cpp @@ -2568,6 +2568,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { } + //clear subdomain { TestDescribeResult(DescribePath(runtime, "/MyRoot"), @@ -2585,6 +2586,155 @@ Y_UNIT_TEST_SUITE(TSchemeShardSubDomainTest) { } } + Y_UNIT_TEST(ColumnSchemeLimitsRejects) { + TTestBasicRuntime runtime; + TTestEnv env(runtime); + ui64 txId = 100; + + TSchemeLimits lowLimits; + lowLimits.MaxDepth = 4; + lowLimits.MaxPaths = 3; + lowLimits.MaxChildrenInDir = 3; + lowLimits.MaxAclBytesSize = 25; + lowLimits.MaxTableColumns = 3; + lowLimits.MaxColumnTableColumns = 3; + lowLimits.MaxTableColumnNameLength = 10; + lowLimits.MaxTableKeyColumns = 1; + lowLimits.MaxShards = 6; + lowLimits.MaxShardsInPath = 4; + lowLimits.MaxPQPartitions = 20; + + + //lowLimits.ExtraPathSymbolsAllowed = "!\"#$%&'()*+,-.:;<=>?@[\\]^_`{|}~"; + SetSchemeshardSchemaLimits(runtime, lowLimits); + TestDescribeResult(DescribePath(runtime, "/MyRoot"), + {NLs::PathExist, + NLs::DomainLimitsIs(lowLimits.MaxPaths, lowLimits.MaxShards, lowLimits.MaxPQPartitions)}); + + { + TestCreateSubDomain(runtime, txId++, "/MyRoot", + "PlanResolution: 50 " + "Coordinators: 1 " + "Mediators: 1 " + "TimeCastBucketsPerMediator: 2 " + "Name: \"USER_0\"" + " DatabaseQuotas {" + " data_stream_shards_quota: 2" + " data_stream_reserved_storage_quota: 200000" + "}"); + } + + //create column tables, column limits + { + TestMkDir(runtime, txId++, "/MyRoot/USER_0", "C"); + env.TestWaitNotification(runtime, txId - 1); + + // MaxColumnTableColumns + TestCreateColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + ColumnShardCount: 1 + Schema { + Columns { Name: "RowId" Type: "Uint64", NotNull: true } + Columns { Name: "Value0" Type: "Utf8" } + Columns { Name: "Value1" Type: "Utf8" } + KeyColumnNames: "RowId" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + AlterSchema { + DropColumns {Name: "Value0"} + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C2" + AlterSchema { + DropColumns {Name: "Value1"} + AddColumns { Name: "Value2" Type: "Utf8" } + AddColumns { Name: "Value3" Type: "Utf8" } + AddColumns { Name: "Value4" Type: "Utf8" } + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + + TestCreateColumnTable(runtime, txId++, "/MyRoot/USER_0/C", R"( + Name: "C1" + ColumnShardCount: 1 + Schema { + Columns { Name: "RowId" Type: "Uint64", NotNull: true } + Columns { Name: "Value0" Type: "Utf8" } + Columns { Name: "Value1" Type: "Utf8" } + Columns { Name: "Value2" Type: "Utf8" } + KeyColumnNames: "RowId" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + )", {NKikimrScheme::StatusSchemeError}); + + TString olapSchema = R"( + Name: "OlapStore1" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "data" Type: "Utf8" } + KeyColumnNames: "timestamp" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + } + )"; + + TestCreateOlapStore(runtime, txId++, "/MyRoot", olapSchema, {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TString olapSchemaBig = R"( + Name: "OlapStoreBig" + ColumnShardCount: 1 + SchemaPresets { + Name: "default" + Schema { + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } + Columns { Name: "data" Type: "Utf8" } + Columns { Name: "data2" Type: "Utf8" } + Columns { Name: "data3" Type: "Utf8" } + KeyColumnNames: "timestamp" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + } + } + )"; + + TestCreateOlapStore(runtime, txId++, "/MyRoot", olapSchemaBig, {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterOlapStore(runtime, txId++, "/MyRoot", R"( + Name: "OlapStore1" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AddColumns { Name: "comment" Type: "Utf8" } + } + } + )", {NKikimrScheme::StatusAccepted}); + env.TestWaitNotification(runtime, txId - 1); + + TestAlterOlapStore(runtime, txId++, "/MyRoot", R"( + Name: "OlapStore1" + AlterSchemaPresets { + Name: "default" + AlterSchema { + AddColumns { Name: "comment2" Type: "Utf8" } + } + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId - 1); + } + } + Y_UNIT_TEST(SchemeLimitsRejectsWithIndexedTables) { TTestBasicRuntime runtime; TTestEnv env(runtime); diff --git a/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp b/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp index 7b09ebefe501..f93498ee65b2 100644 --- a/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp +++ b/ydb/core/tx/schemeshard/ut_topic_splitmerge/ut_topic_splitmerge.cpp @@ -668,29 +668,6 @@ Y_UNIT_TEST_SUITE(TSchemeShardTopicSplitMergeTest) { partitionStrategy->SetPartitionStrategyType(::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED); } }, {{TEvSchemeShard::EStatus::StatusInvalidParameter}}); - - ModifyTopic(runtime, env, txId, [&](auto& scheme) { - { - auto* partitionStrategy = scheme.MutablePQTabletConfig()->MutablePartitionStrategy(); - partitionStrategy->SetPartitionStrategyType(::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED); - partitionStrategy->SetMaxPartitionCount(0); - } - }); - - topic = DescribeTopic(runtime); - - UNIT_ASSERT_VALUES_EQUAL(static_cast(::NKikimrPQ::TPQTabletConfig_TPartitionStrategyType::TPQTabletConfig_TPartitionStrategyType_DISABLED), - static_cast(topic.GetPQTabletConfig().GetPartitionStrategy().GetPartitionStrategyType())); - - UNIT_ASSERT_VALUES_EQUAL(3, topic.GetPartitions().size()); - for (const auto& p : topic.GetPartitions()) { - Cerr << ">>>>> Verify partition " << p.GetPartitionId() << Endl << Flush; - UNIT_ASSERT_VALUES_EQUAL(static_cast(::NKikimrPQ::ETopicPartitionStatus::Active), static_cast(p.GetStatus())); - UNIT_ASSERT(p.GetChildPartitionIds().empty()); - UNIT_ASSERT(p.GetParentPartitionIds().empty()); - UNIT_ASSERT(!p.HasKeyRange()); - } - } // Y_UNIT_TEST(DisableSplitMerge) Y_UNIT_TEST(EnableSplitMerge) { diff --git a/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp b/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp index ce57f14992b3..1accb55c269b 100644 --- a/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp +++ b/ydb/core/tx/schemeshard/ut_ttl/ut_ttl.cpp @@ -1150,6 +1150,7 @@ Y_UNIT_TEST_SUITE(TSchemeShardColumnTableTTL) { Columns { Name: "key" Type: "Uint64" NotNull: true } Columns { Name: "modified_at" Type: "Timestamp" } Columns { Name: "saved_at" Type: "Datetime" } + Columns { Name: "data" Type: "Utf8" } KeyColumnNames: ["key"] } )"); @@ -1206,6 +1207,13 @@ Y_UNIT_TEST_SUITE(TSchemeShardColumnTableTTL) { } } ); + TestAlterColumnTable(runtime, ++txId, "/MyRoot", R"( + Name: "TTLEnabledTable" + AlterSchema { + AlterColumns {Name: "data" DefaultValue: "10"} + } + )", {NKikimrScheme::StatusSchemeError}); + env.TestWaitNotification(runtime, txId); } Y_UNIT_TEST(AlterColumnTable_Negative) { diff --git a/ydb/core/tx/schemeshard/ya.make b/ydb/core/tx/schemeshard/ya.make index 975514b27a12..dad02ce621bc 100644 --- a/ydb/core/tx/schemeshard/ya.make +++ b/ydb/core/tx/schemeshard/ya.make @@ -207,6 +207,7 @@ SRCS( schemeshard_utils.cpp schemeshard_utils.h schemeshard_bg_tasks__list.cpp + schemeshard_xxport__helpers.cpp schemeshard_export__cancel.cpp schemeshard_export__create.cpp schemeshard_export__forget.cpp diff --git a/ydb/core/tx/sharding/unboxed_reader.h b/ydb/core/tx/sharding/unboxed_reader.h index fed3e972e2b9..11a31d7e3597 100644 --- a/ydb/core/tx/sharding/unboxed_reader.h +++ b/ydb/core/tx/sharding/unboxed_reader.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include namespace NKikimr::NMiniKQL { diff --git a/ydb/core/tx/sharding/ut/ut_sharding.cpp b/ydb/core/tx/sharding/ut/ut_sharding.cpp index 14dd61a48560..dce5bc6e7500 100644 --- a/ydb/core/tx/sharding/ut/ut_sharding.cpp +++ b/ydb/core/tx/sharding/ut/ut_sharding.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp index 85dd6d60c10b..57462d745d3a 100644 --- a/ydb/core/tx/tiering/manager.cpp +++ b/ydb/core/tx/tiering/manager.cpp @@ -199,19 +199,18 @@ THashMap TTiersManager::GetTiering() const { Y_ABORT_UNLESS(snapshotPtr); auto& tierConfigs = snapshotPtr->GetTierConfigs(); for (auto&& i : PathIdTiering) { - auto* tiering = snapshotPtr->GetTieringById(i.second); - if (tiering) { + auto* tieringRule = snapshotPtr->GetTieringById(i.second); + if (tieringRule) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "activation"); - result.emplace(i.first, tiering->BuildOlapTiers()); - for (auto& [pathId, pathTiering] : result) { - for (auto& [name, tier] : pathTiering.GetTierByName()) { - AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); - auto it = tierConfigs.find(name); - if (it != tierConfigs.end()) { - tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); - } + NOlap::TTiering tiering = tieringRule->BuildOlapTiers(); + for (auto& [name, tier] : tiering.GetTierByName()) { + AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); + auto it = tierConfigs.find(name); + if (it != tierConfigs.end()) { + tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); } } + result.emplace(i.first, std::move(tiering)); } else { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "not_found"); } diff --git a/ydb/core/tx/tiering/rule/manager.cpp b/ydb/core/tx/tiering/rule/manager.cpp index c6ea9e9f6130..9dfdb4459d0b 100644 --- a/ydb/core/tx/tiering/rule/manager.cpp +++ b/ydb/core/tx/tiering/rule/manager.cpp @@ -13,6 +13,10 @@ void TTieringRulesManager::DoPrepareObjectsBeforeModification(std::vectorEmpty()) { + return TConclusionStatus::Fail("defaultColumn cannot be empty"); + } result.SetColumn(TTieringRule::TDecoder::DefaultColumn, NMetadata::NInternal::TYDBValue::Utf8(*fValue)); } } diff --git a/ydb/core/tx/tiering/rule/object.cpp b/ydb/core/tx/tiering/rule/object.cpp index 59d42bdb4c8e..a596b56890ca 100644 --- a/ydb/core/tx/tiering/rule/object.cpp +++ b/ydb/core/tx/tiering/rule/object.cpp @@ -30,6 +30,10 @@ bool TTieringRule::DeserializeDescriptionFromJson(const NJson::TJsonValue& jsonI if (!jsonInfo["rules"].GetArrayPointer(&rules)) { return false; } + if (rules->empty()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_rule_deserialization_failed")("reason", "empty_rules"); + return false; + } for (auto&& i : *rules) { TTieringInterval interval; if (!interval.DeserializeFromJson(i)) { @@ -61,6 +65,9 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val if (!decoder.Read(decoder.GetDefaultColumnIdx(), DefaultColumn, r)) { return false; } + if (DefaultColumn.Empty()) { + return false; + } NJson::TJsonValue jsonDescription; if (!decoder.ReadJson(decoder.GetDescriptionIdx(), jsonDescription, r)) { return false; @@ -72,6 +79,7 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val } NKikimr::NOlap::TTiering TTieringRule::BuildOlapTiers() const { + AFL_VERIFY(!Intervals.empty()); NOlap::TTiering result; for (auto&& r : Intervals) { AFL_VERIFY(result.Add(std::make_shared(r.GetTierName(), r.GetDurationForEvict(), GetDefaultColumn()))); diff --git a/ydb/core/tx/tiering/rule/ss_fetcher.cpp b/ydb/core/tx/tiering/rule/ss_fetcher.cpp index 681e96780eb4..e822ace4c5a8 100644 --- a/ydb/core/tx/tiering/rule/ss_fetcher.cpp +++ b/ydb/core/tx/tiering/rule/ss_fetcher.cpp @@ -17,7 +17,7 @@ void TFetcherCheckUserTieringPermissions::DoProcess(NSchemeShard::TSchemeShard& } else { bool denied = false; for (auto&& i : TieringRuleIds) { - const std::set& pathIds = schemeShard.ColumnTables.GetTablesWithTiering(i); + const auto& pathIds = schemeShard.ColumnTables.GetTablesWithTiering(i); for (auto&& pathId : pathIds) { auto path = NSchemeShard::TPath::Init(pathId, &schemeShard); if (!path.IsResolved() || path.IsUnderDeleting() || path.IsDeleted()) { diff --git a/ydb/core/tx/tiering/tier/manager.cpp b/ydb/core/tx/tiering/tier/manager.cpp index a64d2a7603ab..27005ca30a09 100644 --- a/ydb/core/tx/tiering/tier/manager.cpp +++ b/ydb/core/tx/tiering/tier/manager.cpp @@ -8,6 +8,10 @@ NMetadata::NModifications::TOperationParsingResult TTiersManager::DoBuildPatchFr const NYql::TObjectSettingsImpl& settings, TInternalModificationContext& context) const { + if (HasAppData() && !AppDataVerified().FeatureFlags.GetEnableTieringInColumnShard()) { + return TConclusionStatus::Fail("Tiering functionality is disabled for OLAP tables."); + } + NMetadata::NInternal::TTableRecord result; result.SetColumn(TTierConfig::TDecoder::TierName, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); if (settings.GetObjectId().StartsWith("$") || settings.GetObjectId().StartsWith("_")) { diff --git a/ydb/core/tx/tiering/ut/ut_tiers.cpp b/ydb/core/tx/tiering/ut/ut_tiers.cpp index 9eeb50b23257..21fa01b29a6e 100644 --- a/ydb/core/tx/tiering/ut/ut_tiers.cpp +++ b/ydb/core/tx/tiering/ut/ut_tiers.cpp @@ -32,21 +32,15 @@ class TFastTTLCompactionController: public NKikimr::NYDBTest::ICSController { virtual bool NeedForceCompactionBacketsConstruction() const override { return true; } - virtual TDuration GetRemovedPortionLivetime(const TDuration /*def*/) const override { - return TDuration::Zero(); - } - virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + virtual ui64 DoGetSmallPortionSizeDetector(const ui64 /*def*/) const override { return 0; } - virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + virtual TDuration DoGetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { return TDuration::Zero(); } - virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + virtual TDuration DoGetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { return TDuration::Zero(); } - virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { - return TDuration::Seconds(1); - } }; @@ -336,7 +330,8 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.GrpcPort = grpcPort; serverSettings.SetDomainName("Root") .SetUseRealThreads(false) - .SetEnableMetadataProvider(true); + .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) ; Tests::TServer::TPtr server = new Tests::TServer(serverSettings); @@ -418,6 +413,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { NKikimrConfig::TAppConfig appConfig; appConfig.MutableTableServiceConfig()->SetEnablePreparedDdl(true); + appConfig.MutableColumnShardConfig()->SetDisabledOnSchemeShard(false); Tests::TServerSettings serverSettings(msgbPort); serverSettings.Port = msgbPort; @@ -425,6 +421,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.SetDomainName("Root") .SetUseRealThreads(false) .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) .SetAppConfig(appConfig); Tests::TServer::TPtr server = new Tests::TServer(serverSettings); @@ -555,6 +552,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { serverSettings.SetDomainName("Root") .SetUseRealThreads(false) .SetEnableMetadataProvider(true) + .SetEnableTieringInColumnShard(true) ; Tests::TServer::TPtr server = new Tests::TServer(serverSettings); diff --git a/ydb/core/tx/tx_processing.h b/ydb/core/tx/tx_processing.h index d992dd71474d..1f9b86527225 100644 --- a/ydb/core/tx/tx_processing.h +++ b/ydb/core/tx/tx_processing.h @@ -105,7 +105,7 @@ struct TEvTxProcessing { } }; - struct TEvReadSet : public TEventPB { + struct TEvReadSet: public TEventPB { TEvReadSet() {} diff --git a/ydb/core/tx/tx_proxy/global.cpp b/ydb/core/tx/tx_proxy/global.cpp new file mode 100644 index 000000000000..66d88d8d824e --- /dev/null +++ b/ydb/core/tx/tx_proxy/global.cpp @@ -0,0 +1,5 @@ +#include "global.h" + +namespace NKikimr::NTxProxy { + +} diff --git a/ydb/core/tx/tx_proxy/global.h b/ydb/core/tx/tx_proxy/global.h new file mode 100644 index 000000000000..00002b17e0ac --- /dev/null +++ b/ydb/core/tx/tx_proxy/global.h @@ -0,0 +1,9 @@ +#pragma once +#include + +namespace NKikimr::NTxProxy { +class TLimits { +public: + static constexpr ui64 MemoryInFlightWriting = (ui64)1 << 30; +}; +} \ No newline at end of file diff --git a/ydb/core/tx/tx_proxy/rpc_long_tx.cpp b/ydb/core/tx/tx_proxy/rpc_long_tx.cpp index f5b7c6b07cd5..557cf13c14cb 100644 --- a/ydb/core/tx/tx_proxy/rpc_long_tx.cpp +++ b/ydb/core/tx/tx_proxy/rpc_long_tx.cpp @@ -1,10 +1,15 @@ -#include -#include +#include "global.h" + +#include #include +#include #include -#include +#include +#include #include +#include + #include namespace NKikimr { @@ -16,30 +21,29 @@ using namespace NLongTxService; // Common logic of LongTx Write that takes care of splitting the data according to the sharding scheme, // sending it to shards and collecting their responses template -class TLongTxWriteBase : public TActorBootstrapped { +class TLongTxWriteBase: public TActorBootstrapped { using TBase = TActorBootstrapped; + static inline TAtomicCounter MemoryInFlight = 0; + protected: using TThis = typename TBase::TThis; public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ; - } - - TLongTxWriteBase(const TString& databaseName, const TString& path, const TString& token, - const TLongTxId& longTxId, const TString& dedupId) - : TBase() - , DatabaseName(databaseName) + TLongTxWriteBase(const TString& databaseName, const TString& path, const TString& token, const TLongTxId& longTxId, const TString& dedupId) + : DatabaseName(databaseName) , Path(path) , DedupId(dedupId) , LongTxId(longTxId) - , ActorSpan(0, NWilson::TTraceId::NewTraceId(0, Max()), "TLongTxWriteBase") - { + , ActorSpan(0, NWilson::TTraceId::NewTraceId(0, Max()), "TLongTxWriteBase") { if (token) { UserToken.emplace(token); } } + virtual ~TLongTxWriteBase() { + AFL_VERIFY(MemoryInFlight.Sub(InFlightSize) >= 0); + } + protected: void ProceedWithSchema(const NSchemeCache::TSchemeCacheNavigate& resp) { NWilson::TProfileSpan pSpan = ActorSpan.BuildChildrenSpan("ProceedWithSchema"); @@ -53,17 +57,24 @@ class TLongTxWriteBase : public TActorBootstrapped { if (UserToken && entry.SecurityObject) { const ui32 access = NACLib::UpdateRow; if (!entry.SecurityObject->CheckAccess(access, *UserToken)) { - RaiseIssue(MakeIssue(NKikimrIssues::TIssuesIds::ACCESS_DENIED, TStringBuilder() - << "User has no permission to perform writes to this table" - << " user: " << UserToken->GetUserSID() - << " path: " << Path)); + RaiseIssue(MakeIssue( + NKikimrIssues::TIssuesIds::ACCESS_DENIED, TStringBuilder() << "User has no permission to perform writes to this table" + << " user: " << UserToken->GetUserSID() << " path: " << Path)); return ReplyError(Ydb::StatusIds::UNAUTHORIZED); } } + auto accessor = ExtractDataAccessor(); + AFL_VERIFY(!InFlightSize); + InFlightSize = accessor->GetSize(); + const i64 sizeInFlight = MemoryInFlight.Add(InFlightSize); + if (TLimits::MemoryInFlightWriting < (ui64)sizeInFlight && sizeInFlight != InFlightSize) { + return ReplyError(Ydb::StatusIds::OVERLOADED, "a lot of memory in flight"); + } if (NCSIndex::TServiceOperator::IsEnabled()) { - TBase::Send(NCSIndex::MakeServiceId(TBase::SelfId().NodeId()), - new NCSIndex::TEvAddData(GetDataAccessor().GetDeserializedBatch(), Path, std::make_shared(TBase::SelfId()))); + TBase::Send( + NCSIndex::MakeServiceId(TBase::SelfId().NodeId()), new NCSIndex::TEvAddData(accessor->GetDeserializedBatch(), Path, + std::make_shared(TBase::SelfId()))); } else { IndexReady = true; } @@ -73,10 +84,11 @@ class TLongTxWriteBase : public TActorBootstrapped { return ReplyError(Ydb::StatusIds::BAD_REQUEST, "Shard splitter not implemented for table kind"); } - auto initStatus = shardsSplitter->SplitData(entry, GetDataAccessor()); + auto initStatus = shardsSplitter->SplitData(entry, *accessor); if (!initStatus.Ok()) { return ReplyError(initStatus.GetStatus(), initStatus.GetErrorMessage()); } + accessor.reset(); const auto& splittedData = shardsSplitter->GetSplitData(); InternalController = std::make_shared(splittedData.GetShardRequestsCount(), this->SelfId(), LongTxId); @@ -85,24 +97,26 @@ class TLongTxWriteBase : public TActorBootstrapped { ui32 writeIdx = 0; for (auto& [shard, infos] : splittedData.GetShardsInfo()) { for (auto&& shardInfo : infos) { + InternalController->GetCounters()->OnRequest(shardInfo->GetRowsCount(), shardInfo->GetBytes()); sumBytes += shardInfo->GetBytes(); rowsCount += shardInfo->GetRowsCount(); - this->Register(new NEvWrite::TShardWriter(shard, shardsSplitter->GetTableId(), DedupId, shardInfo, ActorSpan, InternalController, ++writeIdx, NEvWrite::EModificationType::Replace)); + this->Register(new NEvWrite::TShardWriter(shard, shardsSplitter->GetTableId(), DedupId, shardInfo, ActorSpan, InternalController, + ++writeIdx, NEvWrite::EModificationType::Replace)); } } pSpan.Attribute("affected_shards_count", (long)splittedData.GetShardsInfo().size()); pSpan.Attribute("bytes", (long)sumBytes); pSpan.Attribute("rows", (long)rowsCount); pSpan.Attribute("shards_count", (long)splittedData.GetShardsCount()); - AFL_DEBUG(NKikimrServices::LONG_TX_SERVICE)("affected_shards_count", splittedData.GetShardsInfo().size())("shards_count", splittedData.GetShardsCount()) - ("path", Path)("shards_info", splittedData.ShortLogString(32)); + AFL_DEBUG(NKikimrServices::LONG_TX_SERVICE)("affected_shards_count", splittedData.GetShardsInfo().size())( + "shards_count", splittedData.GetShardsCount())("path", Path)("shards_info", splittedData.ShortLogString(32)); this->Become(&TThis::StateMain); } private: STFUNC(StateMain) { switch (ev->GetTypeRewrite()) { - hFunc(NEvWrite::TWritersController::TEvPrivate::TEvShardsWriteResult, Handle) + hFunc(NEvWrite::TWritersController::TEvPrivate::TEvShardsWriteResult, Handle); hFunc(TEvLongTxService::TEvAttachColumnShardWritesResult, Handle); hFunc(NCSIndex::TEvAddDataResult, Handle); } @@ -150,11 +164,10 @@ class TLongTxWriteBase : public TActorBootstrapped { IndexReady = true; } } - } protected: - virtual NEvWrite::IShardsSplitter::IEvWriteDataAccessor& GetDataAccessor() const = 0; + virtual std::unique_ptr ExtractDataAccessor() = 0; virtual void RaiseIssue(const NYql::TIssue& issue) = 0; virtual void ReplyError(Ydb::StatusIds::StatusCode status, const TString& message = TString()) = 0; virtual void ReplySuccess() = 0; @@ -164,7 +177,9 @@ class TLongTxWriteBase : public TActorBootstrapped { const TString Path; const TString DedupId; TLongTxId LongTxId; + private: + i64 InFlightSize = 0; std::optional UserToken; NWilson::TProfileSpan ActorSpan; NEvWrite::TWritersController::TPtr InternalController; @@ -174,15 +189,19 @@ class TLongTxWriteBase : public TActorBootstrapped { // LongTx Write implementation called from the inside of YDB (e.g. as a part of BulkUpsert call) // NOTE: permission checks must have been done by the caller -class TLongTxWriteInternal : public TLongTxWriteBase { +class TLongTxWriteInternal: public TLongTxWriteBase { using TBase = TLongTxWriteBase; - class TParsedBatchData : public NEvWrite::IShardsSplitter::IEvWriteDataAccessor { + class TParsedBatchData: public NEvWrite::IShardsSplitter::IEvWriteDataAccessor { + private: + using TBase = NEvWrite::IShardsSplitter::IEvWriteDataAccessor; std::shared_ptr Batch; + public: TParsedBatchData(std::shared_ptr batch) - : Batch(batch) - {} + : TBase(NArrow::GetBatchMemorySize(batch)) + , Batch(batch) { + } std::shared_ptr GetDeserializedBatch() const override { return Batch; @@ -193,25 +212,19 @@ class TLongTxWriteInternal : public TLongTxWriteBase { } }; - NEvWrite::IShardsSplitter::IEvWriteDataAccessor::TPtr DataAccessor; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::GRPC_REQ; - } + std::unique_ptr DataAccessor; - explicit TLongTxWriteInternal(const TActorId& replyTo, const TLongTxId& longTxId, const TString& dedupId, - const TString& databaseName, const TString& path, - std::shared_ptr navigateResult, - std::shared_ptr batch, - std::shared_ptr issues) +public: + explicit TLongTxWriteInternal(const TActorId& replyTo, const TLongTxId& longTxId, const TString& dedupId, const TString& databaseName, + const TString& path, std::shared_ptr navigateResult, std::shared_ptr batch, + std::shared_ptr issues) : TBase(databaseName, path, TString(), longTxId, dedupId) , ReplyTo(replyTo) , NavigateResult(navigateResult) , Batch(batch) - , Issues(issues) - { + , Issues(issues) { Y_ABORT_UNLESS(Issues); - DataAccessor = std::make_shared(Batch); + DataAccessor = std::make_unique(Batch); } void Bootstrap() { @@ -220,8 +233,9 @@ class TLongTxWriteInternal : public TLongTxWriteBase { } protected: - NEvWrite::IShardsSplitter::IEvWriteDataAccessor& GetDataAccessor() const override { - return *DataAccessor; + std::unique_ptr ExtractDataAccessor() override { + AFL_VERIFY(DataAccessor); + return std::move(DataAccessor); } void RaiseIssue(const NYql::TIssue& issue) override { @@ -248,19 +262,14 @@ class TLongTxWriteInternal : public TLongTxWriteBase { std::shared_ptr Issues; }; - -TActorId DoLongTxWriteSameMailbox(const TActorContext& ctx, const TActorId& replyTo, - const NLongTxService::TLongTxId& longTxId, const TString& dedupId, - const TString& databaseName, const TString& path, - std::shared_ptr navigateResult, - std::shared_ptr batch, std::shared_ptr issues) -{ - return ctx.RegisterWithSameMailbox( - new TLongTxWriteInternal(replyTo, longTxId, dedupId, databaseName, path, navigateResult, batch, issues)); +TActorId DoLongTxWriteSameMailbox(const TActorContext& ctx, const TActorId& replyTo, const NLongTxService::TLongTxId& longTxId, + const TString& dedupId, const TString& databaseName, const TString& path, + std::shared_ptr navigateResult, std::shared_ptr batch, + std::shared_ptr issues) { + return ctx.RegisterWithSameMailbox(new TLongTxWriteInternal(replyTo, longTxId, dedupId, databaseName, path, navigateResult, batch, issues)); } // - -} -} +} // namespace NTxProxy +} // namespace NKikimr diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp index 6d487a26016b..281d8bf05bc7 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp @@ -7,10 +7,16 @@ namespace NKikimr { : TBase("BulkUpsert") { RequestsCount = TBase::GetDeriviative("Requests/Count"); - ReplyDuration = TBase::GetHistogram("Replies/Duration", NMonitoring::ExponentialHistogram(15, 2, 1)); + ReplyDuration = TBase::GetHistogram("Replies/Duration", NMonitoring::ExponentialHistogram(15, 2, 10)); RowsCount = TBase::GetDeriviative("Rows/Count"); - PackageSize = TBase::GetHistogram("Rows/PackageSize", NMonitoring::ExponentialHistogram(15, 2, 10)); + PackageSizeRecordsByRecords = TBase::GetHistogram("ByRecords/PackageSize/Records", NMonitoring::ExponentialHistogram(15, 2, 10)); + PackageSizeCountByRecords = TBase::GetHistogram("ByRecords/PackageSize/Count", NMonitoring::ExponentialHistogram(15, 2, 10)); + + PreparingDuration = TBase::GetHistogram("Preparing/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + WritingDuration = TBase::GetHistogram("Writing/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + CommitDuration = TBase::GetHistogram("Commit/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); + PrepareReplyDuration = TBase::GetHistogram("ToReply/DurationMs", NMonitoring::ExponentialHistogram(15, 2, 10)); const google::protobuf::EnumDescriptor* descriptor = ::Ydb::StatusIds::StatusCode_descriptor(); for (ui32 i = 0; i < (ui32)descriptor->value_count(); ++i) { @@ -19,12 +25,4 @@ namespace NKikimr { } } - void TUploadCounters::OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const { - const TString name = ::Ydb::StatusIds::StatusCode_Name(code); - auto it = CodesCount.find(name); - Y_ABORT_UNLESS(it != CodesCount.end()); - it->second->Add(1); - ReplyDuration->Collect(d.MilliSeconds()); - } - } diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h index 599f9984b8ab..cbfaffaf23f4 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h @@ -43,19 +43,77 @@ class TUploadCounters: public NColumnShard::TCommonCountersOwner { NMonitoring::THistogramPtr ReplyDuration; NMonitoring::TDynamicCounters::TCounterPtr RowsCount; - NMonitoring::THistogramPtr PackageSize; + NMonitoring::THistogramPtr PackageSizeRecordsByRecords; + NMonitoring::THistogramPtr PackageSizeCountByRecords; + + NMonitoring::THistogramPtr PreparingDuration; + NMonitoring::THistogramPtr WritingDuration; + NMonitoring::THistogramPtr CommitDuration; + NMonitoring::THistogramPtr PrepareReplyDuration; THashMap CodesCount; public: TUploadCounters(); + class TGuard: TMoveOnly { + private: + TMonotonic Start = TMonotonic::Now(); + std::optional WritingStarted; + std::optional CommitStarted; + std::optional CommitFinished; + std::optional ReplyFinished; + TUploadCounters& Owner; + public: + TGuard(const TMonotonic start, TUploadCounters& owner) + : Start(start) + , Owner(owner) + { + + } + + void OnWritingStarted() { + WritingStarted = TMonotonic::Now(); + Owner.PreparingDuration->Collect((*WritingStarted - Start).MilliSeconds()); + } + + void OnCommitStarted() { + CommitStarted = TMonotonic::Now(); + AFL_VERIFY(WritingStarted); + Owner.WritingDuration->Collect((*CommitStarted - *WritingStarted).MilliSeconds()); + } + + void OnCommitFinished() { + CommitFinished = TMonotonic::Now(); + AFL_VERIFY(CommitStarted); + Owner.CommitDuration->Collect((*CommitFinished - *CommitStarted).MilliSeconds()); + } + + void OnReply(const ::Ydb::StatusIds::StatusCode code) { + ReplyFinished = TMonotonic::Now(); + if (CommitFinished) { + Owner.PrepareReplyDuration->Collect((*ReplyFinished - *CommitFinished).MilliSeconds()); + } + Owner.ReplyDuration->Collect((*ReplyFinished - Start).MilliSeconds()); + + const TString name = ::Ydb::StatusIds::StatusCode_Name(code); + auto it = Owner.CodesCount.find(name); + Y_ABORT_UNLESS(it != Owner.CodesCount.end()); + it->second->Add(1); + } + }; + + TGuard BuildGuard(const TMonotonic start) { + return TGuard(start, *this); + } + void OnRequest(const ui64 rowsCount) const { RequestsCount->Add(1); RowsCount->Add(rowsCount); - PackageSize->Collect(rowsCount); + PackageSizeRecordsByRecords->Collect((i64)rowsCount, rowsCount); + PackageSizeCountByRecords->Collect(rowsCount); } - void OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const; + void OnReply(const TDuration dFull, const TDuration dDelta, const ::Ydb::StatusIds::StatusCode code) const; }; @@ -148,6 +206,7 @@ class TUploadRowsBase : public TActorBootstrapped StartCommitTime; TActorId TimeoutTimerActorId; TAutoPtr ResolvePartitionsResult; @@ -164,7 +223,7 @@ class TUploadRowsBase : public TActorBootstrapped Issues = std::make_shared(); NLongTxService::TLongTxId LongTxId; TUploadCounters UploadCounters; - + TUploadCounters::TGuard UploadCountersGuard; protected: enum class EUploadSource { ProtoValues = 0, @@ -216,6 +275,7 @@ class TUploadRowsBase : public TActorBootstrappedGet(); if (msg->Record.GetStatus() == Ydb::StatusIds::SUCCESS) { @@ -1263,7 +1326,7 @@ class TUploadRowsBase : public TActorBootstrappedNow() - StartTime, status); + UploadCountersGuard.OnReply(status); SendResult(ctx, status); LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, LogPrefix() << "completed with status " << status); diff --git a/ydb/core/tx/tx_proxy/ya.make b/ydb/core/tx/tx_proxy/ya.make index fa746bd1e249..d592810a65b2 100644 --- a/ydb/core/tx/tx_proxy/ya.make +++ b/ydb/core/tx/tx_proxy/ya.make @@ -14,6 +14,7 @@ SRCS( commitreq.cpp upload_rows_common_impl.cpp upload_rows.cpp + global.cpp ) GENERATE_ENUM_SERIALIZATION(read_table_impl.h) diff --git a/ydb/core/util/concurrent_rw_hash.h b/ydb/core/util/concurrent_rw_hash.h index 2e787d1022e4..5174886498d5 100644 --- a/ydb/core/util/concurrent_rw_hash.h +++ b/ydb/core/util/concurrent_rw_hash.h @@ -49,7 +49,7 @@ class TConcurrentRWHashMap { bucket.Map[key] = value; } - V& InsertIfAbsent(const K& key, const V& value) { + V InsertIfAbsent(const K& key, const V& value) { TBucket& bucket = GetBucketForKey(key); TWriteGuard guard(bucket.RWLock); @@ -57,7 +57,7 @@ class TConcurrentRWHashMap { } template - V& InsertIfAbsentWithInit(const K& key, Callable initFunc) { + V InsertIfAbsentWithInit(const K& key, Callable initFunc) { TBucket& bucket = GetBucketForKey(key); TWriteGuard guard(bucket.RWLock); diff --git a/ydb/core/viewer/browse.h b/ydb/core/viewer/browse.h index aa29d6c0f3a4..69569a35b61c 100644 --- a/ydb/core/viewer/browse.h +++ b/ydb/core/viewer/browse.h @@ -1,21 +1,16 @@ #pragma once -#include -#include -#include +#include "browse_events.h" +#include "viewer.h" +#include "wb_aggregate.h" #include #include #include -#include +#include #include #include -#include -#include -#include "browse_events.h" -#include "viewer.h" -#include "wb_aggregate.h" +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; @@ -646,4 +641,3 @@ class TBrowseTabletsCommon : public TActorBootstrapped { }; } -} diff --git a/ydb/core/viewer/browse_db.h b/ydb/core/viewer/browse_db.h index 86c4aa264127..f794cb3bbab4 100644 --- a/ydb/core/viewer/browse_db.h +++ b/ydb/core/viewer/browse_db.h @@ -1,20 +1,12 @@ #pragma once -#include -#include +#include "browse.h" +#include "viewer.h" +#include "wb_aggregate.h" #include -#include -#include #include #include -#include -#include -#include -#include "viewer.h" -#include "browse.h" -#include "wb_aggregate.h" -namespace NKikimr { -namespace NViewerDB { +namespace NKikimr::NViewerDB { using namespace NViewer; using namespace NActors; @@ -200,4 +192,3 @@ class TBrowseTable : public TBrowseTabletsCommon { }; } -} diff --git a/ydb/core/viewer/browse_events.h b/ydb/core/viewer/browse_events.h index ee0bbbb37572..92cc2b206bec 100644 --- a/ydb/core/viewer/browse_events.h +++ b/ydb/core/viewer/browse_events.h @@ -1,13 +1,12 @@ #pragma once -#include -#include -#include - #include +#include +#include +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { namespace NViewerEvents { enum EEv { @@ -76,5 +75,4 @@ namespace NViewerEvents { }; } // namespace NViewerEvents -} // namespace NViewer -} // namespace NKikimr +} diff --git a/ydb/core/viewer/browse_pq.h b/ydb/core/viewer/browse_pq.h index 1f2b5d950b16..190b72ad953b 100644 --- a/ydb/core/viewer/browse_pq.h +++ b/ydb/core/viewer/browse_pq.h @@ -1,20 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" #include "browse.h" +#include "viewer.h" #include "wb_aggregate.h" +#include +#include +#include -namespace NKikimr { -namespace NViewerPQ { +namespace NKikimr::NViewerPQ { using namespace NViewer; using namespace NActors; @@ -419,4 +411,3 @@ class TBrowseTopic : public TBrowseCommon { }; } -} diff --git a/ydb/core/viewer/check_access.h b/ydb/core/viewer/check_access.h deleted file mode 100644 index 42526b2bffe1..000000000000 --- a/ydb/core/viewer/check_access.h +++ /dev/null @@ -1,229 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; -using TNavigate = NSchemeCache::TSchemeCacheNavigate; - -class TCheckAccess : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr CacheResult; - TVector Permissions; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TCheckAccess(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - ui32 timeout = FromStringWithDefault(params.Get("timeout"), 10000); - TString database; - if (params.Has("database")) { - database = params.Get("database"); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'database' is required")); - } - if (database && database != AppData()->TenantName) { - BLOG_TRACE("Requesting StateStorageEndpointsLookup for " << database); - RequestStateStorageEndpointsLookup(database); // to find some dynamic node and redirect query there - } else { - if (params.Has("permissions")) { - Split(params.Get("permissions"), ",", Permissions); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'permissions' is required")); - } - if (params.Has("path")) { - RequestSchemeCacheNavigate(params.Get("path")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'path' is required")); - } - } - Become(&TThis::StateRequestedNavigate, TDuration::MilliSeconds(timeout), new TEvents::TEvWakeup()); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - ReplyAndPassAway(Viewer->MakeForward(Event->Get(), GetNodesFromBoardReply(ev))); - } - - STATEFN(StateRequestedNavigate) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - CacheResult = ev->Release(); - RequestDone(); - } - - ui32 GetAccessType(const TString& permission) { - TACLAttrs attrs(0); - try { - attrs = ConvertYdbPermissionNameToACLAttrs(permission); - } - catch (const std::exception&) { - } - return attrs.AccessMask; - } - - bool CheckAccessPermission(const NACLib::TSecurityObject* object, const NACLib::TUserToken* token, const TString& permission) { - const auto& kikimrRunConfig = Viewer->GetKikimrRunConfig(); - const auto& securityConfig = kikimrRunConfig.AppConfig.GetDomainsConfig().GetSecurityConfig(); - if (!securityConfig.GetEnforceUserTokenRequirement()) { - if (!securityConfig.GetEnforceUserTokenCheckRequirement() || token == nullptr) { - return true; - } - } - if (token == nullptr) { - return false; - } - if (object == nullptr) { - return false; - } - ui32 access = GetAccessType(permission); - if (access == 0) { - return false; - } - return object->CheckAccess(access, *token); - } - - void ReplyAndPassAway() { - std::unique_ptr token; - if (Event->Get()->UserToken) { - token = std::make_unique(Event->Get()->UserToken); - } - if (CacheResult == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "no SchemeCache response")); - } - if (CacheResult->Request == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "wrong SchemeCache response")); - } - if (CacheResult->Request.Get()->ResultSet.empty()) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "SchemeCache response is empty")); - } - if (CacheResult->Request.Get()->ErrorCount != 0) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "SchemeCache response error " << static_cast(CacheResult->Request.Get()->ResultSet.front().Status))); - } - - - auto object = CacheResult->Request.Get()->ResultSet.front().SecurityObject; - - NJson::TJsonValue json(NJson::JSON_MAP); - - for (const TString& permission : Permissions) { - json[permission] = CheckAccessPermission(object.Get(), token.get(), permission); - } - - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - } - - void HandleTimeout() { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), "text/plain", "Timeout receiving SchemeCache response")); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: Check access - description: Check access to the specified path - parameters: - - name: database - in: query - description: database name - type: string - required: true - - name: path - in: query - description: path to check access - type: string - required: true - - name: permissions - in: query - description: permissions to check - required: true - type: array - items: - type: string - enum: - - ydb.database.connect - - ydb.tables.modify - - ydb.tables.read - - ydb.generic.list - - ydb.generic.read - - ydb.generic.write - - ydb.generic.use_legacy - - ydb.generic.use - - ydb.generic.manage - - ydb.generic.full_legacy - - ydb.generic.full - - ydb.database.create - - ydb.database.drop - - ydb.access.grant - - ydb.granular.select_row - - ydb.granular.update_row - - ydb.granular.erase_row - - ydb.granular.read_attributes - - ydb.granular.write_attributes - - ydb.granular.create_directory - - ydb.granular.create_table - - ydb.granular.create_queue - - ydb.granular.remove_schema - - ydb.granular.describe_schema - - ydb.granular.alter_schema - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - responses: - 200: - description: OK - content: - application/json: - schema: {} - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - - return node; -} - -} -} - diff --git a/ydb/core/viewer/counters_hosts.h b/ydb/core/viewer/counters_hosts.h index 2232f16b00ba..b93a7cb0b485 100644 --- a/ydb/core/viewer/counters_hosts.h +++ b/ydb/core/viewer/counters_hosts.h @@ -1,16 +1,12 @@ #pragma once -#include -#include -#include -#include +#include "viewer.h" #include #include -#include #include -#include "viewer.h" +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; using namespace NNodeWhiteboard; @@ -173,4 +169,3 @@ class TCountersHostsList : public TActorBootstrapped { }; } -} diff --git a/ydb/core/viewer/healthcheck_record.h b/ydb/core/viewer/healthcheck_record.h index 39c94e288773..5b55d2bd1322 100644 --- a/ydb/core/viewer/healthcheck_record.h +++ b/ydb/core/viewer/healthcheck_record.h @@ -1,9 +1,8 @@ #pragma once +#include +#include namespace NKikimr::NViewer { - -using namespace NActors; -using namespace NMonitoring; struct TMetricRecord { TString Database; diff --git a/ydb/core/viewer/json_acl.h b/ydb/core/viewer/json_acl.h deleted file mode 100644 index d484ac4f2860..000000000000 --- a/ydb/core/viewer/json_acl.h +++ /dev/null @@ -1,303 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; - -class TJsonACL : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr CacheResult; - TJsonSettings JsonSettings; - bool MergeRules = false; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonACL(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - TString database; - if (params.Has("database")) { - database = params.Get("database"); - } - if (database && database != AppData()->TenantName) { - BLOG_TRACE("Requesting StateStorageEndpointsLookup for " << database); - RequestStateStorageEndpointsLookup(database); // to find some dynamic node and redirect query there - } else { - if (params.Has("path")) { - RequestSchemeCacheNavigate(params.Get("path")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'path' is required")); - } - MergeRules = FromStringWithDefault(params.Get("merge_rules"), MergeRules); - } - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - ReplyAndPassAway(Viewer->MakeForward(Event->Get(), GetNodesFromBoardReply(ev))); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - CacheResult = ev->Release(); - RequestDone(); - } - - static bool Has(ui32 accessRights, ui32 mask) { - return (accessRights & mask) == mask; - } - - void FillACE(const NACLibProto::TACE& ace, NKikimrViewer::TMetaCommonInfo::TACE& pbAce) { - if (static_cast(ace.GetAccessType()) == NACLib::EAccessType::Deny) { - pbAce.SetAccessType("Deny"); - } - if (static_cast(ace.GetAccessType()) == NACLib::EAccessType::Allow) { - pbAce.SetAccessType("Allow"); - } - - auto ar = ace.GetAccessRight(); - - static std::pair accessRules[] = { - {NACLib::EAccessRights::GenericFull, "Full"}, - {NACLib::EAccessRights::GenericFullLegacy, "FullLegacy"}, - {NACLib::EAccessRights::GenericManage, "Manage"}, - {NACLib::EAccessRights::GenericUse, "Use"}, - {NACLib::EAccessRights::GenericUseLegacy, "UseLegacy"}, - {NACLib::EAccessRights::GenericWrite, "Write"}, - {NACLib::EAccessRights::GenericRead, "Read"}, - {NACLib::EAccessRights::GenericList, "List"}, - }; - if (MergeRules) { - for (const auto& [rule, name] : accessRules) { - if (Has(ar, rule)) { - pbAce.AddAccessRules(name); - ar &= ~rule; - } - } - } - - static std::pair accessRights[] = { - {NACLib::EAccessRights::SelectRow, "SelectRow"}, - {NACLib::EAccessRights::UpdateRow, "UpdateRow"}, - {NACLib::EAccessRights::EraseRow, "EraseRow"}, - {NACLib::EAccessRights::ReadAttributes, "ReadAttributes"}, - {NACLib::EAccessRights::WriteAttributes, "WriteAttributes"}, - {NACLib::EAccessRights::CreateDirectory, "CreateDirectory"}, - {NACLib::EAccessRights::CreateTable, "CreateTable"}, - {NACLib::EAccessRights::CreateQueue, "CreateQueue"}, - {NACLib::EAccessRights::RemoveSchema, "RemoveSchema"}, - {NACLib::EAccessRights::DescribeSchema, "DescribeSchema"}, - {NACLib::EAccessRights::AlterSchema, "AlterSchema"}, - {NACLib::EAccessRights::CreateDatabase, "CreateDatabase"}, - {NACLib::EAccessRights::DropDatabase, "DropDatabase"}, - {NACLib::EAccessRights::GrantAccessRights, "GrantAccessRights"}, - {NACLib::EAccessRights::WriteUserAttributes, "WriteUserAttributes"}, - {NACLib::EAccessRights::ConnectDatabase, "ConnectDatabase"}, - {NACLib::EAccessRights::ReadStream, "ReadStream"}, - {NACLib::EAccessRights::WriteStream, "WriteStream"}, - {NACLib::EAccessRights::ReadTopic, "ReadTopic"}, - {NACLib::EAccessRights::WriteTopic, "WriteTopic"} - }; - for (const auto& [right, name] : accessRights) { - if (Has(ar, right)) { - pbAce.AddAccessRights(name); - ar &= ~right; - } - } - - if (ar != 0) { - pbAce.AddAccessRights(NACLib::AccessRightsToString(ar)); - } - - pbAce.SetSubject(ace.GetSID()); - - auto inht = ace.GetInheritanceType(); - if ((inht & NACLib::EInheritanceType::InheritObject) != 0) { - pbAce.AddInheritanceType("Object"); - } - if ((inht & NACLib::EInheritanceType::InheritContainer) != 0) { - pbAce.AddInheritanceType("Container"); - } - if ((inht & NACLib::EInheritanceType::InheritOnly) != 0) { - pbAce.AddInheritanceType("Only"); - } - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void ReplyAndPassAway() { - if (CacheResult == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "no SchemeCache response")); - } - if (CacheResult->Request == nullptr) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "wrong SchemeCache response")); - } - if (CacheResult->Request.Get()->ResultSet.empty()) { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "SchemeCache response is empty")); - } - if (CacheResult->Request.Get()->ErrorCount != 0) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "SchemeCache response error " << static_cast(CacheResult->Request.Get()->ResultSet.front().Status))); - } - const auto& entry = CacheResult->Request.Get()->ResultSet.front(); - NKikimrViewer::TMetaInfo metaInfo; - NKikimrViewer::TMetaCommonInfo& pbCommon = *metaInfo.MutableCommon(); - pbCommon.SetPath(CanonizePath(entry.Path)); - pbCommon.SetOwner(entry.Self->Info.GetOwner()); - if (entry.Self->Info.HasACL()) { - NACLib::TACL acl(entry.Self->Info.GetACL()); - for (const NACLibProto::TACE& ace : acl.GetACE()) { - auto& pbAce = *pbCommon.AddACL(); - FillACE(ace, pbAce); - } - } - if (entry.Self->Info.HasEffectiveACL()) { - NACLib::TACL acl(entry.Self->Info.GetEffectiveACL()); - for (const NACLibProto::TACE& ace : acl.GetACE()) { - auto& pbAce = *pbCommon.AddEffectiveACL(); - FillACE(ace, pbAce); - } - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, metaInfo, JsonSettings); - - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), json.Str())); - } - - void HandleTimeout() { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get())); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: ACL information - description: Returns information about ACL of an object - parameters: - - name: database - in: query - description: database name - type: string - required: false - - name: path - in: query - description: schema path - required: true - type: string - - name: merge_rules - in: query - description: merge access rights into access rules - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - properties: - Common: - type: object - properties: - Path: - type: string - Owner: - type: string - ACL: - type: array - items: - type: object - properties: - AccessType: - type: string - Subject: - type: string - AccessRules: - type: array - items: - type: string - AccessRights: - type: array - items: - type: string - InheritanceType: - type: array - items: - type: string - EffectiveACL: - type: array - items: - type: object - properties: - AccessType: - type: string - Subject: - type: string - AccessRules: - type: array - items: - type: string - AccessRights: - type: array - items: - type: string - InheritanceType: - type: array - items: - type: string - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - - )___"); - - return node; -} - -} -} diff --git a/ydb/core/viewer/json_autocomplete.h b/ydb/core/viewer/json_autocomplete.h deleted file mode 100644 index 155a960af2ea..000000000000 --- a/ydb/core/viewer/json_autocomplete.h +++ /dev/null @@ -1,519 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include - -#include "query_autocomplete_helper.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using TNavigate = NSchemeCache::TSchemeCacheNavigate; - -class TJsonAutocomplete : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TEvViewer::TEvViewerRequest::TPtr ViewerRequest; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - - TAutoPtr ProxyResult; - TAutoPtr ConsoleResult; - TAutoPtr CacheResult; - - struct TSchemaWordData { - TString Name; - NKikimrViewer::EAutocompleteType Type; - TString Table; - TSchemaWordData() {} - TSchemaWordData(const TString& name, const NKikimrViewer::EAutocompleteType type, const TString& table = "") - : Name(name) - , Type(type) - , Table(table) - {} - }; - THashMap Dictionary; - TString Database; - TVector Tables; - TVector Paths; - TString Prefix; - TString SearchWord; - ui32 Limit = 10; - NKikimrViewer::TQueryAutocomplete Result; - - std::optional SubscribedNodeId; - std::vector TenantDynamicNodes; - bool Direct = false; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonAutocomplete(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - ParseCgiParameters(params); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - ParsePostContent(content); - } - PrepareParameters(); - } - - // proxied request - TJsonAutocomplete(TEvViewer::TEvViewerRequest::TPtr& ev) - : ViewerRequest(ev) - { - auto& request = ViewerRequest->Get()->Record.GetAutocompleteRequest(); - - Database = request.GetDatabase(); - for (auto& table: request.GetTables()) { - Tables.emplace_back(table); - } - Prefix = request.GetPrefix(); - Limit = request.GetLimit(); - - Timeout = ViewerRequest->Get()->Record.GetTimeout(); - Direct = true; - PrepareParameters(); - } - - void PrepareParameters() { - if (Database) { - TString prefixUpToLastSlash = ""; - auto splitPos = Prefix.find_last_of('/'); - if (splitPos != std::string::npos) { - prefixUpToLastSlash += Prefix.substr(0, splitPos); - SearchWord = Prefix.substr(splitPos + 1); - } else { - SearchWord = Prefix; - } - - if (Tables.size() == 0) { - Paths.emplace_back(Database); - } else { - for (TString& table: Tables) { - TString path = table; - if (!table.StartsWith(Database)) { - path = Database + "/" + path; - } - path += "/" + prefixUpToLastSlash; - Paths.emplace_back(path); - } - } - } else { - SearchWord = Prefix; - } - if (Limit == 0) { - Limit = std::numeric_limits::max(); - } - } - - void ParseCgiParameters(const TCgiParameters& params) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Database = params.Get("database"); - StringSplitter(params.Get("table")).Split(',').SkipEmpty().Collect(&Tables); - Prefix = params.Get("prefix"); - Limit = FromStringWithDefault(params.Get("limit"), Limit); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - } - - void ParsePostContent(const TStringBuf& content) { - static NJson::TJsonReaderConfig JsonConfig; - NJson::TJsonValue requestData; - bool success = NJson::ReadJsonTree(content, &JsonConfig, &requestData); - if (success) { - Database = Database.empty() ? requestData["database"].GetStringSafe({}) : Database; - if (requestData["table"].IsArray()) { - for (auto& table: requestData["table"].GetArraySafe()) { - Tables.emplace_back(table.GetStringSafe()); - } - } - Prefix = Prefix.empty() ? requestData["prefix"].GetStringSafe({}) : Prefix; - if (requestData["limit"].IsDefined()) { - Limit = requestData["limit"].GetInteger(); - } - } - } - - bool IsPostContent() const { - return NViewer::IsPostContent(Event); - } - - TAutoPtr MakeSchemeCacheRequest() { - TAutoPtr request(new NSchemeCache::TSchemeCacheNavigate()); - - for (TString& path: Paths) { - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; - entry.SyncVersion = false; - entry.Path = SplitPath(path); - request->ResultSet.emplace_back(entry); - } - - return request; - } - - void Bootstrap() { - if (ViewerRequest) { - // handle proxied request - SendSchemeCacheRequest(); - } else if (!Database) { - // autocomplete database list via console request - RequestConsoleListTenants(); - } else { - if (!Direct) { - // proxy request to a dynamic node of the specified database - RequestStateStorageEndpointsLookup(Database); - } - if (Requests == 0) { - // perform autocomplete without proxying - SendSchemeCacheRequest(); - } - } - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Connected(TEvInterconnect::TEvNodeConnected::TPtr &) {} - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (!Direct && ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - Direct = true; - SendSchemeCacheRequest(); // fallback - RequestDone(); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &) { - if (!Direct) { - Direct = true; - SendSchemeCacheRequest(); // fallback - RequestDone(); - } - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - TenantDynamicNodes.emplace_back(actorId.NodeId()); - } - } - if (TenantDynamicNodes.empty()) { - SendSchemeCacheRequest(); - } else { - SendDynamicNodeAutocompleteRequest(); - } - RequestDone(); - } - - void SendSchemeCacheRequest() { - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(MakeSchemeCacheRequest())); - } - - void SendDynamicNodeAutocompleteRequest() { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - - auto itPos = std::next(TenantDynamicNodes.begin(), hash % TenantDynamicNodes.size()); - std::nth_element(TenantDynamicNodes.begin(), itPos, TenantDynamicNodes.end()); - - TNodeId nodeId = *itPos; - SubscribedNodeId = nodeId; - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder request = MakeHolder(); - request->Record.SetTimeout(Timeout); - auto autocompleteRequest = request->Record.MutableAutocompleteRequest(); - autocompleteRequest->SetDatabase(Database); - for (TString& path: Paths) { - autocompleteRequest->AddTables(path); - } - autocompleteRequest->SetPrefix(Prefix); - autocompleteRequest->SetLimit(Limit); - - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kAutocompleteRequest, nodeId); - SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeConnected, Connected); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvViewer::TEvViewerResponse, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void ParseProxyResult() { - if (ProxyResult == nullptr) { - Result.add_error("Failed to collect information from ProxyResult"); - return; - } - if (ProxyResult->Record.HasAutocompleteResponse()) { - Result = ProxyResult->Record.GetAutocompleteResponse(); - } else { - Result.add_error("Proxying return empty response"); - } - - } - - void ParseConsoleResult() { - if (ConsoleResult == nullptr) { - Result.add_error("Failed to collect information from ConsoleResult"); - return; - } - - Ydb::Cms::ListDatabasesResult listTenantsResult; - ConsoleResult->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - Dictionary[path] = TSchemaWordData(path, NKikimrViewer::ext_sub_domain); - } - } - - NKikimrViewer::EAutocompleteType ConvertType(TNavigate::EKind navigate) { - switch (navigate) { - case TNavigate::KindSubdomain: - return NKikimrViewer::sub_domain; - case TNavigate::KindPath: - return NKikimrViewer::dir; - case TNavigate::KindExtSubdomain: - return NKikimrViewer::ext_sub_domain; - case TNavigate::KindTable: - return NKikimrViewer::table; - case TNavigate::KindOlapStore: - return NKikimrViewer::column_store; - case TNavigate::KindColumnTable: - return NKikimrViewer::column_table; - case TNavigate::KindRtmr: - return NKikimrViewer::rtmr_volume; - case TNavigate::KindKesus: - return NKikimrViewer::kesus; - case TNavigate::KindSolomon: - return NKikimrViewer::solomon_volume; - case TNavigate::KindTopic: - return NKikimrViewer::pers_queue_group; - case TNavigate::KindCdcStream: - return NKikimrViewer::cdc_stream; - case TNavigate::KindSequence: - return NKikimrViewer::sequence; - case TNavigate::KindReplication: - return NKikimrViewer::replication; - case TNavigate::KindBlobDepot: - return NKikimrViewer::blob_depot; - case TNavigate::KindExternalTable: - return NKikimrViewer::external_table; - case TNavigate::KindExternalDataSource: - return NKikimrViewer::external_data_source; - case TNavigate::KindBlockStoreVolume: - return NKikimrViewer::block_store_volume; - case TNavigate::KindFileStore: - return NKikimrViewer::file_store; - case TNavigate::KindView: - return NKikimrViewer::view; - default: - return NKikimrViewer::dir; - } - } - - void ParseCacheResult() { - if (CacheResult == nullptr) { - Result.add_error("Failed to collect information from CacheResult"); - return; - } - NSchemeCache::TSchemeCacheNavigate *navigate = CacheResult->Request.Get(); - if (navigate->ErrorCount > 0) { - for (auto& entry: CacheResult->Request.Get()->ResultSet) { - if (entry.Status != TSchemeCacheNavigate::EStatus::Ok) { - Result.add_error(TStringBuilder() << "Error receiving Navigate response: `" << CanonizePath(entry.Path) << "` has <" << ToString(entry.Status) << "> status"); - } - } - return; - } - for (auto& entry: CacheResult->Request.Get()->ResultSet) { - TString path = CanonizePath(entry.Path); - if (entry.ListNodeEntry) { - for (const auto& child : entry.ListNodeEntry->Children) { - Dictionary[child.Name] = TSchemaWordData(child.Name, ConvertType(child.Kind), path); - } - }; - for (const auto& [id, column] : entry.Columns) { - Dictionary[column.Name] = TSchemaWordData(column.Name, NKikimrViewer::column, path); - } - for (const auto& index : entry.Indexes) { - Dictionary[index.GetName()] = TSchemaWordData(index.GetName(), NKikimrViewer::index, path); - } - for (const auto& cdcStream : entry.CdcStreams) { - Dictionary[cdcStream.GetName()] = TSchemaWordData(cdcStream.GetName(), NKikimrViewer::cdc_stream, path); - } - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { - CacheResult = ev->Release(); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - ConsoleResult = ev->Release(); - RequestDone(); - } - - void SendAutocompleteResponse() { - if (ViewerRequest) { - TEvViewer::TEvViewerResponse* viewerResponse = new TEvViewer::TEvViewerResponse(); - viewerResponse->Record.MutableAutocompleteResponse()->CopyFrom(Result); - Send(ViewerRequest->Sender, viewerResponse); - } else { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - } - - void ReplyAndPassAway() { - if (ProxyResult) { - ParseProxyResult(); - } else if (Database) { - ParseCacheResult(); - } else { - ParseConsoleResult(); - } - - if (!ProxyResult) { - Result.set_success(Result.error_size() == 0); - if (Result.error_size() == 0) { - auto fuzzy = FuzzySearcher(Dictionary); - auto autocomplete = fuzzy.Search(SearchWord, Limit); - Result.MutableResult()->SetTotal(autocomplete.size()); - for (TSchemaWordData& wordData: autocomplete) { - auto entity = Result.MutableResult()->AddEntities(); - entity->SetName(wordData.Name); - entity->SetType(wordData.Type); - if (wordData.Table) { - entity->SetParent(wordData.Table); - } - } - } - } - - SendAutocompleteResponse(); - PassAway(); - } - - void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) { - if (ev.Get()->Get()->Record.HasAutocompleteResponse()) { - ProxyResult = ev.Release()->Release(); - } else { - Direct = true; - SendSchemeCacheRequest(); // fallback - } - RequestDone(); - } - - void HandleTimeout() { - if (ViewerRequest) { - Result.add_error("Request timed out"); - ReplyAndPassAway(); - } else { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: database - in: query - description: database name - required: false - type: string - - name: table - in: query - description: table list - required: false - type: string - - name: prefix - in: query - description: known part of the word - required: false - type: string - - name: limit - in: query - description: limit of entities - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: direct - in: query - description: force execution on current node - required: false - type: boolean - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Autocomplete information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns autocomplete information about objects in the database"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_blobindexstat.h b/ydb/core/viewer/json_blobindexstat.h deleted file mode 100644 index 2cdf56b4bc4f..000000000000 --- a/ydb/core/viewer/json_blobindexstat.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_vdisk_req.h" - -namespace NKikimr { -namespace NViewer { - -using TJsonBlobIndexStat = TJsonVDiskRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "\"Get logoblob index stat from VDisk\""; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "\"Get logoblob index stat from VDisk\""; - } -}; - -} -} diff --git a/ydb/core/viewer/json_browse.h b/ydb/core/viewer/json_browse.h deleted file mode 100644 index 223e91571b6a..000000000000 --- a/ydb/core/viewer/json_browse.h +++ /dev/null @@ -1,254 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "browse.h" -#include "browse_db.h" -#include "browse_pq.h" -#include -#include "viewer.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonBrowse : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool Recursive = false; - - struct TPathStateInfo { - TString Name; - TString Path; - TActorId BrowseActorId; - NKikimrViewer::TBrowseInfo BrowseInfo; - - TPathStateInfo(const TString& name, const TString& path, const TActorId& browseActorId) - : Name(name) - , Path(path) - , BrowseActorId(browseActorId) - {} - - operator const TString&() const { - return Path; - } - - bool operator== (const TString& otherPath) const { - return Path == otherPath; - } - }; - - TVector Paths; - - using TBrowseRequestKey = std::tuple; - std::unordered_multiset BrowseRequestsInFlight; - ui32 Responses = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonBrowse(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void ParsePath(const TString& path, const TActorContext& ctx) { - size_t prevpos = 0; - size_t pos = 0; - size_t len = path.size(); - while (pos < len) { - if (path[pos] == '/') { - TString n = path.substr(prevpos, pos - prevpos); - TString p = path.substr(0, pos); - if (n.empty() && p.empty()) { - n = p = "/"; - } - Paths.emplace_back(n, p, ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, p, Event->Get()->UserToken))); - ++pos; - prevpos = pos; - } else { - ++pos; - } - } - if (pos != prevpos) { - TString n = path.substr(prevpos, pos - prevpos); - TString p = path.substr(0, pos); - Paths.emplace_back(n, p, ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, p, Event->Get()->UserToken))); - } - } - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Recursive = FromStringWithDefault(params.Get("recursive"), false); - TString path = params.Get("path"); - if (Recursive) { - ParsePath(path, ctx); - } else { - Paths.emplace_back(path, path, ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, path, Event->Get()->UserToken))); - } - Become(&TThis::StateWait, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STFUNC(StateWait) { - switch (ev->GetTypeRewrite()) { - HFunc(NViewerEvents::TEvBrowseResponse, Handle); - HFunc(NViewerEvents::TEvBrowseRequestSent, Handle); - HFunc(NViewerEvents::TEvBrowseRequestCompleted, Handle); - HFunc(NMon::TEvHttpInfoRes, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NViewerEvents::TEvBrowseResponse::TPtr &ev, const TActorContext &ctx) { - NViewerEvents::TEvBrowseResponse& event(*ev->Get()); - if (!event.Error.empty()) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(event.Error, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return Die(ctx); - } - auto it = std::find(Paths.begin(), Paths.end(), event.BrowseInfo.GetPath()); - if (it != Paths.end()) { - it->BrowseInfo.MergeFrom(event.BrowseInfo); - it->BrowseActorId = TActorId(); - } - // TODO: error handling? - ++Responses; - if (Responses == Paths.size()) { - ReplyAndDie(ctx); - } - } - - void Handle(NViewerEvents::TEvBrowseRequestSent::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestSent& event(*ev->Get()); - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void Handle(NViewerEvents::TEvBrowseRequestCompleted::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestCompleted& event(*ev->Get()); - auto it = BrowseRequestsInFlight.find({event.Actor, event.Tablet, event.Event}); - if (it != BrowseRequestsInFlight.end()) { - // we could not delete by key, it could be many items with the same key - BrowseRequestsInFlight.erase(it); - } - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void Handle(NMon::TEvHttpInfoRes::TPtr &ev, const TActorContext &ctx) { - ctx.ExecutorThread.Send(ev->Forward(Event->Sender)); - Die(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - if (!Paths.empty()) { - NKikimrViewer::TBrowseInfo browseInfo; - auto pi = Paths.begin(); - browseInfo.MergeFrom(pi->BrowseInfo); - if (Recursive) { - browseInfo.SetPath(Paths.back().BrowseInfo.GetPath()); - browseInfo.SetName("/"); - } - NKikimrViewer::TBrowseInfo* pBrowseInfo = &browseInfo; - ++pi; - while (pi != Paths.end()) { - TString name = pi->Name; - for (NKikimrViewer::TBrowseInfo& child : *pBrowseInfo->MutableChildren()) { - if (child.GetName() == name) { - pBrowseInfo = &child; - pBrowseInfo->MergeFrom(pi->BrowseInfo); - pBrowseInfo->ClearPath(); - break; - } - } - ++pi; - } - TProtoToJson::ProtoToJson(json, browseInfo, JsonSettings); - } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - for (auto& pathInfo : Paths) { - if (pathInfo.BrowseActorId) { - ctx.Send(pathInfo.BrowseActorId, new TEvents::TEvPoisonPill()); - } - } - TStringStream result; - RenderPendingRequests(result); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), result.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void RenderPendingRequests(IOutputStream& html) { - for (const auto& request : BrowseRequestsInFlight) { - html << request << Endl; - } - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns brief information about schema object"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_bscontrollerinfo.h b/ydb/core/viewer/json_bscontrollerinfo.h deleted file mode 100644 index 1e0263115a5f..000000000000 --- a/ydb/core/viewer/json_bscontrollerinfo.h +++ /dev/null @@ -1,124 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonBSControllerInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr ControllerInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonBSControllerInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - InitConfig(params); - RequestBSControllerInfo(); - Become(&TThis::StateRequestedInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvBlobStorage::TEvResponseControllerInfo, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvBlobStorage::TEvResponseControllerInfo::TPtr& ev) { - ControllerInfo = ev->Release(); - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - if (ControllerInfo != nullptr) { - TProtoToJson::ProtoToJson(json, ControllerInfo->Record); - } else { - json << "null"; - } - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Node(R"___( - - name: controller_id - in: query - description: storage controller identifier (tablet id) - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage controller information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about storage controller"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_bsgroupinfo.h b/ydb/core/viewer/json_bsgroupinfo.h deleted file mode 100644 index 24c05ffe5d7a..000000000000 --- a/ydb/core/viewer/json_bsgroupinfo.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "wb_merge.h" -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvBSGroupStateResponse; - using TResponseEventType = TEvWhiteboard::TEvBSGroupStateResponse; - using TElementType = NKikimrWhiteboard::TBSGroupStateInfo; - using TElementKeyType = ui32; - - static constexpr bool StaticNodesOnly = true; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableBSGroupStateInfo(); - } - - static ui32 GetElementKey(const TElementType& type) { - return type.GetGroupID(); - } - - static TString GetDefaultMergeField() { - return "GroupID"; - } - - static void InitMerger() { - const auto* field = NKikimrWhiteboard::TBSGroupStateInfo::descriptor()->FindFieldByName("Latency"); - TWhiteboardMergerBase::FieldMerger[field] = &TWhiteboardMergerBase::ProtoMaximizeEnumField; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponsesElementKey(result, responses); - } else { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - } -}; - -template <> -struct TWhiteboardMergerComparator { - bool operator ()(const NKikimrWhiteboard::TBSGroupStateInfo& a, const NKikimrWhiteboard::TBSGroupStateInfo& b) const { - return std::make_tuple(a.GetGroupGeneration(), a.VDiskIdsSize(), a.GetChangeTime()) - < std::make_tuple(b.GetGroupGeneration(), b.VDiskIdsSize(), b.GetChangeTime()); - } -}; - -using TJsonBSGroupInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage groups information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about storage groups"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_cluster.h b/ydb/core/viewer/json_cluster.h deleted file mode 100644 index a4b6c0e86b81..000000000000 --- a/ydb/core/viewer/json_cluster.h +++ /dev/null @@ -1,542 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer.h" -#include "viewer_probes.h" - -LWTRACE_USING(VIEWER_PROVIDER); - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; -using ::google::protobuf::FieldDescriptor; - -class TJsonCluster : public TViewerPipeClient { - using TThis = TJsonCluster; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - THolder NodesInfo; - TMap SystemInfo; - TMap VDiskInfo; - TMap PDiskInfo; - TMap BSGroupInfo; - TMap TabletInfo; - THolder DescribeResult; - TSet NodesAlive; - TJsonSettings JsonSettings; - ui32 Timeout; - ui32 TenantsNumber = 0; - bool Tablets = false; - - struct TEventLog { - bool IsTimeout = false; - TInstant StartTime; - TInstant StartHandleListTenantsResponseTime; - TInstant StartHandleNodesInfoTime; - TInstant StartMergeBSGroupsTime; - TInstant StartMergeVDisksTime; - TInstant StartMergePDisksTime; - TInstant StartMergeTabletsTime; - TInstant StartResponseBuildingTime; - }; - TEventLog EventLog; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonCluster(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Tablets = FromStringWithDefault(params.Get("tablets"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - } - - void Bootstrap(const TActorContext& ) { - EventLog.StartTime = TActivationContext::Now(); - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - RequestConsoleListTenants(); - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - if (NodesInfo != nullptr) { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - for (const auto& ni : NodesInfo->Nodes) { - if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe); - } - } - } - TBase::PassAway(); - } - - void SendWhiteboardTabletStateRequest() { - THashSet filterTablets; - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - for (TTabletId id : domain->Coordinators) { - filterTablets.emplace(id); - } - for (TTabletId id : domain->Mediators) { - filterTablets.emplace(id); - } - for (TTabletId id : domain->TxAllocators) { - filterTablets.emplace(id); - } - filterTablets.emplace(domain->SchemeRoot); - filterTablets.emplace(domains->GetHive()); - } - filterTablets.emplace(MakeBSControllerID()); - filterTablets.emplace(MakeDefaultHiveID()); - filterTablets.emplace(MakeCmsID()); - filterTablets.emplace(MakeNodeBrokerID()); - filterTablets.emplace(MakeTenantSlotBrokerID()); - filterTablets.emplace(MakeConsoleID()); - const NKikimrSchemeOp::TPathDescription& pathDescription(DescribeResult->GetRecord().GetPathDescription()); - if (pathDescription.HasDomainDescription()) { - const NKikimrSubDomains::TDomainDescription& domainDescription(pathDescription.GetDomainDescription()); - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) { - filterTablets.emplace(tabletId); - } - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) { - filterTablets.emplace(tabletId); - } - if (domainDescription.HasDomainKey()) { - if (domainDescription.GetDomainKey().HasSchemeShard()) { - filterTablets.emplace(domainDescription.GetDomainKey().GetSchemeShard()); - } - } - } - - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - for (const auto& ni : NodesInfo->Nodes) { - if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId); - auto request = new TEvWhiteboard::TEvTabletStateRequest(); - for (TTabletId id: filterTablets) { - request->Record.AddFilterTabletId(id); - } - SendRequest(whiteboardServiceId, request, IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - } - } - } - - void SendWhiteboardRequests() { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - for (const auto& ni : NodesInfo->Nodes) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(ni.NodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - - if (ni.NodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - SendRequest(whiteboardServiceId,new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, ni.NodeId); - } - } - if (Tablets) { - SendWhiteboardTabletStateRequest(); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { - EventLog.StartHandleNodesInfoTime = TActivationContext::Now(); - NodesInfo = ev->Release(); - // before making requests to Whiteboard with the Tablets parameter, we need to review the TEvDescribeSchemeResult information - if (Tablets) { - THolder request = MakeHolder(); - if (!Event->Get()->UserToken.empty()) { - request->Record.SetUserToken(Event->Get()->UserToken); - } - NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - TString domainPath = "/" + domain->Name; - record->SetPath(domainPath); - } - record->MutableOptions()->SetReturnPartitioningInfo(false); - record->MutableOptions()->SetReturnPartitionConfig(false); - record->MutableOptions()->SetReturnChildren(false); - SendRequest(MakeTxProxyID(), request.Release()); - } else { - SendWhiteboardRequests(); - } - - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - ui32 nodeId = ev.Get()->Cookie; - switch (ev->Get()->SourceType) { - case TEvWhiteboard::EvSystemStateRequest: - if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvVDiskStateRequest: - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvPDiskStateRequest: - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvBSGroupStateRequest: - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvTabletStateRequest: - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - break; - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { - ui32 nodeId = ev->Get()->NodeId; - if (SystemInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - if (nodeId <= dynamicNameserviceConfig->MaxStaticNodeId) { - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - RequestDone(); - } - if (Tablets) { - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - } - } - - void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - SystemInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - VDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - PDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BSGroupInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - TabletInfo[nodeId] = std::move(ev->Get()->Record); - NodesAlive.insert(nodeId); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - EventLog.StartHandleListTenantsResponseTime = TActivationContext::Now(); - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - TenantsNumber = listTenantsResult.paths().size(); - RequestDone(); - } - - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { - if (ev->Get()->GetRecord().GetStatus() == NKikimrScheme::StatusSuccess) { - DescribeResult = ev->Release(); - SendWhiteboardRequests(); - } - RequestDone(); - } - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { - if (ev->Get()->Status != NKikimrProto::OK) { - RequestDone(); - } - } - - STATEFN(StateRequested) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - NKikimrWhiteboard::TEvBSGroupStateResponse MergedBSGroupInfo; - NKikimrWhiteboard::TEvVDiskStateResponse MergedVDiskInfo; - NKikimrWhiteboard::TEvPDiskStateResponse MergedPDiskInfo; - NKikimrWhiteboard::TEvTabletStateResponse MergedTabletInfo; - TMap VDisksIndex; - TMap, const NKikimrWhiteboard::TPDiskStateInfo&> PDisksIndex; - - void ReplyAndPassAway() { - EventLog.StartMergeBSGroupsTime = TActivationContext::Now(); - MergeWhiteboardResponses(MergedBSGroupInfo, BSGroupInfo); - EventLog.StartMergeVDisksTime = TActivationContext::Now(); - MergeWhiteboardResponses(MergedVDiskInfo, VDiskInfo); - EventLog.StartMergePDisksTime = TActivationContext::Now(); - MergeWhiteboardResponses(MergedPDiskInfo, PDiskInfo); - - EventLog.StartMergeTabletsTime = TActivationContext::Now(); - THashSet tablets; - if (Tablets) { - MergeWhiteboardResponses(MergedTabletInfo, TabletInfo); - } - - EventLog.StartResponseBuildingTime = TActivationContext::Now(); - if (Tablets) { - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - tablets.emplace(MakeBSControllerID()); - tablets.emplace(MakeDefaultHiveID()); - tablets.emplace(MakeCmsID()); - tablets.emplace(MakeNodeBrokerID()); - tablets.emplace(MakeTenantSlotBrokerID()); - tablets.emplace(MakeConsoleID()); - tablets.emplace(domain->SchemeRoot); - tablets.emplace(domains->GetHive()); - for (TTabletId id : domain->Coordinators) { - tablets.emplace(id); - } - for (TTabletId id : domain->Mediators) { - tablets.emplace(id); - } - for (TTabletId id : domain->TxAllocators) { - tablets.emplace(id); - } - } - - if (DescribeResult) { - const NKikimrSchemeOp::TPathDescription& pathDescription(DescribeResult->GetRecord().GetPathDescription()); - if (pathDescription.HasDomainDescription()) { - const NKikimrSubDomains::TDomainDescription& domainDescription(pathDescription.GetDomainDescription()); - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) { - tablets.emplace(tabletId); - } - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) { - tablets.emplace(tabletId); - } - if (domainDescription.HasDomainKey()) { - if (domainDescription.GetDomainKey().HasSchemeShard()) { - tablets.emplace(domainDescription.GetDomainKey().GetSchemeShard()); - } - } - } - } - } - - ui64 totalStorageSize = 0; - ui64 availableStorageSize = 0; - - for (auto& element : TWhiteboardInfo::GetElementsField(MergedPDiskInfo)) { - if (element.HasTotalSize() && element.HasAvailableSize()) { - totalStorageSize += element.GetTotalSize(); - availableStorageSize += element.GetAvailableSize(); - } - element.SetStateFlag(GetWhiteboardFlag(GetPDiskStateFlag(element))); - element.SetOverall(GetWhiteboardFlag(GetPDiskOverallFlag(element))); - PDisksIndex.emplace(TWhiteboardInfo::GetElementKey(element), element); - } - for (auto& element : TWhiteboardInfo::GetElementsField(MergedVDiskInfo)) { - element.SetOverall(GetWhiteboardFlag(GetVDiskOverallFlag(element))); - VDisksIndex.emplace(TWhiteboardInfo::GetElementKey(element), element); - } - NKikimrViewer::EFlag flag = NKikimrViewer::Grey; - for (const auto& element : TWhiteboardInfo::GetElementsField(MergedBSGroupInfo)) { - flag = Max(flag, GetBSGroupOverallFlag(element, VDisksIndex, PDisksIndex)); - } - ui32 numberOfCpus = 0; - double loadAverage = 0; - THashSet dataCenters; - THashSet versions; - THashSet hosts; - THashMap names; - for (const auto& [nodeId, sysInfo] : SystemInfo) { - if (sysInfo.SystemStateInfoSize() > 0) { - const NKikimrWhiteboard::TSystemStateInfo& systemState = sysInfo.GetSystemStateInfo(0); - if (systemState.HasNumberOfCpus() && (!systemState.HasHost() || hosts.emplace(systemState.GetHost()).second)) { - numberOfCpus += systemState.GetNumberOfCpus(); - if (systemState.LoadAverageSize() > 0) { - loadAverage += systemState.GetLoadAverage(0); - } - } - if (systemState.HasDataCenter()) { - dataCenters.insert(systemState.GetDataCenter()); - } - if (systemState.HasVersion()) { - versions.insert(systemState.GetVersion()); - } - if (systemState.HasClusterName()) { - names[systemState.GetClusterName()]++; - } - } - } - - NKikimrViewer::TClusterInfo pbCluster; - - if (Tablets) { - for (const NKikimrWhiteboard::TTabletStateInfo& tabletInfo : MergedTabletInfo.GetTabletStateInfo()) { - if (tablets.contains(tabletInfo.GetTabletId())) { - NKikimrWhiteboard::TTabletStateInfo* tablet = pbCluster.AddSystemTablets(); - tablet->CopyFrom(tabletInfo); - auto tabletFlag = GetWhiteboardFlag(GetFlagFromTabletState(tablet->GetState())); - tablet->SetOverall(tabletFlag); - flag = Max(flag, GetViewerFlag(tabletFlag)); - } - } - pbCluster.SetTablets(MergedTabletInfo.TabletStateInfoSize()); - } - pbCluster.SetTenants(TenantsNumber); - - pbCluster.SetOverall(flag); - if (NodesInfo != nullptr) { - pbCluster.SetNodesTotal(NodesInfo->Nodes.size()); - pbCluster.SetNodesAlive(NodesAlive.size()); - } - pbCluster.SetNumberOfCpus(numberOfCpus); - pbCluster.SetLoadAverage(loadAverage); - pbCluster.SetStorageTotal(totalStorageSize); - pbCluster.SetStorageUsed(totalStorageSize - availableStorageSize); - pbCluster.SetHosts(hosts.size()); - TIntrusivePtr domains = AppData()->DomainsInfo; - if (const auto& domain = domains->Domain) { - TString domainName = "/" + domain->Name; - pbCluster.SetDomain(domainName); - } - for (const TString& dc : dataCenters) { - pbCluster.AddDataCenters(dc); - } - for (const TString& version : versions) { - pbCluster.AddVersions(version); - } - auto itMax = std::max_element(names.begin(), names.end(), [](const auto& a, const auto& b) { - return a.second < b.second; - }); - if (itMax != names.end()) { - pbCluster.SetName(itMax->first); - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, pbCluster, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - - const TInstant now = TActivationContext::Now(); - LWPROBE(ViewerClusterHandler, TBase::SelfId().NodeId(), Tablets, EventLog.IsTimeout, - EventLog.StartTime.MilliSeconds(), - (now - EventLog.StartTime).MilliSeconds(), - (EventLog.StartHandleListTenantsResponseTime - EventLog.StartTime).MilliSeconds(), - (EventLog.StartHandleNodesInfoTime - EventLog.StartTime).MilliSeconds(), - (EventLog.StartMergeBSGroupsTime - EventLog.StartTime).MilliSeconds(), - (EventLog.StartMergeVDisksTime - EventLog.StartMergeBSGroupsTime).MilliSeconds(), - (EventLog.StartMergePDisksTime - EventLog.StartMergeVDisksTime).MilliSeconds(), - (EventLog.StartMergeTabletsTime - EventLog.StartMergePDisksTime).MilliSeconds(), - (EventLog.StartResponseBuildingTime - EventLog.StartMergeTabletsTime).MilliSeconds(), - (now - EventLog.StartResponseBuildingTime).MilliSeconds() - ); - - PassAway(); - } - - void HandleTimeout() { - EventLog.IsTimeout = true; - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: tablets - in: query - description: return system tablets state - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Cluster information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about cluster"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_compute.h b/ydb/core/viewer/json_compute.h deleted file mode 100644 index 0d045d700909..000000000000 --- a/ydb/core/viewer/json_compute.h +++ /dev/null @@ -1,710 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "viewer_helper.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" -#include "wb_merge.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonCompute : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - THashMap TenantByPath; - THashMap TenantBySubDomainKey; - THashMap HiveBySubDomainKey; - THashMap SubDomainKeyByPath; - THashMap> NavigateResult; - THashMap> HiveDomainStats; - THashMap> HiveNodeStats; - THashMap> TabletInfoIndex; - THashMap HiveNodeStatsIndex; - THashMap TenantPathByNodeId; - NMon::TEvHttpInfo::TPtr Event; - TVector NodeIds; - THashSet PassedNodeIds; - THashSet FoundNodeIds; - THashMap NodeSysInfo; - TMap NodeTabletInfo; - THolder NodesInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString User; - TString Path; - TString DomainPath; - TPathId FilterSubDomain; - bool Tablets = true; - TTabletId RootHiveId = 0; - bool RootHiveRequested = false; - NKikimrViewer::TComputeInfo Result; - ui32 UptimeSecondsFilter = 0; - bool ProblemNodesFilter = false; - TString TextFilter; - - enum class EVersion { - v1, - v2 // only this works with sorting - }; - enum class ESort { - NodeId, - Host, - DC, - Rack, - Version, - Uptime, - Memory, - CPU, - LoadAverage, - }; - EVersion Version = EVersion::v1; - std::optional Offset; - std::optional Limit; - ESort Sort = ESort::NodeId; - bool ReverseSort = false; - bool IsNodesListSorted = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonCompute(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - TString GetDomainId(TPathId pathId) { - return TStringBuilder() << pathId.OwnerId << '-' << pathId.LocalPathId; - } - - bool IsFitsToPath(const TString& path) const { - if (Path.empty()) { - return true; - } - if (Path == path) { - return true; - } - if (Path == DomainPath) { - return false; - } - if (Path.StartsWith(path)) { - return true; - } - return false; - } - - void Bootstrap(const TActorContext& ) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Tablets = FromStringWithDefault(params.Get("tablets"), Tablets); - Path = params.Get("path"); - UptimeSecondsFilter = FromStringWithDefault(params.Get("uptime"), 0); - ProblemNodesFilter = FromStringWithDefault(params.Get("problems_only"), ProblemNodesFilter); - TextFilter = params.Get("filter"); - if (params.Has("offset")) { - Offset = FromStringWithDefault(params.Get("offset"), 0); - } - if (params.Has("limit")) { - Limit = FromStringWithDefault(params.Get("limit"), std::numeric_limits::max()); - } - TString version = params.Get("version"); - if (version == "v1") { - Version = EVersion::v1; - } else if (version == "v2") { - Version = EVersion::v2; - } - TStringBuf sort = params.Get("sort"); - if (sort) { - if (sort.StartsWith("-") || sort.StartsWith("+")) { - ReverseSort = (sort[0] == '-'); - sort.Skip(1); - } - if (sort == "NodeId") { - Sort = ESort::NodeId; - } else if (sort == "Host") { - Sort = ESort::Host; - } else if (sort == "DC") { - Sort = ESort::DC; - } else if (sort == "Rack") { - Sort = ESort::Rack; - } else if (sort == "Version") { - Sort = ESort::Version; - } else if (sort == "Uptime") { - Sort = ESort::Uptime; - } else if (sort == "Memory") { - Sort = ESort::Memory; - } else if (sort == "CPU") { - Sort = ESort::CPU; - } else if (sort == "LoadAverage") { - Sort = ESort::LoadAverage; - } - } - - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - - RequestConsoleListTenants(); - - DomainPath = "/" + domain->Name; - if (Path.empty() || DomainPath == Path) { - NKikimrViewer::TTenant& tenant = TenantByPath[DomainPath]; - tenant.SetName(DomainPath); - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::RUNNING); - tenant.SetType(NKikimrViewer::Domain); - RequestSchemeCacheNavigate(DomainPath); - } - RootHiveId = domains->GetHive(); - if (Requests == 0) { - ReplyAndPassAway(); - } - - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - for (const TNodeId nodeId : NodeIds) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe); - } - TBase::PassAway(); - } - - STATEFN(StateRequested) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - hFunc(TEvHive::TEvResponseHiveNodeStats, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev) { - NodesInfo = ev->Release(); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - if (IsFitsToPath(path)) { - TString p(Path.empty() ? path : Path); - TenantByPath[p]; - RequestSchemeCacheNavigate(p); - } - } - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) { - TPathId subDomainKey({hiveStat.GetShardId(), hiveStat.GetPathId()}); - if (FilterSubDomain && FilterSubDomain != subDomainKey) { - continue; - } - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - if (ev->Cookie != HiveBySubDomainKey[subDomainKey]) { - continue; // we avoid overwrite of tenant stats by root stats - } - tenant.SetId(GetDomainId({hiveStat.GetShardId(), hiveStat.GetPathId()})); - tenant.MutableStateStats()->CopyFrom(hiveStat.GetStateStats()); - tenant.MutableMetrics()->CopyFrom(hiveStat.GetMetrics()); - tenant.MutableNodeIds()->CopyFrom(hiveStat.GetNodeIds()); - tenant.SetAliveNodes(hiveStat.GetAliveNodes()); - } - HiveDomainStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - bool IsPageNode(TNodeId nodeId) { - if (PassedNodeIds.insert(nodeId).second) { - if (Offset.has_value()) { - if (PassedNodeIds.size() <= Offset.value()) { - return false; - } - } - if (Limit.has_value()) { - if (NodeIds.size() >= Limit.value()) { - return false; - } - } - return true; - } - return false; - } - - bool IsRequiredNode(TNodeId nodeId) { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - return nodeId > dynamicNameserviceConfig->MaxStaticNodeId && (!IsNodesListSorted || IsPageNode(nodeId)); - } - - bool NeedNodesSorting() { - return Version == EVersion::v2; - } - - bool IsNodeFilter() { - return ProblemNodesFilter || UptimeSecondsFilter > 0 && TextFilter; - } - - void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) { - BLOG_TRACE("ProcessNodeIds()"); - - auto nodeStats = ev->Get()->Record.GetNodeStats(); - if (NeedNodesSorting() && Sort == ESort::NodeId && !IsNodeFilter()) { - SortCollection(nodeStats, [](const NKikimrHive::THiveNodeStats& node) { return node.GetNodeId();}, ReverseSort); - IsNodesListSorted = true; - } - for (const NKikimrHive::THiveNodeStats& nodeStat : nodeStats) { - auto nodeId = nodeStat.GetNodeId(); - if (IsRequiredNode(nodeId)) { - const auto& nodeDomain = nodeStat.GetNodeDomain(); - const TPathId subDomain(nodeDomain.GetSchemeShard(), nodeDomain.GetPathId()); - if (FilterSubDomain && FilterSubDomain != subDomain) { - continue; - } - NodeIds.emplace_back(nodeId); // order is important - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - THolder request = MakeHolder(); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - if (Tablets && !ev->Get()->Record.GetExtendedTabletInfo()) { - THolder request = MakeHolder(); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - } - } - HiveNodeStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - const NSchemeCache::TSchemeCacheNavigate::TEntry& result(ev->Get()->Request->ResultSet.front()); - TPathId pathId; - if (!Path.empty() && result.Self) { - switch (result.Self->Info.GetPathType()) { - case NKikimrSchemeOp::EPathTypeSubDomain: - case NKikimrSchemeOp::EPathTypeExtSubDomain: - pathId = TPathId(); - break; - default: - pathId = TPathId(result.Self->Info.GetSchemeshardId(), result.Self->Info.GetPathId()); - break; - } - } - auto domainInfo = result.DomainInfo; - ui64 hiveId = domainInfo->Params.GetHive(); - if (hiveId == 0) { - if (!RootHiveRequested) { - hiveId = RootHiveId; - RootHiveRequested = true; - } - } - if (hiveId) { - RequestHiveDomainStats(hiveId); - RequestHiveNodeStats(hiveId, pathId); - HiveBySubDomainKey[domainInfo->DomainKey] = hiveId; - } - if (domainInfo->ResourcesDomainKey != domainInfo->DomainKey) { - TenantBySubDomainKey[domainInfo->ResourcesDomainKey].SetType(NKikimrViewer::Shared); - TenantBySubDomainKey[domainInfo->DomainKey].SetType(NKikimrViewer::Serverless); - TenantBySubDomainKey[domainInfo->DomainKey].SetResourceId(GetDomainId(domainInfo->ResourcesDomainKey)); - } - - TString path = CanonizePath(result.Path); - SubDomainKeyByPath[path] = domainInfo->DomainKey; - NavigateResult[path] = std::move(ev->Get()->Request); - if (IsFitsToPath(path)) { - FilterSubDomain = domainInfo->DomainKey; - } - } - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeSysInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeTabletInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - if (NodeSysInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvTabletStateRequest) { - if (NodeTabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr&) { - } - - bool CheckNodeFilters(TNodeId nodeId) { - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0); - if (UptimeSecondsFilter > 0 && sysInfo.HasStartTime() && sysInfo.HasChangeTime() - && sysInfo.GetChangeTime() - sysInfo.GetStartTime() > UptimeSecondsFilter * 1000) { - return false; - } - if (ProblemNodesFilter && sysInfo.HasSystemState() - && GetViewerFlag(sysInfo.GetSystemState()) == NKikimrViewer::EFlag::Green) { - return false; - } - if (TextFilter) { - if (sysInfo.HasHost() && sysInfo.GetHost().Contains(TextFilter)) { - return true; - } - if (std::to_string(nodeId).contains(TextFilter)) { - return true; - } - return false; - } - } - } - return true; - } - - static double GetLoadAverage(const NKikimrViewer::TComputeNodeInfo& nodeInfo) { - if (nodeInfo.LoadAverageSize() > 0 && nodeInfo.GetNumberOfCpus() > 0) { - return nodeInfo.GetLoadAverage(0) * 100 / nodeInfo.GetNumberOfCpus(); - } - return 0; - } - - static double GetCPU(const NKikimrViewer::TComputeNodeInfo& nodeInfo) { - double cpu = 0; - if (nodeInfo.PoolStatsSize() > 0) { - for (const auto& ps : nodeInfo.GetPoolStats()) { - cpu = std::max(cpu, ps.GetUsage()); - } - } - return cpu; - } - - void PaginateNodes(::google::protobuf::RepeatedPtrField& nodes) { - switch (Sort) { - case ESort::NodeId: - // already sorted - break; - case ESort::Host: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetHost();}, ReverseSort); - break; - case ESort::DC: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetDataCenter();}, ReverseSort); - break; - case ESort::Rack: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetRack();}, ReverseSort); - break; - case ESort::Version: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetVersion();}, ReverseSort); - break; - case ESort::Uptime: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetStartTime();}, ReverseSort); - break; - case ESort::Memory: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return node.GetMemoryUsed();}, ReverseSort); - break; - case ESort::CPU: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return GetCPU(node);}, ReverseSort); - break; - case ESort::LoadAverage: - SortCollection(nodes, [](const NKikimrViewer::TComputeNodeInfo& node) { return GetLoadAverage(node);}, ReverseSort); - break; - } - - if (Offset.has_value()) { - if (size_t(nodes.size()) > Offset.value()) { - nodes.erase(nodes.begin(), std::next(nodes.begin(), Offset.value())); - } else { - nodes.Clear(); - } - } - if (Limit.has_value()) { - if (size_t(nodes.size()) > Limit.value()) { - nodes.erase(std::next(nodes.begin(), Limit.value()), nodes.end()); - } - } - } - - void FillResponseNode(const TNodeId nodeId, const TString& path) { - if (!CheckNodeFilters(nodeId)) - return; - FoundNodeIds.insert(nodeId); - NKikimrViewer::TComputeNodeInfo& computeNodeInfo = Version == EVersion::v1 - ? *Result.MutableTenants(Result.TenantsSize() - 1)->AddNodes() - : *Result.AddNodes(); - if (Version == EVersion::v2) { - computeNodeInfo.SetTenant(path); - } - computeNodeInfo.SetNodeId(nodeId); - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second.GetSystemStateInfo(0); - if (sysInfo.HasStartTime()) { - computeNodeInfo.SetStartTime(sysInfo.GetStartTime()); - } - if (sysInfo.HasChangeTime()) { - computeNodeInfo.SetChangeTime(sysInfo.GetChangeTime()); - } - computeNodeInfo.MutableSystemLocation()->MergeFrom(sysInfo.GetSystemLocation()); - computeNodeInfo.MutableLoadAverage()->MergeFrom(sysInfo.GetLoadAverage()); - if (sysInfo.HasNumberOfCpus()) { - computeNodeInfo.SetNumberOfCpus(sysInfo.GetNumberOfCpus()); - } - // TODO(xenoxeno) - if (sysInfo.HasSystemState()) { - computeNodeInfo.SetOverall(GetViewerFlag(sysInfo.GetSystemState())); - } - if (sysInfo.HasNodeId()) { - computeNodeInfo.SetNodeId(sysInfo.GetNodeId()); - } - if (sysInfo.HasDataCenter()) { - computeNodeInfo.SetDataCenter(sysInfo.GetDataCenter()); - } - if (sysInfo.HasRack()) { - computeNodeInfo.SetRack(sysInfo.GetRack()); - } - if (sysInfo.HasHost()) { - computeNodeInfo.SetHost(sysInfo.GetHost()); - } - if (sysInfo.HasVersion()) { - computeNodeInfo.SetVersion(sysInfo.GetVersion()); - } - if (sysInfo.HasMemoryUsed()) { - computeNodeInfo.SetMemoryUsed(sysInfo.GetMemoryUsed()); - } - if (sysInfo.HasMemoryLimit()) { - computeNodeInfo.SetMemoryLimit(sysInfo.GetMemoryLimit()); - } - computeNodeInfo.MutablePoolStats()->MergeFrom(sysInfo.GetPoolStats()); - computeNodeInfo.MutableEndpoints()->MergeFrom(sysInfo.GetEndpoints()); - computeNodeInfo.MutableRoles()->MergeFrom(sysInfo.GetRoles()); - - } - } - auto itTabletInfo = TabletInfoIndex.find(nodeId); - if (itTabletInfo != TabletInfoIndex.end()) { - THashMap, NKikimrViewer::TTabletStateInfo> tablets; - for (const auto* pTabletInfo : itTabletInfo->second) { - const auto& tabletInfo = *pTabletInfo; - if (tabletInfo.GetState() != NKikimrWhiteboard::TTabletStateInfo::Deleted) { - NKikimrViewer::EFlag state = GetFlagFromTabletState(tabletInfo.GetState()); - auto& tablet = tablets[std::make_pair(tabletInfo.GetType(), state)]; - tablet.SetCount(tablet.GetCount() + 1); - } - } - for (const auto& [prTypeState, tabletInfo] : tablets) { - NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets(); - tablet.MergeFrom(tabletInfo); - tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(prTypeState.first)); - tablet.SetState(prTypeState.second); - } - } - auto itHiveNodeStats = HiveNodeStatsIndex.find(nodeId); - if (itHiveNodeStats != HiveNodeStatsIndex.end()) { - computeNodeInfo.MutableMetrics()->CopyFrom(itHiveNodeStats->second->GetMetrics()); - for (const auto& state : itHiveNodeStats->second->GetStateStats()) { - if (state.HasTabletType()) { - NKikimrViewer::TTabletStateInfo& tablet = *computeNodeInfo.AddTablets(); - tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(state.GetTabletType())); - tablet.SetCount(state.GetCount()); - NKikimrViewer::EFlag flag = GetFlagFromTabletState(state.GetVolatileState()); - tablet.SetState(flag); - } - } - } - } - - void ReplyAndPassAway() { - NKikimrWhiteboard::TEvTabletStateResponse tabletInfo; - MergeWhiteboardResponses(tabletInfo, NodeTabletInfo); - for (const auto& info : tabletInfo.GetTabletStateInfo()) { - TabletInfoIndex[info.GetNodeId()].emplace_back(&info); - } - auto itRootHiveNodeStats = HiveNodeStats.find(RootHiveId); - if (itRootHiveNodeStats != HiveNodeStats.end()) { - for (const auto& stats : itRootHiveNodeStats->second->Record.GetNodeStats()) { - HiveNodeStatsIndex[stats.GetNodeId()] = &stats; - } - } - for (const auto& prStats : HiveNodeStats) { - if (prStats.first != RootHiveId) { - for (const auto& stats : prStats.second->Record.GetNodeStats()) { - HiveNodeStatsIndex[stats.GetNodeId()] = &stats; - } - } - } - - for (const std::pair& prTenant : TenantByPath) { - const TString& path = prTenant.first; - if (Version == EVersion::v1) { - NKikimrViewer::TComputeTenantInfo& computeTenantInfo = *Result.AddTenants(); - computeTenantInfo.SetName(path); - // TODO(xenoxeno) - computeTenantInfo.SetOverall(NKikimrViewer::EFlag::Green); - } - auto itSubDomainKey = SubDomainKeyByPath.find(path); - if (itSubDomainKey != SubDomainKeyByPath.end()) { - TPathId subDomainKey(itSubDomainKey->second); - const NKikimrViewer::TTenant& tenantBySubDomainKey(TenantBySubDomainKey[subDomainKey]); - for (TNodeId nodeId : tenantBySubDomainKey.GetNodeIds()) { - if (IsNodesListSorted) { - TenantPathByNodeId[nodeId] = path; - } else { - FillResponseNode(nodeId, path); - } - } - } - } - - if (IsNodesListSorted) { - for (TNodeId nodeId : NodeIds) { - FillResponseNode(nodeId, TenantPathByNodeId[nodeId]); - } - } else if (NeedNodesSorting()) { - PaginateNodes(*Result.MutableNodes()); - } - - Result.SetTotalNodes(NodeIds.size()); - Result.SetFoundNodes(FoundNodeIds.size()); - // TODO(xenoxeno) - Result.SetOverall(NKikimrViewer::EFlag::Green); - - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Result.AddErrors("Timeout occurred"); - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: version - in: query - description: query version (v1, v2) - required: false - type: string - - name: path - in: query - description: schema path - required: false - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: uptime - in: query - description: return only nodes with less uptime in sec. - required: false - type: integer - - name: problems_only - in: query - description: return only problem nodes - required: false - type: boolean - - name: filter - in: query - description: filter nodes by id or host - required: false - type: string - - name: sort - in: query - description: sort by (NodeId,Host,DC,Rack,Version,Uptime,Memory,CPU,LoadAverage) - required: false - type: string - - name: offset - in: query - description: skip N nodes - required: false - type: integer - - name: limit - in: query - description: limit to N nodes - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Database compute information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about compute layer of database"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_config.h b/ydb/core/viewer/json_config.h deleted file mode 100644 index e239c82fc4f8..000000000000 --- a/ydb/core/viewer/json_config.h +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonConfig : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonConfig(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const TKikimrRunConfig& kikimrRunConfig = Viewer->GetKikimrRunConfig(); - TStringStream json; - auto config = kikimrRunConfig.AppConfig; - config.MutableNameserviceConfig()->ClearClusterUUID(); - config.MutableNameserviceConfig()->ClearAcceptUUID(); - config.ClearAuthConfig(); - TProtoToJson::ProtoToJson(json, config); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Configuration"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns configuration"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_content.h b/ydb/core/viewer/json_content.h deleted file mode 100644 index 0d9399d36022..000000000000 --- a/ydb/core/viewer/json_content.h +++ /dev/null @@ -1,199 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "browse.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using ::google::protobuf::FieldDescriptor; - -class TJsonContent : public TActorBootstrapped { - using TThis = TJsonContent; - using TBase = TActorBootstrapped; - - IViewer* Viewer; - TActorId Initiator; - NMon::TEvHttpInfo::TPtr Event; - - IViewer::TContentRequestContext ContentRequestContext; - TInstant BrowseStarted; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonContent(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Initiator(ev->Sender) - , Event(ev) - {} - - STFUNC(StateWaitingBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(NViewerEvents::TEvBrowseResponse, HandleBrowseResponse); - CFunc(TEvents::TSystem::Wakeup, HandleBrowseTimeout); - } - } - -public: - void Bootstrap(const TActorContext& ctx) { - BuildRequestContext(&Event->Get()->Request, ContentRequestContext); - if (!Event->Get()->UserToken.empty()) { - ContentRequestContext.UserToken = Event->Get()->UserToken; - } - BrowseStarted = ctx.Now(); - ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, ContentRequestContext.Path, Event->Get()->UserToken)); - - TBase::Become( - &TThis::StateWaitingBrowse, - ctx, - ContentRequestContext.Timeout, - new TEvents::TEvWakeup()); - } - -private: - static void BuildRequestContext( - const NMonitoring::IMonHttpRequest* httpRequest, - IViewer::TContentRequestContext& reqCtx) { - if (!httpRequest) { - return; - } - - const auto& params = httpRequest->GetParams(); - auto post = httpRequest->GetPostContent(); - - reqCtx.JsonSettings.EnumAsNumbers = !FromStringWithDefault( - params.Get("enums"), - !reqCtx.JsonSettings.EnumAsNumbers); // defaults to false - reqCtx.JsonSettings.UI64AsString = !FromStringWithDefault( - params.Get("ui64"), - !reqCtx.JsonSettings.UI64AsString); // defaults to false - - ui32 timeoutMillis = FromStringWithDefault( - params.Get("timeout"), - (ui32)reqCtx.Timeout.MilliSeconds()); - reqCtx.Timeout = TDuration::MilliSeconds(timeoutMillis); - - reqCtx.Limit = FromStringWithDefault(params.Get("limit"), reqCtx.Limit); - reqCtx.Offset = FromStringWithDefault(params.Get("offset"), reqCtx.Offset); - reqCtx.Key = post; - - if (params.Has("key")) { - reqCtx.Key = params.Get("key"); - } - - reqCtx.Path = params.Get("path"); - } - - void HandleBrowseResponse(NViewerEvents::TEvBrowseResponse::TPtr &ev, const TActorContext &ctx) { - NViewerEvents::TEvBrowseResponse& event = *ev->Get(); - - if (!event.Error.empty()) { - return SendErrorReplyAndDie(event.Error, ctx); - } - - auto type = event.BrowseInfo.GetType(); - auto contentHandler = Viewer->GetContentHandler(type); - if (!contentHandler) { - return SendErrorReplyAndDie(TStringBuilder() - << "HTTP/1.1 500 Internal Server Error\r\n" - "Connection: Close\r\n" - "\r\n" - "No content can be retrieved from " - << (NKikimrViewer::EObjectType_IsValid((int)type) ? NKikimrViewer::EObjectType_Name(type) : TString("unknown")) - << " object\r\n", - ctx); - } - - ContentRequestContext.Type = event.BrowseInfo.GetType(); - ContentRequestContext.ObjectName = event.BrowseInfo.GetName(); - ContentRequestContext.Timeout -= (ctx.Now() - BrowseStarted); - - // spawn content retrieval actor - ctx.RegisterWithSameMailbox(contentHandler(Initiator, ContentRequestContext)); - Die(ctx); - } - - void HandleBrowseTimeout(const TActorContext& ctx) { - return SendErrorReplyAndDie(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), ctx); - } - - void SendErrorReplyAndDie(const TString& error, const TActorContext& ctx) { - ctx.Send( - Initiator, - new NMon::TEvHttpInfoRes( - error, - 0, - NMon::IEvHttpInfoRes::EContentType::Custom)); - - Die(ctx); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: key - in: query - description: key for positioning - required: false - type: string - - name: limit - in: query - description: rows limit - required: false - type: integer - - name: offset - in: query - description: offset in rows - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema content preview"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Return schema preview"; - } -}; - - -} -} diff --git a/ydb/core/viewer/json_counters.h b/ydb/core/viewer/json_counters.h deleted file mode 100644 index 8ad296bcbe43..000000000000 --- a/ydb/core/viewer/json_counters.h +++ /dev/null @@ -1,439 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_tabletinfo.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using ::google::protobuf::FieldDescriptor; - -class TJsonCounters : public TActorBootstrapped { - using TThis = TJsonCounters; - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - ui32 Requested; - ui32 Received; - THolder NodesInfo; - TMap VDiskInfo; - TMap PDiskInfo; - TMap TabletInfo; - TMap BSGroupInfo; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonCounters(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - , Requested(0) - , Received(0) - {} - - void Bootstrap(const TActorContext& ctx) { - const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - TBase::Become(&TThis::StateRequestedBrowse); - ctx.Schedule(TDuration::Seconds(60), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - if (NodesInfo != nullptr) { - for (const auto& ni : NodesInfo->Nodes) { - ctx.Send(TActivationContext::InterconnectProxy(ni.NodeId), new TEvents::TEvUnsubscribe()); - } - } - TBase::Die(ctx); - } - - void SendRequest(ui32 nodeId, const TActorContext& ctx) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvTabletStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - ctx.Send(whiteboardServiceId, new TEvWhiteboard::TEvBSGroupStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - ++Requested; - } - - void HandleBrowse(TEvInterconnect::TEvNodesInfo::TPtr& ev, const TActorContext& ctx) { - NodesInfo = ev->Release(); - for (const auto& ni : NodesInfo->Nodes) { - SendRequest(ni.NodeId, ctx); - } - if (Requested > 0) { - TBase::Become(&TThis::StateRequestedNodeInfo); - } else { - ReplyAndDie(ctx); - } - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev.Get()->Cookie; - switch (ev->Get()->SourceType) { - case TEvWhiteboard::EvVDiskStateRequest: - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - case TEvWhiteboard::EvPDiskStateRequest: - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - case TEvWhiteboard::EvTabletStateRequest: - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - case TEvWhiteboard::EvBSGroupStateRequest: - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - break; - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev, const TActorContext &ctx) { - ui32 nodeId = ev->Get()->NodeId; - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - if (TabletInfo.emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - if (BSGroupInfo.emplace(nodeId, NKikimrWhiteboard::TEvBSGroupStateResponse{}).second) { - NodeStateInfoReceived(ctx); - } - } - - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - VDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - PDiskInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - TabletInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void Handle(TEvWhiteboard::TEvBSGroupStateResponse::TPtr& ev, const TActorContext& ctx) { - ui64 nodeId = ev.Get()->Cookie; - BSGroupInfo[nodeId] = std::move(ev->Get()->Record); - NodeStateInfoReceived(ctx); - } - - void NodeStateInfoReceived(const TActorContext& ctx) { - ++Received; - if (Received == Requested) { - ReplyAndDie(ctx); - } - } - - STFUNC(StateRequestedBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, HandleBrowse); - CFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - STFUNC(StateRequestedNodeInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - HFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - HFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - HFunc(TEvWhiteboard::TEvBSGroupStateResponse, Handle); - HFunc(TEvents::TEvUndelivered, Undelivered); - HFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - CFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - template - void RenderStats(TStringStream& json, - ResponseType& response, - const TEvInterconnect::TNodeInfo& nodeInfo, - const TString& subsystem, - const TVector& groupFields) { - - TWhiteboardGrouper::GroupResponse(response, groupFields, true); - auto& stateInfo = TWhiteboardInfo::GetElementsField(response); - TStringBuf host(nodeInfo.Host); - size_t pos = host.find('.'); - if (pos != TString::npos) { - host = host.substr(0, pos); - } - for (typename TWhiteboardInfo::TElementType& info : stateInfo) { - const Reflection& reflectionFrom = *info.GetReflection(); - json << ",{\"labels\":{"; - if (nodeInfo.NodeId != 0) { - json << "\"node\":" << nodeInfo.NodeId << ","; - } - json << "\"host\":\"" << host << "\","; - if (nodeInfo.Port != 0) { - json << "\"port\":" << nodeInfo.Port << ","; - } - json << "\"subsystem\":\"" << subsystem << "\","; - json << "\"" << groupFields.front()->name() << "\":\""; - json << reflectionFrom.GetEnum(info, groupFields.front())->name(); - json << "\""; - json << "},\"value\":"; - json << info.GetCount(); - json << '}'; - } - } - - void RenderStats(TStringStream& json, - NKikimrWhiteboard::TEvVDiskStateResponse& response, - const TEvInterconnect::TNodeInfo& nodeInfo) { - if (response.VDiskStateInfoSize() == 0) - return; - static TVector groupFields - = TWhiteboardGrouper::GetProtoFields("VDiskState"); - RenderStats(json, response, nodeInfo, "VDisk", groupFields); - } - - void RenderStats(TStringStream& json, - NKikimrWhiteboard::TEvPDiskStateResponse& response, - const TEvInterconnect::TNodeInfo& nodeInfo) { - if (response.PDiskStateInfoSize() == 0) - return; - static TVector groupFields - = TWhiteboardGrouper::GetProtoFields("State"); - RenderStats(json, response, nodeInfo, "PDisk", groupFields); - } - - void RenderStats(TStringStream& json, - NKikimrWhiteboard::TEvTabletStateResponse& response, - const TEvInterconnect::TNodeInfo& nodeInfo) { - if (response.TabletStateInfoSize() == 0) - return; - static TVector groupFields - = TWhiteboardGrouper::GetProtoFields("State"); - RenderStats(json, response, nodeInfo, "Tablet", groupFields); - } - - void ReplyAndDie(const TActorContext& ctx) { - TStringStream json; - - json << '{'; - json << "\"sensors\":["; - - Sort(NodesInfo->Nodes, []( - const TEvInterconnect::TNodeInfo& a, - const TEvInterconnect::TNodeInfo& b) -> bool { - return a.NodeId < b.NodeId; - }); - - ui32 nodesResponded = 0; - for (const std::pair& value : VDiskInfo) { - if (value.second.HasResponseTime()) { - ++nodesResponded; - } - } - - json << "{\"labels\":{"; - json << "\"subsystem\":\"Viewer\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"NodesResponded\""; - json << "},\"value\":" << nodesResponded; - json << '}'; - - NKikimrWhiteboard::TEvTabletStateResponse mergedTabletInfo; - MergeWhiteboardResponses(mergedTabletInfo, TabletInfo); - TabletInfo.clear(); - for (const auto& tabletInfo : mergedTabletInfo.GetTabletStateInfo()) { - if (!tabletInfo.HasNodeId()) { - continue; - } - auto it = TabletInfo.find(tabletInfo.GetNodeId()); - if (it == TabletInfo.end()) { - it = TabletInfo.emplace(tabletInfo.GetNodeId(), NKikimrWhiteboard::TEvTabletStateResponse{}).first; - } - it->second.AddTabletStateInfo()->CopyFrom(tabletInfo); - } - - std::array pDiskUserSpaceHistogram = {}; - - auto itVDiskInfo = VDiskInfo.begin(); - auto itPDiskInfo = PDiskInfo.begin(); - auto itTabletInfo = TabletInfo.begin(); - - for (const auto& nodeInfo : NodesInfo->Nodes) { - while (itVDiskInfo != VDiskInfo.end() && itVDiskInfo->first < nodeInfo.NodeId) - ++itVDiskInfo; - if (itVDiskInfo != VDiskInfo.end() && itVDiskInfo->first == nodeInfo.NodeId) { - RenderStats(json, itVDiskInfo->second, nodeInfo); - } - while (itPDiskInfo != PDiskInfo.end() && itPDiskInfo->first < nodeInfo.NodeId) - ++itPDiskInfo; - if (itPDiskInfo != PDiskInfo.end() && itPDiskInfo->first == nodeInfo.NodeId) { - RenderStats(json, itPDiskInfo->second, nodeInfo); - auto& stateInfo = TWhiteboardInfo::GetElementsField(itPDiskInfo->second); - for (const typename TWhiteboardInfo::TElementType& info : stateInfo) { - if (info.GetTotalSize() > 0 && info.GetAvailableSize() > 0) { - ++pDiskUserSpaceHistogram[std::min((info.GetTotalSize() - info.GetAvailableSize()) * pDiskUserSpaceHistogram.size() / info.GetTotalSize(), pDiskUserSpaceHistogram.size() - 1)]; - } - } - } - while (itTabletInfo != TabletInfo.end() && itTabletInfo->first < nodeInfo.NodeId) - ++itTabletInfo; - if (itTabletInfo != TabletInfo.end() && itTabletInfo->first == nodeInfo.NodeId) { - RenderStats(json, itTabletInfo->second, nodeInfo); - } - } - - static TEvInterconnect::TNodeInfo totals(0, "", "cluster", "", 0, TNodeLocation()); - - for (size_t p = 0; p < pDiskUserSpaceHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << ((p + 1) * 100 / pDiskUserSpaceHistogram.size()) << "%\","; - json << "\"subsystem\":\"PDisk\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"UsedSpace\""; - json << "},\"value\":"; - json << pDiskUserSpaceHistogram[p]; - json << '}'; - } - - NKikimrWhiteboard::TEvVDiskStateResponse mergedVDiskInfo; - MergeWhiteboardResponses(mergedVDiskInfo, VDiskInfo); - RenderStats(json, mergedVDiskInfo, totals); - NKikimrWhiteboard::TEvPDiskStateResponse mergedPDiskInfo; - MergeWhiteboardResponses(mergedPDiskInfo, PDiskInfo); - RenderStats(json, mergedPDiskInfo, totals); - RenderStats(json, mergedTabletInfo, totals); - NKikimrWhiteboard::TEvBSGroupStateResponse mergedBSGroupInfo; - MergeWhiteboardResponses(mergedBSGroupInfo, BSGroupInfo); - - std::array bsGroupUnavaiableHistogram = {}; - std::array bsGroupGreenHistogram = {}; - std::array bsGroupNotGreenHistogram = {}; - std::unordered_map bsGroupVDisks; - std::unordered_map bsGroupGreenVDisks; - std::unordered_map bsGroupNotGreenVDisks; - { - auto& stateInfo = TWhiteboardInfo::GetElementsField(mergedBSGroupInfo); - for (const typename TWhiteboardInfo::TElementType& info : stateInfo) { - bsGroupVDisks[info.GetGroupID()] = info.VDiskIdsSize(); - } - } - { - auto& stateInfo = TWhiteboardInfo::GetElementsField(mergedVDiskInfo); - for (const typename TWhiteboardInfo::TElementType& info : stateInfo) { - auto groupId = info.GetVDiskId().GetGroupID(); - bsGroupVDisks[groupId]--; - auto flag = GetVDiskOverallFlag(info); - if (flag == NKikimrViewer::EFlag::Green && info.GetReplicated()) { - bsGroupGreenVDisks[groupId]++; - } else { - bsGroupNotGreenVDisks[groupId]++; - } - } - } - { - for (auto it = bsGroupVDisks.begin(); it != bsGroupVDisks.end(); ++it) { - int idx = it->second; - if (idx < 0) { - idx = 0; - } - if (idx >= (int)bsGroupUnavaiableHistogram.size()) { - idx = bsGroupUnavaiableHistogram.size() - 1; - } - bsGroupUnavaiableHistogram[idx]++; - } - } - { - for (auto it = bsGroupGreenVDisks.begin(); it != bsGroupGreenVDisks.end(); ++it) { - int idx = it->second; - if (idx < 0) { - idx = 0; - } - if (idx >= (int)bsGroupGreenHistogram.size()) { - idx = bsGroupGreenHistogram.size() - 1; - } - bsGroupGreenHistogram[idx]++; - } - } - { - for (auto it = bsGroupNotGreenVDisks.begin(); it != bsGroupNotGreenVDisks.end(); ++it) { - int idx = it->second; - if (idx < 0) { - idx = 0; - } - if (idx >= (int)bsGroupNotGreenHistogram.size()) { - idx = bsGroupNotGreenHistogram.size() - 1; - } - bsGroupNotGreenHistogram[idx]++; - } - } - - for (size_t p = 0; p < bsGroupUnavaiableHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << p << "\","; - json << "\"subsystem\":\"BSGroups\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"UnavailableVDisks\""; - json << "},\"value\":"; - json << bsGroupUnavaiableHistogram[p]; - json << '}'; - } - - for (size_t p = 0; p < bsGroupGreenHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << p << "\","; - json << "\"subsystem\":\"BSGroups\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"GreenVDisks\""; - json << "},\"value\":"; - json << bsGroupGreenHistogram[p]; - json << '}'; - } - - for (size_t p = 0; p < bsGroupNotGreenHistogram.size(); ++p) { - json << ",{\"labels\":{"; - json << "\"bin\":\"" << p << "\","; - json << "\"subsystem\":\"BSGroups\","; - json << "\"host\":\"cluster\","; - json << "\"sensor\":\"NotGreenVDisks\""; - json << "},\"value\":"; - json << bsGroupNotGreenHistogram[p]; - json << '}'; - } - - json << ']'; - json << '}'; - - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void Timeout(const TActorContext& ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -} -} diff --git a/ydb/core/viewer/json_describe.h b/ydb/core/viewer/json_describe.h deleted file mode 100644 index 276bd41e31e6..000000000000 --- a/ydb/core/viewer/json_describe.h +++ /dev/null @@ -1,423 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; -using TNavigate = NSchemeCache::TSchemeCacheNavigate; - -class TJsonDescribe : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr SchemeShardResult; - TAutoPtr CacheResult; - TAutoPtr DescribeResult; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool ExpandSubElements = true; - int Requests = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonDescribe(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void FillParams(NKikimrSchemeOp::TDescribePath* record, const TCgiParameters& params) { - if (params.Has("path")) { - record->SetPath(params.Get("path")); - } - if (params.Has("path_id")) { - record->SetPathId(FromStringWithDefault(params.Get("path_id"))); - } - if (params.Has("schemeshard_id")) { - record->SetSchemeshardId(FromStringWithDefault(params.Get("schemeshard_id"))); - } - record->MutableOptions()->SetBackupInfo(FromStringWithDefault(params.Get("backup"), true)); - record->MutableOptions()->SetShowPrivateTable(FromStringWithDefault(params.Get("private"), true)); - record->MutableOptions()->SetReturnChildren(FromStringWithDefault(params.Get("children"), true)); - record->MutableOptions()->SetReturnBoundaries(FromStringWithDefault(params.Get("boundaries"), false)); - record->MutableOptions()->SetReturnPartitionConfig(FromStringWithDefault(params.Get("partition_config"), true)); - record->MutableOptions()->SetReturnPartitionStats(FromStringWithDefault(params.Get("partition_stats"), false)); - record->MutableOptions()->SetReturnPartitioningInfo(FromStringWithDefault(params.Get("partitioning_info"), true)); - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - ExpandSubElements = FromStringWithDefault(params.Get("subs"), ExpandSubElements); - InitConfig(params); - - if (params.Has("schemeshard_id")) { - THolder request = MakeHolder(); - FillParams(&request->Record, params); - ui64 schemeShardId = FromStringWithDefault(params.Get("schemeshard_id")); - SendRequestToPipe(ConnectTabletPipe(schemeShardId), request.Release()); - } else { - THolder request = MakeHolder(); - FillParams(request->Record.MutableDescribePath(), params); - request->Record.SetUserToken(Event->Get()->UserToken); - SendRequest(MakeTxProxyID(), request.Release()); - } - ++Requests; - - if (params.Has("path")) { - TAutoPtr request(new NSchemeCache::TSchemeCacheNavigate()); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::OpList; - entry.SyncVersion = false; - entry.Path = SplitPath(params.Get("path")); - request->ResultSet.emplace_back(entry); - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request)); - ++Requests; - } - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvSchemeShard::TEvDescribeSchemeResult, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { - SchemeShardResult = ev->Release(); - if (SchemeShardResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - ReplyAndPassAway(); - } else { - RequestDone("TEvDescribeSchemeResult"); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr &ev) { - CacheResult = ev->Release(); - RequestDone("TEvNavigateKeySetResult"); - } - - void RequestDone(const char* name) { - --Requests; - if (Requests == 0) { - ReplyAndPassAway(); - } - if (Requests < 0) { - BLOG_CRIT("Requests < 0 in RequestDone(" << name << ")"); - } - } - - void FillDescription(NKikimrSchemeOp::TDirEntry* descr, ui64 schemeShardId) { - descr->SetSchemeshardId(schemeShardId); - descr->SetPathId(InvalidLocalPathId); - descr->SetParentPathId(InvalidLocalPathId); - descr->SetCreateFinished(true); - descr->SetCreateTxId(0); - descr->SetCreateStep(0); - } - - NKikimrSchemeOp::EPathType ConvertType(TNavigate::EKind navigate) { - switch (navigate) { - case TNavigate::KindSubdomain: - return NKikimrSchemeOp::EPathTypeSubDomain; - case TNavigate::KindPath: - return NKikimrSchemeOp::EPathTypeDir; - case TNavigate::KindExtSubdomain: - return NKikimrSchemeOp::EPathTypeExtSubDomain; - case TNavigate::KindTable: - return NKikimrSchemeOp::EPathTypeTable; - case TNavigate::KindOlapStore: - return NKikimrSchemeOp::EPathTypeColumnStore; - case TNavigate::KindColumnTable: - return NKikimrSchemeOp::EPathTypeColumnTable; - case TNavigate::KindRtmr: - return NKikimrSchemeOp::EPathTypeRtmrVolume; - case TNavigate::KindKesus: - return NKikimrSchemeOp::EPathTypeKesus; - case TNavigate::KindSolomon: - return NKikimrSchemeOp::EPathTypeSolomonVolume; - case TNavigate::KindTopic: - return NKikimrSchemeOp::EPathTypePersQueueGroup; - case TNavigate::KindCdcStream: - return NKikimrSchemeOp::EPathTypeCdcStream; - case TNavigate::KindSequence: - return NKikimrSchemeOp::EPathTypeSequence; - case TNavigate::KindReplication: - return NKikimrSchemeOp::EPathTypeReplication; - case TNavigate::KindBlobDepot: - return NKikimrSchemeOp::EPathTypeBlobDepot; - case TNavigate::KindExternalTable: - return NKikimrSchemeOp::EPathTypeExternalTable; - case TNavigate::KindExternalDataSource: - return NKikimrSchemeOp::EPathTypeExternalDataSource; - case TNavigate::KindBlockStoreVolume: - return NKikimrSchemeOp::EPathTypeBlockStoreVolume; - case TNavigate::KindFileStore: - return NKikimrSchemeOp::EPathTypeFileStore; - case TNavigate::KindView: - return NKikimrSchemeOp::EPathTypeView; - default: - return NKikimrSchemeOp::EPathTypeDir; - } - } - - TAutoPtr GetSchemeShardDescribeSchemeInfo() { - TAutoPtr result(new NKikimrViewer::TEvDescribeSchemeInfo()); - auto& record = SchemeShardResult->GetRecord(); - const auto *descriptor = NKikimrScheme::EStatus_descriptor(); - result->SetStatus(descriptor->FindValueByNumber(record.GetStatus())->name()); - result->SetReason(record.GetReason()); - result->SetPath(record.GetPath()); - result->MutablePathDescription()->CopyFrom(record.GetPathDescription()); - result->SetPathId(record.GetPathId()); - result->SetLastExistedPrefixPath(record.GetLastExistedPrefixPath()); - result->SetLastExistedPrefixPathId(record.GetLastExistedPrefixPathId()); - result->MutableLastExistedPrefixDescription()->CopyFrom(record.GetLastExistedPrefixDescription()); - result->SetPathOwnerId(record.GetPathOwnerId()); - result->SetSource(NKikimrViewer::TEvDescribeSchemeInfo::SchemeShard); - - return result; - } - - TAutoPtr GetCacheDescribeSchemeInfo() { - const auto& entry = CacheResult->Request.Get()->ResultSet.front(); - const auto& path = Event->Get()->Request.GetParams().Get("path"); - const auto& schemeShardId = entry.DomainInfo->DomainKey.OwnerId; - - TAutoPtr result(new NKikimrViewer::TEvDescribeSchemeInfo()); - result->SetPath(path); - result->SetPathId(entry.Self->Info.GetPathId()); - result->SetPathOwnerId(entry.Self->Info.GetSchemeshardId()); - - auto* pathDescription = result->MutablePathDescription(); - auto* self = pathDescription->MutableSelf(); - - self->CopyFrom(entry.Self->Info); - FillDescription(self, schemeShardId); - - if (entry.ListNodeEntry) { - for (const auto& child : entry.ListNodeEntry->Children) { - auto descr = pathDescription->AddChildren(); - descr->SetName(child.Name); - descr->SetPathType(ConvertType(child.Kind)); - FillDescription(descr, schemeShardId); - } - }; - const auto *descriptor = NKikimrScheme::EStatus_descriptor(); - auto status = descriptor->FindValueByNumber(NKikimrScheme::StatusSuccess)->name(); - result->SetStatus(status); - result->SetSource(NKikimrViewer::TEvDescribeSchemeInfo::Cache); - return result; - } - - void ReplyAndPassAway() { - TStringStream json; - if (SchemeShardResult != nullptr && SchemeShardResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - DescribeResult = GetSchemeShardDescribeSchemeInfo(); - } else if (CacheResult != nullptr) { - NSchemeCache::TSchemeCacheNavigate *navigate = CacheResult->Request.Get(); - Y_ABORT_UNLESS(navigate->ResultSet.size() == 1); - if (navigate->ErrorCount == 0) { - DescribeResult = GetCacheDescribeSchemeInfo(); - } - } - if (DescribeResult != nullptr) { - if (ExpandSubElements) { - if (DescribeResult->HasPathDescription()) { - auto& pathDescription = *DescribeResult->MutablePathDescription(); - if (pathDescription.HasTable()) { - auto& table = *pathDescription.MutableTable(); - for (auto& tableIndex : table.GetTableIndexes()) { - NKikimrSchemeOp::TDirEntry& child = *pathDescription.AddChildren(); - child.SetName(tableIndex.GetName()); - child.SetPathType(NKikimrSchemeOp::EPathType::EPathTypeTableIndex); - } - for (auto& tableCdc : table.GetCdcStreams()) { - NKikimrSchemeOp::TDirEntry& child = *pathDescription.AddChildren(); - child.SetName(tableCdc.GetName()); - child.SetPathType(NKikimrSchemeOp::EPathType::EPathTypeCdcStream); - } - } - } - } - const auto *descriptor = NKikimrScheme::EStatus_descriptor(); - auto accessDeniedStatus = descriptor->FindValueByNumber(NKikimrScheme::StatusAccessDenied)->name(); - if (DescribeResult->GetStatus() == accessDeniedStatus) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - return; - } - TProtoToJson::ProtoToJson(json, *DescribeResult, JsonSettings); - DecodeExternalTableContent(json); - } else { - json << "null"; - } - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void DecodeExternalTableContent(TStringStream& json) const { - if (!DescribeResult) { - return; - } - - if (!DescribeResult->GetPathDescription().HasExternalTableDescription()) { - return; - } - - const auto& content = DescribeResult->GetPathDescription().GetExternalTableDescription().GetContent(); - if (!content) { - return; - } - - NExternalSource::IExternalSourceFactory::TPtr externalSourceFactory{NExternalSource::CreateExternalSourceFactory({})}; - NJson::TJsonValue root; - const auto& sourceType = DescribeResult->GetPathDescription().GetExternalTableDescription().GetSourceType(); - try { - NJson::ReadJsonTree(json.Str(), &root); - root["PathDescription"]["ExternalTableDescription"].EraseValue("Content"); - auto source = externalSourceFactory->GetOrCreate(sourceType); - auto parameters = source->GetParameters(content); - for (const auto& [key, items]: parameters) { - NJson::TJsonValue array{NJson::EJsonValueType::JSON_ARRAY}; - for (const auto& item: items) { - array.AppendValue(item); - } - root["PathDescription"]["ExternalTableDescription"]["Content"][key] = array; - } - } catch (...) { - BLOG_CRIT("Сan't unpack content for external table: " << sourceType << ", error: " << CurrentExceptionMessage()); - } - json.Clear(); - json << root; - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: schemeshard_id - in: query - description: schemeshard identifier (tablet id) - required: false - type: integer - - name: path_id - in: query - description: path id - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: backup - in: query - description: return backup information - required: false - type: boolean - default: true - - name: private - in: query - description: return private tables - required: false - type: boolean - default: true - - name: children - in: query - description: return children - required: false - type: boolean - default: true - - name: boundaries - in: query - description: return boundaries - required: false - type: boolean - default: false - - name: partition_config - in: query - description: return partition configuration - required: false - type: boolean - default: true - - name: partition_stats - in: query - description: return partitions statistics - required: false - type: boolean - default: false - - name: partitioning_info - in: query - description: return partitioning information - required: false - type: boolean - default: true - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema detailed information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns detailed information about schema object"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_describe_consumer.h b/ydb/core/viewer/json_describe_consumer.h deleted file mode 100644 index 693f5cc5e065..000000000000 --- a/ydb/core/viewer/json_describe_consumer.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once -#include -#include -#include "json_local_rpc.h" - -namespace NKikimr { -namespace NViewer { - -using TDescribeConsumerRpc = TJsonLocalRpc; - -class TJsonDescribeConsumer : public TDescribeConsumerRpc { -public: - using TBase = TDescribeConsumerRpc; - - TJsonDescribeConsumer(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - {} - - void Bootstrap() override { - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_GET) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only GET method is allowed")); - } - const auto& params(Event->Get()->Request.GetParams()); - if (params.Has("database")) { - Database = params.Get("database"); - } else if (params.Has("database_path")) { - Database = params.Get("database_path"); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'database' is required")); - } - - if (params.Has("consumer")) { - Request.set_consumer(params.Get("consumer")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'consumer' is required")); - } - - if (params.Has("include_stats")) { - Request.set_include_stats(FromStringWithDefault(params.Get("include_stats"), false)); - } - - TBase::Bootstrap(); - } -}; - -template<> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: Topic schema detailed information - description: Returns detailed information about topic - parameters: - - name: database - in: query - description: database name - required: true - type: string - - name: consumer - in: query - description: consumer name - required: true - type: string - - name: include_stats - in: query - description: include stat flag - required: false - type: bool - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: {} - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - node["get"]["responses"]["200"]["content"]["application/json"]["schema"] = TProtoToYaml::ProtoToYamlSchema(); - return node; -} - -} -} diff --git a/ydb/core/viewer/json_describe_topic.h b/ydb/core/viewer/json_describe_topic.h deleted file mode 100644 index 2a7a8fb418dc..000000000000 --- a/ydb/core/viewer/json_describe_topic.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once -#include -#include -#include "json_local_rpc.h" - -namespace NKikimr { -namespace NViewer { - -using TDescribeTopicRpc = TJsonLocalRpc; - -class TJsonDescribeTopic : public TDescribeTopicRpc { -public: - using TBase = TDescribeTopicRpc; - - TJsonDescribeTopic(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - {} - - void Bootstrap() override { - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_GET) { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only GET method is allowed")); - } - const auto& params(Event->Get()->Request.GetParams()); - if (params.Has("database")) { - Database = params.Get("database"); - } else if (params.Has("database_path")) { - Database = params.Get("database_path"); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'database' is required")); - } - - if (params.Has("path")) { - Request.set_path(params.Get("path")); - } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'path' is required")); - } - - if (params.Has("include_stats")) { - Request.set_include_stats(FromStringWithDefault(params.Get("include_stats"), false)); - } - - TBase::Bootstrap(); - } -}; - -template<> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - get: - tags: - - viewer - summary: Topic schema detailed information - description: Returns detailed information about topic - parameters: - - name: database - in: query - description: database name - required: true - type: string - - name: path - in: query - description: schema path - required: true - type: string - - name: include_stats - in: query - description: include stat flag - required: false - type: bool - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: {} - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - node["get"]["responses"]["200"]["content"]["application/json"]["schema"] = TProtoToYaml::ProtoToYamlSchema(); - return node; -} - -} -} diff --git a/ydb/core/viewer/json_getblob.h b/ydb/core/viewer/json_getblob.h deleted file mode 100644 index d09c691dcb90..000000000000 --- a/ydb/core/viewer/json_getblob.h +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_vdisk_req.h" - -namespace NKikimr { -namespace NViewer { - -using TJsonGetBlob = TJsonVDiskRequest; - -template <> -struct TJsonVDiskRequestHelper { - static std::unique_ptr MakeRequest(NMon::TEvHttpInfo::TPtr &ev, TString *error) { - const TCgiParameters& cgi = ev->Get()->Request.GetParams(); - - bool internals = cgi.Has("internals"); - TString from = cgi.Get("from"); - TString to = cgi.Get("to"); - - auto assign_blob_id = [] (NKikimrVDisk::LogoBlobId *id, const TLogoBlobID &blobId) { - const ui64 *raw = blobId.GetRaw(); - id->set_raw_x1(raw[0]); - id->set_raw_x2(raw[1]); - id->set_raw_x3(raw[2]); - }; - - TString errorExplanation; - auto try_to_parse = [&] (const TString &field, const TString ¶m, NKikimrVDisk::LogoBlobId *id) { - TLogoBlobID blobId; - bool good = TLogoBlobID::Parse(blobId, param, errorExplanation); - if (!good) { - *error = "Failed to parse '" + field + "' field: " + errorExplanation; - return true; - } - assign_blob_id(id, blobId); - return false; - }; - - - auto req = std::make_unique(); - req->Record.set_show_internals(internals); - - NKikimrVDisk::LogoBlobIdRange *range = req->Record.mutable_range(); - if (from) { - try_to_parse("from", from, range->mutable_from()); - } else { - assign_blob_id(range->mutable_from(), Min()); - } - if (to) { - try_to_parse("to", to, range->mutable_to()); - } else { - assign_blob_id(range->mutable_to(), Max()); - } - - return req; - } - - static TString GetAdditionalParameters() { - return R"___( - - name: from - in: query - description: blob identifier, inclusive lower bound for getting range, default is minimal blob id - type: string - - name: to - in: query - description: blob identifier, inclusive upper bound for getting range, default is maximal blob id - required: false - type: string - - name: internals - in: query - description: return ingress of each blob - required: false - type: boolean - )___"; - } -}; - - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Get blob from VDisk"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Get blob from VDisk"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_graph.h b/ydb/core/viewer/json_graph.h deleted file mode 100644 index 20fa68980227..000000000000 --- a/ydb/core/viewer/json_graph.h +++ /dev/null @@ -1,194 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "viewer.h" -#include "log.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonGraph : public TActorBootstrapped { - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - std::vector Metrics; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonGraph(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - BLOG_TRACE("Graph received request for " << Event->Get()->Request.GetUri()); - const auto& params(Event->Get()->Request.GetParams()); - NKikimrGraph::TEvGetMetrics getRequest; - if (params.Has("target")) { - StringSplitter(params.Get("target")).Split(',').SkipEmpty().Collect(&Metrics); - for (const auto& metric : Metrics) { - getRequest.AddMetrics(metric); - } - } else { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (params.Has("from")) { - getRequest.SetTimeFrom(FromStringWithDefault(params.Get("from"))); - } - if (params.Has("until")) { - getRequest.SetTimeTo(FromStringWithDefault(params.Get("until"))); - } - if (params.Has("maxDataPoints")) { - getRequest.SetMaxPoints(FromStringWithDefault(params.Get("maxDataPoints"), 1000)); - } - Send(NGraph::MakeGraphServiceId(), new NGraph::TEvGraph::TEvGetMetrics(std::move(getRequest))); - Schedule(TDuration::Seconds(30), new TEvents::TEvWakeup()); - Become(&TThis::StateWork); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(NGraph::TEvGraph::TEvMetricsResult, Handle); - cFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - void Handle(NGraph::TEvGraph::TEvMetricsResult::TPtr& ev) { - const auto& params(Event->Get()->Request.GetParams()); - const auto& response(ev->Get()->Record); - NJson::TJsonValue json; - - if (response.GetError()) { - json["status"] = "error"; - json["error"] = response.GetError(); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (response.DataSize() != Metrics.size()) { - json["status"] = "error"; - json["error"] = "Invalid data size received"; - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - if (response.TimeSize() != protoMetric.ValuesSize()) { - json["status"] = "error"; - json["error"] = "Invalid value size received"; - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - } - if (!params.Has("format") || params.Get("format") == "graphite") { // graphite - json.SetType(NJson::JSON_ARRAY); - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - NJson::TJsonValue& jsonMetric(json.AppendValue({})); - jsonMetric["target"] = Metrics[nMetric]; - jsonMetric["title"] = Metrics[nMetric]; - jsonMetric["tags"]["name"] = Metrics[nMetric]; - NJson::TJsonValue& jsonDataPoints(jsonMetric["datapoints"]); - jsonDataPoints.SetType(NJson::JSON_ARRAY); - for (size_t nTime = 0; nTime < response.TimeSize(); ++nTime) { - NJson::TJsonValue& jsonDataPoint(jsonDataPoints.AppendValue({})); - jsonDataPoint.AppendValue(response.GetTime(nTime)); - double value = protoMetric.GetValues(nTime); - if (isnan(value)) { - jsonDataPoint.AppendValue(NJson::TJsonValue(NJson::JSON_NULL)); - } else { - jsonDataPoint.AppendValue(value); - } - } - } - } else { // prometheus - json["status"] = "success"; - NJson::TJsonValue& jsonData(json["data"]); - jsonData["resultType"] = "matrix"; - NJson::TJsonValue& jsonResults(jsonData["result"]); - jsonResults.SetType(NJson::JSON_ARRAY); - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - NJson::TJsonValue& jsonResult(jsonResults.AppendValue({})); - jsonResult["metric"]["__name__"] = Metrics[nMetric]; - NJson::TJsonValue& jsonValues(jsonResult["values"]); - jsonValues.SetType(NJson::JSON_ARRAY); - for (size_t nTime = 0; nTime < response.TimeSize(); ++nTime) { - NJson::TJsonValue& jsonDataPoint(jsonValues.AppendValue({})); - jsonDataPoint.AppendValue(response.GetTime(nTime)); - double value = protoMetric.GetValues(nTime); - if (isnan(value)) { - jsonDataPoint.AppendValue(NJson::TJsonValue(NJson::JSON_NULL)); - } else { - jsonDataPoint.AppendValue(value); - } - } - } - } - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void Timeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: target - in: query - description: metrics comma delimited - required: true - type: string - - name: from - in: query - description: time in seconds - required: false - type: integer - - name: until - in: query - description: time in seconds - required: false - type: integer - - name: maxDataPoints - in: query - description: maximum number of data points - required: false - type: integer - - name: format - in: query - description: response format, could be prometheus or graphite - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Graph data"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns graph data"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_handlers.cpp b/ydb/core/viewer/json_handlers.cpp new file mode 100644 index 000000000000..7f7b070dd37b --- /dev/null +++ b/ydb/core/viewer/json_handlers.cpp @@ -0,0 +1,47 @@ +#include "json_handlers.h" +#include + +namespace NKikimr::NViewer { + +TSimpleYamlBuilder::TSimpleYamlBuilder(TInitializer initializer) { + Method = Root[TString(initializer.Method)]; + if (initializer.Url) { + Method["tags"].push_back(TString(initializer.Url.After('/').Before('/'))); + } + if (initializer.Tag) { + Method["tags"].push_back(TString(initializer.Tag)); + } + if (initializer.Summary) { + Method["summary"] = TString(initializer.Summary); + } + if (initializer.Description) { + Method["description"] = TString(initializer.Description); + } +} + +void TSimpleYamlBuilder::SetParameters(YAML::Node parameters) { + Method["parameters"] = parameters; +} + +void TSimpleYamlBuilder::AddParameter(TParameter parameter) { + YAML::Node param; + param["in"] = "query"; + param["name"] = TString(parameter.Name); + if (parameter.Description) { + param["description"] = TString(parameter.Description); + } + if (parameter.Type) { + param["type"] = TString(parameter.Type); + } + if (parameter.Default) { + param["default"] = TString(parameter.Default); + } + param["required"] = parameter.Required; + Method["parameters"].push_back(param); +} + +void TSimpleYamlBuilder::SetResponseSchema(YAML::Node schema) { + Method["responses"]["200"]["content"]["application/json"]["schema"] = schema; +} + +} diff --git a/ydb/core/viewer/json_handlers.h b/ydb/core/viewer/json_handlers.h index bfe329ad9385..0e0aeb52a890 100644 --- a/ydb/core/viewer/json_handlers.h +++ b/ydb/core/viewer/json_handlers.h @@ -1,7 +1,7 @@ #pragma once - #include "viewer.h" -#include +#include +#include namespace NKikimr::NViewer { @@ -9,53 +9,36 @@ class TJsonHandlerBase { public: virtual ~TJsonHandlerBase() = default; virtual IActor* CreateRequestActor(IViewer* viewer, NMon::TEvHttpInfo::TPtr& event) = 0; - virtual YAML::Node GetResponseJsonSchema() = 0; - virtual TString GetRequestSummary() = 0; - virtual TString GetRequestDescription() = 0; - virtual YAML::Node GetRequestParameters() = 0; virtual YAML::Node GetRequestSwagger() = 0; }; template class TJsonHandler : public TJsonHandlerBase { public: - IActor* CreateRequestActor(IViewer* viewer, NMon::TEvHttpInfo::TPtr& event) override { - return new ActorRequestType(viewer, event); - } + YAML::Node Swagger; - YAML::Node GetResponseJsonSchema() override { - static YAML::Node jsonSchema = TJsonRequestSchema::GetSchema(); - return jsonSchema; - } + TJsonHandler(YAML::Node swagger) + : Swagger(swagger) + {} - TString GetRequestSummary() override { - static TString summary = TJsonRequestSummary::GetSummary(); - return summary; - } - - TString GetRequestDescription() override { - static TString description = TJsonRequestDescription::GetDescription(); - return description; - } - - YAML::Node GetRequestParameters() override { - static YAML::Node parameters = TJsonRequestParameters::GetParameters(); - return parameters; + IActor* CreateRequestActor(IViewer* viewer, NMon::TEvHttpInfo::TPtr& event) override { + return new ActorRequestType(viewer, event); } YAML::Node GetRequestSwagger() override { - static YAML::Node swagger = TJsonRequestSwagger::GetSwagger(); - return swagger; + return Swagger; } }; struct TJsonHandlers { std::vector JsonHandlersList; THashMap> JsonHandlersIndex; + std::map Capabilities; - void AddHandler(const TString& name, TJsonHandlerBase* handler) { + void AddHandler(const TString& name, TJsonHandlerBase* handler, int version = 1) { JsonHandlersList.push_back(name); JsonHandlersIndex[name] = std::shared_ptr(handler); + Capabilities[name] = version; } TJsonHandlerBase* FindHandler(const TString& name) const { @@ -65,6 +48,45 @@ struct TJsonHandlers { } return it->second.get(); } + + int GetCapabilityVersion(const TString& name) const { + auto it = Capabilities.find(name); + if (it == Capabilities.end()) { + return 0; + } + return it->second; + } +}; + +class TSimpleYamlBuilder { +public: + struct TInitializer { + TStringBuf Method; + TStringBuf Tag; + TStringBuf Url; + TStringBuf Summary; + TStringBuf Description; + }; + + struct TParameter { + TStringBuf Name; + TStringBuf Description; + TStringBuf Type; + TStringBuf Default; + bool Required = false; + }; + + YAML::Node Root; + YAML::Node Method; + + TSimpleYamlBuilder(TInitializer initializer); + void SetParameters(YAML::Node parameters); + void AddParameter(TParameter parameter); + void SetResponseSchema(YAML::Node schema); + + operator YAML::Node() { + return Root; + } }; } // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_handlers_browse.cpp b/ydb/core/viewer/json_handlers_browse.cpp new file mode 100644 index 000000000000..c9ec41656d8f --- /dev/null +++ b/ydb/core/viewer/json_handlers_browse.cpp @@ -0,0 +1,37 @@ +#include "json_handlers.h" +#include "browse.h" +#include "browse_db.h" +#include "browse_pq.h" +#include "viewer_browse.h" +#include "viewer_content.h" +#include "viewer_metainfo.h" + +namespace NKikimr::NViewer { + +void SetupDBVirtualHandlers(IViewer* viewer) { + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Table, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerDB::TBrowseTable(owner, browseContext); + }); +} + +void InitViewerMetaInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/metainfo", new TJsonHandler(TJsonMetaInfo::GetSwagger())); +} + +void InitViewerBrowseJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/browse", new TJsonHandler(TJsonBrowse::GetSwagger())); +} + +void InitViewerContentJsonHandler(TJsonHandlers &jsonHandlers) { + jsonHandlers.AddHandler("/viewer/content", new TJsonHandler(TJsonContent::GetSwagger())); +} + +void InitViewerBrowseJsonHandlers(TJsonHandlers& jsonHandlers) { + InitViewerMetaInfoJsonHandler(jsonHandlers); + InitViewerBrowseJsonHandler(jsonHandlers); + InitViewerContentJsonHandler(jsonHandlers); +} + +} diff --git a/ydb/core/viewer/json_handlers_operation.cpp b/ydb/core/viewer/json_handlers_operation.cpp index 4e9c90da6d79..7375d177983b 100644 --- a/ydb/core/viewer/json_handlers_operation.cpp +++ b/ydb/core/viewer/json_handlers_operation.cpp @@ -1,17 +1,32 @@ #include "json_handlers.h" - -#include "operation_get.h" -#include "operation_list.h" #include "operation_cancel.h" #include "operation_forget.h" +#include "operation_get.h" +#include "operation_list.h" namespace NKikimr::NViewer { -void InitOperationJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/operation/get", new TJsonHandler); - jsonHandlers.AddHandler("/operation/list", new TJsonHandler); - jsonHandlers.AddHandler("/operation/cancel", new TJsonHandler); - jsonHandlers.AddHandler("/operation/forget", new TJsonHandler); +void InitOperationGetJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/get", new TJsonHandler(TOperationGet::GetSwagger())); +} + +void InitOperationListJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/list", new TJsonHandler(TOperationList::GetSwagger())); +} + +void InitOperationCancelJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/cancel", new TJsonHandler(TOperationCancel::GetSwagger())); } +void InitOperationForgetJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/operation/forget", new TJsonHandler(TOperationForget::GetSwagger())); } + +void InitOperationJsonHandlers(TJsonHandlers& jsonHandlers) { + InitOperationGetJsonHandler(jsonHandlers); + InitOperationListJsonHandler(jsonHandlers); + InitOperationCancelJsonHandler(jsonHandlers); + InitOperationForgetJsonHandler(jsonHandlers); +} + +} // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_handlers_pdisk.cpp b/ydb/core/viewer/json_handlers_pdisk.cpp index ef99307e25be..55845fc83cad 100644 --- a/ydb/core/viewer/json_handlers_pdisk.cpp +++ b/ydb/core/viewer/json_handlers_pdisk.cpp @@ -1,19 +1,26 @@ -#include -#include - #include "json_handlers.h" - -#include "json_pdisk_restart.h" #include "pdisk_info.h" +#include "pdisk_restart.h" #include "pdisk_status.h" - namespace NKikimr::NViewer { +void InitPDiskInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/pdisk/info", new TJsonHandler(TPDiskInfo::GetSwagger())); +} + +void InitPDiskRestartJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/pdisk/restart", new TJsonHandler(TJsonPDiskRestart::GetSwagger())); +} + +void InitPDiskStatusJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/pdisk/status", new TJsonHandler(TPDiskStatus::GetSwagger())); +} + void InitPDiskJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/pdisk/info", new TJsonHandler); - jsonHandlers.AddHandler("/pdisk/restart", new TJsonHandler); - jsonHandlers.AddHandler("/pdisk/status", new TJsonHandler); + InitPDiskInfoJsonHandler(jsonHandlers); + InitPDiskRestartJsonHandler(jsonHandlers); + InitPDiskStatusJsonHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_handlers_pq.cpp b/ydb/core/viewer/json_handlers_pq.cpp new file mode 100644 index 000000000000..d3503b95b7f9 --- /dev/null +++ b/ydb/core/viewer/json_handlers_pq.cpp @@ -0,0 +1,29 @@ +#include "viewer.h" +#include "browse_pq.h" + +namespace NKikimr::NViewer { + +void SetupPQVirtualHandlers(IViewer* viewer) { + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Root, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseRoot(owner, browseContext); + }); + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Consumers, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseConsumers(owner, browseContext); + }); + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Consumer, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseConsumer(owner, browseContext); + }); + viewer->RegisterVirtualHandler( + NKikimrViewer::EObjectType::Topic, + [] (const TActorId& owner, const IViewer::TBrowseContext& browseContext) -> IActor* { + return new NViewerPQ::TBrowseTopic(owner, browseContext); + }); +} + +} diff --git a/ydb/core/viewer/json_handlers_query.cpp b/ydb/core/viewer/json_handlers_query.cpp new file mode 100644 index 000000000000..a79c05b54c1f --- /dev/null +++ b/ydb/core/viewer/json_handlers_query.cpp @@ -0,0 +1,20 @@ +#include "json_handlers.h" +#include "query_execute_script.h" +#include "query_fetch_script.h" + +namespace NKikimr::NViewer { + +void InitQueryExecuteScriptJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/query/script/execute", new TJsonHandler(TQueryExecuteScript::GetSwagger())); +} + +void InitQueryFetchScriptJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/query/script/fetch", new TJsonHandler(TQueryFetchScript::GetSwagger())); +} + +void InitQueryJsonHandlers(TJsonHandlers& jsonHandlers) { + InitQueryExecuteScriptJsonHandler(jsonHandlers); + InitQueryFetchScriptJsonHandler(jsonHandlers); +} + +} // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_handlers_scheme.cpp b/ydb/core/viewer/json_handlers_scheme.cpp index c5dc07f28b28..0445940e641c 100644 --- a/ydb/core/viewer/json_handlers_scheme.cpp +++ b/ydb/core/viewer/json_handlers_scheme.cpp @@ -1,11 +1,14 @@ #include "json_handlers.h" - #include "scheme_directory.h" namespace NKikimr::NViewer { +void InitSchemeDirectoryHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/scheme/directory", new TJsonSchemeDirectoryHandler(), 2); +} + void InitSchemeJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/scheme/directory", new TJsonSchemeDirectoryHandler()); + InitSchemeDirectoryHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_handlers_storage.cpp b/ydb/core/viewer/json_handlers_storage.cpp new file mode 100644 index 000000000000..750da437452e --- /dev/null +++ b/ydb/core/viewer/json_handlers_storage.cpp @@ -0,0 +1,14 @@ +#include "json_handlers.h" +#include "storage_groups.h" + +namespace NKikimr::NViewer { + +void InitStorageGroupsJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/storage/groups", new TJsonHandler(TStorageGroups::GetSwagger()), 6); +} + +void InitStorageJsonHandlers(TJsonHandlers& jsonHandlers) { + InitStorageGroupsJsonHandler(jsonHandlers); +} + +} diff --git a/ydb/core/viewer/json_handlers_vdisk.cpp b/ydb/core/viewer/json_handlers_vdisk.cpp index 49c869692f08..633bf9c80566 100644 --- a/ydb/core/viewer/json_handlers_vdisk.cpp +++ b/ydb/core/viewer/json_handlers_vdisk.cpp @@ -1,20 +1,56 @@ -#include -#include - #include "json_handlers.h" - -#include "json_vdiskstat.h" -#include "json_getblob.h" -#include "json_blobindexstat.h" -#include "json_vdisk_evict.h" +#include "vdisk_vdiskstat.h" +#include "vdisk_blobindexstat.h" +#include "vdisk_getblob.h" +#include "vdisk_evict.h" namespace NKikimr::NViewer { +void InitVDiskStatJsonHandler(TJsonHandlers& handlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "vdisk", + .Summary = "VDisk statistic", + .Description = "VDisk statistic", + }); + yaml.SetParameters(TJsonVDiskStat::GetParameters()); + yaml.SetResponseSchema(TJsonVDiskStat::GetSchema()); + handlers.AddHandler("/vdisk/vdiskstat", new TJsonHandler(yaml)); +} + +void InitVDiskGetBlobJsonHandler(TJsonHandlers& handlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "vdisk", + .Summary = "Get blob from VDisk", + .Description = "Get blob from VDisk", + }); + yaml.SetParameters(TJsonGetBlob::GetParameters()); + yaml.SetResponseSchema(TJsonGetBlob::GetSchema()); + handlers.AddHandler("/vdisk/getblob", new TJsonHandler(yaml)); +} + +void InitVDiskBlobIndexStatJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "vdisk", + .Summary = "Get logoblob index stat from VDisk", + .Description = "Get logoblob index stat from VDisk", + }); + yaml.SetParameters(TJsonBlobIndexStat::GetParameters()); + yaml.SetResponseSchema(TJsonBlobIndexStat::GetSchema()); + jsonHandlers.AddHandler("/vdisk/blobindexstat", new TJsonHandler(yaml)); +} + +void InitVDiskEvictJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/vdisk/evict", new TJsonHandler(TJsonVDiskEvict::GetSwagger())); +} + void InitVDiskJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/vdisk/vdiskstat", new TJsonHandler); - jsonHandlers.AddHandler("/vdisk/getblob", new TJsonHandler); - jsonHandlers.AddHandler("/vdisk/blobindexstat", new TJsonHandler); - jsonHandlers.AddHandler("/vdisk/evict", new TJsonHandler); + InitVDiskStatJsonHandler(jsonHandlers); + InitVDiskGetBlobJsonHandler(jsonHandlers); + InitVDiskBlobIndexStatJsonHandler(jsonHandlers); + InitVDiskEvictJsonHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_handlers_viewer.cpp b/ydb/core/viewer/json_handlers_viewer.cpp index ff55ada322b4..b7f70577c398 100644 --- a/ydb/core/viewer/json_handlers_viewer.cpp +++ b/ydb/core/viewer/json_handlers_viewer.cpp @@ -1,89 +1,314 @@ -#include - #include "json_handlers.h" - -#include "json_nodelist.h" -#include "json_nodeinfo.h" -#include "json_vdiskinfo.h" -#include "json_pdiskinfo.h" -#include "json_describe.h" -#include "json_describe_topic.h" -#include "json_describe_consumer.h" -#include "json_hotkeys.h" -#include "json_sysinfo.h" -#include "json_tabletinfo.h" -#include "json_hiveinfo.h" -#include "json_bsgroupinfo.h" -#include "json_bscontrollerinfo.h" -#include "json_config.h" -#include "json_counters.h" -#include "json_topicinfo.h" -#include "json_pqconsumerinfo.h" -#include "json_tabletcounters.h" -#include "json_storage.h" -#include "json_storage_usage.h" -#include "json_metainfo.h" -#include "json_browse.h" -#include "json_cluster.h" -#include "json_content.h" -#include "json_labeledcounters.h" -#include "json_tenants.h" -#include "json_hivestats.h" -#include "json_tenantinfo.h" -#include "json_whoami.h" -#include "json_query.h" -#include "json_netinfo.h" -#include "json_compute.h" -#include "json_healthcheck.h" -#include "json_nodes.h" -#include "json_acl.h" -#include "json_graph.h" -#include "json_render.h" -#include "json_autocomplete.h" -#include "check_access.h" +#include "viewer_acl.h" +#include "viewer_autocomplete.h" +#include "viewer_bscontrollerinfo.h" +#include "viewer_capabilities.h" +#include "viewer_check_access.h" +#include "viewer_cluster.h" +#include "viewer_compute.h" +#include "viewer_config.h" +#include "viewer_counters.h" +#include "viewer_describe_consumer.h" +#include "viewer_describe.h" +#include "viewer_describe_topic.h" +#include "viewer_feature_flags.h" +#include "viewer_graph.h" +#include "viewer_healthcheck.h" +#include "viewer_hiveinfo.h" +#include "viewer_hivestats.h" +#include "viewer_hotkeys.h" +#include "viewer_labeled_counters.h" +#include "viewer_netinfo.h" +#include "viewer_nodelist.h" +#include "viewer_nodes.h" +#include "viewer_pqconsumerinfo.h" +#include "viewer_query.h" +#include "viewer_render.h" +#include "viewer_storage.h" +#include "viewer_storage_usage.h" +#include "viewer_tabletcounters.h" +#include "viewer_tenantinfo.h" +#include "viewer_tenants.h" +#include "viewer_topicinfo.h" +#include "viewer_whoami.h" namespace NKikimr::NViewer { +TBSGroupState GetBSGroupOverallStateWithoutLatency( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + + TBSGroupState groupState; + groupState.Overall = NKikimrViewer::EFlag::Grey; + + const auto& vDiskIds = info.GetVDiskIds(); + std::unordered_map failedRings; + std::unordered_map failedDomains; + TVector vDiskFlags; + vDiskFlags.reserve(vDiskIds.size()); + for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { + const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; + NKikimrViewer::EFlag flag = NKikimrViewer::EFlag::Grey; + auto ie = vDisksIndex.find(vDiskId); + if (ie != vDisksIndex.end()) { + auto pDiskId = std::make_pair(ie->second.GetNodeId(), ie->second.GetPDiskId()); + auto ip = pDisksIndex.find(pDiskId); + if (ip != pDisksIndex.end()) { + const NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo(ip->second); + flag = Max(flag, GetPDiskOverallFlag(pDiskInfo)); + } else { + flag = NKikimrViewer::EFlag::Red; + } + const NKikimrWhiteboard::TVDiskStateInfo& vDiskInfo(ie->second); + flag = Max(flag, GetVDiskOverallFlag(vDiskInfo)); + if (vDiskInfo.GetDiskSpace() > NKikimrWhiteboard::EFlag::Green) { + groupState.SpaceProblems++; + } + } else { + flag = NKikimrViewer::EFlag::Red; + } + vDiskFlags.push_back(flag); + if (flag == NKikimrViewer::EFlag::Red || flag == NKikimrViewer::EFlag::Blue) { + groupState.MissingDisks++; + ++failedRings[vDiskId.GetRing()]; + ++failedDomains[vDiskId.GetDomain()]; + } + groupState.Overall = Max(groupState.Overall, flag); + } + + groupState.Overall = Min(groupState.Overall, NKikimrViewer::EFlag::Yellow); // without failed rings we only allow to raise group status up to Blue/Yellow + TString erasure = info.GetErasureSpecies(); + if (erasure == TErasureType::ErasureSpeciesName(TErasureType::ErasureNone)) { + if (!failedDomains.empty()) { + groupState.Overall = NKikimrViewer::EFlag::Red; + } + } else if (erasure == TErasureType::ErasureSpeciesName(TErasureType::ErasureMirror3dc)) { + if (failedRings.size() > 2) { + groupState.Overall = NKikimrViewer::EFlag::Red; + } else if (failedRings.size() == 2) { // TODO: check for 1 ring - 1 domain rule + groupState.Overall = NKikimrViewer::EFlag::Orange; + } else if (failedRings.size() > 0) { + groupState.Overall = Min(groupState.Overall, NKikimrViewer::EFlag::Yellow); + } + } else if (erasure == TErasureType::ErasureSpeciesName(TErasureType::Erasure4Plus2Block)) { + if (failedDomains.size() > 2) { + groupState.Overall = NKikimrViewer::EFlag::Red; + } else if (failedDomains.size() > 1) { + groupState.Overall = NKikimrViewer::EFlag::Orange; + } else if (failedDomains.size() > 0) { + groupState.Overall = Min(groupState.Overall, NKikimrViewer::EFlag::Yellow); + } + } + return groupState; +} + +NKikimrViewer::EFlag GetBSGroupOverallFlagWithoutLatency( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + return GetBSGroupOverallStateWithoutLatency(info, vDisksIndex, pDisksIndex).Overall; +} + +TBSGroupState GetBSGroupOverallState( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + TBSGroupState state = GetBSGroupOverallStateWithoutLatency(info, vDisksIndex, pDisksIndex); + if (info.HasLatency()) { + state.Overall = Max(state.Overall, Min(NKikimrViewer::EFlag::Yellow, GetViewerFlag(info.GetLatency()))); + } + return state; +} + +NKikimrViewer::EFlag GetBSGroupOverallFlag( + const NKikimrWhiteboard::TBSGroupStateInfo& info, + const TMap& vDisksIndex, + const TMap, const NKikimrWhiteboard::TPDiskStateInfo&>& pDisksIndex) { + return GetBSGroupOverallState(info, vDisksIndex, pDisksIndex).Overall; +} + +void InitViewerCapabilitiesJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Viewer capabilities", + .Description = "Viewer capabilities", + }); + jsonHandlers.AddHandler("/viewer/capabilities", new TJsonHandler(yaml)); +} + +void InitViewerNodelistJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/nodelist", new TJsonHandler(TJsonNodeList::GetSwagger())); +} + +void InitViewerNodeInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerSysInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerVDiskInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerPDiskInfoJsonHandler(TJsonHandlers& jsonHandlers); +void InitViewerTabletInfoJsonHandler(TJsonHandlers& jsonHandlers); + +void InitViewerDescribeJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/describe", new TJsonHandler(TJsonDescribe::GetSwagger())); +} + +void InitViewerDescribeTopicJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/describe_topic", new TJsonHandler(TJsonDescribeTopic::GetSwagger())); +} + +void InitViewerDescribeConsumerJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/describe_consumer", new TJsonHandler(TJsonDescribeConsumer::GetSwagger())); +} + +void InitViewerHotkeysJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/hotkeys", new TJsonHandler(TJsonHotkeys::GetSwagger())); +} + +void InitViewerHiveInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/hiveinfo", new TJsonHandler(TJsonHiveInfo::GetSwagger())); +} + +void InitViewerBSGroupInfoJsonHandler(TJsonHandlers& jsonHandlers); + +void InitViewerBSControllerInfoJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/bscontrollerinfo", new TJsonHandler(TJsonBSControllerInfo::GetSwagger())); +} + +void InitViewerConfigJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/config", new TJsonHandler(TJsonConfig::GetSwagger())); +} + +void InitViewerCountersJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/counters", new TJsonHandler(TJsonCounters::GetSwagger())); +} + +void InitViewerTopicInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/topicinfo", new TJsonHandler(TJsonTopicInfo::GetSwagger())); +} + +void InitViewerPQConsumerInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/pqconsumerinfo", new TJsonHandler(TJsonPQConsumerInfo::GetSwagger())); +} + +void InitViewerTabletCountersJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/tabletcounters", new TJsonHandler(TJsonTabletCounters::GetSwagger())); +} + +void InitViewerStorageJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/storage", new TJsonHandler(TJsonStorage::GetSwagger())); +} + +void InitViewerStorageUsageJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/storage_usage", new TJsonHandler(TJsonStorageUsage::GetSwagger())); +} + +void InitViewerClusterJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/cluster", new TJsonHandler(TJsonCluster::GetSwagger()), 5); +} + +void InitViewerLabeledCountersJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/labeledcounters", new TJsonHandler(TJsonLabeledCounters::GetSwagger())); +} + +void InitViewerTenantsJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/tenants", new TJsonHandler(TJsonTenants::GetSwagger())); +} + +void InitViewerHiveStatsJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/hivestats", new TJsonHandler(TJsonHiveStats::GetSwagger())); +} + +void InitViewerTenantInfoJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/tenantinfo", new TJsonHandler(TJsonTenantInfo::GetSwagger()), 3); +} + +void InitViewerWhoAmIJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/whoami", new TJsonHandler(TJsonWhoAmI::GetSwagger())); +} + +void InitViewerQueryJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/query", new TJsonHandler(TJsonQuery::GetSwagger()), 4); +} + +void InitViewerNetInfoJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/netinfo", new TJsonHandler(TJsonNetInfo::GetSwagger())); +} + +void InitViewerComputeJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/compute", new TJsonHandler(TJsonCompute::GetSwagger())); +} + +void InitViewerHealthCheckJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/healthcheck", new TJsonHandler(TJsonHealthCheck::GetSwagger())); +} + +void InitViewerNodesJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/nodes", new TJsonHandler(TJsonNodes::GetSwagger()), 11); +} + +void InitViewerACLJsonHandler(TJsonHandlers &jsonHandlers) { + jsonHandlers.AddHandler("/viewer/acl", new TJsonHandler(TJsonACL::GetSwagger())); +} + +void InitViewerGraphJsonHandler(TJsonHandlers &handlers) { + handlers.AddHandler("/viewer/graph", new TJsonHandler(TJsonGraph::GetSwagger())); +} + +void InitViewerRenderJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/render", new TJsonHandler(TJsonRender::GetSwagger())); +} + +void InitViewerAutocompleteJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/autocomplete", new TJsonHandler(TJsonAutocomplete::GetSwagger()), 2); +} + +void InitViewerCheckAccessJsonHandler(TJsonHandlers& jsonHandlers) { + jsonHandlers.AddHandler("/viewer/check_access", new TJsonHandler(TCheckAccess::GetSwagger())); +} + +void InitViewerFeatureFlagsJsonHandler(TJsonHandlers& handlers) { + handlers.AddHandler("/viewer/feature_flags", new TJsonHandler(TJsonFeatureFlags::GetSwagger()), 2); +} + void InitViewerJsonHandlers(TJsonHandlers& jsonHandlers) { - jsonHandlers.AddHandler("/viewer/nodelist", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/nodeinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/sysinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/vdiskinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/pdiskinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tabletinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/describe", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/describe_topic", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/describe_consumer", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/hotkeys", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/hiveinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/bsgroupinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/bscontrollerinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/config", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/counters", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/topicinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/pqconsumerinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tabletcounters", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/storage", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/storage_usage", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/metainfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/browse", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/cluster", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/content", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/labeledcounters", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tenants", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/hivestats", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/tenantinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/whoami", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/query", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/netinfo", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/compute", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/healthcheck", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/nodes", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/acl", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/graph", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/render", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/autocomplete", new TJsonHandler); - jsonHandlers.AddHandler("/viewer/check_access", new TJsonHandler); + InitViewerCapabilitiesJsonHandler(jsonHandlers); + InitViewerNodelistJsonHandler(jsonHandlers); + InitViewerNodeInfoJsonHandler(jsonHandlers); + InitViewerSysInfoJsonHandler(jsonHandlers); + InitViewerVDiskInfoJsonHandler(jsonHandlers); + InitViewerPDiskInfoJsonHandler(jsonHandlers); + InitViewerTabletInfoJsonHandler(jsonHandlers); + InitViewerDescribeJsonHandler(jsonHandlers); + InitViewerDescribeTopicJsonHandler(jsonHandlers); + InitViewerDescribeConsumerJsonHandler(jsonHandlers); + InitViewerHotkeysJsonHandler(jsonHandlers); + InitViewerHiveInfoJsonHandler(jsonHandlers); + InitViewerBSGroupInfoJsonHandler(jsonHandlers); + InitViewerBSControllerInfoJsonHandler(jsonHandlers); + InitViewerConfigJsonHandler(jsonHandlers); + InitViewerCountersJsonHandler(jsonHandlers); + InitViewerTopicInfoJsonHandler(jsonHandlers); + InitViewerPQConsumerInfoJsonHandler(jsonHandlers); + InitViewerTabletCountersJsonHandler(jsonHandlers); + InitViewerStorageJsonHandler(jsonHandlers); + InitViewerStorageUsageJsonHandler(jsonHandlers); + InitViewerClusterJsonHandler(jsonHandlers); + InitViewerLabeledCountersJsonHandler(jsonHandlers); + InitViewerTenantsJsonHandler(jsonHandlers); + InitViewerHiveStatsJsonHandler(jsonHandlers); + InitViewerTenantInfoJsonHandler(jsonHandlers); + InitViewerWhoAmIJsonHandler(jsonHandlers); + InitViewerQueryJsonHandler(jsonHandlers); + InitViewerNetInfoJsonHandler(jsonHandlers); + InitViewerComputeJsonHandler(jsonHandlers); + InitViewerHealthCheckJsonHandler(jsonHandlers); + InitViewerNodesJsonHandler(jsonHandlers); + InitViewerACLJsonHandler(jsonHandlers); + InitViewerGraphJsonHandler(jsonHandlers); + InitViewerRenderJsonHandler(jsonHandlers); + InitViewerAutocompleteJsonHandler(jsonHandlers); + InitViewerCheckAccessJsonHandler(jsonHandlers); + InitViewerFeatureFlagsJsonHandler(jsonHandlers); } } diff --git a/ydb/core/viewer/json_healthcheck.h b/ydb/core/viewer/json_healthcheck.h deleted file mode 100644 index 376320d64d40..000000000000 --- a/ydb/core/viewer/json_healthcheck.h +++ /dev/null @@ -1,340 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "healthcheck_record.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -enum HealthCheckResponseFormat { - JSON, - PROMETHEUS -}; - -class TJsonHealthCheck : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - static const bool WithRetry = false; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - HealthCheckResponseFormat Format; - TString Database; - bool Cache = true; - bool MergeRecords = false; - std::optional Result; - std::optional SubscribedNodeId; - Ydb::Monitoring::StatusFlag::Status MinStatus = Ydb::Monitoring::StatusFlag::UNSPECIFIED; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHealthCheck(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - THolder MakeSelfCheckRequest() { - const auto& params(Event->Get()->Request.GetParams()); - THolder request = MakeHolder(); - request->Database = Database; - if (params.Has("verbose")) { - request->Request.set_return_verbose_status(FromStringWithDefault(params.Get("verbose"), false)); - } - if (params.Has("max_level")) { - request->Request.set_maximum_level(FromStringWithDefault(params.Get("max_level"), 0)); - } - if (MinStatus != Ydb::Monitoring::StatusFlag::UNSPECIFIED) { - request->Request.set_minimum_status(MinStatus); - } - if (params.Has("merge_records")) { - request->Request.set_merge_records(MergeRecords); - } - SetDuration(TDuration::MilliSeconds(Timeout), *request->Request.mutable_operation_params()->mutable_operation_timeout()); - return request; - } - - void SendHealthCheckRequest() { - auto request = MakeSelfCheckRequest(); - Send(NHealthCheck::MakeHealthCheckID(), request.Release()); - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - - Format = HealthCheckResponseFormat::JSON; - if (params.Has("format")) { - auto& format = params.Get("format"); - if (format == "json") { - Format = HealthCheckResponseFormat::JSON; - } else if (format == "prometheus") { - Format = HealthCheckResponseFormat::PROMETHEUS; - } - } else if (const auto *header = Event->Get()->Request.GetHeaders().FindHeader("Accept")) { - THashSet accept; - StringSplitter(header->Value()).SplitBySet(", ").SkipEmpty().Collect(&accept); - if (accept.contains("*/*") || accept.contains("application/json")) { - Format = HealthCheckResponseFormat::JSON; - } else if (accept.contains("text/plain")) { - Format = HealthCheckResponseFormat::PROMETHEUS; - } else { - Format = HealthCheckResponseFormat::JSON; - } - } - if (Format == HealthCheckResponseFormat::JSON) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - } - Database = params.Get("tenant"); - Cache = FromStringWithDefault(params.Get("cache"), Cache); - MergeRecords = FromStringWithDefault(params.Get("merge_records"), MergeRecords); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - - if (params.Get("min_status") && !Ydb::Monitoring::StatusFlag_Status_Parse(params.Get("min_status"), &MinStatus)) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "The field 'min_status' cannot be parsed"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (AppData()->FeatureFlags.GetEnableDbMetadataCache() && Cache && Database && MergeRecords) { - RequestStateStorageMetadataCacheEndpointsLookup(Database); - } else { - SendHealthCheckRequest(); - } - Timeout += Timeout * 20 / 100; // we prefer to wait for more (+20%) verbose timeout status from HC - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - } - - STFUNC(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(NHealthCheck::TEvSelfCheckResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - hFunc(NHealthCheck::TEvSelfCheckResultProto, Handle); - cFunc(TEvents::TSystem::Undelivered, SendHealthCheckRequest); - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - } - } - - int GetIssueCount(const Ydb::Monitoring::IssueLog& issueLog) { - return issueLog.count() == 0 ? 1 : issueLog.count(); - } - - THolder> GetRecordCounters() { - const auto *descriptor = Ydb::Monitoring::StatusFlag_Status_descriptor(); - THashMap recordCounters; - for (auto& log : Result->issue_log()) { - TMetricRecord record { - .Database = log.location().database().name(), - .Message = log.message(), - .Status = descriptor->FindValueByNumber(log.status())->name(), - .Type = log.type() - }; - - auto it = recordCounters.find(record); - if (it != recordCounters.end()) { - it->second += GetIssueCount(log); - } else { - recordCounters[record] = GetIssueCount(log); - } - } - - return MakeHolder>(recordCounters); - } - - void HandleJSON() { - TStringStream json; - TProtoToJson::ProtoToJson(json, *Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - - void HandlePrometheus() { - auto recordCounters = GetRecordCounters(); - - TStringStream ss; - IMetricEncoderPtr encoder = EncoderPrometheus(&ss); - IMetricEncoder* e = encoder.Get(); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - auto filterDatabase = Database ? Database : "/" + domain->Name; - e->OnStreamBegin(); - if (recordCounters->size() > 0) { - for (auto& recordCounter : *recordCounters) { - e->OnMetricBegin(EMetricType::IGAUGE); - { - e->OnLabelsBegin(); - e->OnLabel("sensor", "ydb_healthcheck"); - e->OnLabel("DOMAIN", domain->Name); - e->OnLabel("DATABASE", recordCounter.first.Database ? recordCounter.first.Database : filterDatabase); - e->OnLabel("MESSAGE", recordCounter.first.Message); - e->OnLabel("STATUS", recordCounter.first.Status); - e->OnLabel("TYPE", recordCounter.first.Type); - e->OnLabelsEnd(); - } - e->OnInt64(TInstant::Zero(), recordCounter.second); - e->OnMetricEnd(); - } - } - const auto *descriptor = Ydb::Monitoring::SelfCheck_Result_descriptor(); - auto result = descriptor->FindValueByNumber(Result->self_check_result())->name(); - e->OnMetricBegin(EMetricType::IGAUGE); - { - e->OnLabelsBegin(); - e->OnLabel("sensor", "ydb_healthcheck"); - e->OnLabel("DOMAIN", domain->Name); - e->OnLabel("DATABASE", filterDatabase); - e->OnLabel("MESSAGE", result); - e->OnLabel("STATUS", result); - e->OnLabel("TYPE", "ALL"); - e->OnLabelsEnd(); - } - e->OnInt64(TInstant::Zero(), 1); - e->OnMetricEnd(); - e->OnStreamEnd(); - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKTEXT(Event->Get()) + ss.Str(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - - void ReplyAndPassAway() { - if (Result) { - if (Format == HealthCheckResponseFormat::JSON) { - HandleJSON(); - } else { - HandlePrometheus(); - } - } - PassAway(); - } - - void Handle(NHealthCheck::TEvSelfCheckResult::TPtr& ev) { - Result = std::move(ev->Get()->Result); - ReplyAndPassAway(); - } - - void Handle(NHealthCheck::TEvSelfCheckResultProto::TPtr& ev) { - Result = std::move(ev->Get()->Record); - NHealthCheck::RemoveUnrequestedEntries(*Result, MakeSelfCheckRequest().Release()->Request); - ReplyAndPassAway(); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - auto activeNode = TDatabaseMetadataCache::PickActiveNode(ev->Get()->InfoEntries); - if (activeNode != 0) { - SubscribedNodeId = activeNode; - std::optional cache = MakeDatabaseMetadataCacheId(activeNode); - auto request = MakeHolder(); - Send(*cache, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, activeNode); - } else { - SendHealthCheckRequest(); - } - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: tenant - in: query - description: path to database - required: false - type: string - - name: cache - in: query - description: use cache - required: false - type: boolean - - name: verbose - in: query - description: return verbose status - required: false - type: boolean - - name: merge_records - in: query - description: merge records - required: false - type: boolean - - name: max_level - in: query - description: max depth of issues to return - required: false - type: integer - - name: min_status - in: query - description: min status of issues to return - required: false - type: string - - name: format - in: query - description: format of reply - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Self-check result"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Performs self-check and returns result"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_hiveinfo.h b/ydb/core/viewer/json_hiveinfo.h deleted file mode 100644 index d5f97f8eabe0..000000000000 --- a/ydb/core/viewer/json_hiveinfo.h +++ /dev/null @@ -1,174 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonHiveInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr HiveInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TNodeId NodeId = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHiveInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - ui64 hiveId = FromStringWithDefault(params.Get("hive_id"), 0); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - NodeId = FromStringWithDefault(params.Get("node"), 0); - InitConfig(params); - if (hiveId != 0 ) { - TAutoPtr request = new TEvHive::TEvRequestHiveInfo(); - if (params.Has("tablet_id")) { - request->Record.SetTabletID(FromStringWithDefault(params.Get("tablet_id"), 0)); - } - if (params.Has("tablet_type")) { - request->Record.SetTabletType(static_cast(FromStringWithDefault(params.Get("tablet_type"), 0))); - } - if (FromStringWithDefault(params.Get("followers"), false)) { - request->Record.SetReturnFollowers(true); - } - if (FromStringWithDefault(params.Get("metrics"), false)) { - request->Record.SetReturnMetrics(true); - } - SendRequestToPipe(ConnectTabletPipe(hiveId), request.Release()); - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } else { - ReplyAndPassAway(); - } - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvHive::TEvResponseHiveInfo, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvHive::TEvResponseHiveInfo::TPtr& ev) { - HiveInfo = ev->Release(); - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - if (HiveInfo != nullptr) { - if (NodeId != 0) { - for (auto itRecord = HiveInfo->Record.MutableTablets()->begin(); itRecord != HiveInfo->Record.MutableTablets()->end();) { - if (itRecord->GetNodeID() != NodeId) { - itRecord = HiveInfo->Record.MutableTablets()->erase(itRecord); - } else { - ++itRecord; - } - } - } - TProtoToJson::ProtoToJson(json, HiveInfo->Record, JsonSettings); - } else { - json << "null"; - } - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: hive_id - in: query - description: hive identifier (tablet id) - required: true - type: string - - name: tablet_id - in: query - description: tablet id filter - required: false - type: string - - name: tablet_type - in: query - description: tablet type filter - required: false - type: string - - name: followers - in: query - description: return followers - required: false - type: boolean - - name: metrics - in: query - description: return tablet metrics - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Hive information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about tablets from Hive"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_hivestats.h b/ydb/core/viewer/json_hivestats.h deleted file mode 100644 index 483c0e5c120a..000000000000 --- a/ydb/core/viewer/json_hivestats.h +++ /dev/null @@ -1,142 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonHiveStats : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr HiveStats; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHiveStats(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - ui64 hiveId = FromStringWithDefault(params.Get("hive_id"), 0); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - InitConfig(params); - if (hiveId != 0 ) { - THolder request = MakeHolder(); - request->Record.SetReturnFollowers(FromStringWithDefault(params.Get("followers"), false)); - request->Record.SetReturnMetrics(FromStringWithDefault(params.Get("metrics"), true)); - SendRequestToPipe(ConnectTabletPipe(hiveId), request.Release()); - Become(&TThis::StateRequestedInfo, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } else { - ReplyAndPassAway(); - } - } - - STATEFN(StateRequestedInfo) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - HiveStats = ev->Release(); - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - if (HiveStats != nullptr) { - TProtoToJson::ProtoToJson(json, HiveStats->Record, JsonSettings); - } else { - json << "null"; - } - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: hive_id - in: query - description: hive identifier (tablet id) - required: true - type: string - - name: followers - in: query - description: return followers - required: false - type: boolean - - name: metrics - in: query - description: return tablet metrics - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Hive statistics"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about Hive statistics"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_hotkeys.h b/ydb/core/viewer/json_hotkeys.h deleted file mode 100644 index b4922a4af965..000000000000 --- a/ydb/core/viewer/json_hotkeys.h +++ /dev/null @@ -1,169 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using NSchemeShard::TEvSchemeShard; - -class TJsonHotkeys : public TViewerPipeClient { - static const bool WithRetry = false; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr DescribeResult; - ui32 Timeout = 0; - ui32 Limit = 0; - float PollingFactor = 0.0; - bool EnableSampling = false; - - struct KeysComparator { - bool operator ()(const std::pair>& a, const std::pair>& b) const { - return a.first > b.first; - }; - }; - - TMultiSet>, KeysComparator> Keys; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonHotkeys(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void FillParams(NKikimrSchemeOp::TDescribePath* record, const TCgiParameters& params) { - if (params.Has("path")) { - record->SetPath(params.Get("path")); - } - record->MutableOptions()->SetReturnPartitionStats(true); - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Limit = FromStringWithDefault(params.Get("limit"), 10); - PollingFactor = std::max(0.0f, std::min(FromStringWithDefault(params.Get("polling_factor"), 0.2), 1.0f)); - EnableSampling = FromStringWithDefault(params.Get("enable_sampling"), false); - InitConfig(params); - - THolder request = MakeHolder(); - FillParams(request->Record.MutableDescribePath(), params); - request->Record.SetUserToken(Event->Get()->UserToken); - SendRequest(MakeTxProxyID(), request.Release()); - - Become(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvSchemeShard::TEvDescribeSchemeResult, Handle); - hFunc(TEvDataShard::TEvGetDataHistogramResponse, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvSchemeShard::TEvDescribeSchemeResult::TPtr& ev) { - DescribeResult = ev->Release(); - const auto& pbRecord(DescribeResult->GetRecord()); - if (pbRecord.HasPathDescription()) { - const auto& pathDescription = pbRecord.GetPathDescription(); - const auto& partitions = pathDescription.GetTablePartitions(); - const auto& metrics = pathDescription.GetTablePartitionMetrics(); - TVector> tabletsOrder; - - for (int i = 0; i < metrics.size(); ++i) { - tabletsOrder.emplace_back(metrics.Get(i).GetCPU(), i); - } - - Sort(tabletsOrder, std::greater>()); - ui32 tablets = (ui32) std::max(1, (int) std::ceil(PollingFactor * tabletsOrder.size())); - - for (ui32 i = 0; i < tablets; ++i) { - THolder request = MakeHolder(); - if (EnableSampling) { - request->Record.SetCollectKeySampleMs(30000); // 30 sec - } - request->Record.SetActualData(true); - ui64 datashardId = partitions.Get(tabletsOrder[i].second).GetDatashardId(); - SendRequestToPipe(ConnectTabletPipe(datashardId), request.Release()); - } - } - - RequestDone(); - } - - void Handle(TEvDataShard::TEvGetDataHistogramResponse::TPtr& ev) { - const auto& rec = ev->Get()->Record; - for (const auto& i: rec.GetTableHistograms()) { - for (const auto& item: i.GetKeyAccessSample().GetItems()) { - TVector keys(item.GetKeyValues().begin(), item.GetKeyValues().end()); - Keys.emplace(item.GetValue(), std::move(keys)); - if (Keys.size() > Limit) { - Keys.erase(--Keys.end()); - } - } - } - - RequestDone(); - } - - NJson::TJsonValue BuildResponse() { - NJson::TJsonValue root; - if (DescribeResult != nullptr) { - NJson::TJsonValue& hotkeys = root["hotkeys"]; - for (const auto &i: Keys) { - NJson::TJsonValue entry; - NJson::TJsonValue keyValues; - for (const auto &j: i.second) { - keyValues.AppendValue(j); - } - entry["accessSample"] = i.first; - entry["keyValues"] = std::move(keyValues); - hotkeys.AppendValue(std::move(entry)); - } - } - return root; - } - - void ReplyAndPassAway() { - if (DescribeResult != nullptr) { - switch (DescribeResult->GetRecord().GetStatus()) { - case NKikimrScheme::StatusAccessDenied: - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - return; - default: - break; - } - } - NJson::TJsonValue root = BuildResponse(); - TString json = NJson::WriteJson(root, false); - - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -} -} diff --git a/ydb/core/viewer/json_labeledcounters.h b/ydb/core/viewer/json_labeledcounters.h deleted file mode 100644 index c8ea014b6205..000000000000 --- a/ydb/core/viewer/json_labeledcounters.h +++ /dev/null @@ -1,237 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonLabeledCounters : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrLabeledCounters::TEvTabletLabeledCountersResponse LabeledCountersResult; - TJsonSettings JsonSettings; - TString Groups; - TString GroupNames; - TString Topic; - TString Consumer; - TString DC; - TVector Counters; - ui32 Version = 1; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonLabeledCounters(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Groups = params.Get("group"); - Topic = NPersQueue::ConvertNewTopicName(params.Get("topic")); - if (Topic.empty()) - Topic = "*"; - Consumer = NPersQueue::ConvertNewConsumerName(params.Get("consumer"), ctx); - DC = params.Get("dc"); - if (DC.empty()) - DC = "*"; - GroupNames = params.Get("group_names"); - Split(params.Get("counters"), ",", Counters); - Version = FromStringWithDefault(params.Get("version"), Version); - Sort(Counters); - if (Version >= 3) { - TString topic = "rt3." + DC + "--" + Topic; - if (!Consumer.empty()) { - Groups = Consumer + "/*/" + topic; - if (Topic != "*") { - Groups += "," + topic; - } - } else { - Groups = topic; - } - } - CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PersQueue, ctx, Version, Version >= 2 ? Groups : TString()); - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { - if (Version == 1) { - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - auto& uc = *ev->Get()->Record.MutableLabeledCountersByGroup(i); - if (!Groups.empty() && !IsMatchesWildcards(uc.GetGroup(), Groups)) { - continue; - } - if (!GroupNames.empty() && !IsMatchesWildcard(uc.GetGroupNames(), GroupNames)) { - continue; - } - if (Counters.empty()) { - LabeledCountersResult.AddLabeledCountersByGroup()->Swap(&uc); - } else { - auto& lc = *LabeledCountersResult.AddLabeledCountersByGroup(); - lc.SetGroup(uc.GetGroup()); - lc.SetGroupNames(uc.GetGroupNames()); - for (auto& c : *uc.MutableLabeledCounter()) { - if (BinarySearch(Counters.begin(), Counters.end(), c.GetName())) { - lc.AddLabeledCounter()->Swap(&c); - } - } - } - } - } else if (Version >= 2) { - const NKikimrLabeledCounters::TEvTabletLabeledCountersResponse& source(ev->Get()->Record); - TVector> counterNamesMapping; - counterNamesMapping.reserve(source.CounterNamesSize()); - for (const TString& counterName : source.GetCounterNames()) { - if (Counters.empty() || BinarySearch(Counters.begin(), Counters.end(), counterName)) { - counterNamesMapping.push_back(LabeledCountersResult.CounterNamesSize()); - LabeledCountersResult.AddCounterNames(counterName); - } else { - counterNamesMapping.push_back(Nothing()); - } - } - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - auto& uc = *ev->Get()->Record.MutableLabeledCountersByGroup(i); - auto& lc = *LabeledCountersResult.AddLabeledCountersByGroup(); - lc.SetGroup(uc.GetGroup()); - for (auto& c : *uc.MutableLabeledCounter()) { - ui32 nameId = c.GetNameId(); - if (counterNamesMapping[c.GetNameId()].Defined()) { - nameId = counterNamesMapping[c.GetNameId()].GetRef(); - auto* lci = lc.AddLabeledCounter(); - lci->SetValue(c.GetValue()); - lci->SetNameId(nameId); - } - } - } - } - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, LabeledCountersResult, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: group - in: query - description: group name - required: false - type: string - - name: dc - in: query - description: datacenter name - required: false - type: string - default: "*" - - name: topic - in: query - description: topic name - required: false - type: string - default: "*" - - name: consumer - in: query - description: consumer name - required: false - type: string - default: "" - - name: group_names - in: query - description: group names - required: false - type: string - - name: counters - in: query - description: counters names - required: false - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - default: false - - name: all - in: query - description: return information about all topics and clients - required: false - type: boolean - default: false - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - default: false - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - default: 10000 - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Labeled counters info"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about labeled counters"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_local_rpc.h b/ydb/core/viewer/json_local_rpc.h index 00cd683cbb72..a2574bd6aa0f 100644 --- a/ydb/core/viewer/json_local_rpc.h +++ b/ydb/core/viewer/json_local_rpc.h @@ -1,20 +1,9 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" #include "json_pipe_req.h" - #include -#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { struct TEvLocalRpcPrivate { enum EEv { @@ -35,27 +24,16 @@ struct TEvLocalRpcPrivate { }; }; -using namespace NActors; -using NSchemeShard::TEvSchemeShard; - template -class TJsonLocalRpc : public TActorBootstrapped> { +class TJsonLocalRpc : public TViewerPipeClient { using TThis = TJsonLocalRpc; - using TBase = TActorBootstrapped; - - using TBase::Send; - using TBase::PassAway; - using TBase::Become; + using TBase = TViewerPipeClient; protected: - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TProtoRequest Request; + using TBase::ReplyAndPassAway; + using TRequestProtoType = TProtoRequest; + std::vector AllowedMethods = {}; TAutoPtr> Result; - - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString Database; NThreading::TFuture RpcFuture; public: @@ -63,13 +41,11 @@ class TJsonLocalRpc : public TActorBootstrappedname()) {} - TProtoRequest Params2Proto(const TCgiParameters& params) { - TProtoRequest request; + void Params2Proto(const TCgiParameters& params, TRequestProtoType& request) { using google::protobuf::Descriptor; using google::protobuf::Reflection; using google::protobuf::FieldDescriptor; @@ -119,44 +95,51 @@ class TJsonLocalRpc : public TActorBootstrappedGet()->Request.GetPostContent(); - if (!postData.empty()) { - try { - NProtobufJson::Json2Proto(postData, request, json2ProtoConfig); - } - catch (const yexception& e) { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", e.what())); + virtual bool ValidateRequest(TRequestProtoType& request) { + using google::protobuf::Descriptor; + using google::protobuf::Reflection; + using google::protobuf::FieldDescriptor; + const Descriptor& descriptor = *TRequestProtoType::GetDescriptor(); + const Reflection& reflection = *TRequestProtoType::GetReflection(); + for (int idx = 0; idx < descriptor.field_count(); ++idx) { + const FieldDescriptor* field = descriptor.field(idx); + const auto& options(field->options()); + if (options.HasExtension(Ydb::required)) { + if (options.GetExtension(Ydb::required)) { + if (!reflection.HasField(request, field)) { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", TStringBuilder() << "field '" << field->name() << "' is required")); + return false; + } + } } - } else { - const auto& params(Event->Get()->Request.GetParams()); - return Params2Proto(params); } - return request; + return true; } - bool PostToRequest() { + bool Params2Proto(TRequestProtoType& request) { auto postData = Event->Get()->Request.GetPostContent(); if (!postData.empty()) { try { - NProtobufJson::Json2Proto(postData, Request, {}); - return true; + NProtobufJson::Json2Proto(postData, request); } catch (const yexception& e) { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", e.what())); + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", e.what())); return false; } } + const auto& params(Event->Get()->Request.GetParams()); + Params2Proto(params, request); + if (!ValidateRequest(request)) { + return false; + } return true; } - void SendGrpcRequest() { - RpcFuture = NRpcService::DoLocalRpc(std::move(Request), Database, Event->Get()->UserToken, TlsActivationContext->ActorSystem()); + void SendGrpcRequest(TRequestProtoType&& request) { + // TODO(xenoxeno): pass trace id + RpcFuture = NRpcService::DoLocalRpc(std::move(request), Database, Event->Get()->UserToken, TlsActivationContext->ActorSystem()); RpcFuture.Subscribe([actorId = TBase::SelfId(), actorSystem = TlsActivationContext->ActorSystem()] (const NThreading::TFuture& future) { auto& response = future.GetValueSync(); @@ -182,14 +165,21 @@ class TJsonLocalRpc : public TActorBootstrappedGet()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), true); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - - SendGrpcRequest(); - - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); + if (!AllowedMethods.empty() && std::find(AllowedMethods.begin(), AllowedMethods.end(), Event->Get()->Request.GetMethod()) == AllowedMethods.end()) { + return ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Method is not allowed")); + } + if (Database.empty()) { + return ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "field 'database' is required")); + } + if (TBase::NeedToRedirect()) { + return; + } + TRequestProtoType request; + if (!Params2Proto(request)) { + return; + } + SendGrpcRequest(std::move(request)); + Become(&TThis::StateRequested, Timeout, new TEvents::TEvWakeup()); } void Handle(typename TEvLocalRpcPrivate::TEvGrpcRequestResult::TPtr& ev) { @@ -206,38 +196,24 @@ class TJsonLocalRpc : public TActorBootstrappedStatus) { - if (!Result->Status->IsSuccess()) { + if (Result->Status->IsSuccess()) { + return ReplyAndPassAway(GetHTTPOKJSON(Result->Message)); + } else { NJson::TJsonValue json; TString message; - MakeErrorReply(json, message, Result->Status.value()); + MakeJsonErrorReply(json, message, Result->Status.value()); TStringStream stream; NJson::WriteJson(&stream, &json); if (Result->Status->GetStatus() == NYdb::EStatus::UNAUTHORIZED) { - return ReplyAndPassAway(Viewer->GetHTTPFORBIDDEN(Event->Get(), "application/json", stream.Str())); + return ReplyAndPassAway(GetHTTPFORBIDDEN("application/json", stream.Str()), message); } else { - return ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), "application/json", stream.Str())); + return ReplyAndPassAway(GetHTTPBADREQUEST("application/json", stream.Str()), message); } - } else { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result->Message, JsonSettings); - return ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), json.Str())); } } else { - return ReplyAndPassAway(Viewer->GetHTTPINTERNALERROR(Event->Get())); + return ReplyAndPassAway(GetHTTPINTERNALERROR("text/plain", "no Result or Status"), "internal error"); } } - - - void HandleTimeout() { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get())); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } }; - -} -} +} // namespace NKikimr::NViewer diff --git a/ydb/core/viewer/json_metainfo.h b/ydb/core/viewer/json_metainfo.h deleted file mode 100644 index b96a26a4478a..000000000000 --- a/ydb/core/viewer/json_metainfo.h +++ /dev/null @@ -1,189 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "browse.h" -#include -#include "viewer.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonMetaInfo : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool Counters = false; - NKikimrViewer::TMetaInfo MetaInfo; - TActorId BrowseActorID; - using TBrowseRequestKey = std::tuple; - std::unordered_multiset BrowseRequestsInFlight; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonMetaInfo(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Counters = FromStringWithDefault(params.Get("counters"), false); - TString path = params.Get("path"); - BrowseActorID = ctx.RegisterWithSameMailbox(new TBrowse(Viewer, ctx.SelfID, path, Event->Get()->UserToken)); - Become(&TThis::StateWait, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - ctx.Send(BrowseActorID, new TEvents::TEvPoisonPill()); - TBase::Die(ctx); - } - - STFUNC(StateWait) { - switch (ev->GetTypeRewrite()) { - HFunc(NViewerEvents::TEvBrowseResponse, Handle); - HFunc(NViewerEvents::TEvBrowseRequestSent, Handle); - HFunc(NViewerEvents::TEvBrowseRequestCompleted, Handle); - HFunc(NMon::TEvHttpInfoRes, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NMon::TEvHttpInfoRes::TPtr &ev, const TActorContext &ctx) { - ctx.ExecutorThread.Send(ev->Forward(Event->Sender)); - Die(ctx); - } - - void Handle(NViewerEvents::TEvBrowseResponse::TPtr &ev, const TActorContext &ctx) { - NViewerEvents::TEvBrowseResponse& event(*ev->Get()); - if (!event.Error.empty()) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(event.Error, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return Die(ctx); - } - MetaInfo.MergeFrom(event.MetaInfo); - if (!Counters) { - // TODO(xenoxeno): it could be a little bit more effective - MetaInfo.ClearCounters(); - } - ReplyAndDie(ctx); - } - - void Handle(NViewerEvents::TEvBrowseRequestSent::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestSent& event(*ev->Get()); - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void Handle(NViewerEvents::TEvBrowseRequestCompleted::TPtr& ev, const TActorContext&) { - NViewerEvents::TEvBrowseRequestCompleted& event(*ev->Get()); - auto it = BrowseRequestsInFlight.find({event.Actor, event.Tablet, event.Event}); - if (it != BrowseRequestsInFlight.end()) { - // we could not delete by key, it could be many items with the same key - BrowseRequestsInFlight.erase(it); - } - BrowseRequestsInFlight.emplace(event.Actor, event.Tablet, event.Event); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, MetaInfo, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - TStringStream result; - RenderPendingRequests(result); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), result.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void RenderPendingRequests(IOutputStream& html) { - for (const auto& request : BrowseRequestsInFlight) { - html << request << Endl; - } - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: tablet_id - in: query - description: tablet identifier - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: counters - in: query - description: return tablet counters - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Schema meta information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns meta information about schema path"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_netinfo.h b/ydb/core/viewer/json_netinfo.h deleted file mode 100644 index e57e18677801..000000000000 --- a/ydb/core/viewer/json_netinfo.h +++ /dev/null @@ -1,357 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" -#include "wb_merge.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonNetInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - std::unordered_map TenantByPath; - std::unordered_map TenantBySubDomainKey; - std::unordered_map> NavigateResult; - std::unique_ptr HiveStats; - NMon::TEvHttpInfo::TPtr Event; - std::vector NodeIds; - std::unordered_map> NodeSysInfo; - std::unordered_map> NodeNetInfo; - std::unique_ptr NodesInfo; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString User; - TString Path; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonNetInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Path = params.Get("path"); - - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - ui64 consoleId = MakeConsoleID(); - - if (consoleId != 0) { - RequestConsoleListTenants(); - } - - ui64 hiveId = domains->GetHive(); - if (hiveId != TDomainsInfo::BadTabletId) { - RequestHiveDomainStats(hiveId); - } - - TString domainPath = "/" + domain->Name; - if (Path.empty() || domainPath == Path) { - NKikimrViewer::TTenant& tenant = TenantByPath[domainPath]; - tenant.SetName(domainPath); - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::State::GetDatabaseStatusResult_State_RUNNING); - RequestSchemeCacheNavigate(domainPath); - } - - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - for (const TNodeId nodeId : NodeIds) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvNodeStateResponse, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { - NodesInfo.reset(ev->Release().Release()); - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - if (!Path.empty() && path != Path) { - continue; - } - TenantByPath[path]; - RequestSchemeCacheNavigate(path); - } - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - HiveStats.reset(ev->Release().Release()); - for (const NKikimrHive::THiveDomainStats& hiveStat : HiveStats->Record.GetDomainStats()) { - TPathId subDomainKey(hiveStat.GetShardId(), hiveStat.GetPathId()); - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - tenant.SetId(TStringBuilder() << hiveStat.GetShardId() << '-' << hiveStat.GetPathId()); - tenant.MutableNodeIds()->MergeFrom(hiveStat.GetNodeIds()); - for (TNodeId nodeId : hiveStat.GetNodeIds()) { - NodeIds.emplace_back(nodeId); - } - } - for (TNodeId nodeId : NodeIds) { - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - SendRequest( - whiteboardServiceId, - new NNodeWhiteboard::TEvWhiteboard::TEvSystemStateRequest(), - IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, - nodeId); - SendRequest( - whiteboardServiceId, - new NNodeWhiteboard::TEvWhiteboard::TEvNodeStateRequest(), - IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, - nodeId); - - } - RequestDone(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); - NavigateResult[path].reset(ev->Release().Release()); - } - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeSysInfo[nodeId].reset(ev->Release().Release()); - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvNodeStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - NodeNetInfo[nodeId].reset(ev->Release().Release()); - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - ui32 nodeId = ev.Get()->Cookie; - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - if (NodeSysInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvNodeStateRequest) { - if (NodeNetInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { - ui32 nodeId = ev->Get()->NodeId; - if (NodeSysInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - if (NodeNetInfo.emplace(nodeId, nullptr).second) { - RequestDone(); - } - } - - void ReplyAndPassAway() { - THashMap nodeInfoIndex; - if (NodesInfo) { - for (const TEvInterconnect::TNodeInfo& nodeInfo : NodesInfo->Nodes) { - nodeInfoIndex[nodeInfo.NodeId] = &nodeInfo; - } - } - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - NKikimrViewer::TNetInfo result; - for (const std::pair& prTenant : TenantByPath) { - const TString& path = prTenant.first; - //const NKikimrViewer::TTenant& tenantByPath(prTenant.second); - NKikimrViewer::TNetTenantInfo& netTenantInfo = *result.AddTenants(); - netTenantInfo.SetName(path); - auto itNavigate = NavigateResult.find(path); - if (itNavigate != NavigateResult.end()) { - auto domainInfo = itNavigate->second->Request->ResultSet.begin()->DomainInfo; - TPathId subDomainKey(domainInfo->DomainKey); - const NKikimrViewer::TTenant& tenantBySubDomainKey(TenantBySubDomainKey[subDomainKey]); - for (TNodeId nodeId : tenantBySubDomainKey.GetNodeIds()) { - NKikimrViewer::TNetNodeInfo& netNodeInfo = *netTenantInfo.AddNodes(); - netNodeInfo.SetNodeId(nodeId); - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second != nullptr && itSysInfo->second->Record.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second->Record.GetSystemStateInfo(0); - if (sysInfo.HasDataCenter()) { - netNodeInfo.SetDataCenter(sysInfo.GetDataCenter()); - } - if (sysInfo.HasRack()) { - netNodeInfo.SetRack(sysInfo.GetRack()); - } - } - } - if (dynamicNameserviceConfig) { - netNodeInfo.SetNodeType(nodeId <= dynamicNameserviceConfig->MaxStaticNodeId ? NKikimrViewer::ENodeType::Static : NKikimrViewer::ENodeType::Dynamic); - } - auto itNodeInfo = nodeInfoIndex.find(nodeId); - if (itNodeInfo != nodeInfoIndex.end()) { - netNodeInfo.SetHost(itNodeInfo->second->Host); - netNodeInfo.SetPort(itNodeInfo->second->Port); - } - auto itNetInfo = NodeNetInfo.find(nodeId); - if (itNetInfo != NodeNetInfo.end()) { - if (itNetInfo->second != nullptr) { - for (const NKikimrWhiteboard::TNodeStateInfo& netInfo : itNetInfo->second->Record.GetNodeStateInfo()) { - TString peerName(netInfo.GetPeerName()); - TNodeId nodeId = FromStringWithDefault(TStringBuf(peerName).Before(':')); - if (nodeInfoIndex.find(nodeId) == nodeInfoIndex.end()) { - continue; - } - NKikimrViewer::TNetNodePeerInfo& netNodePeerInfo = *netNodeInfo.AddPeers(); - netNodePeerInfo.SetNodeId(nodeId); - netNodePeerInfo.SetPeerName(peerName); - netNodePeerInfo.SetConnected(netInfo.GetConnected()); - netNodePeerInfo.SetConnectStatus(GetViewerFlag(netInfo.GetConnectStatus())); - netNodePeerInfo.SetChangeTime(netInfo.GetChangeTime()); - if (dynamicNameserviceConfig) { - netNodePeerInfo.SetNodeType(nodeId <= dynamicNameserviceConfig->MaxStaticNodeId ? NKikimrViewer::ENodeType::Static : NKikimrViewer::ENodeType::Dynamic); - } - auto itSysInfo = NodeSysInfo.find(nodeId); - if (itSysInfo != NodeSysInfo.end()) { - if (itSysInfo->second != nullptr && itSysInfo->second->Record.SystemStateInfoSize() == 1) { - const NKikimrWhiteboard::TSystemStateInfo& sysInfo = itSysInfo->second->Record.GetSystemStateInfo(0); - if (sysInfo.HasDataCenter()) { - netNodePeerInfo.SetDataCenter(sysInfo.GetDataCenter()); - } - if (sysInfo.HasRack()) { - netNodePeerInfo.SetRack(sysInfo.GetRack()); - } - } - } - auto itNodeInfo = nodeInfoIndex.find(nodeId); - if (itNodeInfo != nodeInfoIndex.end()) { - netNodePeerInfo.SetHost(itNodeInfo->second->Host); - netNodePeerInfo.SetPort(itNodeInfo->second->Port); - } - } - } - } - - // TODO(xenoxeno) - netNodeInfo.SetOverall(NKikimrViewer::EFlag::Green); - } - } - - // TODO(xenoxeno) - netTenantInfo.SetOverall(NKikimrViewer::EFlag::Green); - } - - // TODO(xenoxeno) - result.SetOverall(NKikimrViewer::EFlag::Green); - TStringStream json; - TProtoToJson::ProtoToJson(json, result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: hive_id - in: query - description: hive identifier (tablet id) - required: false - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Network information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns network information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_nodeinfo.h b/ydb/core/viewer/json_nodeinfo.h deleted file mode 100644 index ac1dbb04878d..000000000000 --- a/ydb/core/viewer/json_nodeinfo.h +++ /dev/null @@ -1,62 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "wb_merge.h" -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvNodeStateResponse; - using TElementType = NKikimrWhiteboard::TNodeStateInfo; - using TElementKeyType = TString; - - static constexpr bool StaticNodesOnly = false; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableNodeStateInfo(); - } - - static const TString& GetElementKey(const TElementType& type) { - return type.GetPeerName(); - } - - static TString GetDefaultMergeField() { - return "PeerName"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - - static void InitMerger() { - const auto* field = NKikimrWhiteboard::TNodeStateInfo::descriptor()->FindFieldByName("ConnectStatus"); - TWhiteboardMergerBase::FieldMerger[field] = &TWhiteboardMergerBase::ProtoMaximizeEnumField; - field = NKikimrWhiteboard::TNodeStateInfo::descriptor()->FindFieldByName("Connected"); - TWhiteboardMergerBase::FieldMerger[field] = &TWhiteboardMergerBase::ProtoMaximizeBoolField; - } -}; - -using TJsonNodeInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Interconnect information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about node connections"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_nodelist.h b/ydb/core/viewer/json_nodelist.h deleted file mode 100644 index d1d3310f616e..000000000000 --- a/ydb/core/viewer/json_nodelist.h +++ /dev/null @@ -1,123 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonNodeList : public TActorBootstrapped { - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TAutoPtr NodesInfo; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonNodeList(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const TActorId nameserviceId = GetNameserviceActorId(); - ctx.Send(nameserviceId, new TEvInterconnect::TEvListNodes()); - ctx.Schedule(TDuration::Seconds(10), new TEvents::TEvWakeup()); - Become(&TThis::StateRequestedBrowse); - } - - STFUNC(StateRequestedBrowse) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvInterconnect::TEvNodesInfo, Handle); - CFunc(TEvents::TSystem::Wakeup, Timeout); - } - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr &ev, const TActorContext &ctx) { - NodesInfo = ev->Release(); - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - NJson::TJsonValue json; - json.SetType(NJson::EJsonValueType::JSON_ARRAY); - if (NodesInfo != nullptr) { - for (auto it = NodesInfo->Nodes.begin(); it != NodesInfo->Nodes.end(); ++it) { - const TEvInterconnect::TNodeInfo& nodeInfo = *it; - NJson::TJsonValue& jsonNodeInfo = json.AppendValue(NJson::TJsonValue()); - jsonNodeInfo["Id"] = nodeInfo.NodeId; - if (!nodeInfo.Host.empty()) { - jsonNodeInfo["Host"] = nodeInfo.Host; - } - if (!nodeInfo.ResolveHost.empty()) { - jsonNodeInfo["ResolveHost"] = nodeInfo.ResolveHost; - } - jsonNodeInfo["Address"] = nodeInfo.Address; - jsonNodeInfo["Port"] = nodeInfo.Port; - if (nodeInfo.Location != TNodeLocation()) { - NJson::TJsonValue& jsonPhysicalLocation = jsonNodeInfo["PhysicalLocation"]; - const auto& x = nodeInfo.Location.GetLegacyValue(); - jsonPhysicalLocation["DataCenter"] = x.DataCenter; - jsonPhysicalLocation["Room"] = x.Room; - jsonPhysicalLocation["Rack"] = x.Rack; - jsonPhysicalLocation["Body"] = x.Body; - jsonPhysicalLocation["DataCenterId"] = nodeInfo.Location.GetDataCenterId(); - jsonPhysicalLocation["Location"] = nodeInfo.Location.ToString(); - } - } - } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void Timeout(const TActorContext &ctx) { - ReplyAndDie(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return YAML::Load(R"___( - type: array - title: TEvNodeListResponse - items: - type: object - title: TNodeInfo - properties: - Id: - type: integer - Host: - type: string - Address: - type: string - Port: - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Nodes list"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns list of nodes"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_nodes.h b/ydb/core/viewer/json_nodes.h deleted file mode 100644 index 987cdd369be2..000000000000 --- a/ydb/core/viewer/json_nodes.h +++ /dev/null @@ -1,1020 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "viewer_helper.h" -#include "json_pipe_req.h" -#include "json_sysinfo.h" -#include "json_pdiskinfo.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; -using ::google::protobuf::FieldDescriptor; - -class TJsonNodes : public TViewerPipeClient { - using TThis = TJsonNodes; - using TBase = TViewerPipeClient; - using TNodeId = ui32; - using TPDiskId = std::pair; - IViewer* Viewer; - TActorId Initiator; - NMon::TEvHttpInfo::TPtr Event; - std::unique_ptr NodesInfo; - std::unordered_map PDiskInfo; - std::unordered_map VDiskInfo; - std::unordered_map> TabletInfo; - std::unordered_map SysInfo; - std::unordered_map NavigateResult; - std::unique_ptr BaseConfig; - std::unordered_map BaseConfigGroupIndex; - std::unordered_map DisconnectTime; - std::unordered_map NodeName; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString FilterTenant; - TSubDomainKey FilterSubDomainKey; - TString FilterPath; - TString FilterStoragePool; - std::unordered_set FilterNodeIds; - std::unordered_set FilterGroupIds; - std::unordered_set PassedNodeIds; - std::vector NodeIds; - std::optional Offset; - std::optional Limit; - ui32 UptimeSeconds = 0; - bool ProblemNodesOnly = false; - TString Filter; - - enum class EWith { - Everything, - MissingDisks, - SpaceProblems, - }; - EWith With = EWith::Everything; - - enum class EType { - Any, - Static, - Dynamic, - }; - EType Type = EType::Any; - - enum class ESort { - NodeId, - Host, - DC, - Rack, - Version, - Uptime, - Memory, - CPU, - LoadAverage, - Missing, - }; - ESort Sort = ESort::NodeId; - bool ReverseSort = false; - bool SortedNodeList = false; - bool LimitApplied = false; - - bool Storage = false; - bool Tablets = false; - TPathId FilterPathId; - bool ResolveGroupsToNodes = false; - TNodeId MinAllowedNodeId = std::numeric_limits::min(); - TNodeId MaxAllowedNodeId = std::numeric_limits::max(); - ui32 RequestsBeforeNodeList = 0; - ui64 HiveId = 0; - std::optional MaximumDisksPerNode; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TString GetLogPrefix() { - static TString prefix = "json/nodes "; - return prefix; - } - - TJsonNodes(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Initiator(ev->Sender) - , Event(std::move(ev)) - { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - UptimeSeconds = FromStringWithDefault(params.Get("uptime"), 0); - ProblemNodesOnly = FromStringWithDefault(params.Get("problems_only"), ProblemNodesOnly); - Filter = params.Get("filter"); - FilterPath = params.Get("path"); - FilterTenant = params.Get("tenant"); - FilterStoragePool = params.Get("pool"); - SplitIds(params.Get("node_id"), ',', FilterNodeIds); - auto itZero = FilterNodeIds.find(0); - if (itZero != FilterNodeIds.end()) { - FilterNodeIds.erase(itZero); - FilterNodeIds.insert(TlsActivationContext->ActorSystem()->NodeId); - } - if (params.Get("with") == "missing") { - With = EWith::MissingDisks; - } else if (params.Get("with") == "space") { - With = EWith::SpaceProblems; - } - if (params.Has("offset")) { - Offset = FromStringWithDefault(params.Get("offset"), 0); - } - if (params.Has("limit")) { - Limit = FromStringWithDefault(params.Get("limit"), std::numeric_limits::max()); - } - if (params.Get("type") == "static") { - Type = EType::Static; - } else if (params.Get("type") == "dynamic") { - Type = EType::Dynamic; - } else if (params.Get("type") == "any") { - Type = EType::Any; - } - Storage = FromStringWithDefault(params.Get("storage"), Storage); - Tablets = FromStringWithDefault(params.Get("tablets"), Tablets); - ResolveGroupsToNodes = FromStringWithDefault(params.Get("resolve_groups"), ResolveGroupsToNodes); - TStringBuf sort = params.Get("sort"); - if (sort) { - if (sort.StartsWith("-") || sort.StartsWith("+")) { - ReverseSort = (sort[0] == '-'); - sort.Skip(1); - } - if (sort == "NodeId") { - Sort = ESort::NodeId; - } else if (sort == "Host") { - Sort = ESort::Host; - } else if (sort == "DC") { - Sort = ESort::DC; - } else if (sort == "Rack") { - Sort = ESort::Rack; - } else if (sort == "Version") { - Sort = ESort::Version; - } else if (sort == "Uptime") { - Sort = ESort::Uptime; - } else if (sort == "Memory") { - Sort = ESort::Memory; - } else if (sort == "CPU") { - Sort = ESort::CPU; - } else if (sort == "LoadAverage") { - Sort = ESort::LoadAverage; - } else if (sort == "Missing") { - Sort = ESort::Missing; - } - } - } - - void Bootstrap() { - BLOG_TRACE("Bootstrap()"); - if (Type != EType::Any) { - TIntrusivePtr dynamicNameserviceConfig = AppData()->DynamicNameserviceConfig; - if (dynamicNameserviceConfig) { - if (Type == EType::Static) { - MaxAllowedNodeId = dynamicNameserviceConfig->MaxStaticNodeId; - } - if (Type == EType::Dynamic) { - MinAllowedNodeId = dynamicNameserviceConfig->MaxStaticNodeId + 1; - } - } - } - - if (Storage) { - BLOG_TRACE("RequestBSControllerConfig()"); - RequestBSControllerConfig(); - ++RequestsBeforeNodeList; - } - - if (!FilterTenant.empty()) { - RequestForTenant(FilterTenant); - } - - if (!FilterPath.empty()) { - BLOG_TRACE("Requesting navigate for " << FilterPath); - RequestSchemeCacheNavigate(FilterPath); - ++RequestsBeforeNodeList; - } - - BLOG_TRACE("Request TEvListNodes"); - SendRequest(GetNameserviceActorId(), new TEvInterconnect::TEvListNodes()); - ++RequestsBeforeNodeList; - if (Requests == 0) { - ReplyAndPassAway(); - return; - } - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - BLOG_TRACE("PassAway()"); - for (const TNodeId nodeId : NodeIds) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - } - - void RequestForTenant(const TString& filterTenant) { - BLOG_TRACE("RequestForTenant " << filterTenant); - FilterTenant = filterTenant; - if (Type == EType::Static || Type == EType::Any) { - if (ResolveGroupsToNodes) { - if (!Storage) { - BLOG_TRACE("RequestBSControllerConfig()"); - RequestBSControllerConfig(); - ++RequestsBeforeNodeList; - } - } - } - if (Type == EType::Dynamic || Type == EType::Any) { - BLOG_TRACE("RequestStateStorageEndpointsLookup for " << FilterTenant); - RequestStateStorageEndpointsLookup(FilterTenant); // to get dynamic nodes - ++RequestsBeforeNodeList; - } - } - - bool CheckNodeFilters(TNodeId nodeId) { - if (Storage && With == EWith::MissingDisks) { - auto itPDiskState = PDiskInfo.find(nodeId); - if (itPDiskState != PDiskInfo.end()) { - int disksNormal = 0; - for (const auto& protoPDiskInfo : itPDiskState->second.GetPDiskStateInfo()) { - if (protoPDiskInfo.state() == NKikimrBlobStorage::TPDiskState::Normal) { - ++disksNormal; - } - } - if (itPDiskState->second.pdiskstateinfo_size() == disksNormal) { - return false; - } - } - } - auto itSysInfo = SysInfo.find(nodeId); - if (itSysInfo != SysInfo.end() && itSysInfo->second.SystemStateInfoSize() > 0) { - const auto& sysState(itSysInfo->second.GetSystemStateInfo(0)); - if (Storage && With == EWith::SpaceProblems) { - if (!sysState.HasMaxDiskUsage() || sysState.GetMaxDiskUsage() < 0.85) { - return false; - } - } - if (UptimeSeconds > 0 && sysState.HasStartTime() && itSysInfo->second.HasResponseTime() - && itSysInfo->second.GetResponseTime() - sysState.GetStartTime() > UptimeSeconds * 1000) { - return false; - } - if (ProblemNodesOnly && sysState.HasSystemState() - && GetViewerFlag(sysState.GetSystemState()) == NKikimrViewer::EFlag::Green) { - return false; - } - if (Filter) { - if (sysState.HasHost() && sysState.GetHost().Contains(Filter)) { - return true; - } - if (std::to_string(nodeId).contains(Filter)) { - return true; - } - return false; - } - } - - return true; - } - - bool HasNodeFilter() { - return With != EWith::Everything || UptimeSeconds != 0 || ProblemNodesOnly || !Filter.empty(); - } - - void SendNodeRequest(TNodeId nodeId) { - if (PassedNodeIds.insert(nodeId).second) { - if (SortedNodeList) { - // optimization for early paging with default sort - LimitApplied = true; - if (Offset.has_value()) { - if (PassedNodeIds.size() <= Offset.value()) { - return; - } - } - if (Limit.has_value()) { - if (NodeIds.size() >= Limit.value()) { - return; - } - } - } - NodeIds.push_back(nodeId); // order is important - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - BLOG_TRACE("SendSystemStateRequest to " << nodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvSystemStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - if (Storage) { - BLOG_TRACE("SendV/PDiskStateRequest to " << nodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvPDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - SendRequest(whiteboardServiceId, new TEvWhiteboard::TEvVDiskStateRequest(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - if (Tablets && FilterPathId == TPathId()) { - BLOG_TRACE("SendTabletStateRequest to " << nodeId); - auto request = std::make_unique(); - request->Record.SetGroupBy("Type,State"); - SendRequest(whiteboardServiceId, request.release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - } - } - - void ProcessNodeIds() { - BLOG_TRACE("ProcessNodeIds()"); - - if (!HasNodeFilter()) { - switch (Sort) { - case ESort::NodeId: { - SortCollection(NodesInfo->Nodes, [](const TEvInterconnect::TNodeInfo& node) { return node.NodeId;}, ReverseSort); - SortedNodeList = true; - break; - } - case ESort::Host: { - SortCollection(NodesInfo->Nodes, [](const TEvInterconnect::TNodeInfo& node) { return node.Host;}, ReverseSort); - SortedNodeList = true; - break; - } - case ESort::DC: { - SortCollection(NodesInfo->Nodes, [](const TEvInterconnect::TNodeInfo& node) { return node.Location.GetDataCenterId();}, ReverseSort); - SortedNodeList = true; - break; - } - default: - break; - } - } - - for (const auto& ni : NodesInfo->Nodes) { - if ((FilterNodeIds.empty() || FilterNodeIds.count(ni.NodeId) > 0) && ni.NodeId >= MinAllowedNodeId && ni.NodeId <= MaxAllowedNodeId) { - SendNodeRequest(ni.NodeId); - } - } - } - - void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { - BLOG_TRACE("Received TEvControllerConfigResponse"); - const NKikimrBlobStorage::TEvControllerConfigResponse& pbRecord(ev->Get()->Record); - if (pbRecord.HasResponse() && pbRecord.GetResponse().StatusSize() > 0) { - const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0)); - if (pbStatus.HasBaseConfig()) { - BaseConfig.reset(ev->Release().Release()); - const NKikimrBlobStorage::TEvControllerConfigResponse& pbRecord(BaseConfig->Record); - const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0)); - const NKikimrBlobStorage::TBaseConfig& pbConfig(pbStatus.GetBaseConfig()); - for (const NKikimrBlobStorage::TBaseConfig::TGroup& group : pbConfig.GetGroup()) { - BaseConfigGroupIndex[group.GetGroupId()] = &group; - } - std::unordered_map disksPerNode; - disksPerNode.reserve(pbConfig.NodeSize()); - for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pdisk : pbConfig.GetPDisk()) { - disksPerNode[pdisk.GetNodeId()]++; - } - int maximumDisksPerNode = 0; - for (const auto& [nodeId, disks] : disksPerNode) { - if (disks > maximumDisksPerNode) { - maximumDisksPerNode = disks; - } - } - MaximumDisksPerNode = maximumDisksPerNode; - } - } - if (ResolveGroupsToNodes) { - BLOG_TRACE("Requesting navigate for " << FilterTenant); - RequestSchemeCacheNavigate(FilterTenant); // to get storage pools and then groups and then pdisks - ++RequestsBeforeNodeList; - } - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - - RequestDone(); - } - - bool IsSubDomainPath(const TSchemeCacheNavigate::TEntry& entry) { - switch (entry.Kind) { - case TSchemeCacheNavigate::EKind::KindSubdomain: - case TSchemeCacheNavigate::EKind::KindExtSubdomain: - return true; - case TSchemeCacheNavigate::EKind::KindPath: - return entry.Self->Info.GetPathId() == NSchemeShard::RootPathId; - default: - return false; - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - TSchemeCacheNavigate::TEntry& entry(ev->Get()->Request->ResultSet.front()); - TString path = CanonizePath(entry.Path); - BLOG_TRACE("Received navigate for " << path); - if (IsSubDomainPath(entry)) { - if (HiveId == 0) { - HiveId = entry.DomainInfo->Params.GetHive(); - } - if (!FilterSubDomainKey) { - const auto ownerId = entry.DomainInfo->DomainKey.OwnerId; - const auto localPathId = entry.DomainInfo->DomainKey.LocalPathId; - FilterSubDomainKey = TSubDomainKey(ownerId, localPathId); - } - - if (FilterTenant.empty()) { - RequestForTenant(path); - } - - if (entry.DomainInfo->ResourcesDomainKey && entry.DomainInfo->DomainKey != entry.DomainInfo->ResourcesDomainKey) { - TPathId resourceDomainKey(entry.DomainInfo->ResourcesDomainKey); - BLOG_TRACE("Requesting navigate for resource domain " << resourceDomainKey); - RequestSchemeCacheNavigate(resourceDomainKey); - ++RequestsBeforeNodeList; - } else if (Storage && entry.DomainDescription) { - for (const auto& storagePool : entry.DomainDescription->Description.GetStoragePools()) { - TString storagePoolName = storagePool.GetName(); - THolder request = MakeHolder(); - request->Record.SetReturnAllMatchingGroups(true); - request->Record.AddGroupParameters()->MutableStoragePoolSpecifier()->SetName(storagePoolName); - BLOG_TRACE("Requesting BSControllerSelectGroups for " << storagePoolName); - RequestBSControllerSelectGroups(std::move(request)); - ++RequestsBeforeNodeList; - } - } - } else { - if (entry.DomainInfo) { - TPathId domainKey(entry.DomainInfo->DomainKey); - BLOG_TRACE("Requesting navigate for parent domain " << domainKey); - RequestSchemeCacheNavigate(domainKey); - ++RequestsBeforeNodeList; - - if (!FilterPath.empty() && Tablets && FilterPathId == TPathId()) { - FilterPathId = TPathId(entry.Self->Info.GetSchemeshardId(), entry.Self->Info.GetPathId()); - } - } - } - NavigateResult.emplace(path, std::move(entry)); - - if (HiveId != 0) { - BLOG_TRACE("Requesting hive " << HiveId << " for path id " << FilterPathId); - RequestHiveNodeStats(HiveId, FilterPathId); - ++RequestsBeforeNodeList; - } - } else { - BLOG_TRACE("Error receiving Navigate response"); - FilterNodeIds = { 0 }; - } - - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveNodeStats::TPtr& ev) { - BLOG_TRACE("ResponseHiveNodeStats()"); - for (const NKikimrHive::THiveNodeStats& nodeStats : ev->Get()->Record.GetNodeStats()) { - const TSubDomainKey nodeSubDomainKey = TSubDomainKey(nodeStats.GetNodeDomain()); - if (FilterSubDomainKey && FilterSubDomainKey != nodeSubDomainKey) { - continue; - } - ui32 nodeId = nodeStats.GetNodeId(); - auto& tabletInfo(TabletInfo[nodeId]); - for (const NKikimrHive::THiveDomainStatsStateCount& stateStats : nodeStats.GetStateStats()) { - tabletInfo.emplace_back(); - NKikimrViewer::TTabletStateInfo& viewerTablet(tabletInfo.back()); - viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(stateStats.GetTabletType())); - viewerTablet.SetCount(stateStats.GetCount()); - viewerTablet.SetState(GetFlagFromTabletState(stateStats.GetVolatileState())); - } - BLOG_TRACE("HiveNodeStats filter node by " << nodeId); - FilterNodeIds.insert(nodeId); - DisconnectTime[nodeId] = nodeStats.GetLastAliveTimestamp(); - if (nodeStats.HasNodeName()) { - NodeName[nodeId] = nodeStats.GetNodeName(); - } - } - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvBlobStorage::TEvControllerSelectGroupsResult::TPtr& ev) { - BLOG_TRACE("Received TEvControllerSelectGroupsResult"); - for (const auto& matchingGroups : ev->Get()->Record.GetMatchingGroups()) { - for (const auto& group : matchingGroups.GetGroups()) { - TString storagePoolName = group.GetStoragePoolName(); - if (FilterStoragePool.empty() || FilterStoragePool == storagePoolName) { - if (FilterGroupIds.emplace(group.GetGroupID()).second && BaseConfig) { - auto itBaseConfigGroupIndex = BaseConfigGroupIndex.find(group.GetGroupID()); - if (itBaseConfigGroupIndex != BaseConfigGroupIndex.end()) { - for (const NKikimrBlobStorage::TVSlotId& vslot : itBaseConfigGroupIndex->second->GetVSlotId()) { - BLOG_TRACE("SelectGroups filter by node " << vslot.GetNodeId()); - FilterNodeIds.insert(vslot.GetNodeId()); - } - } - } - } - } - } - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvInterconnect::TEvNodesInfo::TPtr& ev) { - BLOG_TRACE("Received TEvNodesInfo " << ev->Get()->Nodes.size()); - NodesInfo.reset(ev->Release().Release()); - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - BLOG_TRACE("Received TEvBoardInfo"); - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - auto nodeId(actorId.NodeId()); - BLOG_TRACE("BoardInfo filter node by " << nodeId); - FilterNodeIds.insert(nodeId); - } - } else { - BLOG_TRACE("Error receiving TEvBoardInfo response"); - FilterNodeIds = { 0 }; - } - - if (--RequestsBeforeNodeList == 0) { - ProcessNodeIds(); - } - RequestDone(); - } - - void Undelivered(TEvents::TEvUndelivered::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered type " << ev->Get()->SourceType << " from node " << nodeId); - switch (ev->Get()->SourceType) { - case TEvWhiteboard::EvSystemStateRequest: - if (SysInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvPDiskStateRequest: - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvVDiskStateRequest: - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - break; - case TEvWhiteboard::EvTabletStateRequest: - RequestDone(); - break; - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr& ev) { - ui32 nodeId = ev->Get()->NodeId; - BLOG_TRACE("Disconnected from node " << nodeId); - if (SysInfo.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - if (Storage) { - if (PDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvPDiskStateResponse{}).second) { - RequestDone(); - } - if (VDiskInfo.emplace(nodeId, NKikimrWhiteboard::TEvVDiskStateResponse{}).second) { - RequestDone(); - } - } - if (Tablets) { - if (TabletInfo.emplace(nodeId, std::vector()).second) { - RequestDone(); - } - } - } - - void Handle(TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("SystemStateResponse from node " << nodeId); - SysInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvPDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("PDiskStateResponse from node " << nodeId); - PDiskInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvVDiskStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("VDiskStateResponse from node " << nodeId); - VDiskInfo[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui64 nodeId = ev.Get()->Cookie; - BLOG_TRACE("TabletStateResponse from node " << nodeId); - NKikimrWhiteboard::TEvTabletStateResponse response = std::move(ev->Get()->Record); - bool needToGroup = response.TabletStateInfoSize() > 0 && !response.GetTabletStateInfo(0).HasCount(); - if (needToGroup) { // for compatibility with older versions - GroupWhiteboardResponses(response, "Type,Overall", false); - } - auto& vecTablets(TabletInfo[nodeId]); - for (const NKikimrWhiteboard::TTabletStateInfo& tablet : response.GetTabletStateInfo()) { - if (tablet.GetState() == NKikimrWhiteboard::TTabletStateInfo::Dead - || tablet.GetState() == NKikimrWhiteboard::TTabletStateInfo::Deleted) { - continue; - } - vecTablets.emplace_back(); - NKikimrViewer::TTabletStateInfo& viewerTablet(vecTablets.back()); - viewerTablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(tablet.GetType())); - viewerTablet.SetCount(tablet.GetCount()); - viewerTablet.SetState(GetFlagFromTabletState(tablet.GetState())); - } - RequestDone(); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvInterconnect::TEvNodesInfo, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvPDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvVDiskStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvHive::TEvResponseHiveNodeStats, Handle); - hFunc(TEvBlobStorage::TEvControllerSelectGroupsResult, Handle); - hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - NKikimrWhiteboard::TPDiskStateInfo& GetPDisk(TPDiskId pDiskId) { - auto itPDiskInfo = PDiskInfo.find(pDiskId.first); - if (itPDiskInfo == PDiskInfo.end()) { - itPDiskInfo = PDiskInfo.insert({pDiskId.first, NKikimrWhiteboard::TEvPDiskStateResponse{}}).first; - } - - for (auto& pDiskInfo : *itPDiskInfo->second.mutable_pdiskstateinfo()) { - if (pDiskInfo.pdiskid() == pDiskId.second) { - return pDiskInfo; - } - } - - NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo = *itPDiskInfo->second.add_pdiskstateinfo(); - pDiskInfo.SetPDiskId(pDiskId.second); - return pDiskInfo; - } - - static double GetCPU(const NKikimrWhiteboard::TSystemStateInfo& sysInfo) { - double cpu = 0; - if (sysInfo.PoolStatsSize() > 0) { - for (const auto& ps : sysInfo.GetPoolStats()) { - cpu = std::max(cpu, ps.GetUsage()); - } - } - return cpu; - } - - static double GetLoadAverage(const NKikimrWhiteboard::TSystemStateInfo& sysInfo) { - if (sysInfo.LoadAverageSize() > 0 && sysInfo.GetNumberOfCpus() > 0) { - return sysInfo.GetLoadAverage(0) * 100 / sysInfo.GetNumberOfCpus(); - } - return 0; - } - - static uint32 GetMissing(const NKikimrViewer::TNodeInfo& nodeInfo) { - uint32 missing = 0; - for (const auto& pDisk : nodeInfo.GetPDisks()) { - if (pDisk.state() != NKikimrBlobStorage::TPDiskState::Normal) { - missing++; - } - } - return missing; - } - - void ReplyAndPassAway() { - NKikimrViewer::TNodesInfo result; - - if (Storage && BaseConfig) { - const NKikimrBlobStorage::TEvControllerConfigResponse& pbRecord(BaseConfig->Record); - const NKikimrBlobStorage::TConfigResponse::TStatus& pbStatus(pbRecord.GetResponse().GetStatus(0)); - const NKikimrBlobStorage::TBaseConfig& pbConfig(pbStatus.GetBaseConfig()); - for (const NKikimrBlobStorage::TBaseConfig::TPDisk& pDisk : pbConfig.GetPDisk()) { - if (!FilterNodeIds.empty() && FilterNodeIds.count(pDisk.GetNodeId()) == 0) { - continue; - } - if (pDisk.GetNodeId() < MinAllowedNodeId || pDisk.GetNodeId() > MaxAllowedNodeId) { - continue; - } - TPDiskId pDiskId(pDisk.GetNodeId(), pDisk.GetPDiskId()); - NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo = GetPDisk(pDiskId); - pDiskInfo.SetPath(pDisk.GetPath()); - pDiskInfo.SetGuid(pDisk.GetGuid()); - pDiskInfo.SetCategory(static_cast(pDisk.GetType())); - if (pDiskInfo.GetTotalSize() == 0) { - pDiskInfo.SetTotalSize(pDisk.GetPDiskMetrics().GetTotalSize()); - } - if (pDiskInfo.GetAvailableSize() == 0) { - pDiskInfo.SetAvailableSize(pDisk.GetPDiskMetrics().GetAvailableSize()); - } - } - for (const NKikimrBlobStorage::TBaseConfig::TNode& node : pbConfig.GetNode()) { - if (!FilterNodeIds.empty() && FilterNodeIds.count(node.GetNodeId()) == 0) { - continue; - } - if (node.GetNodeId() < MinAllowedNodeId || node.GetNodeId() > MaxAllowedNodeId) { - continue; - } - if (node.GetLastDisconnectTimestamp() > node.GetLastConnectTimestamp()) { - DisconnectTime[node.GetNodeId()] = node.GetLastDisconnectTimestamp() / 1000; // us -> ms - } - } - } - - bool noDC = true; - bool noRack = true; - - for (TNodeId nodeId : NodeIds) { - if (!CheckNodeFilters(nodeId)) { - continue; - } - - NKikimrViewer::TNodeInfo& nodeInfo = *result.add_nodes(); - nodeInfo.set_nodeid(nodeId); - BLOG_TRACE("AddingNode " << nodeId); - auto itSystemState = SysInfo.find(nodeId); - if (itSystemState != SysInfo.end() && itSystemState->second.SystemStateInfoSize() > 0) { - *nodeInfo.MutableSystemState() = itSystemState->second.GetSystemStateInfo(0); - } else if (NodesInfo != nullptr) { - auto* icNodeInfo = NodesInfo->GetNodeInfo(nodeId); - if (icNodeInfo != nullptr) { - nodeInfo.MutableSystemState()->SetHost(icNodeInfo->Host); - } - auto itDisconnectTime = DisconnectTime.find(nodeId); - if (itDisconnectTime != DisconnectTime.end()) { - nodeInfo.MutableSystemState()->SetDisconnectTime(itDisconnectTime->second); - } - auto itNodeName = NodeName.find(nodeId); - if (itNodeName != NodeName.end()) { - nodeInfo.MutableSystemState()->SetNodeName(itNodeName->second); - } - } - if (Storage) { - auto itPDiskState = PDiskInfo.find(nodeId); - if (itPDiskState != PDiskInfo.end()) { - for (auto& protoPDiskInfo : *itPDiskState->second.MutablePDiskStateInfo()) { - NKikimrWhiteboard::TPDiskStateInfo& pDiskInfo = *nodeInfo.AddPDisks(); - pDiskInfo = std::move(protoPDiskInfo); - } - } - auto itVDiskState = VDiskInfo.find(nodeId); - if (itVDiskState != VDiskInfo.end()) { - for (auto& protoVDiskInfo : *itVDiskState->second.MutableVDiskStateInfo()) { - NKikimrWhiteboard::TVDiskStateInfo& vDiskInfo = *nodeInfo.AddVDisks(); - vDiskInfo = std::move(protoVDiskInfo); - } - } - } - if (Tablets) { - auto itTabletState = TabletInfo.find(nodeId); - if (itTabletState != TabletInfo.end()) { - for (auto& viewerTabletInfo : itTabletState->second) { - NKikimrViewer::TTabletStateInfo& tabletInfo = *nodeInfo.AddTablets(); - tabletInfo = std::move(viewerTabletInfo); - } - } - } - - if (!nodeInfo.GetSystemState().GetLocation().GetDataCenter().empty()) { - noDC = false; - } - if (nodeInfo.GetSystemState().GetSystemLocation().GetDataCenter() != 0) { - noDC = false; - } - if (!nodeInfo.GetSystemState().GetLocation().GetRack().empty()) { - noRack = false; - } - if (nodeInfo.GetSystemState().GetSystemLocation().GetRack() != 0) { - noRack = false; - } - } - - if (!SortedNodeList) { - switch (Sort) { - case ESort::NodeId: - case ESort::Host: - case ESort::DC: - // already sorted - break; - case ESort::Rack: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetRack();}, ReverseSort); - break; - case ESort::Version: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetVersion();}, ReverseSort); - break; - case ESort::Uptime: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetStartTime();}, ReverseSort); - break; - case ESort::Memory: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return node.GetSystemState().GetMemoryUsed();}, ReverseSort); - break; - case ESort::CPU: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return GetCPU(node.GetSystemState());}, ReverseSort); - break; - case ESort::LoadAverage: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return GetLoadAverage(node.GetSystemState());}, ReverseSort); - break; - case ESort::Missing: - SortCollection(*result.MutableNodes(), [](const NKikimrViewer::TNodeInfo& node) { return GetMissing(node);}, ReverseSort); - break; - } - } - - result.SetTotalNodes(PassedNodeIds.size()); - result.SetFoundNodes(LimitApplied ? PassedNodeIds.size() : result.NodesSize()); - - BLOG_TRACE("Total/Found " << result.GetTotalNodes() << "/" << result.GetFoundNodes()); - - if (!LimitApplied) { - auto& nodes = *result.MutableNodes(); - if (Offset.has_value()) { - BLOG_TRACE("ErasingFromBegining " << Offset.value()); - if (size_t(nodes.size()) > Offset.value()) { - nodes.erase(nodes.begin(), std::next(nodes.begin(), Offset.value())); - } else { - nodes.Clear(); - } - } - if (Limit.has_value()) { - BLOG_TRACE("LimitingWith " << Limit.value()); - if (size_t(nodes.size()) > Limit.value()) { - nodes.erase(std::next(nodes.begin(), Limit.value()), nodes.end()); - } - } - } - - for (NKikimrViewer::TNodeInfo& nodeInfo : *result.MutableNodes()) { - if (Storage) { - { - auto& cont(*nodeInfo.MutablePDisks()); - std::sort(cont.begin(), cont.end(), [](const NKikimrWhiteboard::TPDiskStateInfo& a, const NKikimrWhiteboard::TPDiskStateInfo& b) -> bool { - return a.GetPath() < b.GetPath(); - }); - } - { - auto& cont(*nodeInfo.MutableVDisks()); - std::sort(cont.begin(), cont.end(), [](const NKikimrWhiteboard::TVDiskStateInfo& a, const NKikimrWhiteboard::TVDiskStateInfo& b) -> bool { - return VDiskIDFromVDiskID(a.GetVDiskId()) < VDiskIDFromVDiskID(b.GetVDiskId()); - }); - } - } - if (Tablets) { - { - auto& cont(*nodeInfo.MutableTablets()); - std::sort(cont.begin(), cont.end(), [](const NKikimrViewer::TTabletStateInfo& a, const NKikimrViewer::TTabletStateInfo& b) -> bool { - return a.GetType() < b.GetType(); - }); - } - } - } - - if (MaximumDisksPerNode.has_value()) { - result.SetMaximumDisksPerNode(MaximumDisksPerNode.value()); - } - if (noDC) { - result.SetNoDC(true); - } - if (noRack) { - result.SetNoRack(true); - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, result, JsonSettings); - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as numbers - required: false - type: boolean - - name: path - in: query - description: path to schema object - required: false - type: string - - name: with - in: query - description: filter nodes by missing disks or space - required: false - type: string - - name: type - in: query - description: nodes type to get (static,dynamic,any) - required: false - type: string - - name: storage - in: query - description: return storage info - required: false - type: boolean - - name: tablets - in: query - description: return tablets info - required: false - type: boolean - - name: sort - in: query - description: sort by (NodeId,Host,DC,Rack,Version,Uptime,Memory,CPU,LoadAverage,Missing) - required: false - type: string - - name: offset - in: query - description: skip N nodes - required: false - type: integer - - name: limit - in: query - description: limit to N nodes - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: uptime - in: query - description: return only nodes with less uptime in sec. - required: false - type: integer - - name: problems_only - in: query - description: return only problem nodes - required: false - type: boolean - - name: filter - in: query - description: filter nodes by id or host - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Nodes info"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Information about nodes"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_pdisk_restart.h b/ydb/core/viewer/json_pdisk_restart.h deleted file mode 100644 index 98d499466eb2..000000000000 --- a/ydb/core/viewer/json_pdisk_restart.h +++ /dev/null @@ -1,225 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonPDiskRestart : public TViewerPipeClient { - enum EEv { - EvRetryNodeRequest = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); - - struct TEvRetryNodeRequest : NActors::TEventLocal { - TEvRetryNodeRequest() - {} - }; - -protected: - using TThis = TJsonPDiskRestart; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - ui32 Timeout = 0; - ui32 ActualRetries = 0; - ui32 Retries = 0; - TDuration RetryPeriod = TDuration::MilliSeconds(500); - - std::unique_ptr Response; - - ui32 NodeId = 0; - ui32 PDiskId = 0; - bool Force = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonPDiskRestart(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - NodeId = FromStringWithDefault(params.Get("node_id"), 0); - PDiskId = FromStringWithDefault(params.Get("pdisk_id"), Max()); - Force = FromStringWithDefault(params.Get("force"), false); - - if (PDiskId == Max()) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "field 'pdisk_id' is required"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_POST) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only POST method is allowed"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (Force && !Viewer->CheckAccessAdministration(Event->Get())) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - - if (!NodeId) { - NodeId = TlsActivationContext->ActorSystem()->NodeId; - } - TBase::InitConfig(params); - - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Retries = FromStringWithDefault(params.Get("retries"), 0); - RetryPeriod = TDuration::MilliSeconds(FromStringWithDefault(params.Get("retry_period"), RetryPeriod.MilliSeconds())); - - SendRequest(); - - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - cFunc(TEvRetryNodeRequest::EventType, HandleRetry); - cFunc(TEvents::TEvUndelivered::EventType, Undelivered); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendRequest() { - RequestBSControllerPDiskRestart(NodeId, PDiskId, Force); - } - - bool RetryRequest() { - if (Retries) { - if (++ActualRetries <= Retries) { - TBase::Schedule(RetryPeriod, new TEvRetryNodeRequest()); - return true; - } - } - return false; - } - - void Undelivered() { - if (!RetryRequest()) { - TBase::RequestDone(); - } - } - - void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { - Response.reset(ev->Release().Release()); - ReplyAndPassAway(); - } - - void HandleRetry() { - SendRequest(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), "text/plain", "Timeout receiving response from BSC"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void PassAway() override { - TBase::PassAway(); - } - - void ReplyAndPassAway() { - NJson::TJsonValue json; - if (Response != nullptr) { - if (Response->Record.GetResponse().GetSuccess()) { - json["result"] = true; - } else { - json["result"] = false; - TString error; - bool forceRetryPossible = false; - Viewer->TranslateFromBSC2Human(Response->Record.GetResponse(), error, forceRetryPossible); - json["error"] = error; - if (forceRetryPossible && Viewer->CheckAccessAdministration(Event->Get())) { - json["forceRetryPossible"] = true; - } - } - json["debugMessage"] = Response->Record.ShortDebugString(); - TBase::Send(Event->Sender, - new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json)), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } else { - TBase::Send(Event->Sender, - new NMon::TEvHttpInfoRes(Viewer->GetHTTPINTERNALERROR(Event->Get(), "text/plain", "No response was received from BSC"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - PassAway(); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - return YAML::Load(R"___( - post: - tags: - - pdisk - summary: Restart PDisk - description: Restart PDisk on the specified node - parameters: - - name: node_id - in: query - description: node identifier - type: integer - - name: pdisk_id - in: query - description: pdisk identifier - required: true - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: force - in: query - description: attempt forced operation, ignore warnings - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - properties: - result: - type: boolean - description: was operation successful or not - error: - type: string - description: details about failed operation - forceRetryPossible: - type: boolean - description: if true, operation can be retried with force flag - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); -} - -} -} diff --git a/ydb/core/viewer/json_pdiskinfo.h b/ydb/core/viewer/json_pdiskinfo.h deleted file mode 100644 index 5a2ff46edc2f..000000000000 --- a/ydb/core/viewer/json_pdiskinfo.h +++ /dev/null @@ -1,59 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvPDiskStateResponse; - using TResponseEventType = TEvWhiteboard::TEvPDiskStateResponse; - using TElementType = NKikimrWhiteboard::TPDiskStateInfo; - using TElementKeyType = std::pair; - - static constexpr bool StaticNodesOnly = true; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutablePDiskStateInfo(); - } - - static std::pair GetElementKey(const TElementType& type) { - return std::make_pair(type.GetNodeId(), type.GetPDiskId()); - } - - static TString GetDefaultMergeField() { - return "NodeId,PDiskId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponsesElementKey(result, responses); - } else { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - } -}; - -using TJsonPDiskInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "PDisk information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns PDisk information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_pipe_req.cpp b/ydb/core/viewer/json_pipe_req.cpp new file mode 100644 index 000000000000..52777ea8492c --- /dev/null +++ b/ydb/core/viewer/json_pipe_req.cpp @@ -0,0 +1,847 @@ +#include "json_pipe_req.h" +#include +#include + +namespace NKikimr::NViewer { + +NTabletPipe::TClientConfig TViewerPipeClient::GetPipeClientConfig() { + NTabletPipe::TClientConfig clientConfig; + if (WithRetry) { + clientConfig.RetryPolicy = {.RetryLimitCount = 3}; + } + return clientConfig; +} + +TViewerPipeClient::~TViewerPipeClient() = default; + +TViewerPipeClient::TViewerPipeClient() = default; + +TViewerPipeClient::TViewerPipeClient(NWilson::TTraceId traceId) { + if (traceId) { + Span = {TComponentTracingLevels::THttp::TopLevel, std::move(traceId), "viewer", NWilson::EFlags::AUTO_END}; + } +} + +TViewerPipeClient::TViewerPipeClient(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev, const TString& handlerName) + : Viewer(viewer) + , Event(ev) +{ + TCgiParameters params = Event->Get()->Request.GetParams(); + if (Event->Get()->Request.GetHeader("Content-Type") == "application/json") { + NJson::TJsonValue jsonData; + if (NJson::ReadJsonTree(Event->Get()->Request.GetPostContent(), &jsonData)) { + if (jsonData.IsMap()) { + for (const auto& [key, value] : jsonData.GetMap()) { + switch (value.GetType()) { + case NJson::EJsonValueType::JSON_STRING: + case NJson::EJsonValueType::JSON_INTEGER: + case NJson::EJsonValueType::JSON_UINTEGER: + case NJson::EJsonValueType::JSON_DOUBLE: + case NJson::EJsonValueType::JSON_BOOLEAN: + params.InsertUnescaped(key, value.GetStringRobust()); + break; + default: + break; + } + } + } + } + } + InitConfig(params); + NWilson::TTraceId traceId; + TStringBuf traceparent = Event->Get()->Request.GetHeader("traceparent"); + if (traceparent) { + traceId = NWilson::TTraceId::FromTraceparentHeader(traceparent, TComponentTracingLevels::ProductionVerbose); + } + TStringBuf wantTrace = Event->Get()->Request.GetHeader("X-Want-Trace"); + TStringBuf traceVerbosity = Event->Get()->Request.GetHeader("X-Trace-Verbosity"); + TStringBuf traceTTL = Event->Get()->Request.GetHeader("X-Trace-TTL"); + if (!traceId && (FromStringWithDefault(wantTrace) || !traceVerbosity.empty() || !traceTTL.empty())) { + ui8 verbosity = TComponentTracingLevels::ProductionVerbose; + if (traceVerbosity) { + verbosity = FromStringWithDefault(traceVerbosity, verbosity); + verbosity = std::min(verbosity, NWilson::TTraceId::MAX_VERBOSITY); + } + ui32 ttl = Max(); + if (traceTTL) { + ttl = FromStringWithDefault(traceTTL, ttl); + ttl = std::min(ttl, NWilson::TTraceId::MAX_TIME_TO_LIVE); + } + traceId = NWilson::TTraceId::NewTraceId(verbosity, ttl); + } + if (traceId) { + Span = {TComponentTracingLevels::THttp::TopLevel, std::move(traceId), handlerName ? "http " + handlerName : "http viewer", NWilson::EFlags::AUTO_END}; + Span.Attribute("request_type", TString(Event->Get()->Request.GetUri().Before('?'))); + Span.Attribute("request_params", TString(Event->Get()->Request.GetUri().After('?'))); + } +} + +TActorId TViewerPipeClient::ConnectTabletPipe(NNodeWhiteboard::TTabletId tabletId) { + TPipeInfo& pipeInfo = PipeInfo[tabletId]; + if (!pipeInfo.PipeClient) { + auto pipe = NTabletPipe::CreateClient(SelfId(), tabletId, GetPipeClientConfig()); + pipeInfo.PipeClient = RegisterWithSameMailbox(pipe); + } + pipeInfo.Requests++; + return pipeInfo.PipeClient; +} + +void TViewerPipeClient::SendEvent(std::unique_ptr event) { + if (DelayedRequests.empty() && Requests < MaxRequestsInFlight) { + TActivationContext::Send(event.release()); + ++Requests; + } else { + DelayedRequests.push_back({ + .Event = std::move(event), + }); + } +} + +void TViewerPipeClient::SendRequest(TActorId recipient, IEventBase* ev, ui32 flags, ui64 cookie, NWilson::TTraceId traceId) { + SendEvent(std::make_unique(recipient, SelfId(), ev, flags, cookie, nullptr /*forwardOnNondelivery*/, std::move(traceId))); +} + +void TViewerPipeClient::SendRequestToPipe(TActorId pipe, IEventBase* ev, ui64 cookie, NWilson::TTraceId traceId) { + std::unique_ptr event = std::make_unique(pipe, SelfId(), ev, 0 /*flags*/, cookie, nullptr /*forwardOnNondelivery*/, std::move(traceId)); + event->Rewrite(TEvTabletPipe::EvSend, pipe); + SendEvent(std::move(event)); +} + +void TViewerPipeClient::SendDelayedRequests() { + while (!DelayedRequests.empty() && Requests < MaxRequestsInFlight) { + auto& request(DelayedRequests.front()); + TActivationContext::Send(request.Event.release()); + ++Requests; + DelayedRequests.pop_front(); + } +} + +TPathId TViewerPipeClient::GetPathId(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev) { + if (ev.Request->ResultSet.size() == 1) { + if (ev.Request->ResultSet.begin()->Self) { + const auto& info = ev.Request->ResultSet.begin()->Self->Info; + return TPathId(info.GetSchemeshardId(), info.GetPathId()); + } + if (ev.Request->ResultSet.begin()->TableId) { + return ev.Request->ResultSet.begin()->TableId.PathId; + } + } + return {}; +} + +TString TViewerPipeClient::GetPath(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev) { + if (ev.Request->ResultSet.size() == 1) { + return CanonizePath(ev.Request->ResultSet.begin()->Path); + } + return {}; +} + +TPathId TViewerPipeClient::GetPathId(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + return GetPathId(*ev->Get()); +} + +TString TViewerPipeClient::GetPath(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + return GetPath(*ev->Get()); +} + +bool TViewerPipeClient::IsSuccess(const std::unique_ptr& ev) { + return (ev->Request->ResultSet.size() > 0) && (std::find_if(ev->Request->ResultSet.begin(), ev->Request->ResultSet.end(), + [](const auto& entry) { + return entry.Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok; + }) != ev->Request->ResultSet.end()); +} + +TString TViewerPipeClient::GetError(const std::unique_ptr& ev) { + if (ev->Request->ResultSet.size() == 0) { + return "empty response"; + } + for (const auto& entry : ev->Request->ResultSet) { + if (entry.Status != NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { + switch (entry.Status) { + case NSchemeCache::TSchemeCacheNavigate::EStatus::Ok: + return "Ok"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::Unknown: + return "Unknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RootUnknown: + return "RootUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathErrorUnknown: + return "PathErrorUnknown"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotTable: + return "PathNotTable"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::PathNotPath: + return "PathNotPath"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::TableCreationNotComplete: + return "TableCreationNotComplete"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::LookupError: + return "LookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::RedirectLookupError: + return "RedirectLookupError"; + case NSchemeCache::TSchemeCacheNavigate::EStatus::AccessDenied: + return "AccessDenied"; + default: + return ::ToString(static_cast(ev->Request->ResultSet.begin()->Status)); + } + } + } + return "no error"; +} + +bool TViewerPipeClient::IsSuccess(const std::unique_ptr& ev) { + return ev->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok; +} + +TString TViewerPipeClient::GetError(const std::unique_ptr& ev) { + switch (ev->Status) { + case TEvStateStorage::TEvBoardInfo::EStatus::Unknown: + return "Unknown"; + case TEvStateStorage::TEvBoardInfo::EStatus::Ok: + return "Ok"; + case TEvStateStorage::TEvBoardInfo::EStatus::NotAvailable: + return "NotAvailable"; + default: + return ::ToString(static_cast(ev->Status)); + } +} + +void TViewerPipeClient::RequestHiveDomainStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + request->Record.SetReturnFollowers(Followers); + request->Record.SetReturnMetrics(Metrics); + SendRequestToPipe(pipeClient, request.Release(), hiveId); +} + +void TViewerPipeClient::RequestHiveNodeStats(NNodeWhiteboard::TTabletId hiveId, TPathId pathId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + request->Record.SetReturnMetrics(Metrics); + if (pathId != TPathId()) { + request->Record.SetReturnExtendedTabletInfo(true); + request->Record.SetFilterTabletsBySchemeShardId(pathId.OwnerId); + request->Record.SetFilterTabletsByPathId(pathId.LocalPathId); + } + SendRequestToPipe(pipeClient, request.Release(), hiveId); +} + +void TViewerPipeClient::RequestHiveStorageStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + SendRequestToPipe(pipeClient, request.Release(), hiveId); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeViewerRequest(TNodeId nodeId, TEvViewer::TEvViewerRequest* ev, ui32 flags) { + TActorId viewerServiceId = MakeViewerID(nodeId); + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); + if (response.Span) { + response.Span.Attribute("target_node_id", nodeId); + TStringBuilder askFor; + askFor << ev->Record.GetLocation().NodeIdSize() << " nodes ("; + for (size_t i = 0; i < std::min(ev->Record.GetLocation().NodeIdSize(), 16); ++i) { + if (i) { + askFor << ", "; + } + askFor << ev->Record.GetLocation().GetNodeId(i); + } + if (ev->Record.GetLocation().NodeIdSize() > 16) { + askFor << ", ..."; + } + askFor << ")"; + response.Span.Attribute("ask_for", askFor); + switch (ev->Record.Request_case()) { + case NKikimrViewer::TEvViewerRequest::kTabletRequest: + response.Span.Attribute("request_type", "TabletRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kSystemRequest: + response.Span.Attribute("request_type", "SystemRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kQueryRequest: + response.Span.Attribute("request_type", "QueryRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kRenderRequest: + response.Span.Attribute("request_type", "RenderRequest"); + break; + case NKikimrViewer::TEvViewerRequest::kAutocompleteRequest: + response.Span.Attribute("request_type", "AutocompleteRequest"); + break; + default: + response.Span.Attribute("request_type", ::ToString(static_cast(ev->Record.Request_case()))); + break; + } + } + SendRequest(viewerServiceId, ev, flags, nodeId, response.Span.GetTraceId()); + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestHiveDomainStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + request->Record.SetReturnFollowers(Followers); + request->Record.SetReturnMetrics(Metrics); + auto response = MakeRequestToPipe(pipeClient, request.Release(), hiveId); + if (response.Span) { + auto hive_id = "#" + ::ToString(hiveId); + response.Span.Attribute("hive_id", hive_id); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestHiveStorageStats(NNodeWhiteboard::TTabletId hiveId) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + THolder request = MakeHolder(); + auto response = MakeRequestToPipe(pipeClient, request.Release(), hiveId); + if (response.Span) { + auto hive_id = "#" + ::ToString(hiveId); + response.Span.Attribute("hive_id", hive_id); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestHiveNodeStats(TTabletId hiveId, TEvHive::TEvRequestHiveNodeStats* request) { + TActorId pipeClient = ConnectTabletPipe(hiveId); + auto response = MakeRequestToPipe(pipeClient, request, hiveId); + if (response.Span) { + auto hive_id = "#" + ::ToString(hiveId); + response.Span.Attribute("hive_id", hive_id); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestViewer(TNodeId nodeId, TEvViewer::TEvViewerRequest* request, ui32 flags) { + auto requestType = request->Record.GetRequestCase(); + auto response = MakeRequest(MakeViewerID(nodeId), request, flags, nodeId); + if (response.Span) { + TString requestTypeString; + switch (requestType) { + case NKikimrViewer::TEvViewerRequest::kTabletRequest: + requestTypeString = "TabletRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kSystemRequest: + requestTypeString = "SystemRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kQueryRequest: + requestTypeString = "QueryRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kRenderRequest: + requestTypeString = "RenderRequest"; + break; + case NKikimrViewer::TEvViewerRequest::kAutocompleteRequest: + requestTypeString = "AutocompleteRequest"; + break; + default: + requestTypeString = ::ToString(static_cast(requestType)); + break; + } + response.Span.Attribute("request_type", requestTypeString); + } + return response; +} + +void TViewerPipeClient::RequestConsoleListTenants() { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleListTenants() { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + return MakeRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleNodeConfigByTenant(TString tenant, ui64 cookie) { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + auto request = MakeHolder(); + request->Record.MutableNode()->SetTenant(tenant); + request->Record.AddItemKinds(static_cast(NKikimrConsole::TConfigItem::FeatureFlagsItem)); + return MakeRequestToPipe(pipeClient, request.Release(), cookie); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleGetAllConfigs() { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + return MakeRequestToPipe(pipeClient, new NConsole::TEvConsole::TEvGetAllConfigsRequest()); +} + +void TViewerPipeClient::RequestConsoleGetTenantStatus(const TString& path) { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->set_path(path); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestConsoleGetTenantStatus(const TString& path) { + TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->set_path(path); + auto response = MakeRequestToPipe(pipeClient, request.Release()); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; +} + +void TViewerPipeClient::RequestBSControllerConfig() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerConfigWithStoragePools() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); + request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max()); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestBSControllerConfigWithStoragePools() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); + request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max()); + return MakeRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerInfo() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerSelectGroups(THolder request) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestBSControllerSelectGroups(THolder request, ui64 cookie) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + return MakeRequestToPipe(pipeClient, request.Release(), cookie); +} + +void TViewerPipeClient::RequestBSControllerPDiskRestart(ui32 nodeId, ui32 pdiskId, bool force) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + auto* restartPDisk = request->Record.MutableRequest()->AddCommand()->MutableRestartPDisk(); + restartPDisk->MutableTargetPDiskId()->SetNodeId(nodeId); + restartPDisk->MutableTargetPDiskId()->SetPDiskId(pdiskId); + if (force) { + request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); + } + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestBSControllerVDiskEvict(ui32 groupId, ui32 groupGeneration, ui32 failRealmIdx, ui32 failDomainIdx, ui32 vdiskIdx, bool force) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + auto* evictVDisk = request->Record.MutableRequest()->AddCommand()->MutableReassignGroupDisk(); + evictVDisk->SetGroupId(groupId); + evictVDisk->SetGroupGeneration(groupGeneration); + evictVDisk->SetFailRealmIdx(failRealmIdx); + evictVDisk->SetFailDomainIdx(failDomainIdx); + evictVDisk->SetVDiskIdx(vdiskIdx); + if (force) { + request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); + } + SendRequestToPipe(pipeClient, request.Release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerPDiskInfo(ui32 nodeId, ui32 pdiskId) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + request->Record.SetInclusiveFrom(true); + request->Record.SetInclusiveTo(true); + request->Record.MutableFrom()->SetNodeId(nodeId); + request->Record.MutableFrom()->SetPDiskId(pdiskId); + request->Record.MutableTo()->SetNodeId(nodeId); + request->Record.MutableTo()->SetPDiskId(pdiskId); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerVDiskInfo(ui32 nodeId, ui32 pdiskId) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + request->Record.SetInclusiveFrom(true); + request->Record.SetInclusiveTo(true); + request->Record.MutableFrom()->SetNodeId(nodeId); + request->Record.MutableFrom()->SetPDiskId(pdiskId); + request->Record.MutableFrom()->SetVSlotId(0); + request->Record.MutableTo()->SetNodeId(nodeId); + request->Record.MutableTo()->SetPDiskId(pdiskId); + request->Record.MutableTo()->SetVSlotId(std::numeric_limits::max()); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerGroups() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerPools() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerVSlots() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerPDisks() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + auto request = std::make_unique(); + return MakeRequestToPipe(pipeClient, request.release()); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::RequestBSControllerStorageStats() { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + return MakeRequestToPipe(pipeClient, new NSysView::TEvSysView::TEvGetStorageStatsRequest()); +} + +void TViewerPipeClient::RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force) { + TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); + THolder request = MakeHolder(); + auto* updateDriveStatus = request->Record.MutableRequest()->AddCommand()->MutableUpdateDriveStatus(); + updateDriveStatus->CopyFrom(driveStatus); + if (force) { + request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); + } + SendRequestToPipe(pipeClient, request.Release()); +} + +void TViewerPipeClient::RequestSchemeCacheNavigate(const TString& path) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.Path = SplitPath(path); + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); +} + +void TViewerPipeClient::RequestSchemeCacheNavigate(const TPathId& pathId) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.TableId.PathId = pathId; + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestSchemeCacheNavigate(const TString& path, ui64 cookie) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.Path = SplitPath(path); + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + auto response = MakeRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0 /*flags*/, cookie); + if (response.Span) { + response.Span.Attribute("path", path); + } + return response; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestSchemeCacheNavigate(TPathId pathId, ui64 cookie) { + THolder request = MakeHolder(); + NSchemeCache::TSchemeCacheNavigate::TEntry entry; + entry.TableId.PathId = pathId; + entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; + entry.RedirectRequired = false; + entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; + request->ResultSet.emplace_back(entry); + auto response = MakeRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release()), 0 /*flags*/, cookie); + if (response.Span) { + response.Span.Attribute("path_id", pathId.ToString()); + } + return response; +} + +void TViewerPipeClient::RequestTxProxyDescribe(const TString& path) { + THolder request(new TEvTxUserProxy::TEvNavigate()); + request->Record.MutableDescribePath()->SetPath(path); + SendRequest(MakeTxProxyID(), request.Release()); +} + +void TViewerPipeClient::RequestStateStorageEndpointsLookup(const TString& path) { + RegisterWithSameMailbox(CreateBoardLookupActor(MakeEndpointsBoardPath(path), + SelfId(), + EBoardLookupMode::Second)); + ++Requests; +} + +TViewerPipeClient::TRequestResponse TViewerPipeClient::MakeRequestStateStorageEndpointsLookup(const TString& path, ui64 cookie) { + TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, "BoardLookupActor")); + RegisterWithSameMailbox(CreateBoardLookupActor(MakeEndpointsBoardPath(path), + SelfId(), + EBoardLookupMode::Second, {}, cookie)); + if (response.Span) { + response.Span.Attribute("path", path); + } + ++Requests; + return response; +} + +void TViewerPipeClient::RequestStateStorageMetadataCacheEndpointsLookup(const TString& path) { + if (!AppData()->DomainsInfo->Domain) { + return; + } + RegisterWithSameMailbox(CreateBoardLookupActor(MakeDatabaseMetadataCacheBoardPath(path), + SelfId(), + EBoardLookupMode::Second)); + ++Requests; +} + +std::vector TViewerPipeClient::GetNodesFromBoardReply(const TEvStateStorage::TEvBoardInfo& ev) { + std::vector databaseNodes; + if (ev.Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { + for (const auto& [actorId, infoEntry] : ev.InfoEntries) { + databaseNodes.emplace_back(actorId.NodeId()); + } + } + std::sort(databaseNodes.begin(), databaseNodes.end()); + databaseNodes.erase(std::unique(databaseNodes.begin(), databaseNodes.end()), databaseNodes.end()); + return databaseNodes; +} + +std::vector TViewerPipeClient::GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { + return GetNodesFromBoardReply(*ev->Get()); +} + +void TViewerPipeClient::InitConfig(const TCgiParameters& params) { + Followers = FromStringWithDefault(params.Get("followers"), Followers); + Metrics = FromStringWithDefault(params.Get("metrics"), Metrics); + WithRetry = FromStringWithDefault(params.Get("with_retry"), WithRetry); + MaxRequestsInFlight = FromStringWithDefault(params.Get("max_requests_in_flight"), MaxRequestsInFlight); + Database = params.Get("database"); + if (!Database) { + Database = params.Get("tenant"); + } + Direct = FromStringWithDefault(params.Get("direct"), Direct); + JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); + JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); + if (FromStringWithDefault(params.Get("enums"), true)) { + Proto2JsonConfig.EnumMode = TProto2JsonConfig::EnumValueMode::EnumName; + } + if (!FromStringWithDefault(params.Get("ui64"), false)) { + Proto2JsonConfig.StringifyNumbers = TProto2JsonConfig::EStringifyNumbersMode::StringifyInt64Always; + } + Proto2JsonConfig.MapAsObject = true; + Proto2JsonConfig.ConvertAny = true; + Proto2JsonConfig.WriteNanAsString = true; + Timeout = TDuration::MilliSeconds(FromStringWithDefault(params.Get("timeout"), Timeout.MilliSeconds())); +} + +void TViewerPipeClient::InitConfig(const TRequestSettings& settings) { + Followers = settings.Followers; + Metrics = settings.Metrics; + WithRetry = settings.WithRetry; +} + +void TViewerPipeClient::ClosePipes() { + for (const auto& [tabletId, pipeInfo] : PipeInfo) { + if (pipeInfo.PipeClient) { + NTabletPipe::CloseClient(SelfId(), pipeInfo.PipeClient); + } + } + PipeInfo.clear(); +} + +ui32 TViewerPipeClient::FailPipeConnect(NNodeWhiteboard::TTabletId tabletId) { + auto itPipeInfo = PipeInfo.find(tabletId); + if (itPipeInfo != PipeInfo.end()) { + ui32 requests = itPipeInfo->second.Requests; + NTabletPipe::CloseClient(SelfId(), itPipeInfo->second.PipeClient); + PipeInfo.erase(itPipeInfo); + return requests; + } + return 0; +} + +TRequestState TViewerPipeClient::GetRequest() const { + return {Event->Get(), Span.GetTraceId()}; +} + +void TViewerPipeClient::ReplyAndPassAway(TString data, const TString& error) { + TString message = error; + Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + if (message.empty()) { + TStringBuf dataParser(data); + if (dataParser.NextTok(' ') == "HTTP/1.1") { + TStringBuf code = dataParser.NextTok(' '); + if (code.size() == 3 && code[0] != '2') { + message = dataParser.NextTok('\n'); + } + } + } + if (Span) { + if (message) { + Span.EndError(message); + } else { + Span.EndOk(); + } + } + PassAway(); +} + +TString TViewerPipeClient::GetHTTPOK(TString contentType, TString response, TInstant lastModified) { + return Viewer->GetHTTPOK(GetRequest(), std::move(contentType), std::move(response), lastModified); +} + +TString TViewerPipeClient::GetHTTPOKJSON(TString response, TInstant lastModified) { + return Viewer->GetHTTPOKJSON(GetRequest(), std::move(response), lastModified); +} + +TString TViewerPipeClient::GetHTTPOKJSON(const NJson::TJsonValue& response, TInstant lastModified) { + return GetHTTPOKJSON(NJson::WriteJson(response, false), lastModified); +} + +TString TViewerPipeClient::GetHTTPOKJSON(const google::protobuf::Message& response, TInstant lastModified) { + TStringStream json; + NProtobufJson::Proto2Json(response, json, Proto2JsonConfig); + return GetHTTPOKJSON(json.Str(), lastModified); +} + +TString TViewerPipeClient::GetHTTPGATEWAYTIMEOUT(TString contentType, TString response) { + return Viewer->GetHTTPGATEWAYTIMEOUT(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::GetHTTPBADREQUEST(TString contentType, TString response) { + return Viewer->GetHTTPBADREQUEST(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::GetHTTPINTERNALERROR(TString contentType, TString response) { + return Viewer->GetHTTPINTERNALERROR(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::GetHTTPFORBIDDEN(TString contentType, TString response) { + return Viewer->GetHTTPFORBIDDEN(GetRequest(), std::move(contentType), std::move(response)); +} + +TString TViewerPipeClient::MakeForward(const std::vector& nodes) { + return Viewer->MakeForward(GetRequest(), nodes); +} + +void TViewerPipeClient::RequestDone(ui32 requests) { + if (requests == 0) { + return; + } + Requests -= requests; + if (!DelayedRequests.empty()) { + SendDelayedRequests(); + } + if (Requests == 0) { + ReplyAndPassAway(); + } +} + +void TViewerPipeClient::Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { + if (ev->Get()->Status != NKikimrProto::OK) { + ui32 requests = FailPipeConnect(ev->Get()->TabletId); + RequestDone(requests); + } +} + +void TViewerPipeClient::HandleResolveResource(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + if (ResourceNavigateResponse) { + ResourceNavigateResponse->Set(std::move(ev)); + if (ResourceNavigateResponse->IsOk()) { + TSchemeCacheNavigate::TEntry& entry(ResourceNavigateResponse->Get()->Request->ResultSet.front()); + SharedDatabase = CanonizePath(entry.Path); + if (SharedDatabase == AppData()->TenantName) { + Direct = true; + return Bootstrap(); // retry bootstrap without redirect this time + } + DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(SharedDatabase); + } else { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Failed to resolve database - shared database not found")); + } + } +} + +void TViewerPipeClient::HandleResolveDatabase(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { + if (DatabaseNavigateResponse) { + DatabaseNavigateResponse->Set(std::move(ev)); + if (DatabaseNavigateResponse->IsOk()) { + TSchemeCacheNavigate::TEntry& entry(DatabaseNavigateResponse->Get()->Request->ResultSet.front()); + if (entry.DomainInfo && entry.DomainInfo->ResourcesDomainKey && entry.DomainInfo->DomainKey != entry.DomainInfo->ResourcesDomainKey) { + ResourceNavigateResponse = MakeRequestSchemeCacheNavigate(TPathId(entry.DomainInfo->ResourcesDomainKey)); + Become(&TViewerPipeClient::StateResolveResource); + return; + } + DatabaseBoardInfoResponse = MakeRequestStateStorageEndpointsLookup(CanonizePath(entry.Path)); + } else { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Failed to resolve database - not found")); + } + } +} + +void TViewerPipeClient::HandleResolve(TEvStateStorage::TEvBoardInfo::TPtr& ev) { + if (DatabaseBoardInfoResponse) { + DatabaseBoardInfoResponse->Set(std::move(ev)); + if (DatabaseBoardInfoResponse->IsOk()) { + ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(DatabaseBoardInfoResponse->GetRef()))); + } else { + ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Failed to resolve database - no nodes found")); + } + } +} + +void TViewerPipeClient::HandleTimeout() { + ReplyAndPassAway(GetHTTPGATEWAYTIMEOUT()); +} + +STATEFN(TViewerPipeClient::StateResolveDatabase) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvStateStorage::TEvBoardInfo, HandleResolve); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleResolveDatabase); + cFunc(TEvents::TEvWakeup::EventType, HandleTimeout); + } +} + +STATEFN(TViewerPipeClient::StateResolveResource) { + switch (ev->GetTypeRewrite()) { + hFunc(TEvStateStorage::TEvBoardInfo, HandleResolve); + hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, HandleResolveResource); + cFunc(TEvents::TEvWakeup::EventType, HandleTimeout); + } +} + +void TViewerPipeClient::RedirectToDatabase(const TString& database) { + DatabaseNavigateResponse = MakeRequestSchemeCacheNavigate(database); + Become(&TViewerPipeClient::StateResolveDatabase); +} + +bool TViewerPipeClient::NeedToRedirect() { + if (Event) { + Direct |= !Event->Get()->Request.GetHeader("X-Forwarded-From-Node").empty(); // we're already forwarding + Direct |= (Database == AppData()->TenantName) || Database.empty(); // we're already on the right node or don't use database filter + if (Database && !Direct) { + RedirectToDatabase(Database); // to find some dynamic node and redirect query there + return true; + } + } + return false; +} + +void TViewerPipeClient::PassAway() { + std::sort(SubscriptionNodeIds.begin(), SubscriptionNodeIds.end()); + SubscriptionNodeIds.erase(std::unique(SubscriptionNodeIds.begin(), SubscriptionNodeIds.end()), SubscriptionNodeIds.end()); + for (TNodeId nodeId : SubscriptionNodeIds) { + Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); + } + ClosePipes(); + TBase::PassAway(); +} + +void TViewerPipeClient::AddEvent(const TString& name) { + if (Span) { + Span.Event(name); + } +} + +} diff --git a/ydb/core/viewer/json_pipe_req.h b/ydb/core/viewer/json_pipe_req.h index b37026c1826d..d3d9a7a4e6c9 100644 --- a/ydb/core/viewer/json_pipe_req.h +++ b/ydb/core/viewer/json_pipe_req.h @@ -1,54 +1,69 @@ #pragma once - -#include -#include -#include -#include -#include -#include +#include "viewer.h" #include #include +#include #include +#include #include #include +#include #include #include #include -#include -#include "viewer.h" +#include +#include +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NKikimr; using namespace NSchemeCache; +using namespace NProtobufJson; using NNodeWhiteboard::TNodeId; +using NNodeWhiteboard::TTabletId; + +class TViewerPipeClient : public TActorBootstrapped { + using TBase = TActorBootstrapped; + +public: + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { + return NKikimrServices::TActivity::VIEWER_HANDLER; + } + + virtual void Bootstrap() = 0; + virtual void ReplyAndPassAway() = 0; -template -class TViewerPipeClient : public TActorBootstrapped { protected: - using TBase = TActorBootstrapped; bool Followers = true; bool Metrics = true; - bool WithRetry = true; + bool WithRetry = false; + TString Database; + TString SharedDatabase; + bool Direct = false; ui32 Requests = 0; - static constexpr ui32 MaxRequestsInFlight = 50; + ui32 MaxRequestsInFlight = 200; NWilson::TSpan Span; IViewer* Viewer = nullptr; NMon::TEvHttpInfo::TPtr Event; + TJsonSettings JsonSettings; + TProto2JsonConfig Proto2JsonConfig; + TDuration Timeout = TDuration::Seconds(10); struct TPipeInfo { TActorId PipeClient; ui32 Requests = 0; }; - std::unordered_map PipeInfo; + std::unordered_map PipeInfo; struct TDelayedRequest { std::unique_ptr Event; }; std::deque DelayedRequests; + std::vector SubscriptionNodeIds; template struct TRequestResponse { @@ -66,23 +81,30 @@ class TViewerPipeClient : public TActorBootstrapped { TRequestResponse& operator =(TRequestResponse&&) = default; void Set(std::unique_ptr&& response) { + constexpr bool hasErrorCheck = requires(const std::unique_ptr& r) {TViewerPipeClient::IsSuccess(r);}; + if constexpr (hasErrorCheck) { + if (!TViewerPipeClient::IsSuccess(response)) { + Error(TViewerPipeClient::GetError(response)); + return; + } + } if (!IsDone()) { Span.EndOk(); + Response = std::move(response); } - Response = std::move(response); } void Set(TAutoPtr>&& response) { Set(std::unique_ptr(response->Release().Release())); } - void Error(const TString& error) { + bool Error(const TString& error) { if (!IsDone()) { Span.EndError(error); - } - if (!IsOk()) { Response = error; + return true; } + return false; } bool IsOk() const { @@ -105,386 +127,192 @@ class TViewerPipeClient : public TActorBootstrapped { return std::get>(Response).get(); } - T* operator ->() { + const T* Get() const { return std::get>(Response).get(); } - TString GetError() const { - return std::get(Response); + T& GetRef() { + return *Get(); } - }; - NTabletPipe::TClientConfig GetPipeClientConfig() { - NTabletPipe::TClientConfig clientConfig; - if (WithRetry) { - clientConfig.RetryPolicy = {.RetryLimitCount = 3}; + const T& GetRef() const { + return *Get(); } - return clientConfig; - } - TViewerPipeClient() = default; - - TViewerPipeClient(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - { - InitConfig(Event->Get()->Request.GetParams()); - NWilson::TTraceId traceId; - TStringBuf traceparent = Event->Get()->Request.GetHeader("traceparent"); - if (traceparent) { - traceId = NWilson::TTraceId::FromTraceparentHeader(traceparent, TComponentTracingLevels::ProductionVerbose); + T* operator ->() { + return Get(); } - TStringBuf wantTrace = Event->Get()->Request.GetHeader("X-Want-Trace"); - if (!traceId && FromStringWithDefault(wantTrace)) { - traceId = NWilson::TTraceId::NewTraceId(TComponentTracingLevels::ProductionVerbose, Max()); + + const T* operator ->() const { + return Get(); } - if (traceId) { - Span = {TComponentTracingLevels::THttp::TopLevel, std::move(traceId), "http", NWilson::EFlags::AUTO_END}; - Span.Attribute("request_type", TString(Event->Get()->Request.GetUri().Before('?'))); + + T& operator *() { + return GetRef(); } - } - TActorId ConnectTabletPipe(NNodeWhiteboard::TTabletId tabletId) { - TPipeInfo& pipeInfo = PipeInfo[tabletId]; - if (!pipeInfo.PipeClient) { - auto pipe = NTabletPipe::CreateClient(TBase::SelfId(), tabletId, GetPipeClientConfig()); - pipeInfo.PipeClient = TBase::RegisterWithSameMailbox(pipe); + const T& operator *() const { + return GetRef(); } - pipeInfo.Requests++; - return pipeInfo.PipeClient; - } - void SendEvent(std::unique_ptr event) { - if (DelayedRequests.empty() && Requests < MaxRequestsInFlight) { - TActivationContext::Send(event.release()); - ++Requests; - } else { - DelayedRequests.push_back({ - .Event = std::move(event), - }); + TString GetError() const { + return std::get(Response); } - } - void SendRequest(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}) { - SendEvent(std::make_unique(recipient, TBase::SelfId(), ev, flags, cookie, nullptr/*forwardOnNondelivery*/, std::move(traceId))); - } + void Event(const TString& name) { + if (Span) { + Span.Event(name); + } + } + }; - void SendRequestToPipe(const TActorId& pipe, IEventBase* ev, ui64 cookie = 0, NWilson::TTraceId traceId = {}) { - std::unique_ptr event = std::make_unique(pipe, TBase::SelfId(), ev, 0/*flags*/, cookie, nullptr/*forwardOnNondelivery*/, std::move(traceId)); - event->Rewrite(TEvTabletPipe::EvSend, pipe); - SendEvent(std::move(event)); - } + std::optional> DatabaseNavigateResponse; + std::optional> ResourceNavigateResponse; + std::optional> DatabaseBoardInfoResponse; + + NTabletPipe::TClientConfig GetPipeClientConfig(); + + ~TViewerPipeClient(); + TViewerPipeClient(); + TViewerPipeClient(NWilson::TTraceId traceId); + TViewerPipeClient(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev, const TString& handlerName = {}); + TActorId ConnectTabletPipe(TTabletId tabletId); + void SendEvent(std::unique_ptr event); + void SendRequest(TActorId recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0, NWilson::TTraceId traceId = {}); + void SendRequestToPipe(TActorId pipe, IEventBase* ev, ui64 cookie = 0, NWilson::TTraceId traceId = {}); template - TRequestResponse MakeRequest(const TActorId& recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0) { + TRequestResponse MakeRequest(TActorId recipient, IEventBase* ev, ui32 flags = 0, ui64 cookie = 0) { TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); SendRequest(recipient, ev, flags, cookie, response.Span.GetTraceId()); + if (flags & IEventHandle::FlagSubscribeOnSession) { + SubscriptionNodeIds.push_back(recipient.NodeId()); + } return response; } template - TRequestResponse MakeRequestToPipe(const TActorId& pipe, IEventBase* ev, ui64 cookie = 0) { + TRequestResponse MakeRequestToPipe(TActorId pipe, IEventBase* ev, ui64 cookie = 0) { TRequestResponse response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); SendRequestToPipe(pipe, ev, cookie, response.Span.GetTraceId()); return response; } - void SendDelayedRequests() { - while (!DelayedRequests.empty() && Requests < MaxRequestsInFlight) { - auto& request(DelayedRequests.front()); - TActivationContext::Send(request.Event.release()); - ++Requests; - DelayedRequests.pop_front(); - } - } - - void RequestHiveDomainStats(NNodeWhiteboard::TTabletId hiveId) { - TActorId pipeClient = ConnectTabletPipe(hiveId); - THolder request = MakeHolder(); - request->Record.SetReturnFollowers(Followers); - request->Record.SetReturnMetrics(Metrics); - SendRequestToPipe(pipeClient, request.Release(), hiveId); - } - - void RequestHiveNodeStats(NNodeWhiteboard::TTabletId hiveId, TPathId pathId) { - TActorId pipeClient = ConnectTabletPipe(hiveId); - THolder request = MakeHolder(); - request->Record.SetReturnMetrics(Metrics); - if (pathId != TPathId()) { - request->Record.SetReturnExtendedTabletInfo(true); - request->Record.SetFilterTabletsBySchemeShardId(pathId.OwnerId); - request->Record.SetFilterTabletsByPathId(pathId.LocalPathId); + template + TRequestResponse::Type> MakeWhiteboardRequest(TNodeId nodeId, TRequest* ev, ui32 flags = IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession) { + TActorId whiteboardServiceId = NNodeWhiteboard::MakeNodeWhiteboardServiceId(nodeId); + TRequestResponse::Type> response(Span.CreateChild(TComponentTracingLevels::THttp::Detailed, TypeName(*ev))); + if (response.Span) { + response.Span.Attribute("target_node_id", nodeId); } - SendRequestToPipe(pipeClient, request.Release(), hiveId); + SendRequest(whiteboardServiceId, ev, flags, nodeId, response.Span.GetTraceId()); + return response; } - void RequestHiveStorageStats(NNodeWhiteboard::TTabletId hiveId) { - TActorId pipeClient = ConnectTabletPipe(hiveId); - THolder request = MakeHolder(); - SendRequestToPipe(pipeClient, request.Release(), hiveId); - } + TRequestResponse MakeViewerRequest(TNodeId nodeId, TEvViewer::TEvViewerRequest* ev, ui32 flags = IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession); + void SendDelayedRequests(); + void RequestHiveDomainStats(TTabletId hiveId); + void RequestHiveNodeStats(TTabletId hiveId, TPathId pathId); + void RequestHiveStorageStats(TTabletId hiveId); - NNodeWhiteboard::TTabletId GetConsoleId() { + TTabletId GetConsoleId() { return MakeConsoleID(); } - void RequestConsoleListTenants() { - TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); - THolder request = MakeHolder(); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestConsoleGetTenantStatus(const TString& path) { - TActorId pipeClient = ConnectTabletPipe(GetConsoleId()); - THolder request = MakeHolder(); - request->Record.MutableRequest()->set_path(path); - SendRequestToPipe(pipeClient, request.Release()); - } - - NNodeWhiteboard::TTabletId GetBSControllerId() { + TTabletId GetBSControllerId() { return MakeBSControllerID(); } - void RequestBSControllerConfig() { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerConfigWithStoragePools() { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - request->Record.MutableRequest()->AddCommand()->MutableQueryBaseConfig(); - request->Record.MutableRequest()->AddCommand()->MutableReadStoragePool()->SetBoxId(Max()); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerInfo() { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerSelectGroups(THolder request) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerPDiskRestart(ui32 nodeId, ui32 pdiskId, bool force = false) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - auto* restartPDisk = request->Record.MutableRequest()->AddCommand()->MutableRestartPDisk(); - restartPDisk->MutableTargetPDiskId()->SetNodeId(nodeId); - restartPDisk->MutableTargetPDiskId()->SetPDiskId(pdiskId); - if (force) { - request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); - } - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestBSControllerVDiskEvict(ui32 groupId, ui32 groupGeneration, ui32 failRealmIdx, ui32 failDomainIdx, ui32 vdiskIdx, bool force = false) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - auto* evictVDisk = request->Record.MutableRequest()->AddCommand()->MutableReassignGroupDisk(); - evictVDisk->SetGroupId(groupId); - evictVDisk->SetGroupGeneration(groupGeneration); - evictVDisk->SetFailRealmIdx(failRealmIdx); - evictVDisk->SetFailDomainIdx(failDomainIdx); - evictVDisk->SetVDiskIdx(vdiskIdx); - if (force) { - request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); - } - SendRequestToPipe(pipeClient, request.Release()); - } - - TRequestResponse RequestBSControllerPDiskInfo(ui32 nodeId, ui32 pdiskId) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - auto request = std::make_unique(); - request->Record.SetInclusiveFrom(true); - request->Record.SetInclusiveTo(true); - request->Record.MutableFrom()->SetNodeId(nodeId); - request->Record.MutableFrom()->SetPDiskId(pdiskId); - request->Record.MutableTo()->SetNodeId(nodeId); - request->Record.MutableTo()->SetPDiskId(pdiskId); - return MakeRequestToPipe(pipeClient, request.release(), 0/*cookie*/); - } - - TRequestResponse RequestBSControllerVDiskInfo(ui32 nodeId, ui32 pdiskId) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - auto request = std::make_unique(); - request->Record.SetInclusiveFrom(true); - request->Record.SetInclusiveTo(true); - request->Record.MutableFrom()->SetNodeId(nodeId); - request->Record.MutableFrom()->SetPDiskId(pdiskId); - request->Record.MutableFrom()->SetVSlotId(0); - request->Record.MutableTo()->SetNodeId(nodeId); - request->Record.MutableTo()->SetPDiskId(pdiskId); - request->Record.MutableTo()->SetVSlotId(std::numeric_limits::max()); - return MakeRequestToPipe(pipeClient, request.release(), 0/*cookie*/); - } - - void RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force = false) { - TActorId pipeClient = ConnectTabletPipe(GetBSControllerId()); - THolder request = MakeHolder(); - auto* updateDriveStatus = request->Record.MutableRequest()->AddCommand()->MutableUpdateDriveStatus(); - updateDriveStatus->CopyFrom(driveStatus); - if (force) { - request->Record.MutableRequest()->SetIgnoreDegradedGroupsChecks(true); - } - SendRequestToPipe(pipeClient, request.Release()); - } - - void RequestSchemeCacheNavigate(const TString& path) { - THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.Path = SplitPath(path); - entry.RedirectRequired = false; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); - } - - void RequestSchemeCacheNavigate(const TPathId& pathId) { - THolder request = MakeHolder(); - NSchemeCache::TSchemeCacheNavigate::TEntry entry; - entry.TableId.PathId = pathId; - entry.RequestType = NSchemeCache::TSchemeCacheNavigate::TEntry::ERequestType::ByTableId; - entry.RedirectRequired = false; - entry.Operation = NSchemeCache::TSchemeCacheNavigate::EOp::OpPath; - request->ResultSet.emplace_back(entry); - SendRequest(MakeSchemeCacheID(), new TEvTxProxySchemeCache::TEvNavigateKeySet(request.Release())); - } - - void RequestTxProxyDescribe(const TString& path) { - THolder request(new TEvTxUserProxy::TEvNavigate()); - request->Record.MutableDescribePath()->SetPath(path); - SendRequest(MakeTxProxyID(), request.Release()); - } - - void RequestStateStorageEndpointsLookup(const TString& path) { - TBase::RegisterWithSameMailbox(CreateBoardLookupActor(MakeEndpointsBoardPath(path), - TBase::SelfId(), - EBoardLookupMode::Second)); - ++Requests; - } - - void RequestStateStorageMetadataCacheEndpointsLookup(const TString& path) { - if (!AppData()->DomainsInfo->Domain) { - return; - } - TBase::RegisterWithSameMailbox(CreateBoardLookupActor(MakeDatabaseMetadataCacheBoardPath(path), - TBase::SelfId(), - EBoardLookupMode::Second)); - ++Requests; - } - - std::vector GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - std::vector databaseNodes; - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - databaseNodes.emplace_back(actorId.NodeId()); - } - } - std::sort(databaseNodes.begin(), databaseNodes.end()); - databaseNodes.erase(std::unique(databaseNodes.begin(), databaseNodes.end()), databaseNodes.end()); - return databaseNodes; - } - - void InitConfig(const TCgiParameters& params) { - Followers = FromStringWithDefault(params.Get("followers"), Followers); - Metrics = FromStringWithDefault(params.Get("metrics"), Metrics); - WithRetry = FromStringWithDefault(params.Get("with_retry"), WithRetry); - } - - void InitConfig(const TRequestSettings& settings) { - Followers = settings.Followers; - Metrics = settings.Metrics; - WithRetry = settings.WithRetry; - } - - void ClosePipes() { - for (const auto& [tabletId, pipeInfo] : PipeInfo) { - if (pipeInfo.PipeClient) { - NTabletPipe::CloseClient(TBase::SelfId(), pipeInfo.PipeClient); - } - } - PipeInfo.clear(); - } - - ui32 FailPipeConnect(NNodeWhiteboard::TTabletId tabletId) { - auto itPipeInfo = PipeInfo.find(tabletId); - if (itPipeInfo != PipeInfo.end()) { - ui32 requests = itPipeInfo->second.Requests; - NTabletPipe::CloseClient(TBase::SelfId(), itPipeInfo->second.PipeClient); - PipeInfo.erase(itPipeInfo); - return requests; - } - return 0; - } - - void RequestDone(ui32 requests = 1) { - Requests -= requests; - if (!DelayedRequests.empty()) { - SendDelayedRequests(); - } - if (Requests == 0) { - static_cast(this)->ReplyAndPassAway(); - } - } - - void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev) { - if (ev->Get()->Status != NKikimrProto::OK) { - ui32 requests = FailPipeConnect(ev->Get()->TabletId); - RequestDone(requests); - } - } - - void PassAway() override { - ClosePipes(); - TBase::PassAway(); - } - - TRequestState GetRequest() const { - return {Event->Get(), Span.GetTraceId()}; - } - - void ReplyAndPassAway(TString data, const TString& error = {}) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(data, 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - if (Span) { - if (error) { - Span.EndError(error); - } else { - Span.EndOk(); - } - } - PassAway(); - } - - TString GetHTTPOK(TString contentType = {}, TString response = {}, TInstant lastModified = {}) { - return Viewer->GetHTTPOK(GetRequest(), contentType, response, lastModified); - } - - TString GetHTTPOKJSON(TString response = {}, TInstant lastModified = {}) { - return Viewer->GetHTTPOKJSON(GetRequest(), response, lastModified); - } - - TString GetHTTPGATEWAYTIMEOUT(TString contentType = {}, TString response = {}) { - return Viewer->GetHTTPGATEWAYTIMEOUT(GetRequest(), contentType, response); - } - - TString GetHTTPBADREQUEST(TString contentType = {}, TString response = {}) { - return Viewer->GetHTTPBADREQUEST(GetRequest(), contentType, response); - } - - TString GetHTTPINTERNALERROR(TString contentType = {}, TString response = {}) { - return Viewer->GetHTTPINTERNALERROR(GetRequest(), contentType, response); - } - - TString MakeForward(const std::vector& nodes) { - return Viewer->MakeForward(GetRequest(), nodes); - } + static TPathId GetPathId(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev); + static TString GetPath(const TEvTxProxySchemeCache::TEvNavigateKeySetResult& ev); + + static TPathId GetPathId(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + static TString GetPath(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + + static bool IsSuccess(const std::unique_ptr& ev); + static TString GetError(const std::unique_ptr& ev); + + static bool IsSuccess(const std::unique_ptr& ev); + static TString GetError(const std::unique_ptr& ev); + + TRequestResponse MakeRequestHiveDomainStats(TTabletId hiveId); + TRequestResponse MakeRequestHiveStorageStats(TTabletId hiveId); + TRequestResponse MakeRequestHiveNodeStats(TTabletId hiveId, TEvHive::TEvRequestHiveNodeStats* request); + void RequestConsoleListTenants(); + TRequestResponse MakeRequestConsoleListTenants(); + TRequestResponse MakeRequestConsoleNodeConfigByTenant(TString tenant, ui64 cookie = 0); + TRequestResponse MakeRequestConsoleGetAllConfigs(); + void RequestConsoleGetTenantStatus(const TString& path); + TRequestResponse MakeRequestConsoleGetTenantStatus(const TString& path); + void RequestBSControllerConfig(); + void RequestBSControllerConfigWithStoragePools(); + TRequestResponse MakeRequestBSControllerConfigWithStoragePools(); + void RequestBSControllerInfo(); + void RequestBSControllerSelectGroups(THolder request); + TRequestResponse MakeRequestBSControllerSelectGroups(THolder request, ui64 cookie = 0); + void RequestBSControllerPDiskRestart(ui32 nodeId, ui32 pdiskId, bool force = false); + void RequestBSControllerVDiskEvict(ui32 groupId, ui32 groupGeneration, ui32 failRealmIdx, ui32 failDomainIdx, ui32 vdiskIdx, bool force = false); + TRequestResponse RequestBSControllerPDiskInfo(ui32 nodeId, ui32 pdiskId); + TRequestResponse RequestBSControllerVDiskInfo(ui32 nodeId, ui32 pdiskId); + TRequestResponse RequestBSControllerGroups(); + TRequestResponse RequestBSControllerPools(); + TRequestResponse RequestBSControllerVSlots(); + TRequestResponse RequestBSControllerPDisks(); + TRequestResponse RequestBSControllerStorageStats(); + void RequestBSControllerPDiskUpdateStatus(const NKikimrBlobStorage::TUpdateDriveStatus& driveStatus, bool force = false); + void RequestSchemeCacheNavigate(const TString& path); + void RequestSchemeCacheNavigate(const TPathId& pathId); + TRequestResponse MakeRequestSchemeCacheNavigate(const TString& path, ui64 cookie = 0); + TRequestResponse MakeRequestSchemeCacheNavigate(TPathId pathId, ui64 cookie = 0); + TRequestResponse MakeRequestViewer(TNodeId nodeId, TEvViewer::TEvViewerRequest* request, ui32 flags = 0); + void RequestTxProxyDescribe(const TString& path); + void RequestStateStorageEndpointsLookup(const TString& path); + void RequestStateStorageMetadataCacheEndpointsLookup(const TString& path); + TRequestResponse MakeRequestStateStorageEndpointsLookup(const TString& path, ui64 cookie = 0); + std::vector GetNodesFromBoardReply(TEvStateStorage::TEvBoardInfo::TPtr& ev); + std::vector GetNodesFromBoardReply(const TEvStateStorage::TEvBoardInfo& ev); + void InitConfig(const TCgiParameters& params); + void InitConfig(const TRequestSettings& settings); + void ClosePipes(); + ui32 FailPipeConnect(TTabletId tabletId); + + bool IsLastRequest() const { + return Requests == 1; + } + + bool WaitingForResponse() const { + return Requests != 0; + } + + bool NoMoreRequests(ui32 requestsDone = 0) const { + return Requests == requestsDone; + } + + TRequestState GetRequest() const; + void ReplyAndPassAway(TString data, const TString& error = {}); + + TString GetHTTPOK(TString contentType = {}, TString response = {}, TInstant lastModified = {}); + TString GetHTTPOKJSON(TString response = {}, TInstant lastModified = {}); + TString GetHTTPOKJSON(const NJson::TJsonValue& response, TInstant lastModified = {}); + TString GetHTTPOKJSON(const google::protobuf::Message& response, TInstant lastModified = {}); + TString GetHTTPGATEWAYTIMEOUT(TString contentType = {}, TString response = {}); + TString GetHTTPBADREQUEST(TString contentType = {}, TString response = {}); + TString GetHTTPINTERNALERROR(TString contentType = {}, TString response = {}); + TString GetHTTPFORBIDDEN(TString contentType = {}, TString response = {}); + TString MakeForward(const std::vector& nodes); + + void RequestDone(ui32 requests = 1); + void AddEvent(const TString& name); + void Handle(TEvTabletPipe::TEvClientConnected::TPtr& ev); + void HandleResolveDatabase(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + void HandleResolveResource(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev); + void HandleResolve(TEvStateStorage::TEvBoardInfo::TPtr& ev); + STATEFN(StateResolveDatabase); + STATEFN(StateResolveResource); + void RedirectToDatabase(const TString& database); + bool NeedToRedirect(); + void HandleTimeout(); + void PassAway() override; }; } -} diff --git a/ydb/core/viewer/json_pqconsumerinfo.h b/ydb/core/viewer/json_pqconsumerinfo.h deleted file mode 100644 index 0071ec99e8bd..000000000000 --- a/ydb/core/viewer/json_pqconsumerinfo.h +++ /dev/null @@ -1,190 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - - -class TJsonPQConsumerInfo : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrClient::TResponse Result; - TJsonSettings JsonSettings; - TString Topic; - TString Client; - TString DC; - ui32 Version = 0; - ui32 Timeout = 0; - ui32 Requests = 0; - ui32 Responses = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonPQConsumerInfo( - IViewer* viewer, - NMon::TEvHttpInfo::TPtr& ev - ) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Topic = params.Get("topic"); - Version = FromStringWithDefault(params.Get("version"), 0); - DC = params.Get("dc"); - //TODO: make here list of topics - Client = params.Get("client"); - if (Version >= 3) { - Topic = "rt3." + DC + "--" + NPersQueue::ConvertNewTopicName(Topic); - Client = NPersQueue::ConvertNewConsumerName(Client, ctx); - } else { - size_t pos = Topic.rfind('/'); - if (pos != TString::npos) { - Topic = Topic.substr(pos + 1); - } - } - { - NKikimrClient::TPersQueueRequest request; - request.MutableMetaRequest()->MutableCmdGetPartitionStatus()->SetClientId(Client); - request.MutableMetaRequest()->MutableCmdGetPartitionStatus()->AddTopicRequest()->SetTopic(Topic); - ctx.Register(NMsgBusProxy::CreateActorServerPersQueue( - ctx.SelfID, - request, - NMsgBusProxy::CreatePersQueueMetaCacheV2Id() - )); - ++Requests; - } - { - NKikimrClient::TPersQueueRequest request; - request.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->SetClientId(Client); - request.MutableMetaRequest()->MutableCmdGetReadSessionsInfo()->AddTopic(Topic); - ctx.Register(NMsgBusProxy::CreateActorServerPersQueue( - ctx.SelfID, - request, - NMsgBusProxy::CreatePersQueueMetaCacheV2Id() - )); - ++Requests; - } - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvPersQueue::TEvResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvPersQueue::TEvResponse::TPtr &ev, const TActorContext &ctx) { - Result.MergeFrom(ev->Get()->Record); - if (++Responses == Requests) { - ReplyAndDie(ctx); - } - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result.GetMetaResponse(), JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: topic - in: query - description: topic name - required: true - type: string - - name: dc - in: query - description: dc name (required with version >= 3) - required: false - type: string - default: "" - - name: version - in: query - description: query version - required: false - type: integer - default: 0 - - name: client - in: query - description: client name - required: true - type: string - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - default: false - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - default: false - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - default: 10000 - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Consumer-topic metrics"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns consumer-topic metrics"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_query.h b/ydb/core/viewer/json_query.h deleted file mode 100644 index 0378f12d516e..000000000000 --- a/ydb/core/viewer/json_query.h +++ /dev/null @@ -1,744 +0,0 @@ -#pragma once -#include "viewer.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NMonitoring; -using ::google::protobuf::FieldDescriptor; -using namespace NNodeWhiteboard; - -class TJsonQuery : public TViewerPipeClient { - using TThis = TJsonQuery; - using TBase = TViewerPipeClient; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TVector ResultSets; - TString Query; - TString Database; - TString Action; - TString Stats; - TString Syntax; - TString QueryId; - TString TransactionMode; - bool Direct = false; - bool IsBase64Encode = true; - - enum ESchemaType { - Classic, - Modern, - Multi, - Ydb, - }; - ESchemaType Schema = ESchemaType::Classic; - TRequestResponse CreateSessionResponse; - TRequestResponse QueryResponse; - TString SessionId; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - ESchemaType StringToSchemaType(const TString& schemaStr) { - if (schemaStr == "classic") { - return ESchemaType::Classic; - } else if (schemaStr == "modern") { - return ESchemaType::Modern; - } else if (schemaStr == "multi") { - return ESchemaType::Multi; - } else if (schemaStr == "ydb") { - return ESchemaType::Ydb; - } else { - return ESchemaType::Classic; - } - } - - void ParseCgiParameters(const TCgiParameters& params) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 60000); - Query = params.Get("query"); - Database = params.Get("database"); - Stats = params.Get("stats"); - Action = params.Get("action"); - TString schemaStr = params.Get("schema"); - Schema = StringToSchemaType(schemaStr); - Syntax = params.Get("syntax"); - QueryId = params.Get("query_id"); - TransactionMode = params.Get("transaction_mode"); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - IsBase64Encode = FromStringWithDefault(params.Get("base64"), true); - } - - bool ParsePostContent(const TStringBuf& content) { - static NJson::TJsonReaderConfig JsonConfig; - NJson::TJsonValue requestData; - bool success = NJson::ReadJsonTree(content, &JsonConfig, &requestData); - if (success) { - Query = Query.empty() ? requestData["query"].GetStringSafe({}) : Query; - Database = Database.empty() ? requestData["database"].GetStringSafe({}) : Database; - Stats = Stats.empty() ? requestData["stats"].GetStringSafe({}) : Stats; - Action = Action.empty() ? requestData["action"].GetStringSafe({}) : Action; - Syntax = Syntax.empty() ? requestData["syntax"].GetStringSafe({}) : Syntax; - QueryId = QueryId.empty() ? requestData["query_id"].GetStringSafe({}) : QueryId; - TransactionMode = TransactionMode.empty() ? requestData["transaction_mode"].GetStringSafe({}) : TransactionMode; - } - return success; - } - - bool IsPostContent() const { - return NViewer::IsPostContent(Event); - } - - TJsonQuery(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - { - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - ParseCgiParameters(params); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - if (!ParsePostContent(content)) { - return TBase::ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Bad content received"), "BadRequest"); - } - } - if (Query.empty() && Action != "cancel-query") { - return TBase::ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Query is empty"), "EmptyQuery"); - } - - Direct |= Event->Get()->Request.GetUri().StartsWith("/node/"); // we're already forwarding - Direct |= (Database == AppData()->TenantName); // we're already on the right node - - if (Database && !Direct) { - BLOG_TRACE("Requesting StateStorageEndpointsLookup for " << Database); - RequestStateStorageEndpointsLookup(Database); // to find some dynamic node and redirect query there - } else { - SendKpqProxyRequest(); - } - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void HandleReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - TBase::ReplyAndPassAway(MakeForward(GetNodesFromBoardReply(ev))); - } - - void PassAway() override { - if (QueryId) { - Viewer->EndRunningQuery(QueryId, SelfId()); - } - if (SessionId) { - auto event = std::make_unique(); - event->Record.MutableRequest()->SetSessionId(SessionId); - BLOG_TRACE("Closing session " << SessionId); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, HandleReply); - hFunc(NKqp::TEvKqp::TEvCreateSessionResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvQueryResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleReply); - hFunc(NKqp::TEvKqp::TEvPingSessionResponse, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamData, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamProfile, HandleReply); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendKpqProxyRequest() { - if (QueryId) { - TActorId actorId = Viewer->FindRunningQuery(QueryId); - if (actorId) { - auto event = std::make_unique(); - Ydb::Issue::IssueMessage* issue = event->Record.AddIssues(); - issue->set_message("Query was cancelled"); - issue->set_severity(NYql::TSeverityIds::S_ERROR); - Send(actorId, event.release()); - - if (Action == "cancel-query") { - return TBase::ReplyAndPassAway(GetHTTPOK("text/plain", "Query was cancelled")); - } - } else { - if (Action == "cancel-query") { - return TBase::ReplyAndPassAway(GetHTTPBADREQUEST("text/plain", "Query not found"), "BadRequest"); - } - } - Viewer->AddRunningQuery(QueryId, SelfId()); - } - - auto event = std::make_unique(); - if (Database) { - event->Record.MutableRequest()->SetDatabase(Database); - if (Span) { - Span.Attribute("database", Database); - } - } - BLOG_TRACE("Creating session"); - CreateSessionResponse = MakeRequest(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - } - - void SetTransactionMode(NKikimrKqp::TQueryRequest& request) { - if (TransactionMode == "serializable-read-write") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_serializable_read_write(); - request.mutable_txcontrol()->set_commit_tx(true); - } else if (TransactionMode == "online-read-only") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_online_read_only(); - request.mutable_txcontrol()->set_commit_tx(true); - } else if (TransactionMode == "stale-read-only") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_stale_read_only(); - request.mutable_txcontrol()->set_commit_tx(true); - } else if (TransactionMode == "snapshot-read-only") { - request.mutable_txcontrol()->mutable_begin_tx()->mutable_snapshot_read_only(); - request.mutable_txcontrol()->set_commit_tx(true); - } - } - - void HandleReply(NKqp::TEvKqp::TEvCreateSessionResponse::TPtr& ev) { - if (ev->Get()->Record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - CreateSessionResponse.Set(std::move(ev)); - } else { - CreateSessionResponse.Error("FailedToCreateSession"); - return TBase::ReplyAndPassAway( - GetHTTPINTERNALERROR("text/plain", - TStringBuilder() << "Failed to create session, error " << ev->Get()->Record.GetYdbStatus()), "InternalError"); - } - SessionId = CreateSessionResponse->Record.GetResponse().GetSessionId(); - BLOG_TRACE("Session created " << SessionId); - - { - auto event = std::make_unique(); - event->Record.MutableRequest()->SetSessionId(SessionId); - ActorIdToProto(SelfId(), event->Record.MutableRequest()->MutableExtSessionCtrlActorId()); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - } - - auto event = MakeHolder(); - NKikimrKqp::TQueryRequest& request = *event->Record.MutableRequest(); - request.SetQuery(Query); - request.SetSessionId(SessionId); - if (Database) { - request.SetDatabase(Database); - } - if (Event->Get()->UserToken) { - event->Record.SetUserToken(Event->Get()->UserToken); - } - if (Action.empty() || Action == "execute-script" || Action == "execute") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - request.SetKeepSession(false); - } else if (Action == "execute-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.SetKeepSession(false); - SetTransactionMode(request); - } else if (Action == "explain-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.SetKeepSession(false); - } else if (Action == "execute-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - request.SetKeepSession(false); - } else if (Action == "execute-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - request.SetKeepSession(false); - SetTransactionMode(request); - } else if (Action == "explain" || Action == "explain-ast" || Action == "explain-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - } else if (Action == "explain-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - } else if (Action == "explain-script") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - } - if (Stats == "profile") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_PROFILE); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_PROFILE); - } else if (Stats == "full") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_FULL); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL); - } - if (Syntax == "yql_v1") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_YQL_V1); - } else if (Syntax == "pg") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_PG); - } - ActorIdToProto(SelfId(), event->Record.MutableRequestActorId()); - QueryResponse = MakeRequest(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.Release()); - BLOG_TRACE("Query sent"); - } - -private: - NJson::TJsonValue ColumnPrimitiveValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (const auto primitive = valueParser.GetPrimitiveType()) { - case NYdb::EPrimitiveType::Bool: - return valueParser.GetBool(); - case NYdb::EPrimitiveType::Int8: - return valueParser.GetInt8(); - case NYdb::EPrimitiveType::Uint8: - return valueParser.GetUint8(); - case NYdb::EPrimitiveType::Int16: - return valueParser.GetInt16(); - case NYdb::EPrimitiveType::Uint16: - return valueParser.GetUint16(); - case NYdb::EPrimitiveType::Int32: - return valueParser.GetInt32(); - case NYdb::EPrimitiveType::Uint32: - return valueParser.GetUint32(); - case NYdb::EPrimitiveType::Int64: - return TStringBuilder() << valueParser.GetInt64(); - case NYdb::EPrimitiveType::Uint64: - return TStringBuilder() << valueParser.GetUint64(); - case NYdb::EPrimitiveType::Float: - return valueParser.GetFloat(); - case NYdb::EPrimitiveType::Double: - return valueParser.GetDouble(); - case NYdb::EPrimitiveType::Utf8: - return valueParser.GetUtf8(); - case NYdb::EPrimitiveType::Date: - return valueParser.GetDate().ToString(); - case NYdb::EPrimitiveType::Datetime: - return valueParser.GetDatetime().ToString(); - case NYdb::EPrimitiveType::Timestamp: - return valueParser.GetTimestamp().ToString(); - case NYdb::EPrimitiveType::Interval: - return TStringBuilder() << valueParser.GetInterval(); - case NYdb::EPrimitiveType::Date32: - return valueParser.GetInt32(); - case NYdb::EPrimitiveType::Datetime64: - return valueParser.GetDatetime64(); - case NYdb::EPrimitiveType::Timestamp64: - return valueParser.GetTimestamp64(); - case NYdb::EPrimitiveType::Interval64: - return valueParser.GetInterval64(); - case NYdb::EPrimitiveType::TzDate: - return valueParser.GetTzDate(); - case NYdb::EPrimitiveType::TzDatetime: - return valueParser.GetTzDatetime(); - case NYdb::EPrimitiveType::TzTimestamp: - return valueParser.GetTzTimestamp(); - case NYdb::EPrimitiveType::String: - return IsBase64Encode ? Base64Encode(valueParser.GetString()) : valueParser.GetString(); - case NYdb::EPrimitiveType::Yson: - return valueParser.GetYson(); - case NYdb::EPrimitiveType::Json: - return valueParser.GetJson(); - case NYdb::EPrimitiveType::JsonDocument: - return valueParser.GetJsonDocument(); - case NYdb::EPrimitiveType::DyNumber: - return valueParser.GetDyNumber(); - case NYdb::EPrimitiveType::Uuid: - return valueParser.GetUuid().ToString(); - default: - Y_ENSURE(false, TStringBuilder() << "Unsupported type: " << primitive); } - } - - NJson::TJsonValue ColumnValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Optional: - valueParser.OpenOptional(); - if (valueParser.IsNull()) { - return NJson::JSON_NULL; - } - switch(valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - case NYdb::TTypeParser::ETypeKind::Decimal: - return valueParser.GetDecimal().ToString(); - default: - return NJson::JSON_UNDEFINED; - } - - case NYdb::TTypeParser::ETypeKind::Tagged: - valueParser.OpenTagged(); - return ColumnValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Pg: - return valueParser.GetPg().Content_; - - default: - return NJson::JSON_UNDEFINED; - } - } - - void HandleReply(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev) { - BLOG_TRACE("Query response received"); - NJson::TJsonValue jsonResponse; - if (ev->Get()->Record.GetRef().GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - QueryResponse.Set(std::move(ev)); - MakeOkReply(jsonResponse, QueryResponse->Record.GetRef()); - if (Schema == ESchemaType::Classic && Stats.empty() && (Action.empty() || Action == "execute")) { - jsonResponse = std::move(jsonResponse["result"]); - } - } else { - QueryResponse.Error("QueryError"); - NYql::TIssues issues; - NYql::IssuesFromMessage(ev->Get()->Record.GetRef().GetResponse().GetQueryIssues(), issues); - MakeErrorReply(jsonResponse, NYdb::TStatus(NYdb::EStatus(ev->Get()->Record.GetRef().GetYdbStatus()), std::move(issues))); - } - - TStringStream stream; - NJson::WriteJson(&stream, &jsonResponse, { - .ValidateUtf8 = false, - .WriteNanAsString = true, - }); - - TBase::ReplyAndPassAway(GetHTTPOKJSON(stream.Str())); - } - - void HandleReply(NKqp::TEvKqp::TEvAbortExecution::TPtr& ev) { - QueryResponse.Error("Aborted"); - auto& record(ev->Get()->Record); - NJson::TJsonValue jsonResponse; - if (record.IssuesSize() > 0) { - NYql::TIssues issues; - NYql::IssuesFromMessage(record.GetIssues(), issues); - MakeErrorReply(jsonResponse, NYdb::TStatus(NYdb::EStatus(record.GetStatusCode()), std::move(issues))); - } - - TStringStream stream; - NJson::WriteJson(&stream, &jsonResponse, { - .ValidateUtf8 = false, - .WriteNanAsString = true, - }); - - TBase::ReplyAndPassAway(GetHTTPOKJSON(stream.Str())); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamProfile::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqp::TEvPingSessionResponse::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamData::TPtr& ev) { - const NKikimrKqp::TEvExecuterStreamData& data(ev->Get()->Record); - - ResultSets.emplace_back(); - ResultSets.back() = std::move(data.GetResultSet()); - - THolder ack = MakeHolder(); - ack->Record.SetSeqNo(ev->Get()->Record.GetSeqNo()); - Send(ev->Sender, ack.Release()); - } - - void HandleTimeout() { - TStringBuilder error; - error << "Timeout executing query"; - if (SessionId) { - auto event = std::make_unique(); - event->Record.MutableRequest()->SetSessionId(SessionId); - BLOG_TRACE("Cancelling query in session " << SessionId); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.release()); - error << ", query was cancelled"; - } - NJson::TJsonValue json; - json["error"]["severity"] = NYql::TSeverityIds::S_ERROR; - json["error"]["message"] = error; - NJson::TJsonValue& issue = json["issues"].AppendValue({}); - issue["severity"] = NYql::TSeverityIds::S_ERROR; - issue["message"] = error; - TBase::ReplyAndPassAway(GetHTTPOKJSON(NJson::WriteJson(json, false))); - } - -private: - void MakeErrorReply(NJson::TJsonValue& jsonResponse, const NYdb::TStatus& status) { - TString message; - - NViewer::MakeErrorReply(jsonResponse, message, status); - - if (Span) { - Span.EndError("Error"); - } - } - - void MakeOkReply(NJson::TJsonValue& jsonResponse, NKikimrKqp::TEvQueryResponse& record) { - const auto& response = record.GetResponse(); - - if (response.ResultsSize() > 0 || response.YdbResultsSize() > 0) { - try { - for (const auto& result : response.GetResults()) { - Ydb::ResultSet resultSet; - NKqp::ConvertKqpQueryResultToDbResult(result, &resultSet); - ResultSets.emplace_back(std::move(resultSet)); - } - - for (const auto& result : response.GetYdbResults()) { - ResultSets.emplace_back(result); - } - } - catch (const std::exception& ex) { - NYql::TIssues issues; - issues.AddIssue(TStringBuilder() << "Convert error: " << ex.what()); - MakeErrorReply(jsonResponse, NYdb::TStatus(NYdb::EStatus::BAD_REQUEST, std::move(issues))); - return; - } - } - - if (ResultSets.size() > 0) { - if (Schema == ESchemaType::Classic) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonRow[columnMeta.Name] = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Modern) { - { - NJson::TJsonValue& jsonColumns = jsonResponse["columns"]; - NYdb::TResultSet resultSet(ResultSets.front()); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - } - - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Multi) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NJson::TJsonValue& jsonResult = jsonResults.AppendValue({}); - - NJson::TJsonValue& jsonColumns = jsonResult["columns"]; - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - - NJson::TJsonValue& jsonRows = jsonResult["rows"]; - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonRows.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Ydb) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - TString row = NYdb::FormatResultRowJson(rsParser, columnsMeta, IsBase64Encode ? NYdb::EBinaryStringEncoding::Base64 : NYdb::EBinaryStringEncoding::Unicode); - NJson::ReadJsonTree(row, &jsonRow); - } - } - } - } - if (response.HasQueryAst()) { - jsonResponse["ast"] = response.GetQueryAst(); - } - if (response.HasQueryPlan()) { - NJson::ReadJsonTree(response.GetQueryPlan(), &(jsonResponse["plan"])); - } - if (response.HasQueryStats()) { - NProtobufJson::Proto2Json(response.GetQueryStats(), jsonResponse["stats"]); - } - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - YAML::Node node = YAML::Load(R"___( - post: - tags: - - viewer - summary: Executes SQL query - description: Executes SQL query - parameters: - - name: action - in: query - type: string - enum: [execute-scan, execute-script, execute-query, execute-data, explain-ast, explain-scan, explain-script, explain-query, explain-data, cancel-query] - required: true - description: > - execute method: - * `execute-query` - execute query (QueryService) - * `execute-data` - execute data query (DataQuery) - * `execute-scan` - execute scan query (ScanQuery) - * `execute-script` - execute script query (ScriptingService) - * `explain-query` - explain query (QueryService) - * `explain-data` - explain data query (DataQuery) - * `explain-scan` - explain scan query (ScanQuery) - * `explain-script` - explain script query (ScriptingService) - * `cancel-query` - cancel query (using query_id) - - name: database - in: query - description: database name - type: string - required: false - - name: query - in: query - description: SQL query text - type: string - required: false - - name: query_id - in: query - description: unique query identifier (uuid) - use the same id to cancel query - required: false - - name: syntax - in: query - description: > - query syntax: - * `yql_v1` - YQL v1 (default) - * `pg` - PostgreSQL compatible - type: string - enum: [yql_v1, pg] - required: false - - name: schema - in: query - description: > - result format schema: - * `classic` - * `modern` - * `multi` - * `ydb` - type: string - enum: [classic, modern, ydb, multi] - required: false - - name: stats - in: query - description: > - return stats: - * `profile` - * `full` - type: string - enum: [profile, full] - required: false - - name: transaction_mode - in: query - description: > - transaction mode: - * `serializable-read-write` - * `online-read-only` - * `stale-read-only` - * `snapshot-read-only` - type: string - enum: [serializable-read-write, online-read-only, stale-read-only, snapshot-read-only] - required: false - - name: direct - in: query - description: force processing query on current node - type: boolean - required: false - - name: base64 - in: query - description: return strings using base64 encoding - type: string - required: false - - name: timeout - in: query - description: timeout in ms - type: integer - required: false - - name: ui64 - in: query - description: return ui64 as number to avoid 56-bit js rounding - type: boolean - required: false - requestBody: - description: Executes SQL query - required: false - content: - application/json: - schema: - type: object - description: the same properties as in query parameters - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - description: format depends on schema parameter - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); - return node; -} - - -} -} diff --git a/ydb/core/viewer/json_query_old.h b/ydb/core/viewer/json_query_old.h deleted file mode 100644 index 8a532385ce65..000000000000 --- a/ydb/core/viewer/json_query_old.h +++ /dev/null @@ -1,636 +0,0 @@ -#pragma once -#include "viewer.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NMonitoring; -using ::google::protobuf::FieldDescriptor; - -// we only keep this class for compatibility with viewer requests -// DO NOT EDIT THIS FILE - it should be deleted after 2025-01-01 -class TJsonQueryOld : public TViewerPipeClient { - using TThis = TJsonQueryOld; - using TBase = TViewerPipeClient; - IViewer* Viewer; - TJsonSettings JsonSettings; - NMon::TEvHttpInfo::TPtr Event; - TEvViewer::TEvViewerRequest::TPtr ViewerRequest; - ui32 Timeout = 0; - TVector ResultSets; - TString Query; - TString Database; - TString Action; - TString Stats; - TString Syntax; - TString UserToken; - bool IsBase64Encode; - - enum ESchemaType { - Classic, - Modern, - Multi, - Ydb, - }; - ESchemaType Schema = ESchemaType::Classic; - - std::optional SubscribedNodeId; - std::vector TenantDynamicNodes; - bool Direct = false; - bool MadeKqpProxyRequest = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - ESchemaType StringToSchemaType(const TString& schemaStr) { - if (schemaStr == "classic") { - return ESchemaType::Classic; - } else if (schemaStr == "modern") { - return ESchemaType::Modern; - } else if (schemaStr == "multi") { - return ESchemaType::Multi; - } else if (schemaStr == "ydb") { - return ESchemaType::Ydb; - } else { - return ESchemaType::Classic; - } - } - - void ParseCgiParameters(const TCgiParameters& params) { - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 60000); - Query = params.Get("query"); - Database = params.Get("database"); - Stats = params.Get("stats"); - Action = params.Get("action"); - TString schemaStr = params.Get("schema"); - Schema = StringToSchemaType(schemaStr); - Syntax = params.Get("syntax"); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - IsBase64Encode = FromStringWithDefault(params.Get("base64"), true); - } - - void ParsePostContent(const TStringBuf& content) { - static NJson::TJsonReaderConfig JsonConfig; - NJson::TJsonValue requestData; - bool success = NJson::ReadJsonTree(content, &JsonConfig, &requestData); - if (success) { - Query = Query.empty() ? requestData["query"].GetStringSafe({}) : Query; - Database = Database.empty() ? requestData["database"].GetStringSafe({}) : Database; - Stats = Stats.empty() ? requestData["stats"].GetStringSafe({}) : Stats; - Action = Action.empty() ? requestData["action"].GetStringSafe({}) : Action; - Syntax = Syntax.empty() ? requestData["syntax"].GetStringSafe({}) : Syntax; - } - } - - bool IsPostContent() const { - return NViewer::IsPostContent(Event); - } - - TJsonQueryOld(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - InitConfig(params); - ParseCgiParameters(params); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - ParsePostContent(content); - } - UserToken = Event->Get()->UserToken; - } - - TJsonQueryOld(TEvViewer::TEvViewerRequest::TPtr& ev) - : ViewerRequest(ev) - { - auto& request = ViewerRequest->Get()->Record.GetQueryRequest(); - - TCgiParameters params(request.GetUri()); - InitConfig(params); - ParseCgiParameters(params); - - TStringBuf content = request.GetContent(); - if (content) { - ParsePostContent(content); - } - - Timeout = ViewerRequest->Get()->Record.GetTimeout(); - UserToken = request.GetUserToken(); - Direct = true; - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, HandleReply); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeConnected, Connected); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvViewer::TEvViewerResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvQueryResponse, HandleReply); - hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamData, HandleReply); - hFunc(NKqp::TEvKqpExecuter::TEvStreamProfile, HandleReply); - - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendKpqProxyRequest() { - if (MadeKqpProxyRequest) { - return; - } - MadeKqpProxyRequest = true; - auto event = MakeHolder(); - NKikimrKqp::TQueryRequest& request = *event->Record.MutableRequest(); - request.SetQuery(Query); - if (Action.empty() || Action == "execute-script" || Action == "execute") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - request.SetKeepSession(false); - } else if (Action == "execute-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.mutable_txcontrol()->mutable_begin_tx()->mutable_serializable_read_write(); - request.mutable_txcontrol()->set_commit_tx(true); - request.SetKeepSession(false); - } else if (Action == "explain-query") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_GENERIC_QUERY); - request.SetKeepSession(false); - } else if (Action == "execute-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - request.SetKeepSession(false); - } else if (Action == "execute-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXECUTE); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - request.mutable_txcontrol()->mutable_begin_tx()->mutable_serializable_read_write(); - request.mutable_txcontrol()->set_commit_tx(true); - request.SetKeepSession(false); - } else if (Action == "explain" || Action == "explain-ast" || Action == "explain-data") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_DML); - } else if (Action == "explain-scan") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCAN); - } else if (Action == "explain-script") { - request.SetAction(NKikimrKqp::QUERY_ACTION_EXPLAIN); - request.SetType(NKikimrKqp::QUERY_TYPE_SQL_SCRIPT); - } - if (Stats == "profile") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_PROFILE); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_PROFILE); - } else if (Stats == "full") { - request.SetStatsMode(NYql::NDqProto::DQ_STATS_MODE_FULL); - request.SetCollectStats(Ydb::Table::QueryStatsCollection::STATS_COLLECTION_FULL); - } - if (Database) { - request.SetDatabase(Database); - } - if (UserToken) { - event->Record.SetUserToken(UserToken); - } - if (Syntax == "yql_v1") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_YQL_V1); - } else if (Syntax == "pg") { - request.SetSyntax(Ydb::Query::Syntax::SYNTAX_PG); - } - ActorIdToProto(SelfId(), event->Record.MutableRequestActorId()); - Send(NKqp::MakeKqpProxyID(SelfId().NodeId()), event.Release()); - } - - void Bootstrap() { - if (Query.empty()) { - if (Event) { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request")); - } else { - auto* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableQueryResponse()->SetYdbStatus(Ydb::StatusIds::BAD_REQUEST); - ReplyAndPassAway(response); - } - return; - } - - if (Database && !Direct) { - RequestStateStorageEndpointsLookup(Database); // to find some dynamic node and redirect query there - } - - if (Requests == 0) { - SendKpqProxyRequest(); - } - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - -private: - NJson::TJsonValue ColumnPrimitiveValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (const auto primitive = valueParser.GetPrimitiveType()) { - case NYdb::EPrimitiveType::Bool: - return valueParser.GetBool(); - case NYdb::EPrimitiveType::Int8: - return valueParser.GetInt8(); - case NYdb::EPrimitiveType::Uint8: - return valueParser.GetUint8(); - case NYdb::EPrimitiveType::Int16: - return valueParser.GetInt16(); - case NYdb::EPrimitiveType::Uint16: - return valueParser.GetUint16(); - case NYdb::EPrimitiveType::Int32: - return valueParser.GetInt32(); - case NYdb::EPrimitiveType::Uint32: - return valueParser.GetUint32(); - case NYdb::EPrimitiveType::Int64: - return TStringBuilder() << valueParser.GetInt64(); - case NYdb::EPrimitiveType::Uint64: - return TStringBuilder() << valueParser.GetUint64(); - case NYdb::EPrimitiveType::Float: - return valueParser.GetFloat(); - case NYdb::EPrimitiveType::Double: - return valueParser.GetDouble(); - case NYdb::EPrimitiveType::Utf8: - return valueParser.GetUtf8(); - case NYdb::EPrimitiveType::Date: - return valueParser.GetDate().ToString(); - case NYdb::EPrimitiveType::Datetime: - return valueParser.GetDatetime().ToString(); - case NYdb::EPrimitiveType::Timestamp: - return valueParser.GetTimestamp().ToString(); - case NYdb::EPrimitiveType::Interval: - return TStringBuilder() << valueParser.GetInterval(); - case NYdb::EPrimitiveType::Date32: - return TStringBuilder() << valueParser.GetDate32(); - case NYdb::EPrimitiveType::Datetime64: - return TStringBuilder() << valueParser.GetDatetime64(); - case NYdb::EPrimitiveType::Timestamp64: - return TStringBuilder() << valueParser.GetTimestamp64(); - case NYdb::EPrimitiveType::Interval64: - return TStringBuilder() << valueParser.GetInterval64(); - case NYdb::EPrimitiveType::TzDate: - return valueParser.GetTzDate(); - case NYdb::EPrimitiveType::TzDatetime: - return valueParser.GetTzDatetime(); - case NYdb::EPrimitiveType::TzTimestamp: - return valueParser.GetTzTimestamp(); - case NYdb::EPrimitiveType::String: - return IsBase64Encode ? Base64Encode(valueParser.GetString()) : valueParser.GetString(); - case NYdb::EPrimitiveType::Yson: - return valueParser.GetYson(); - case NYdb::EPrimitiveType::Json: - return valueParser.GetJson(); - case NYdb::EPrimitiveType::JsonDocument: - return valueParser.GetJsonDocument(); - case NYdb::EPrimitiveType::DyNumber: - return valueParser.GetDyNumber(); - case NYdb::EPrimitiveType::Uuid: - return valueParser.GetUuid().ToString(); - default: - Y_ENSURE(false, TStringBuilder() << "Unsupported type: " << primitive); } - } - - NJson::TJsonValue ColumnValueToJsonValue(NYdb::TValueParser& valueParser) { - switch (valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Optional: - valueParser.OpenOptional(); - if (valueParser.IsNull()) { - return NJson::JSON_NULL; - } - switch(valueParser.GetKind()) { - case NYdb::TTypeParser::ETypeKind::Primitive: - return ColumnPrimitiveValueToJsonValue(valueParser); - case NYdb::TTypeParser::ETypeKind::Decimal: - return valueParser.GetDecimal().ToString(); - default: - return NJson::JSON_UNDEFINED; - } - - case NYdb::TTypeParser::ETypeKind::Tagged: - valueParser.OpenTagged(); - return ColumnValueToJsonValue(valueParser); - - case NYdb::TTypeParser::ETypeKind::Pg: - return valueParser.GetPg().Content_; - - default: - return NJson::JSON_UNDEFINED; - } - } - - void Connected(TEvInterconnect::TEvNodeConnected::TPtr &) {} - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - SendKpqProxyRequest(); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &) { - SendKpqProxyRequest(); - } - - void SendDynamicNodeQueryRequest() { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - - auto itPos = std::next(TenantDynamicNodes.begin(), hash % TenantDynamicNodes.size()); - std::nth_element(TenantDynamicNodes.begin(), itPos, TenantDynamicNodes.end()); - - TNodeId nodeId = *itPos; - SubscribedNodeId = nodeId; - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder request = MakeHolder(); - request->Record.SetTimeout(Timeout); - auto queryRequest = request->Record.MutableQueryRequest(); - queryRequest->SetUri(TString(Event->Get()->Request.GetUri())); - if (IsPostContent()) { - TStringBuf content = Event->Get()->Request.GetPostContent(); - queryRequest->SetContent(TString(content)); - } - if (UserToken) { - queryRequest->SetUserToken(UserToken); - } - - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kQueryRequest, nodeId); - SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - - void HandleReply(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - TenantDynamicNodes.emplace_back(actorId.NodeId()); - } - } - if (TenantDynamicNodes.empty()) { - SendKpqProxyRequest(); - } else { - SendDynamicNodeQueryRequest(); - } - } - - void Handle(NKikimrKqp::TEvQueryResponse& record) { - if (Event) { - NJson::TJsonValue jsonResponse; - if (record.GetYdbStatus() == Ydb::StatusIds::SUCCESS) { - MakeOkReply(jsonResponse, record); - } else { - MakeErrorReply(jsonResponse, record); - } - - if (Schema == ESchemaType::Classic && Stats.empty() && (Action.empty() || Action == "execute")) { - jsonResponse = std::move(jsonResponse["result"]); - } - - TStringStream stream; - NJson::TJsonWriterConfig config; - config.ValidateUtf8 = false; - config.WriteNanAsString = true; - NJson::WriteJson(&stream, &jsonResponse, config); - - ReplyAndPassAway(stream.Str()); - } else { - TEvViewer::TEvViewerResponse* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableQueryResponse()->CopyFrom(record); - response->Record.MutableQueryResponse()->MutableResponse()->MutableYdbResults()->Add(ResultSets.begin(), ResultSets.end()); - ReplyAndPassAway(response); - } - } - - void HandleReply(NKqp::TEvKqp::TEvQueryResponse::TPtr& ev) { - Handle(ev->Get()->Record.GetRef()); - } - - void HandleReply(TEvViewer::TEvViewerResponse::TPtr& ev) { - auto& record = ev.Get()->Get()->Record; - if (record.HasQueryResponse()) { - Handle(*(ev.Get()->Get()->Record.MutableQueryResponse())); - } else { - SendKpqProxyRequest(); // fallback - } - } - - void HandleReply(NKqp::TEvKqp::TEvAbortExecution::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamProfile::TPtr& ev) { - Y_UNUSED(ev); - } - - void HandleReply(NKqp::TEvKqpExecuter::TEvStreamData::TPtr& ev) { - const NKikimrKqp::TEvExecuterStreamData& data(ev->Get()->Record); - - ResultSets.emplace_back(); - ResultSets.back() = std::move(data.GetResultSet()); - - THolder ack = MakeHolder(); - ack->Record.SetSeqNo(ev->Get()->Record.GetSeqNo()); - Send(ev->Sender, ack.Release()); - } - - void HandleTimeout() { - if (Event) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } else { - auto* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableQueryResponse()->SetYdbStatus(Ydb::StatusIds::TIMEOUT); - ReplyAndPassAway(response); - } - } - - void ReplyAndPassAway(TEvViewer::TEvViewerResponse* response) { - Send(ViewerRequest->Sender, response); - PassAway(); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(data)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - -private: - void MakeErrorReply(NJson::TJsonValue& jsonResponse, NKikimrKqp::TEvQueryResponse& record) { - NJson::TJsonValue& jsonIssues = jsonResponse["issues"]; - - // find first deepest error - google::protobuf::RepeatedPtrField* protoIssues = record.MutableResponse()->MutableQueryIssues(); - std::stable_sort(protoIssues->begin(), protoIssues->end(), [](const Ydb::Issue::IssueMessage& a, const Ydb::Issue::IssueMessage& b) -> bool { - return a.severity() < b.severity(); - }); - while (protoIssues->size() > 0 && (*protoIssues)[0].issuesSize() > 0) { - protoIssues = (*protoIssues)[0].mutable_issues(); - } - if (protoIssues->size() > 0) { - const Ydb::Issue::IssueMessage& issue = (*protoIssues)[0]; - NProtobufJson::Proto2Json(issue, jsonResponse["error"]); - } - for (const auto& queryIssue : record.GetResponse().GetQueryIssues()) { - NJson::TJsonValue& issue = jsonIssues.AppendValue({}); - NProtobufJson::Proto2Json(queryIssue, issue); - } - } - - void MakeOkReply(NJson::TJsonValue& jsonResponse, NKikimrKqp::TEvQueryResponse& record) { - const auto& response = record.GetResponse(); - - if (response.ResultsSize() > 0 || response.YdbResultsSize() > 0) { - try { - for (const auto& result : response.GetResults()) { - Ydb::ResultSet resultSet; - NKqp::ConvertKqpQueryResultToDbResult(result, &resultSet); - ResultSets.emplace_back(std::move(resultSet)); - } - - for (const auto& result : response.GetYdbResults()) { - ResultSets.emplace_back(result); - } - } - catch (const std::exception& ex) { - Ydb::Issue::IssueMessage* issue = record.MutableResponse()->AddQueryIssues(); - issue->set_message(Sprintf("Convert error: %s", ex.what())); - issue->set_severity(NYql::TSeverityIds::S_ERROR); - MakeErrorReply(jsonResponse, record); - return; - } - } - - if (ResultSets.size() > 0) { - if (Schema == ESchemaType::Classic) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonRow[columnMeta.Name] = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Modern) { - { - NJson::TJsonValue& jsonColumns = jsonResponse["columns"]; - NYdb::TResultSet resultSet(ResultSets.front()); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - } - - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Multi) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NJson::TJsonValue& jsonResult = jsonResults.AppendValue({}); - - NJson::TJsonValue& jsonColumns = jsonResult["columns"]; - jsonColumns.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonColumns.AppendValue({}); - const NYdb::TColumn& columnMeta = columnsMeta[columnNum]; - jsonColumn["name"] = columnMeta.Name; - jsonColumn["type"] = columnMeta.Type.ToString(); - } - - NJson::TJsonValue& jsonRows = jsonResult["rows"]; - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonRows.AppendValue({}); - jsonRow.SetType(NJson::JSON_ARRAY); - for (size_t columnNum = 0; columnNum < columnsMeta.size(); ++columnNum) { - NJson::TJsonValue& jsonColumn = jsonRow.AppendValue({}); - jsonColumn = ColumnValueToJsonValue(rsParser.ColumnParser(columnNum)); - } - } - } - } - - if (Schema == ESchemaType::Ydb) { - NJson::TJsonValue& jsonResults = jsonResponse["result"]; - jsonResults.SetType(NJson::JSON_ARRAY); - for (auto it = ResultSets.begin(); it != ResultSets.end(); ++it) { - NYdb::TResultSet resultSet(*it); - const auto& columnsMeta = resultSet.GetColumnsMeta(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - NJson::TJsonValue& jsonRow = jsonResults.AppendValue({}); - TString row = NYdb::FormatResultRowJson(rsParser, columnsMeta, IsBase64Encode ? NYdb::EBinaryStringEncoding::Base64 : NYdb::EBinaryStringEncoding::Unicode); - NJson::ReadJsonTree(row, &jsonRow); - } - } - } - } - if (response.HasQueryAst()) { - jsonResponse["ast"] = response.GetQueryAst(); - } - if (response.HasQueryPlan()) { - NJson::ReadJsonTree(response.GetQueryPlan(), &(jsonResponse["plan"])); - } - if (response.HasQueryStats()) { - NProtobufJson::Proto2Json(response.GetQueryStats(), jsonResponse["stats"]); - } - } - -}; - - -} -} diff --git a/ydb/core/viewer/json_render.h b/ydb/core/viewer/json_render.h deleted file mode 100644 index a0788879a19c..000000000000 --- a/ydb/core/viewer/json_render.h +++ /dev/null @@ -1,344 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "viewer_request.h" -#include "viewer.h" -#include "log.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NMonitoring; - -class TJsonRender : public TViewerPipeClient { - using TThis = TJsonRender; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TEvViewer::TEvViewerRequest::TPtr ViewerRequest; - ui32 Timeout = 0; - std::vector Metrics; - TString Database; - TCgiParameters Params; - - std::optional SubscribedNodeId; - std::vector TenantDynamicNodes; - bool Direct = false; - bool MadeProxyRequest = false; -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonRender(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - { - const auto& params(Event->Get()->Request.GetParams()); - - InitConfig(params); - Database = params.Get("database"); - Direct = FromStringWithDefault(params.Get("direct"), Direct); - Timeout = FromStringWithDefault(params.Get("timeout"), 30000); - } - - TJsonRender(TEvViewer::TEvViewerRequest::TPtr& ev) - : ViewerRequest(ev) - { - auto& request = ViewerRequest->Get()->Record.GetRenderRequest(); - - TCgiParameters params(request.GetUri()); - InitConfig(params); - Direct = true; - Timeout = ViewerRequest->Get()->Record.GetTimeout(); - } - - void Bootstrap() { - auto postData = Event - ? Event->Get()->Request.GetPostContent() - : ViewerRequest->Get()->Record.GetRenderRequest().GetContent(); - BLOG_D("PostData=" << postData); - NKikimrGraph::TEvGetMetrics getRequest; - if (postData) { - Params = TCgiParameters(postData); - if (Params.Has("target")) { - TString metric; - size_t num = 0; - for (;;) { - metric = Params.Get("target", num); - if (metric.empty()) { - break; - } - Metrics.push_back(metric); - ++num; - } - } - //StringSplitter(Params.Get("target")).Split(',').SkipEmpty().Collect(&Metrics); - - if (Database && !Direct) { - RequestStateStorageEndpointsLookup(Database); // to find some dynamic node and redirect there - } - if (Requests == 0) { - SendGraphRequest(); - } - } else { - ReplyAndPassAway(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request")); - return; - } - - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - if (SubscribedNodeId.has_value()) { - Send(TActivationContext::InterconnectProxy(SubscribedNodeId.value()), new TEvents::TEvUnsubscribe()); - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeConnected, Connected); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvViewer::TEvViewerResponse, Handle); - hFunc(NGraph::TEvGraph::TEvMetricsResult, Handle); - - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Connected(TEvInterconnect::TEvNodeConnected::TPtr &) {} - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - SendGraphRequest(); - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &) { - SendGraphRequest(); - } - - void SendDynamicNodeRenderRequest() { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - - auto itPos = std::next(TenantDynamicNodes.begin(), hash % TenantDynamicNodes.size()); - std::nth_element(TenantDynamicNodes.begin(), itPos, TenantDynamicNodes.end()); - - TNodeId nodeId = *itPos; - SubscribedNodeId = nodeId; - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder request = MakeHolder(); - request->Record.SetTimeout(Timeout); - auto renderRequest = request->Record.MutableRenderRequest(); - renderRequest->SetUri(TString(Event->Get()->Request.GetUri())); - - TStringBuf content = Event->Get()->Request.GetPostContent(); - renderRequest->SetContent(TString(content)); - - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kRenderRequest, nodeId); - SendRequest(viewerServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - BLOG_TRACE("Received TEvBoardInfo"); - if (ev->Get()->Status == TEvStateStorage::TEvBoardInfo::EStatus::Ok) { - for (const auto& [actorId, infoEntry] : ev->Get()->InfoEntries) { - TenantDynamicNodes.emplace_back(actorId.NodeId()); - } - } - if (TenantDynamicNodes.empty()) { - SendGraphRequest(); - } else { - SendDynamicNodeRenderRequest(); - } - } - - void SendGraphRequest() { - if (MadeProxyRequest) { - return; - } - MadeProxyRequest = true; - NKikimrGraph::TEvGetMetrics getRequest; - if (Metrics.size() > 0) { - for (const auto& metric : Metrics) { - getRequest.AddMetrics(metric); - } - } else { - static const TString png1x1 = "\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52\x00\x00\x00\x01\x00\x00\x00\x01\x01" - "\x03\x00\x00\x00\x25\xdb\x56\xca\x00\x00\x00\x03\x50\x4c\x54\x45\x00\x00\x00\xa7\x7a\x3d\xda\x00\x00" - "\x00\x01\x74\x52\x4e\x53\x00\x40\xe6\xd8\x66\x00\x00\x00\x0a\x49\x44\x41\x54\x08\xd7\x63\x60\x00\x00" - "\x00\x02\x00\x01\xe2\x21\xbc\x33\x00\x00\x00\x00\x49\x45\x4e\x44\xae\x42\x60\x82"; - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOK(Event->Get(), "image/png", png1x1), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - if (Params.Has("from")) { - getRequest.SetTimeFrom(FromStringWithDefault(Params.Get("from"))); - } - if (Params.Has("until")) { - getRequest.SetTimeTo(FromStringWithDefault(Params.Get("until"))); - } - if (Params.Has("maxDataPoints")) { - getRequest.SetMaxPoints(FromStringWithDefault(Params.Get("maxDataPoints"), 1000)); - } - Send(NGraph::MakeGraphServiceId(), new NGraph::TEvGraph::TEvGetMetrics(std::move(getRequest))); - } - - void HandleRenderResponse(NKikimrGraph::TEvMetricsResult& response) { - if (Event) { - NJson::TJsonValue json; - - if (response.GetError()) { - json["status"] = "error"; - json["error"] = response.GetError(); - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - return; - } - if (response.DataSize() != Metrics.size()) { - json["status"] = "error"; - json["error"] = "Invalid data size received"; - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - return; - } - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - if (response.TimeSize() != protoMetric.ValuesSize()) { - json["status"] = "error"; - json["error"] = "Invalid value size received"; - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - return; - } - } - { // graphite - json.SetType(NJson::JSON_ARRAY); - for (size_t nMetric = 0; nMetric < response.DataSize(); ++nMetric) { - const auto& protoMetric(response.GetData(nMetric)); - NJson::TJsonValue& jsonMetric(json.AppendValue({})); - jsonMetric["target"] = Metrics[nMetric]; - jsonMetric["title"] = Metrics[nMetric]; - jsonMetric["tags"]["name"] = Metrics[nMetric]; - NJson::TJsonValue& jsonDataPoints(jsonMetric["datapoints"]); - jsonDataPoints.SetType(NJson::JSON_ARRAY); - for (size_t nTime = 0; nTime < response.TimeSize(); ++nTime) { - NJson::TJsonValue& jsonDataPoint(jsonDataPoints.AppendValue({})); - double value = protoMetric.GetValues(nTime); - if (isnan(value)) { - jsonDataPoint.AppendValue(NJson::TJsonValue(NJson::JSON_NULL)); - } else { - jsonDataPoint.AppendValue(value); - } - jsonDataPoint.AppendValue(response.GetTime(nTime)); - } - } - } - - ReplyAndPassAway(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false))); - } else { - TEvViewer::TEvViewerResponse* viewerResponse = new TEvViewer::TEvViewerResponse(); - viewerResponse->Record.MutableRenderResponse()->CopyFrom(response); - ReplyAndPassAway(viewerResponse); - } - } - - void Handle(NGraph::TEvGraph::TEvMetricsResult::TPtr& ev) { - HandleRenderResponse(ev->Get()->Record); - } - - void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) { - auto& record = ev.Get()->Get()->Record; - if (record.HasRenderResponse()) { - HandleRenderResponse(*(record.MutableRenderResponse())); - } else { - SendGraphRequest(); // fallback - } - } - - void HandleTimeout() { - if (Event) { - ReplyAndPassAway(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get())); - } else { - auto* response = new TEvViewer::TEvViewerResponse(); - response->Record.MutableRenderResponse()->SetError("Request timed out"); - ReplyAndPassAway(response); - } - } - - void ReplyAndPassAway(TEvViewer::TEvViewerResponse* response) { - Send(ViewerRequest->Sender, response); - PassAway(); - } - - void ReplyAndPassAway(TString data) { - Send(Event->Sender, new NMon::TEvHttpInfoRes(std::move(data), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: target - in: query - description: metrics comma delimited - required: true - type: string - - name: from - in: query - description: time in seconds - required: false - type: integer - - name: database - in: query - description: database name - required: false - type: string - - name: direct - in: query - description: force processing query on current node - required: false - type: boolean - - name: until - in: query - description: time in seconds - required: false - type: integer - - name: maxDataPoints - in: query - description: maximum number of data points - required: false - type: integer - - name: format - in: query - description: response format - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Graph data"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns graph data in graphite format"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_storage.h b/ydb/core/viewer/json_storage.h deleted file mode 100644 index fba5047a69e0..000000000000 --- a/ydb/core/viewer/json_storage.h +++ /dev/null @@ -1,601 +0,0 @@ -#pragma once -#include "json_storage_base.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; - -using ::google::protobuf::FieldDescriptor; - -class TJsonStorage : public TJsonStorageBase { - using TBase = TJsonStorageBase; - using TThis = TJsonStorage; - - bool NeedGroups = true; - bool NeedDisks = true; - bool NeedDonors = true; - - enum class EGroupSort { - PoolName, - Kind, - MediaType, - Erasure, - Degraded, - Usage, - GroupId, - Used, - Limit, - Read, - Write - }; - enum class EVersion { - v1, - v2 // only this works with sorting, limiting and filtering with usage buckets - }; - EVersion Version = EVersion::v1; - EGroupSort GroupSort = EGroupSort::PoolName; - bool ReverseSort = false; - std::optional Offset; - std::optional Limit; - - ui32 UsagePace = 5; - TVector UsageBuckets; - - struct TGroupRow { - TString PoolName; - TString GroupId; - TString Kind; - TString MediaType; - TString Erasure; - ui32 Degraded; - float Usage; - uint64 Used; - uint64 Limit; - uint64 Read; - uint64 Write; - - TGroupRow() - : Degraded(0) - , Usage(0) - , Used(0) - , Limit(0) - , Read(0) - , Write(0) - {} - }; - THashMap GroupRowsByGroupId; - -public: - TJsonStorage(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - { - const auto& params(Event->Get()->Request.GetParams()); - NeedGroups = FromStringWithDefault(params.Get("need_groups"), true); - NeedDisks = FromStringWithDefault(params.Get("need_disks"), NeedGroups); - NeedDonors = FromStringWithDefault(params.Get("need_donors"), NeedDonors); - NeedGroups = Max(NeedGroups, NeedDisks); - UsagePace = FromStringWithDefault(params.Get("usage_pace"), UsagePace); - if (UsagePace == 0) { - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - SplitIds(params.Get("usage_buckets"), ',', UsageBuckets); - Sort(UsageBuckets); - - TString version = params.Get("version"); - if (version == "v1") { - Version = EVersion::v1; - } else if (version == "v2") { - Version = EVersion::v2; - } - Offset = FromStringWithDefault(params.Get("offset"), 0); - Limit = FromStringWithDefault(params.Get("limit"), std::numeric_limits::max()); - TStringBuf sort = params.Get("sort"); - if (sort) { - if (sort.StartsWith("-") || sort.StartsWith("+")) { - ReverseSort = (sort[0] == '-'); - sort.Skip(1); - } - if (sort == "PoolName") { - GroupSort = EGroupSort::PoolName; - } else if (sort == "Kind") { - GroupSort = EGroupSort::Kind; - } else if (sort == "MediaType") { - GroupSort = EGroupSort::MediaType; - } else if (sort == "Erasure") { - GroupSort = EGroupSort::Erasure; - } else if (sort == "Degraded") { - GroupSort = EGroupSort::Degraded; - } else if (sort == "Usage") { - GroupSort = EGroupSort::Usage; - } else if (sort == "GroupId") { - GroupSort = EGroupSort::GroupId; - } else if (sort == "Used") { - GroupSort = EGroupSort::Used; - } else if (sort == "Limit") { - GroupSort = EGroupSort::Limit; - } else if (sort == "Read") { - GroupSort = EGroupSort::Read; - } else if (sort == "Write") { - GroupSort = EGroupSort::Write; - } - } - } - - void Bootstrap() override { - TIntrusivePtr domains = AppData()->DomainsInfo; - ui64 hiveId = domains->GetHive(); - if (hiveId != TDomainsInfo::BadTabletId) { - RequestHiveStorageStats(hiveId); - } - TBase::Bootstrap(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - TBase::Handle(ev, true); - } - - void RemapGroup(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - const auto& info = static_cast(protoFrom); - TString groupId = info.GetGroupId(); - if (Version == EVersion::v2) { - const auto& groupRow = GroupRowsByGroupId[groupId]; - json << "\"PoolName\":\"" << groupRow.PoolName << "\","; - json << "\"Kind\":\"" << groupRow.Kind << "\","; - json << "\"MediaType\":\"" << groupRow.MediaType << "\","; - json << "\"Erasure\":\"" << groupRow.Erasure << "\","; - json << "\"Degraded\":\"" << groupRow.Degraded << "\","; - json << "\"Usage\":\"" << groupRow.Usage << "\","; - json << "\"Used\":\"" << groupRow.Used << "\","; - json << "\"Limit\":\"" << groupRow.Limit << "\","; - json << "\"Read\":\"" << groupRow.Read << "\","; - json << "\"Write\":\"" << groupRow.Write << "\","; - } - auto ib = BSGroupIndex.find(groupId); - if (ib != BSGroupIndex.end()) { - TProtoToJson::ProtoToJsonInline(json, ib->second, jsonSettings); - if (auto ih = BSGroupHiveIndex.find(groupId); ih != BSGroupHiveIndex.end()) { - json << ','; - TProtoToJson::ProtoToJsonInline(json, ih->second, jsonSettings); - } - if (auto io = BSGroupOverall.find(groupId); io != BSGroupOverall.end()) { - json << ",\"Overall\":\"" << io->second << "\""; - } - } - } - - void RemapVDisks(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - NKikimrWhiteboard::EFlag diskSpace = NKikimrWhiteboard::Grey; - json << "\"VDisks\":["; - const auto& info = static_cast(protoFrom); - const auto& vDiskIds = info.GetVDiskIds(); - for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { - if (iv != vDiskIds.begin()) { - json << ','; - } - const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; - auto ie = VDisksIndex.find(vDiskId); - if (ie != VDisksIndex.end()) { - json << '{'; - TProtoToJson::ProtoToJsonInline(json, ie->second, jsonSettings); - if (auto io = VDisksOverall.find(vDiskId); io != VDisksOverall.end()) { - json << ",\"Overall\":\"" << io->second << "\""; - } - json << '}'; - diskSpace = std::max(diskSpace, ie->second.GetDiskSpace()); - } else { - json << "{\"VDiskId\":"; - TProtoToJson::ProtoToJson(json, vDiskId, jsonSettings); - json << "}"; - } - } - json << ']'; - if (diskSpace != NKikimrWhiteboard::Grey) { - json << ",\"DiskSpace\":\""; - json << NKikimrWhiteboard::EFlag_Name(diskSpace); - json << "\""; - } - } - - void RemapDonors(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - const auto& info = static_cast(protoFrom); - const auto& donors = info.GetDonors(); - if (donors.empty()) { - return; - } - json << "\"Donors\":["; - for (auto id = donors.begin(); id != donors.end(); ++id) { - if (id != donors.begin()) { - json << ','; - } - const NKikimrBlobStorage::TVSlotId& vSlotId = *id; - auto ie = VSlotsIndex.find(vSlotId); - if (ie != VSlotsIndex.end()) { - json << '{'; - TProtoToJson::ProtoToJsonInline(json, ie->second, jsonSettings); - json << '}'; - } else { - json << "{"; - TProtoToJson::ProtoToJsonInline(json, vSlotId, jsonSettings); - json << "}"; - } - } - json << ']'; - } - - void RemapPDisk(IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) { - json << "\"PDisk\":"; - const auto& info = static_cast(protoFrom); - ui32 nodeId = info.GetNodeId(); - ui32 pDiskId = info.GetPDiskId(); - auto ie = PDisksIndex.find(std::make_pair(nodeId, pDiskId)); - if (ie != PDisksIndex.end()) { - TProtoToJson::ProtoToJson(json, ie->second, jsonSettings); - if (auto io = PDisksOverall.find(std::make_pair(nodeId, pDiskId)); io != PDisksOverall.end()) { - json << ",\"Overall\":\"" << io->second << "\""; - } - } else { - json << "{\"PDiskId\":" << pDiskId << ",\"NodeId\":" << nodeId << "}"; - } - } - - bool CheckGroupFilters(const TString& groupId, const TString& poolName, const TGroupRow& groupRow) { - if (!EffectiveGroupFilter.contains(groupId)) { - return false; - } - switch (With) { - case EWith::MissingDisks: - if (BSGroupWithMissingDisks.count(groupId) == 0) { - return false; - } - break; - case EWith::SpaceProblems: - if (BSGroupWithSpaceProblems.count(groupId) == 0 && groupRow.Usage < 0.8) { - return false; - } - break; - case EWith::Everything: - break; - } - if (Filter) { - if (poolName.Contains(Filter)) { - return true; - } - if (groupId.Contains(Filter)) { - return true; - } - return false; - } - return true; - } - - void ReplyAndPassAway() override { - if (CheckAdditionalNodesInfoNeeded()) { - return; - } - CollectDiskInfo(true); - ParsePDisksFromBaseConfig(); - ParseVDisksFromBaseConfig(); - - for (const auto& [hiveId, hiveStats] : HiveStorageStats) { - for (auto& pbPool : *hiveStats->Record.MutablePools()) { - for (auto& pbGroup : *pbPool.MutableGroups()) { - TString groupId = ToString(pbGroup.GetGroupID()); - NKikimrHive::THiveStorageGroupStats& stats = BSGroupHiveIndex[groupId]; - stats.SetAcquiredUnits(stats.GetAcquiredUnits() + pbGroup.GetAcquiredUnits()); - stats.SetAcquiredIOPS(stats.GetAcquiredIOPS() + pbGroup.GetAcquiredIOPS()); - stats.SetAcquiredThroughput(stats.GetAcquiredThroughput() + pbGroup.GetAcquiredThroughput()); - stats.SetAcquiredSize(stats.GetAcquiredSize() + pbGroup.GetAcquiredSize()); - stats.SetMaximumIOPS(stats.GetMaximumIOPS() + pbGroup.GetMaximumIOPS()); - stats.SetMaximumThroughput(stats.GetMaximumThroughput() + pbGroup.GetMaximumThroughput()); - stats.SetMaximumSize(stats.GetMaximumSize() + pbGroup.GetMaximumSize()); - } - } - } - ui64 foundGroups = 0; - ui64 totalGroups = 0; - TVector GroupRows; - for (const auto& [poolName, poolInfo] : StoragePoolInfo) { - if ((!FilterTenant.empty() || !FilterStoragePools.empty()) && FilterStoragePools.count(poolName) == 0) { - continue; - } - NKikimrViewer::TStoragePoolInfo* pool = StorageInfo.AddStoragePools(); - for (TString groupId : poolInfo.Groups) { - TGroupRow row; - row.PoolName = poolName; - row.GroupId = groupId; - row.Kind = poolInfo.Kind; - row.MediaType = poolInfo.MediaType; - auto ib = BSGroupIndex.find(groupId); - if (ib != BSGroupIndex.end()) { - row.Erasure = ib->second.GetErasureSpecies(); - const auto& vDiskIds = ib->second.GetVDiskIds(); - for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { - const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; - auto ie = VDisksIndex.find(vDiskId); - bool degraded = false; - if (ie != VDisksIndex.end()) { - ui32 nodeId = ie->second.GetNodeId(); - ui32 pDiskId = ie->second.GetPDiskId(); - degraded |= !ie->second.GetReplicated() || ie->second.GetVDiskState() != NKikimrWhiteboard::EVDiskState::OK; - row.Used += ie->second.GetAllocatedSize(); - row.Limit += ie->second.GetAllocatedSize() + ie->second.GetAvailableSize(); - row.Read += ie->second.GetReadThroughput(); - row.Write += ie->second.GetWriteThroughput(); - - auto ip = PDisksIndex.find(std::make_pair(nodeId, pDiskId)); - if (ip != PDisksIndex.end()) { - degraded |= ip->second.GetState() != NKikimrBlobStorage::TPDiskState::Normal; - if (!ie->second.HasAvailableSize()) { - row.Limit += ip->second.GetAvailableSize(); - } - } - } - if (degraded) { - row.Degraded++; - } - } - } - row.Usage = row.Limit == 0 ? 100 : (float)row.Used / row.Limit; - - ++totalGroups; - if (!CheckGroupFilters(groupId, poolName, row)) { - continue; - } - ++foundGroups; - if (Version == EVersion::v1) { - pool->AddGroups()->SetGroupId(groupId); - pool->SetMediaType(poolInfo.MediaType); - } else if (Version == EVersion::v2) { - if (!UsageBuckets.empty() && !BinarySearch(UsageBuckets.begin(), UsageBuckets.end(), (ui32)(row.Usage * 100) / UsagePace)) { - continue; - } - GroupRows.emplace_back(row); - GroupRowsByGroupId[groupId] = row; - } - auto itHiveGroup = BSGroupHiveIndex.find(groupId); - if (itHiveGroup != BSGroupHiveIndex.end()) { - pool->SetAcquiredUnits(pool->GetAcquiredUnits() + itHiveGroup->second.GetAcquiredUnits()); - pool->SetAcquiredIOPS(pool->GetAcquiredIOPS() + itHiveGroup->second.GetAcquiredIOPS()); - pool->SetAcquiredThroughput(pool->GetAcquiredThroughput() + itHiveGroup->second.GetAcquiredThroughput()); - pool->SetAcquiredSize(pool->GetAcquiredSize() + itHiveGroup->second.GetAcquiredSize()); - pool->SetMaximumIOPS(pool->GetMaximumIOPS() + itHiveGroup->second.GetMaximumIOPS()); - pool->SetMaximumThroughput(pool->GetMaximumThroughput() + itHiveGroup->second.GetMaximumThroughput()); - pool->SetMaximumSize(pool->GetMaximumSize() + itHiveGroup->second.GetMaximumSize()); - } - } - if (pool->GroupsSize() == 0) { - StorageInfo.MutableStoragePools()->RemoveLast(); - continue; - } - if (!poolName.empty()) { - pool->SetName(poolName); - } - if (!poolInfo.Kind.empty()) { - pool->SetKind(poolInfo.Kind); - } - pool->SetOverall(poolInfo.Overall); - } - - if (Version == EVersion::v2) { - switch (GroupSort) { - case EGroupSort::PoolName: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.PoolName;}, ReverseSort); - break; - case EGroupSort::GroupId: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.GroupId;}, ReverseSort); - break; - case EGroupSort::Kind: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Kind;}, ReverseSort); - break; - case EGroupSort::MediaType: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.MediaType;}, ReverseSort); - break; - case EGroupSort::Erasure: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Erasure;}, ReverseSort); - break; - case EGroupSort::Degraded: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Degraded;}, ReverseSort); - break; - case EGroupSort::Usage: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Usage;}, ReverseSort); - break; - case EGroupSort::Used: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Used;}, ReverseSort); - break; - case EGroupSort::Limit: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Limit;}, ReverseSort); - break; - case EGroupSort::Read: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Read;}, ReverseSort); - break; - case EGroupSort::Write: - SortCollection(GroupRows, [](const TGroupRow& node) { return node.Write;}, ReverseSort); - break; - } - - ui32 start = Offset.has_value() ? Offset.value() : 0; - ui32 end = GroupRows.size(); - if (Limit.has_value()) { - end = Min(end, start + Limit.value()); - } - for (ui32 i = start; i < end; ++i) { - NKikimrViewer::TStorageGroupInfo* group = StorageInfo.AddStorageGroups(); - group->SetGroupId(GroupRows[i].GroupId); - } - } - - const FieldDescriptor* field; - if (NeedGroups) { - field = NKikimrViewer::TStorageGroupInfo::descriptor()->FindFieldByName("GroupId"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapGroup(json, protoFrom, jsonSettings); - }; - } - if (NeedDisks) { - field = NKikimrWhiteboard::TBSGroupStateInfo::descriptor()->FindFieldByName("VDiskIds"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapVDisks(json, protoFrom, jsonSettings); - }; - field = NKikimrWhiteboard::TVDiskStateInfo::descriptor()->FindFieldByName("PDiskId"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapPDisk(json, protoFrom, jsonSettings); - }; - if (NeedDonors) { - field = NKikimrWhiteboard::TVDiskStateInfo::descriptor()->FindFieldByName("Donors"); - JsonSettings.FieldRemapper[field] = [this]( - IOutputStream& json, - const ::google::protobuf::Message& protoFrom, - const TJsonSettings& jsonSettings) -> void { - RemapDonors(json, protoFrom, jsonSettings); - }; - } - } - StorageInfo.SetTotalGroups(totalGroups); - StorageInfo.SetFoundGroups(foundGroups); - - TStringStream json; - TProtoToJson::ProtoToJson(json, StorageInfo, JsonSettings); - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: tenant - in: query - description: tenant name - required: false - type: string - - name: pool - in: query - description: storage pool name - required: false - type: string - - name: node_id - in: query - description: node id - required: false - type: integer - - name: pdisk_id - in: query - description: pdisk id - required: false - type: integer - - name: group_id - in: query - description: group id - required: false - type: integer - - name: need_groups - in: query - description: return groups information - required: false - type: boolean - default: true - - name: need_disks - in: query - description: return disks information - required: false - type: boolean - default: true - - name: with - in: query - description: filter groups by missing or space - required: false - type: string - - name: version - in: query - description: query version (v1, v2) - required: false - type: string - - name: usage_pace - in: query - description: bucket size as a percentage - required: false - type: integer - default: 5 - - name: usage_buckets - in: query - description: filter groups by usage buckets - required: false - type: integer - - name: sort - in: query - description: sort by (PoolName,Kind,MediaType,Erasure,Degraded,Usage,GroupId,Used,Limit,Read,Write) - required: false - type: string - - name: offset - in: query - description: skip N nodes - required: false - type: integer - - name: limit - in: query - description: limit to N nodes - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about storage"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_storage_base.h b/ydb/core/viewer/json_storage_base.h index f4f728369d08..1f8616ce6bbc 100644 --- a/ydb/core/viewer/json_storage_base.h +++ b/ydb/core/viewer/json_storage_base.h @@ -1,17 +1,11 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include +#include "json_pipe_req.h" #include "viewer.h" +#include "viewer_bsgroupinfo.h" +#include "viewer_vdiskinfo.h" +#include "viewer_pdiskinfo.h" #include "viewer_helper.h" -#include "json_pipe_req.h" -#include "json_vdiskinfo.h" -#include "json_pdiskinfo.h" +#include "wb_merge.h" template<> struct std::hash { @@ -31,17 +25,16 @@ struct std::equal_to { } }; -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; using namespace NNodeWhiteboard; using ::google::protobuf::FieldDescriptor; -class TJsonStorageBase : public TViewerPipeClient { +class TJsonStorageBase : public TViewerPipeClient { protected: - using TBase = TViewerPipeClient; + using TBase = TViewerPipeClient; using TThis = TJsonStorageBase; using TNodeId = ui32; @@ -104,10 +97,10 @@ class TJsonStorageBase : public TViewerPipeClient { TString Erasure; ui32 Degraded; float Usage; - uint64 Used; - uint64 Limit; - uint64 Read; - uint64 Write; + ui64 Used; + ui64 Limit; + ui64 Read; + ui64 Write; TGroupRow() : Used(0) @@ -146,11 +139,7 @@ class TJsonStorageBase : public TViewerPipeClient { } public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - virtual void Bootstrap() { + void Bootstrap() override { TIntrusivePtr domains = AppData()->DomainsInfo; if (FilterTenant.empty()) { @@ -550,7 +539,7 @@ class TJsonStorageBase : public TViewerPipeClient { } } - virtual void ReplyAndPassAway() {} + void ReplyAndPassAway() override {} void HandleTimeout(TEvents::TEvWakeup::TPtr& ev) { switch (ev->Get()->Tag) { @@ -565,4 +554,3 @@ class TJsonStorageBase : public TViewerPipeClient { }; } -} diff --git a/ydb/core/viewer/json_storage_usage.h b/ydb/core/viewer/json_storage_usage.h deleted file mode 100644 index e759a72c3ecf..000000000000 --- a/ydb/core/viewer/json_storage_usage.h +++ /dev/null @@ -1,146 +0,0 @@ -#pragma once -#include "json_storage_base.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; -using namespace NNodeWhiteboard; - -using ::google::protobuf::FieldDescriptor; - -class TJsonStorageUsage : public TJsonStorageBase { - using TBase = TJsonStorageBase; - using TThis = TJsonStorageUsage; - ui32 Pace = 5; - -public: - TJsonStorageUsage(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : TBase(viewer, ev) - { - const auto& params(Event->Get()->Request.GetParams()); - Pace = FromStringWithDefault(params.Get("pace"), Pace); - if (Pace == 0) { - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPBADREQUEST(Event->Get(), {}, "Bad Request"), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - TBase::Handle(ev, false); - } - - void ReplyAndPassAway() override { - if (CheckAdditionalNodesInfoNeeded()) { - return; - } - CollectDiskInfo(true); - ParseVDisksFromBaseConfig(); - - TMap buckets; - for (const auto& [poolName, poolInfo] : StoragePoolInfo) { - if ((!FilterTenant.empty() || !FilterStoragePools.empty()) && FilterStoragePools.count(poolName) == 0) { - continue; - } - for (TString groupId : poolInfo.Groups) { - auto ib = BSGroupIndex.find(groupId); - if (ib != BSGroupIndex.end()) { - int64 used = 0; - int64 limit = 0; - const auto& vDiskIds = ib->second.GetVDiskIds(); - for (auto iv = vDiskIds.begin(); iv != vDiskIds.end(); ++iv) { - const NKikimrBlobStorage::TVDiskID& vDiskId = *iv; - auto ie = VDisksIndex.find(vDiskId); - if (ie != VDisksIndex.end()) { - used += ie->second.GetAllocatedSize(); - limit += ie->second.GetAllocatedSize() + ie->second.GetAvailableSize(); - } - } - int bucketNumber = limit == 0 ? 100 : used * 100 / limit / Pace; - if (!buckets.contains(bucketNumber)) { - buckets[bucketNumber] = 0; - } - buckets[bucketNumber]++; - } - } - } - NKikimrViewer::TStorageUsageStats StorageStats; - StorageStats.SetPace(Pace); - for (ui32 i = 0; i * Pace < 100; i++) { - StorageStats.AddBuckets(buckets[i]); - } - - TStringStream json; - TProtoToJson::ProtoToJson(json, StorageStats, JsonSettings); - Send(Initiator, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - type: boolean - required: false - - name: ui64 - in: query - description: return ui64 as number - type: boolean - required: false - - name: tenant - in: query - description: tenant name - type: string - required: false - - name: pool - in: query - description: storage pool name - type: string - required: false - - name: node_id - in: query - description: node id - type: integer - required: false - - name: pace - in: query - description: bucket size as a percentage - type: integer - required: false - default: 5 - - name: timeout - in: query - description: timeout in ms - type: integer - required: false - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Storage groups statistics"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns the distribution of groups by usage"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_sysinfo.h b/ydb/core/viewer/json_sysinfo.h deleted file mode 100644 index 922f9566b917..000000000000 --- a/ydb/core/viewer/json_sysinfo.h +++ /dev/null @@ -1,73 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include "json_wb_req.h" - -namespace NKikimr { -namespace NViewer { - -template <> -class TWhiteboardMerger { -public: - static THolder MergeResponses(TMap>& responses, const TString&) { - THolder result = MakeHolder(); - ui64 minResponseTime = 0; - auto* field = result->Record.MutableSystemStateInfo(); - field->Reserve(responses.size()); - for (auto it = responses.begin(); it != responses.end(); ++it) { - if (it->second != nullptr && it->second->Record.SystemStateInfoSize() > 0) { - auto* element = field->Add(); - element->Swap(it->second->Record.MutableSystemStateInfo(0)); - element->SetNodeId(it->first); - if (minResponseTime == 0 || it->second->Record.GetResponseTime() < minResponseTime) { - minResponseTime = it->second->Record.GetResponseTime(); - } - } - } - result->Record.SetResponseTime(minResponseTime); - return result; - } -}; - -template <> -struct TWhiteboardInfo { - using TResponseType = NKikimrWhiteboard::TEvSystemStateResponse; - using TResponseEventType = TEvWhiteboard::TEvSystemStateResponse; - using TElementType = NKikimrWhiteboard::TSystemStateInfo; - - static constexpr bool StaticNodesOnly = false; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableSystemStateInfo(); - } - - static TString GetDefaultMergeField() { - return "NodeId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } -}; - -using TJsonSysInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "System information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns system information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tabletcounters.h b/ydb/core/viewer/json_tabletcounters.h deleted file mode 100644 index 023552fdea3f..000000000000 --- a/ydb/core/viewer/json_tabletcounters.h +++ /dev/null @@ -1,230 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTabletCounters : public TActorBootstrapped { - static const bool WithRetry = false; - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - TVector PipeClients; - TVector Tablets; - TMap> Results; - THolder DescribeResult; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool Aggregate = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTabletCounters(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - static NTabletPipe::TClientConfig InitPipeClientConfig() { - NTabletPipe::TClientConfig clientConfig; - if (WithRetry) { - clientConfig.RetryPolicy = NTabletPipe::TClientRetryPolicy::WithRetries(); - } - return clientConfig; - } - - static const NTabletPipe::TClientConfig& GetPipeClientConfig() { - static NTabletPipe::TClientConfig clientConfig = InitPipeClientConfig(); - return clientConfig; - } - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Aggregate = FromStringWithDefault(params.Get("aggregate"), true); - if (params.Has("path")) { - THolder request(new TEvTxUserProxy::TEvNavigate()); - if (!Event->Get()->UserToken.empty()) { - request->Record.SetUserToken(Event->Get()->UserToken); - } - NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); - record->SetPath(params.Get("path")); - - TActorId txproxy = MakeTxProxyID(); - ctx.Send(txproxy, request.Release()); - Become(&TThis::StateRequestedDescribe, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } else if (params.Has("tablet_id")) { - TTabletId tabletId = FromStringWithDefault(params.Get("tablet_id"), 0); - if (tabletId != 0) { - Tablets.emplace_back(tabletId); - TActorId PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, GetPipeClientConfig())); - NTabletPipe::SendData(ctx, PipeClient, new TEvTablet::TEvGetCounters(), tabletId); - PipeClients.emplace_back(PipeClient); - Become(&TThis::StateRequestedGetCounters, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - if (PipeClients.empty()) { - ReplyAndDie(ctx); - } - } - } - - void Die(const TActorContext& ctx) override { - for (const TActorId& pipeClient : PipeClients) { - NTabletPipe::CloseClient(ctx, pipeClient); - } - TBase::Die(ctx); - } - - STFUNC(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - HFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - STFUNC(StateRequestedGetCounters) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTablet::TEvGetCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr &ev, const TActorContext &ctx) { - DescribeResult = ev->Release(); - if (DescribeResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - Tablets.reserve(DescribeResult->GetRecord().GetPathDescription().TablePartitionsSize()); - for (const auto& partition : DescribeResult->GetRecord().GetPathDescription().GetTablePartitions()) { - Tablets.emplace_back(partition.GetDatashardId()); - } - Tablets.reserve(DescribeResult->GetRecord().GetPathDescription().GetPersQueueGroup().PartitionsSize()); - for (const auto& partition : DescribeResult->GetRecord().GetPathDescription().GetPersQueueGroup().GetPartitions()) { - Tablets.emplace_back(partition.GetTabletId()); - } - Sort(Tablets); - Tablets.erase(std::unique(Tablets.begin(), Tablets.end()), Tablets.end()); - } - for (auto tabletId : Tablets) { - TActorId PipeClient = ctx.RegisterWithSameMailbox(NTabletPipe::CreateClient(ctx.SelfID, tabletId, GetPipeClientConfig())); - NTabletPipe::SendData(ctx, PipeClient, new TEvTablet::TEvGetCounters(), tabletId); - PipeClients.emplace_back(PipeClient); - } - if (Tablets.empty()) { - ReplyAndDie(ctx); - } - Become(&TThis::StateRequestedGetCounters); - } - - void Handle(TEvTablet::TEvGetCountersResponse::TPtr &ev, const TActorContext &ctx) { - Results.emplace(ev->Cookie, ev->Release()); - if (Results.size() == Tablets.size()) { - ReplyAndDie(ctx); - } - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - if (!Results.empty()) { - if (Aggregate) { - THolder response = AggregateWhiteboardResponses(Results); - TProtoToJson::ProtoToJson(json, response->Record, JsonSettings); - } else { - json << '{'; - for (auto it = Results.begin(); it != Results.end(); ++it) { - if (it != Results.begin()) { - json << ','; - } - json << '"' << it->first << "\":"; - TProtoToJson::ProtoToJson(json, it->second->Record, JsonSettings); - } - json << '}'; - } - } else { - json << "null"; - } - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: tablet_id - in: query - description: tablet identifier - required: false - type: integer - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: aggregate - in: query - description: aggregate tablet counters - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Tablet counters information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about tablet counters"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tabletinfo.h b/ydb/core/viewer/json_tabletinfo.h deleted file mode 100644 index aac8fabe01a5..000000000000 --- a/ydb/core/viewer/json_tabletinfo.h +++ /dev/null @@ -1,472 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "json_pipe_req.h" -#include "json_wb_req.h" -#include - -namespace NKikimr { -namespace NViewer { - -template<> -struct TWhiteboardInfo { - using TResponseEventType = TEvWhiteboard::TEvTabletStateResponse; - using TResponseType = NKikimrWhiteboard::TEvTabletStateResponse; - using TElementType = NKikimrWhiteboard::TTabletStateInfo; - using TElementTypePacked5 = NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5; - using TElementKeyType = std::pair; - - static constexpr bool StaticNodesOnly = false; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableTabletStateInfo(); - } - - static std::span GetElementsFieldPacked5(const TResponseType& response) { - const auto& packed5 = response.GetPacked5(); - return std::span{reinterpret_cast(packed5.data()), packed5.size() / sizeof(TElementTypePacked5)}; - } - - static size_t GetElementsCount(const TResponseType& response) { - return response.GetTabletStateInfo().size() + response.GetPacked5().size() / sizeof(TElementTypePacked5); - } - - static TElementKeyType GetElementKey(const TElementType& type) { - return TElementKeyType(type.GetTabletId(), type.GetFollowerId()); - } - - static TElementKeyType GetElementKey(const TElementTypePacked5& type) { - return TElementKeyType(type.TabletId, type.FollowerId); - } - - static TString GetDefaultMergeField() { - return "TabletId,FollowerId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TStaticMergeKey mergeKey; - TWhiteboardMerger::MergeResponsesBaseHybrid(result, responses, mergeKey); - } else { - TWhiteboardMerger::TDynamicMergeKey mergeKey(fields); - TWhiteboardMerger::MergeResponsesBase(result, responses, mergeKey); - } - } -}; - -template <> -struct TWhiteboardMergerComparator { - bool operator ()(const NKikimrWhiteboard::TTabletStateInfo& a, const NKikimrWhiteboard::TTabletStateInfo& b) const { - return std::make_tuple(a.GetGeneration(), a.GetChangeTime()) < std::make_tuple(b.GetGeneration(), b.GetChangeTime()); - } -}; - -template <> -struct TWhiteboardMergerComparator { - bool operator ()(const NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5& a, const NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponsePacked5& b) const { - return a.Generation < b.Generation; - } -}; - -class TJsonTabletInfo : public TJsonWhiteboardRequest { - static const bool WithRetry = false; - bool ReplyWithDeadTabletsInfo; - using TBase = TJsonWhiteboardRequest; - using TThis = TJsonTabletInfo; - THashMap Tablets; - std::unordered_map EndOfRangeKeyPrefix; - TTabletId HiveId; - bool IsBase64Encode = true; -public: - TJsonTabletInfo(IViewer *viewer, NMon::TEvHttpInfo::TPtr &ev) - : TJsonWhiteboardRequest(viewer, ev) - { - static TString prefix = "json/tabletinfo "; - LogPrefix = prefix; - } - - void Bootstrap() override { - BLOG_TRACE("Bootstrap()"); - const auto& params(Event->Get()->Request.GetParams()); - ReplyWithDeadTabletsInfo = params.Has("path"); - if (params.Has("path")) { - TBase::RequestSettings.Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - IsBase64Encode = FromStringWithDefault(params.Get("base64"), IsBase64Encode); - THolder request(new TEvTxUserProxy::TEvNavigate()); - if (!Event->Get()->UserToken.empty()) { - request->Record.SetUserToken(Event->Get()->UserToken); - } - NKikimrSchemeOp::TDescribePath* record = request->Record.MutableDescribePath(); - record->SetPath(params.Get("path")); - - TActorId txproxy = MakeTxProxyID(); - TBase::Send(txproxy, request.Release()); - UnsafeBecome(&TThis::StateRequestedDescribe, TDuration::MilliSeconds(TBase::RequestSettings.Timeout), new TEvents::TEvWakeup()); - } else { - TBase::Bootstrap(); - if (!TBase::RequestSettings.FilterFields.empty()) { - if (IsMatchesWildcard(TBase::RequestSettings.FilterFields, "(TabletId=*)")) { - TString strTabletId(TBase::RequestSettings.FilterFields.substr(10, TBase::RequestSettings.FilterFields.size() - 11)); - TTabletId uiTabletId(FromStringWithDefault(strTabletId, {})); - if (uiTabletId) { - Tablets[uiTabletId] = NKikimrTabletBase::TTabletTypes::Unknown; - Request->Record.AddFilterTabletId(uiTabletId); - } - } - } - } - } - - TString GetColumnValue(const TCell& cell, const NKikimrSchemeOp::TColumnDescription& type) { - if (cell.IsNull()) { - return "NULL"; - } - switch (type.GetTypeId()) { - case NScheme::NTypeIds::Int32: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint32: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Int64: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint64: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Int8: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint8: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Int16: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Uint16: - return ToString(cell.AsValue()); - case NScheme::NTypeIds::Bool: - return cell.AsValue() ? "true" : "false"; - case NScheme::NTypeIds::Date: return "Date"; - case NScheme::NTypeIds::Datetime: return "Datetime"; - case NScheme::NTypeIds::Timestamp: return "Timestamp"; - case NScheme::NTypeIds::Interval: return "Interval"; - case NScheme::NTypeIds::Date32: return "Date32"; - case NScheme::NTypeIds::Datetime64: return "Datetime64"; - case NScheme::NTypeIds::Timestamp64: return "Timestamp64"; - case NScheme::NTypeIds::Interval64: return "Interval64"; - case NScheme::NTypeIds::PairUi64Ui64: return "PairUi64Ui64"; - case NScheme::NTypeIds::String: - case NScheme::NTypeIds::String4k: - case NScheme::NTypeIds::String2m: - return IsBase64Encode ? Base64Encode(cell.AsBuf()) : (TStringBuilder() << '"' << cell.AsBuf() << '"'); - case NScheme::NTypeIds::Utf8: - return TStringBuilder() << '"' << cell.AsBuf() << '"'; - case NScheme::NTypeIds::Decimal: return "Decimal"; - case NScheme::NTypeIds::DyNumber: return "DyNumber"; - case NScheme::NTypeIds::Uuid: return "Uuid"; - default: - return "-"; - } - } - - void Handle(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult::TPtr &ev) { - THolder describeResult = ev->Release(); - if (describeResult->GetRecord().GetStatus() == NKikimrScheme::EStatus::StatusSuccess) { - const auto& pathDescription = describeResult->GetRecord().GetPathDescription(); - for (auto shard : pathDescription.GetColumnTableDescription().GetSharding().GetColumnShards()) { - Tablets[shard] = NKikimrTabletBase::TTabletTypes::ColumnShard; - } - for (auto shard : pathDescription.GetColumnStoreDescription().GetColumnShards()) { - Tablets[shard] = NKikimrTabletBase::TTabletTypes::ColumnShard; - } - if (pathDescription.HasTable()) { - std::vector keyColumns; - for (uint32 id : pathDescription.GetTable().GetKeyColumnIds()) { - for (const auto& column : pathDescription.GetTable().GetColumns()) { - if (column.GetId() == id) { - keyColumns.push_back(column); - break; - } - } - } - for (const auto& partition : pathDescription.GetTablePartitions()) { - Tablets[partition.GetDatashardId()] = NKikimrTabletBase::TTabletTypes::DataShard; - if (partition.HasEndOfRangeKeyPrefix()) { - TSerializedCellVec cellVec; - if (TSerializedCellVec::TryParse(partition.GetEndOfRangeKeyPrefix(), cellVec)) { - TStringBuilder key; - TConstArrayRef cells(cellVec.GetCells()); - if (cells.size() == keyColumns.size()) { - if (cells.size() > 1) { - key << "("; - } - for (size_t idx = 0; idx < cells.size(); ++idx) { - if (idx > 0) { - key << ","; - } - const NKikimrSchemeOp::TColumnDescription& type(keyColumns[idx]); - const TCell& cell(cells[idx]); - key << GetColumnValue(cell, type); - } - if (cells.size() > 1) { - key << ")"; - } - } - if (key) { - EndOfRangeKeyPrefix[partition.GetDatashardId()] = key; - } - } - } - } - } - for (const auto& partition : pathDescription.GetPersQueueGroup().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::PersQueue; - } - if (pathDescription.HasRtmrVolumeDescription()) { - for (const auto& partition : pathDescription.GetRtmrVolumeDescription().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::RTMRPartition; - } - } - if (pathDescription.HasBlockStoreVolumeDescription()) { - for (const auto& partition : pathDescription.GetBlockStoreVolumeDescription().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::BlockStorePartition; - } - if (pathDescription.GetBlockStoreVolumeDescription().HasVolumeTabletId()) { - Tablets[pathDescription.GetBlockStoreVolumeDescription().GetVolumeTabletId()] = NKikimrTabletBase::TTabletTypes::BlockStoreVolume; - } - } - if (pathDescription.GetKesus().HasKesusTabletId()) { - Tablets[pathDescription.GetKesus().GetKesusTabletId()] = NKikimrTabletBase::TTabletTypes::Kesus; - } - if (pathDescription.HasSolomonDescription()) { - for (const auto& partition : pathDescription.GetSolomonDescription().GetPartitions()) { - Tablets[partition.GetTabletId()] = NKikimrTabletBase::TTabletTypes::KeyValue; - } - } - if (pathDescription.GetFileStoreDescription().HasIndexTabletId()) { - Tablets[pathDescription.GetFileStoreDescription().GetIndexTabletId()] = NKikimrTabletBase::TTabletTypes::FileStore; - } - if (pathDescription.GetSequenceDescription().HasSequenceShard()) { - Tablets[pathDescription.GetSequenceDescription().GetSequenceShard()] = NKikimrTabletBase::TTabletTypes::SequenceShard; - } - if (pathDescription.GetReplicationDescription().HasControllerId()) { - Tablets[pathDescription.GetReplicationDescription().GetControllerId()] = NKikimrTabletBase::TTabletTypes::ReplicationController; - } - if (pathDescription.GetBlobDepotDescription().HasTabletId()) { - Tablets[pathDescription.GetBlobDepotDescription().GetTabletId()] = NKikimrTabletBase::TTabletTypes::BlobDepot; - } - - if (pathDescription.GetSelf().GetPathType() == NKikimrSchemeOp::EPathType::EPathTypeDir - || pathDescription.GetSelf().GetPathType() == NKikimrSchemeOp::EPathType::EPathTypeSubDomain - || pathDescription.GetSelf().GetPathType() == NKikimrSchemeOp::EPathType::EPathTypeExtSubDomain) { - if (pathDescription.HasDomainDescription()) { - const auto& domainDescription(pathDescription.GetDomainDescription()); - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetCoordinators()) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Coordinator; - } - for (TTabletId tabletId : domainDescription.GetProcessingParams().GetMediators()) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Mediator; - } - if (domainDescription.GetProcessingParams().HasSchemeShard()) { - Tablets[domainDescription.GetProcessingParams().GetSchemeShard()] = NKikimrTabletBase::TTabletTypes::SchemeShard; - } - if (domainDescription.GetProcessingParams().HasHive()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetHive()] = NKikimrTabletBase::TTabletTypes::Hive; - HiveId = domainDescription.GetProcessingParams().GetHive(); - } - if (domainDescription.GetProcessingParams().HasGraphShard()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetGraphShard()] = NKikimrTabletBase::TTabletTypes::GraphShard; - } - if (domainDescription.GetProcessingParams().HasSysViewProcessor()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetSysViewProcessor()] = NKikimrTabletBase::TTabletTypes::SysViewProcessor; - } - if (domainDescription.GetProcessingParams().HasStatisticsAggregator()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetStatisticsAggregator()] = NKikimrTabletBase::TTabletTypes::StatisticsAggregator; - } - if (domainDescription.GetProcessingParams().HasBackupController()) { - Tablets[pathDescription.GetDomainDescription().GetProcessingParams().GetBackupController()] = NKikimrTabletBase::TTabletTypes::BackupController; - } - TIntrusivePtr domains = AppData()->DomainsInfo; - auto* domain = domains->GetDomain(); - if (describeResult->GetRecord().GetPathOwnerId() == domain->SchemeRoot && describeResult->GetRecord().GetPathId() == 1) { - Tablets[domain->SchemeRoot] = NKikimrTabletBase::TTabletTypes::SchemeShard; - Tablets[domains->GetHive()] = NKikimrTabletBase::TTabletTypes::Hive; - HiveId = domains->GetHive(); - Tablets[MakeBSControllerID()] = NKikimrTabletBase::TTabletTypes::BSController; - Tablets[MakeConsoleID()] = NKikimrTabletBase::TTabletTypes::Console; - Tablets[MakeNodeBrokerID()] = NKikimrTabletBase::TTabletTypes::NodeBroker; - Tablets[MakeTenantSlotBrokerID()] = NKikimrTabletBase::TTabletTypes::TenantSlotBroker; - Tablets[MakeCmsID()] = NKikimrTabletBase::TTabletTypes::Cms; - for (TTabletId tabletId : domain->Coordinators) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Coordinator; - } - for (TTabletId tabletId : domain->Mediators) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::Mediator; - } - for (TTabletId tabletId : domain->TxAllocators) { - Tablets[tabletId] = NKikimrTabletBase::TTabletTypes::TxAllocator; - } - } - } - } - } - if (Tablets.empty()) { - ReplyAndPassAway(); - } else { - TBase::Bootstrap(); - for (auto tablet : Tablets) { - Request->Record.AddFilterTabletId(tablet.first); - } - } - } - - virtual void FilterResponse(NKikimrWhiteboard::TEvTabletStateResponse& response) override { - if (!Tablets.empty()) { - NKikimrWhiteboard::TEvTabletStateResponse result; - for (const NKikimrWhiteboard::TTabletStateInfo& info : response.GetTabletStateInfo()) { - auto tablet = Tablets.find(info.GetTabletId()); - if (tablet != Tablets.end()) { - auto tabletInfo = result.MutableTabletStateInfo()->Add(); - tabletInfo->CopyFrom(info); - auto itKey = EndOfRangeKeyPrefix.find(info.GetTabletId()); - if (itKey != EndOfRangeKeyPrefix.end()) { - tabletInfo->SetEndOfRangeKeyPrefix(itKey->second); - } - Tablets.erase(tablet->first); - } - } - if (ReplyWithDeadTabletsInfo) { - for (auto tablet : Tablets) { - auto deadTablet = result.MutableTabletStateInfo()->Add(); - deadTablet->SetTabletId(tablet.first); - deadTablet->SetState(NKikimrWhiteboard::TTabletStateInfo::Dead); - deadTablet->SetType(tablet.second); - deadTablet->SetHiveId(HiveId); - } - } - result.SetResponseTime(response.GetResponseTime()); - response = std::move(result); - } - for (NKikimrWhiteboard::TTabletStateInfo& info : *response.MutableTabletStateInfo()) { - info.SetOverall(GetWhiteboardFlag(GetFlagFromTabletState(info.GetState()))); - } - TBase::FilterResponse(response); - } - - STATEFN(StateRequestedDescribe) { - switch (ev->GetTypeRewrite()) { - hFunc(NSchemeShard::TEvSchemeShard::TEvDescribeSchemeResult, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void PassAway() override { - TBase::PassAway(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: node_id - in: query - description: node identifier - required: false - type: integer - - name: path - in: query - description: schema path - required: false - type: string - - name: merge - in: query - description: merge information from nodes - required: false - type: boolean - - name: group - in: query - description: group information by field - required: false - type: string - - name: all - in: query - description: return all possible key combinations (for enums only) - required: false - type: boolean - - name: filter - in: query - description: filter information by field - required: false - type: string - - name: alive - in: query - description: request from alive (connected) nodes only - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: retries - in: query - description: number of retries - required: false - type: integer - - name: retry_period - in: query - description: retry period in ms - required: false - type: integer - default: 500 - - name: static - in: query - description: request from static nodes only - required: false - type: boolean - - name: since - in: query - description: filter by update time - required: false - type: string - )___"); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Tablet information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about tablets"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tenantinfo.h b/ydb/core/viewer/json_tenantinfo.h deleted file mode 100644 index 11d785086384..000000000000 --- a/ydb/core/viewer/json_tenantinfo.h +++ /dev/null @@ -1,937 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" -#include "wb_merge.h" -#include "log.h" -#include "viewer_request.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTenantInfo : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - THashMap TenantByPath; - THashMap TenantBySubDomainKey; - THashMap HcOverallByTenantPath; - THashMap> NavigateResult; - THashMap> HiveDomainStats; - THashMap> HiveStorageStats; - NMon::TEvHttpInfo::TPtr Event; - THashSet Subscribers; - THashSet WhiteboardNodesRequested; - THashSet OffloadTenantsRequested; - THashSet MetadataCacheRequested; - THashMap NodeIdsToTenant; // for tablet info - TMap WhiteboardSystemStateResponse; - THashMap> WhiteboardTabletStateResponse; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - TString User; - TString Path; - TString DomainPath; - bool Tablets = false; - bool SystemTablets = false; - bool Storage = false; - bool Nodes = false; - bool Users = false; - bool OffloadMerge = false; - THashMap> TenantNodes; - THashMap OffloadMergedTabletStateResponse; - THashMap OffloadMergedSystemStateResponse; - TTabletId RootHiveId = 0; - TString RootId; // id of root domain (tenant) - NKikimrViewer::TTenantInfo Result; - - struct TStorageQuota { - uint64 SoftQuota = 0; - uint64 HardQuota = 0; - }; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTenantInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - TString GetLogPrefix() { - static TString prefix = "json/tenantinfo "; - return prefix; - } - - TString GetDomainId(TPathId pathId) { - return TStringBuilder() << pathId.OwnerId << '-' << pathId.LocalPathId; - } - - bool IsFilterByPath() { - return !Path.empty() && DomainPath != Path; - } - - bool IsValidTenant(const TString& path) { - return !IsFilterByPath() || Path == path; - } - - bool IsFilterByOwner() { - return !User.empty(); - } - - bool IsValidOwner(const std::unordered_set& users) { - return !IsFilterByOwner() || users.count(User) != 0; - } - - void Bootstrap() { - BLOG_TRACE("Bootstrap()"); - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Followers = false; - Metrics = true; - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Tablets = FromStringWithDefault(params.Get("tablets"), Tablets); - SystemTablets = FromStringWithDefault(params.Get("system_tablets"), Tablets); // Tablets here is by design - Storage = FromStringWithDefault(params.Get("storage"), Storage); - Nodes = FromStringWithDefault(params.Get("nodes"), Nodes); - Users = FromStringWithDefault(params.Get("users"), Users); - User = params.Get("user"); - Path = params.Get("path"); - OffloadMerge = FromStringWithDefault(params.Get("offload_merge"), OffloadMerge); - - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - - RequestConsoleListTenants(); - - DomainPath = "/" + domain->Name; - if (!IsFilterByPath()) { - TPathId subDomainKey(domain->SchemeRoot, 1); - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - tenant.SetId(GetDomainId(subDomainKey)); - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::RUNNING); - tenant.SetType(NKikimrViewer::Domain); - RequestSchemeCacheNavigate(DomainPath); - } - RootId = GetDomainId({domain->SchemeRoot, 1}); - RootHiveId = domains->GetHive(); - RequestHiveDomainStats(RootHiveId); - if (Storage) { - RequestHiveStorageStats(RootHiveId); - } - - if (Requests == 0) { - ReplyAndPassAway(); - } - - Become(&TThis::StateRequested, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void PassAway() override { - for (const TNodeId nodeId : Subscribers) { - if (nodeId != SelfId().NodeId()) { - Send(TActivationContext::InterconnectProxy(nodeId), new TEvents::TEvUnsubscribe()); - } - } - TBase::PassAway(); - BLOG_TRACE("PassAway()"); - } - - STATEFN(StateRequested) { - switch (ev->GetTypeRewrite()) { - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(NConsole::TEvConsole::TEvGetTenantStatusResponse, Handle); - hFunc(TEvHive::TEvResponseHiveDomainStats, Handle); - hFunc(TEvHive::TEvResponseHiveStorageStats, Handle); - hFunc(TEvTxProxySchemeCache::TEvNavigateKeySetResult, Handle); - hFunc(TEvWhiteboard::TEvSystemStateResponse, Handle); - hFunc(TEvWhiteboard::TEvTabletStateResponse, Handle); - hFunc(TEvViewer::TEvViewerResponse, Handle); - hFunc(TEvents::TEvUndelivered, Undelivered); - hFunc(TEvInterconnect::TEvNodeDisconnected, Disconnected); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - hFunc(TEvStateStorage::TEvBoardInfo, Handle); - hFunc(NHealthCheck::TEvSelfCheckResultProto, Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - BLOG_TRACE("Received ListTenantsResponse"); - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - if (!IsValidTenant(path)) { - continue; - } - RequestConsoleGetTenantStatus(path); - RequestSchemeCacheNavigate(path); - - if (AppData()->FeatureFlags.GetEnableDbMetadataCache()) { - RequestStateStorageMetadataCacheEndpointsLookup(path); - } - } - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { - BLOG_TRACE("Received GetTenantStatusResponse"); - Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantStatusResult); - TString path = getTenantStatusResult.path(); - NKikimrViewer::TTenant& tenant = TenantByPath[path]; - tenant.SetName(path); - tenant.SetState(getTenantStatusResult.state()); - if (getTenantStatusResult.has_required_shared_resources()) { - tenant.SetType(NKikimrViewer::Shared); - RequestSchemeCacheNavigate(path); - } - for (const Ydb::Cms::StorageUnits& unit : getTenantStatusResult.allocated_resources().storage_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddAllocated(); - resource.SetType("storage"); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - for (const Ydb::Cms::StorageUnits& unit : getTenantStatusResult.required_resources().storage_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddRequired(); - resource.SetType("storage"); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - for (const Ydb::Cms::ComputationalUnits& unit : getTenantStatusResult.allocated_resources().computational_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddAllocated(); - resource.SetType("compute"); - resource.SetZone(unit.availability_zone()); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - for (const Ydb::Cms::ComputationalUnits& unit : getTenantStatusResult.required_resources().computational_units()) { - NKikimrViewer::TTenantResource& resource = *tenant.MutableResources()->AddRequired(); - resource.SetType("compute"); - resource.SetZone(unit.availability_zone()); - resource.SetKind(unit.unit_kind()); - resource.SetCount(unit.count()); - } - Ydb::Cms::DatabaseQuotas& quotas = *tenant.MutableDatabaseQuotas(); - quotas.MergeFrom(getTenantStatusResult.database_quotas()); - - RequestDone(); - } - - void SendWhiteboardSystemStateRequest(const TNodeId nodeId) { - Subscribers.insert(nodeId); - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - THolder request = MakeHolder(); - BLOG_TRACE("Tenant " << NodeIdsToTenant[nodeId] << " send to " << nodeId << " TEvSystemStateRequest: " << request->Record.ShortDebugString()); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - - void SendWhiteboardTabletStateRequest(const TNodeId nodeId) { - Subscribers.insert(nodeId); - TActorId whiteboardServiceId = MakeNodeWhiteboardServiceId(nodeId); - THolder request = MakeHolder(); - request->Record.SetFormat("packed5"); - BLOG_TRACE("Tenant " << NodeIdsToTenant[nodeId] << " send to " << nodeId << " TEvTabletStateRequest: " << request->Record.ShortDebugString()); - SendRequest(whiteboardServiceId, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, nodeId); - } - - void SendWhiteboardRequests(const TNodeId nodeId) { - if (WhiteboardNodesRequested.insert(nodeId).second) { - SendWhiteboardSystemStateRequest(nodeId); - if (Tablets) { - SendWhiteboardTabletStateRequest(nodeId); - } - } - } - - void SendOffloadRequests(const TString& tenantId) { - std::vector& nodesIds = TenantNodes[tenantId]; - if (!nodesIds.empty() && OffloadTenantsRequested.insert(tenantId).second) { - ui64 hash = std::hash()(Event->Get()->Request.GetRemoteAddr()); - auto itPos = std::next(nodesIds.begin(), hash % nodesIds.size()); - std::nth_element(nodesIds.begin(), itPos, nodesIds.end()); - TNodeId nodeId = *itPos; - - Subscribers.insert(nodeId); - TActorId viewerServiceId = MakeViewerID(nodeId); - - THolder sysRequest = MakeHolder(); - sysRequest->Record.MutableSystemRequest(); - sysRequest->Record.SetTimeout(Timeout / 3); - for (auto nodeId : nodesIds) { - sysRequest->Record.MutableLocation()->AddNodeId(nodeId); - } - BLOG_TRACE("Tenant " << tenantId << " send to " << nodeId << " TEvViewerRequest: " << sysRequest->Record.ShortDebugString()); - ViewerWhiteboardCookie cookie (NKikimrViewer::TEvViewerRequest::kSystemRequest, nodeId); - SendRequest(viewerServiceId, sysRequest.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - - if (Tablets) { - THolder tblRequest = MakeHolder(); - tblRequest->Record.MutableTabletRequest()->SetFormat("packed5"); - tblRequest->Record.SetTimeout(Timeout / 3); - for (auto nodeId : nodesIds) { - tblRequest->Record.MutableLocation()->AddNodeId(nodeId); - } - BLOG_TRACE("Tenant " << tenantId << " send to " << nodeId << " TEvViewerRequest: " << tblRequest->Record.ShortDebugString()); - ViewerWhiteboardCookie cookie(NKikimrViewer::TEvViewerRequest::kTabletRequest, nodeId); - SendRequest(viewerServiceId, tblRequest.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, cookie.ToUi64()); - } - } - } - - void Handle(TEvHive::TEvResponseHiveDomainStats::TPtr& ev) { - for (const NKikimrHive::THiveDomainStats& hiveStat : ev->Get()->Record.GetDomainStats()) { - TPathId subDomainKey({hiveStat.GetShardId(), hiveStat.GetPathId()}); - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[subDomainKey]; - TString tenantId = GetDomainId({hiveStat.GetShardId(), hiveStat.GetPathId()}); - tenant.SetId(tenantId); - if (ev->Cookie != RootHiveId || tenant.GetId() == RootId) { - if (!tenant.HasMetrics()) { - tenant.MutableMetrics()->CopyFrom(hiveStat.GetMetrics()); - } - if (tenant.StateStatsSize() == 0) { - tenant.MutableStateStats()->CopyFrom(hiveStat.GetStateStats()); - } - if (tenant.NodeIdsSize() == 0) { - tenant.MutableNodeIds()->CopyFrom(hiveStat.GetNodeIds()); - } - if (tenant.GetAliveNodes() == 0) { - tenant.SetAliveNodes(hiveStat.GetAliveNodes()); - } - } - - BLOG_TRACE("Received HiveDomainStats for " << tenant.GetId() << " from " << ev->Cookie); - std::vector nodesIds; - nodesIds.reserve(hiveStat.NodeIdsSize()); - for (auto nodeId : hiveStat.GetNodeIds()) { - nodesIds.push_back(nodeId); - NodeIdsToTenant.insert({nodeId, tenantId}); - } - TenantNodes[tenantId] = nodesIds; - - if (OffloadMerge) { - SendOffloadRequests(tenantId); - } else { - for (TNodeId nodeId : hiveStat.GetNodeIds()) { - SendWhiteboardRequests(nodeId); - } - } - } - HiveDomainStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - void Handle(TEvHive::TEvResponseHiveStorageStats::TPtr& ev) { - BLOG_TRACE("Received HiveStorageStats from " << ev->Cookie); - HiveStorageStats[ev->Cookie] = std::move(ev->Release()); - RequestDone(); - } - - void Handle(TEvTxProxySchemeCache::TEvNavigateKeySetResult::TPtr& ev) { - if (ev->Get()->Request->ResultSet.size() == 1 && ev->Get()->Request->ResultSet.begin()->Status == NSchemeCache::TSchemeCacheNavigate::EStatus::Ok) { - auto domainInfo = ev->Get()->Request->ResultSet.begin()->DomainInfo; - TTabletId hiveId = domainInfo->Params.GetHive(); - if (hiveId) { - RequestHiveDomainStats(hiveId); - if (Storage) { - RequestHiveStorageStats(hiveId); - } - } - NKikimrViewer::TTenant& tenant = TenantBySubDomainKey[domainInfo->DomainKey]; - if (domainInfo->ResourcesDomainKey != domainInfo->DomainKey) { - NKikimrViewer::TTenant& sharedTenant = TenantBySubDomainKey[domainInfo->ResourcesDomainKey]; - if (sharedTenant.GetType() != NKikimrViewer::Shared) { - sharedTenant.SetType(NKikimrViewer::Shared); - RequestSchemeCacheNavigate(domainInfo->ResourcesDomainKey); - } - tenant.SetType(NKikimrViewer::Serverless); - tenant.SetResourceId(GetDomainId(domainInfo->ResourcesDomainKey)); - } - TString id = GetDomainId(domainInfo->DomainKey); - TString path = CanonizePath(ev->Get()->Request->ResultSet.begin()->Path); - BLOG_TRACE("Received Navigate for " << id << " " << path); - tenant.SetId(id); - tenant.SetName(path); - if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { - tenant.SetType(NKikimrViewer::Dedicated); - } - NavigateResult[id] = std::move(ev->Get()->Request); - } - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvSystemStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Received TEvSystemStateResponse from " << nodeId); - WhiteboardSystemStateResponse[nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(NNodeWhiteboard::TEvWhiteboard::TEvTabletStateResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Received TEvTabletStateResponse from " << nodeId << " with " - << TWhiteboardInfo::GetElementsCount(ev->Get()->Record) << " tablets"); - auto tenantId = NodeIdsToTenant[nodeId]; - WhiteboardTabletStateResponse[tenantId][nodeId] = std::move(ev->Get()->Record); - RequestDone(); - } - - void Handle(NHealthCheck::TEvSelfCheckResultProto::TPtr& ev) { - auto result = std::move(ev->Get()->Record); - if (result.database_status_size() == 1) { - HcOverallByTenantPath.emplace(result.database_status(0).name(), GetViewerFlag(result.database_status(0).overall())); - } - - RequestDone(); - } - - void Handle(TEvStateStorage::TEvBoardInfo::TPtr& ev) { - auto activeNode = TDatabaseMetadataCache::PickActiveNode(ev->Get()->InfoEntries); - if (activeNode != 0) { - Subscribers.insert(activeNode); - std::optional cache = MakeDatabaseMetadataCacheId(activeNode); - auto request = MakeHolder(); - if (MetadataCacheRequested.insert(ev->Get()->Path).second) { - SendRequest(*cache, request.Release(), IEventHandle::FlagTrackDelivery | IEventHandle::FlagSubscribeOnSession, activeNode); - } - } - RequestDone(); - } - - void Handle(TEvViewer::TEvViewerResponse::TPtr& ev) { - ui32 nodeId = ev.Get()->Cookie; - auto tenantId = NodeIdsToTenant[nodeId]; - switch (ev->Get()->Record.GetResponseCase()) { - case NKikimrViewer::TEvViewerResponse::kTabletResponse: - BLOG_TRACE("Received TEvViewerResponse from " << nodeId << " with " - << TWhiteboardInfo::GetElementsCount(ev->Get()->Record.GetTabletResponse()) - << " tablets"); - OffloadMergedTabletStateResponse[tenantId] = std::move(ev->Get()->Record); - RequestDone(); - break; - case NKikimrViewer::TEvViewerResponse::kSystemResponse: - BLOG_TRACE("Received TEvViewerResponse from " << nodeId); - OffloadMergedSystemStateResponse[tenantId] = std::move(ev->Get()->Record); - RequestDone(); - break; - default: - break; - } - } - - void Undelivered(TEvents::TEvUndelivered::TPtr &ev) { - if (ev->Get()->SourceType == NHealthCheck::EvSelfCheckRequestProto) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); - auto tenantId = NodeIdsToTenant[nodeId]; - if (HcOverallByTenantPath.emplace(tenantId, NKikimrViewer::EFlag::Grey).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvSystemStateRequest) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); - if (WhiteboardSystemStateResponse.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NNodeWhiteboard::TEvWhiteboard::EvTabletStateRequest) { - ui32 nodeId = ev.Get()->Cookie; - BLOG_TRACE("Undelivered for node " << nodeId << " event " << ev->Get()->SourceType); - auto tenantId = NodeIdsToTenant[nodeId]; - if (WhiteboardTabletStateResponse[tenantId].emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - if (ev->Get()->SourceType == NViewer::TEvViewer::EvViewerRequest) { - ViewerWhiteboardCookie cookie(ev.Get()->Cookie); - auto nodeId = cookie.GetNodeId(); - auto tenantId = NodeIdsToTenant[nodeId]; - BLOG_TRACE("Undelivered for node " << cookie.GetNodeId() << " event " << ev->Get()->SourceType); - switch (cookie.GetRequestCase()) { - case NKikimrViewer::TEvViewerRequest::kTabletRequest: - if (OffloadMergedTabletStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - // fallback - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardTabletStateRequest(nodeId); - } - RequestDone(); - }; - - break; - case NKikimrViewer::TEvViewerRequest::kSystemRequest: - if (OffloadMergedSystemStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - // fallback - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardSystemStateRequest(nodeId); - } - RequestDone(); - } - break; - default: - break; - } - } - } - - void Disconnected(TEvInterconnect::TEvNodeDisconnected::TPtr &ev) { - TNodeId nodeId = ev->Get()->NodeId; - auto tenantId = NodeIdsToTenant[nodeId]; - BLOG_TRACE("NodeDisconnected for nodeId " << nodeId); - - if (OffloadTenantsRequested.count(tenantId) > 0) { - // fallback - if (OffloadMergedSystemStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardSystemStateRequest(nodeId); - } - RequestDone(); - } - if (Tablets && OffloadMergedSystemStateResponse.emplace(tenantId, NKikimrViewer::TEvViewerResponse{}).second) { - for (TNodeId nodeId : TenantNodes[tenantId]) { - SendWhiteboardSystemStateRequest(nodeId); - } - RequestDone(); - } - } - if (WhiteboardNodesRequested.count(nodeId) > 0) { - if (WhiteboardSystemStateResponse.emplace(nodeId, NKikimrWhiteboard::TEvSystemStateResponse{}).second) { - RequestDone(); - } - if (Tablets && WhiteboardTabletStateResponse[tenantId].emplace(nodeId, NKikimrWhiteboard::TEvTabletStateResponse{}).second) { - RequestDone(); - } - } - if (MetadataCacheRequested.count(tenantId) > 0) { - if (HcOverallByTenantPath.emplace(tenantId, NKikimrViewer::EFlag::Grey).second) { - RequestDone(); - } - } - } - - NKikimrViewer::TStorageUsage::EType GetStorageType(const TString& poolKind) { - auto kind = to_lower(poolKind); - if (kind.StartsWith("ssd") || kind.StartsWith("nvme")) { - return NKikimrViewer::TStorageUsage::SSD; - } - if (kind.StartsWith("hdd") || kind.StartsWith("rot")) { - return NKikimrViewer::TStorageUsage::HDD; - } - return NKikimrViewer::TStorageUsage::None; - } - - void ReplyAndPassAway() { - BLOG_TRACE("ReplyAndPassAway() started"); - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - THashMap OverallByDomainId; - TMap NodeSystemStateInfo; - - for (auto& [tenantId, record] : OffloadMergedSystemStateResponse) { - for (auto& systemState : *(record.MutableSystemResponse()->MutableSystemStateInfo())) { - auto ni = systemState.GetNodeId(); - NodeSystemStateInfo[ni] = std::move(systemState); - } - } - for (auto& [nodeId, record] : WhiteboardSystemStateResponse) { - if (record.SystemStateInfoSize() == 1) { - NodeSystemStateInfo[nodeId] = std::move(record.GetSystemStateInfo(0)); - } - } - - for (const auto& [subDomainKey, tenantBySubDomainKey] : TenantBySubDomainKey) { - TString id(GetDomainId(subDomainKey)); - NKikimrWhiteboard::TEvTabletStateResponse tabletInfo; - THashMap tabletInfoIndex; - if (Tablets) { - if (WhiteboardTabletStateResponse[id].size() > 0) { - TWhiteboardInfo::MergeResponses(tabletInfo, WhiteboardTabletStateResponse[id]); - } else if (OffloadMerge) { - tabletInfo = std::move(*(OffloadMergedTabletStateResponse[id].MutableTabletResponse())); - } - if (SystemTablets) { - for (const auto& info : TWhiteboardInfo::GetElementsField(tabletInfo)) { - tabletInfoIndex[info.GetTabletId()] = &info; - } - } - } - - NKikimrViewer::EFlag overall = NKikimrViewer::EFlag::Grey; - auto itNavigate = NavigateResult.find(id); - if (itNavigate != NavigateResult.end()) { - NSchemeCache::TSchemeCacheNavigate::TEntry entry = itNavigate->second->ResultSet.front(); - TString path = CanonizePath(entry.Path); - if (!IsValidTenant(path)) { - continue; - } - std::unordered_set users; - if(!User.empty() || Users) { - if (entry.SecurityObject) { - users.emplace(entry.SecurityObject->GetOwnerSID()); - for (const NACLibProto::TACE& ace : entry.SecurityObject->GetACL().GetACE()) { - if (ace.GetAccessType() == (ui32)NACLib::EAccessType::Allow) { - users.emplace(ace.GetSID()); - } - } - } - if (!IsValidOwner(users)) { - continue; - } - } - NKikimrViewer::TTenant& tenant = *Result.AddTenantInfo(); - auto itTenantByPath = TenantByPath.find(path); - if (itTenantByPath != TenantByPath.end()) { - tenant = std::move(itTenantByPath->second); - TenantByPath.erase(itTenantByPath); - } - if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { - tenant.MergeFrom(tenantBySubDomainKey); - } else { - auto oldType = tenant.GetType(); - tenant.MergeFrom(tenantBySubDomainKey); - tenant.SetType(oldType); - } - if (!tenant.GetId()) { - tenant.SetId(GetDomainId(subDomainKey)); - } - if (tenant.GetType() == NKikimrViewer::UnknownTenantType) { - tenant.SetType(NKikimrViewer::Dedicated); - } - tenant.SetCreateTime(TInstant::MicroSeconds(entry.CreateStep).MilliSeconds()); - if (entry.SecurityObject) { - tenant.SetOwner(entry.SecurityObject->GetOwnerSID()); - } - for (const TString& user : users) { - tenant.AddUsers(user); - } - for (const auto& userAttribute : entry.Attributes) { - tenant.MutableUserAttributes()->insert({userAttribute.first, userAttribute.second}); - } - - TStackVec tablets; - for (TTabletId tabletId : entry.DomainInfo->Params.GetCoordinators()) { - tablets.emplace_back(tabletId); - } - for (TTabletId tabletId : entry.DomainInfo->Params.GetMediators()) { - tablets.emplace_back(tabletId); - } - if (entry.DomainInfo->Params.HasSchemeShard()) { - tablets.emplace_back(entry.DomainInfo->Params.GetSchemeShard()); - } else { - tablets.emplace_back(domain->SchemeRoot); - tablets.emplace_back(MakeBSControllerID()); - tablets.emplace_back(MakeConsoleID()); - } - TTabletId hiveId = domains->GetHive(); - if (entry.DomainInfo->Params.HasHive()) { - hiveId = entry.DomainInfo->Params.GetHive(); - } else { - if (tenant.GetType() == NKikimrViewer::Serverless) { - auto itResourceNavigate = NavigateResult.find(tenant.GetResourceId()); - if (itResourceNavigate != NavigateResult.end()) { - NSchemeCache::TSchemeCacheNavigate::TEntry entry = itResourceNavigate->second->ResultSet.front(); - if (entry.DomainInfo->Params.HasHive()) { - hiveId = entry.DomainInfo->Params.GetHive(); - } - } - } - } - tablets.emplace_back(hiveId); - - if (SystemTablets) { - for (TTabletId tabletId : tablets) { - auto it = tabletInfoIndex.find(tabletId); - if (it != tabletInfoIndex.end()) { - NKikimrWhiteboard::TTabletStateInfo* tabletInfo = tenant.AddSystemTablets(); - tabletInfo->CopyFrom(*it->second); - NKikimrViewer::EFlag flag = GetFlagFromTabletState(tabletInfo->GetState()); - tabletInfo->SetOverall(GetWhiteboardFlag(flag)); - overall = Max(overall, flag); - } - } - } - - if (Storage) { - auto itHiveStorageStats = HiveStorageStats.find(hiveId); - if (itHiveStorageStats != HiveStorageStats.end()) { - const NKikimrHive::TEvResponseHiveStorageStats& record = itHiveStorageStats->second.Get()->Record; - uint64 storageAllocatedSize = 0; - uint64 storageAvailableSize = 0; - uint64 storageMinAvailableSize = std::numeric_limits::max(); - uint64 storageGroups = 0; - for (const NKikimrHive::THiveStoragePoolStats& poolStat : record.GetPools()) { - if (poolStat.GetName().StartsWith(tenantBySubDomainKey.GetName())) { - for (const NKikimrHive::THiveStorageGroupStats& groupStat : poolStat.GetGroups()) { - storageAllocatedSize += groupStat.GetAllocatedSize(); - storageAvailableSize += groupStat.GetAvailableSize(); - storageMinAvailableSize = std::min(storageMinAvailableSize, groupStat.GetAvailableSize()); - ++storageGroups; - } - } - } - uint64 storageAllocatedLimit = storageAllocatedSize + storageAvailableSize; - tenant.SetStorageAllocatedSize(storageAllocatedSize); - tenant.SetStorageAllocatedLimit(storageAllocatedLimit); - tenant.SetStorageMinAvailableSize(storageMinAvailableSize); - tenant.SetStorageGroups(storageGroups); - } - - THashMap storageUsageByType; - THashMap storageQuotasByType; - if (entry.DomainDescription) { - for (const auto& poolUsage : entry.DomainDescription->Description.GetDiskSpaceUsage().GetStoragePoolsUsage()) { - auto type = GetStorageType(poolUsage.GetPoolKind()); - storageUsageByType[type] += poolUsage.GetTotalSize(); - } - } - - for (const auto& quota : tenant.GetDatabaseQuotas().storage_quotas()) { - auto type = GetStorageType(quota.unit_kind()); - auto& usage = storageQuotasByType[type]; - usage.SoftQuota += quota.data_size_soft_quota(); - usage.HardQuota += quota.data_size_hard_quota(); - } - - for (const auto& [type, size] : storageUsageByType) { - auto& storageUsage = *tenant.AddStorageUsage(); - storageUsage.SetType(type); - storageUsage.SetSize(size); - auto it = storageQuotasByType.find(type); - if (it != storageQuotasByType.end()) { - storageUsage.SetLimit(it->second.HardQuota); - storageUsage.SetSoftQuota(it->second.SoftQuota); - storageUsage.SetHardQuota(it->second.HardQuota); - } - } - } - - THashSet tenantNodes; - - for (TNodeId nodeId : tenant.GetNodeIds()) { - auto itNodeInfo = NodeSystemStateInfo.find(nodeId); - if (itNodeInfo != NodeSystemStateInfo.end()) { - if (Nodes) { - tenant.AddNodes()->CopyFrom(itNodeInfo->second); - } - for (const auto& poolStat : itNodeInfo->second.GetPoolStats()) { - TString poolName = poolStat.GetName(); - NKikimrWhiteboard::TSystemStateInfo_TPoolStats* targetPoolStat = nullptr; - for (NKikimrWhiteboard::TSystemStateInfo_TPoolStats& ps : *tenant.MutablePoolStats()) { - if (ps.GetName() == poolName) { - targetPoolStat = &ps; - break; - } - } - if (targetPoolStat == nullptr) { - targetPoolStat = tenant.AddPoolStats(); - targetPoolStat->SetName(poolName); - } - double poolUsage = targetPoolStat->GetUsage() * targetPoolStat->GetThreads(); - poolUsage += poolStat.GetUsage() * poolStat.GetThreads(); - ui32 poolThreads = targetPoolStat->GetThreads() + poolStat.GetThreads(); - if (poolThreads != 0) { - double threadUsage = poolUsage / poolThreads; - targetPoolStat->SetUsage(threadUsage); - targetPoolStat->SetThreads(poolThreads); - } - tenant.SetCoresUsed(tenant.GetCoresUsed() + poolStat.GetUsage() * poolStat.GetThreads()); - } - if (itNodeInfo->second.HasMemoryUsed()) { - tenant.SetMemoryUsed(tenant.GetMemoryUsed() + itNodeInfo->second.GetMemoryUsed()); - } - if (itNodeInfo->second.HasMemoryLimit()) { - tenant.SetMemoryLimit(tenant.GetMemoryLimit() + itNodeInfo->second.GetMemoryLimit()); - } - overall = Max(overall, GetViewerFlag(itNodeInfo->second.GetSystemState())); - } - tenantNodes.emplace(nodeId); - } - - if (tenant.GetType() == NKikimrViewer::Serverless) { - tenant.SetStorageAllocatedSize(tenant.GetMetrics().GetStorage()); - const bool noExclusiveNodes = tenantNodes.empty(); - if (noExclusiveNodes) { - tenant.SetMemoryUsed(tenant.GetMetrics().GetMemory()); - tenant.ClearMemoryLimit(); - tenant.SetCoresUsed(static_cast(tenant.GetMetrics().GetCPU()) / 1000000); - } - } - - if (Tablets) { - THashMap, ui32> tablets; - for (const auto& pbTablet : tabletInfo.GetTabletStateInfo()) { - if (tenantNodes.count(pbTablet.GetNodeId()) > 0) { - NKikimrViewer::EFlag state = GetFlagFromTabletState(pbTablet.GetState()); - tablets[std::make_pair(pbTablet.GetType(), state)]++; - } - } - - for (const auto& [prTypeState, prTabletCount] : tablets) { - NKikimrViewer::TTabletStateInfo& tablet = *tenant.AddTablets(); - tablet.SetType(NKikimrTabletBase::TTabletTypes::EType_Name(prTypeState.first)); - tablet.SetState(prTypeState.second); - tablet.SetCount(prTabletCount); - } - } - if (HcOverallByTenantPath.count(path) > 0 && HcOverallByTenantPath[path] != NKikimrViewer::EFlag::Grey) { - tenant.SetOverall(HcOverallByTenantPath[path]); - OverallByDomainId[tenant.GetId()] = HcOverallByTenantPath[path]; - } else { - tenant.SetOverall(overall); - OverallByDomainId[tenant.GetId()] = overall; - } - } - } - for (const std::pair& prTenant : TenantByPath) { - const TString& path(prTenant.first); - if (!IsValidTenant(path)) { - continue; - } - if (IsFilterByOwner()) { - continue; - } - const NKikimrViewer::TTenant& tenantByPath(prTenant.second); - NKikimrViewer::EFlag overall = NKikimrViewer::EFlag::Red; - NKikimrViewer::TTenant& tenant = *Result.AddTenantInfo(); - tenant.MergeFrom(tenantByPath); - tenant.SetName(path); - tenant.SetOverall(overall); - if (tenant.GetId()) { - OverallByDomainId[tenant.GetId()] = overall; - } - } - for (NKikimrViewer::TTenant& tenant: *Result.MutableTenantInfo()) { - if (tenant.GetType() != NKikimrViewer::Serverless) { - continue; - } - auto it = OverallByDomainId.find(tenant.GetResourceId()); - if (it != OverallByDomainId.end()) { - tenant.SetOverall(it->second); - } - } - std::sort(Result.MutableTenantInfo()->begin(), Result.MutableTenantInfo()->end(), - [](const NKikimrViewer::TTenant& a, const NKikimrViewer::TTenant& b) { - return a.name() < b.name(); - }); - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - BLOG_TRACE("Timeout occurred"); - Result.AddErrors("Timeout occurred"); - ReplyAndPassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: false - type: string - - name: user - in: query - description: tenant owner - required: false - type: string - - name: followers - in: query - description: return followers - required: false - type: boolean - - name: metrics - in: query - description: return tablet metrics - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: tablets - in: query - description: return tablets - required: false - type: boolean - - name: system_tablets - in: query - description: return system tablets - required: false - type: boolean - - name: offload_merge - in: query - description: use offload merge - required: false - type: boolean - - name: storage - in: query - description: return storage info - required: false - type: boolean - - name: nodes - in: query - description: return nodes info - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "\"Tenant info (detailed)\""; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "\"Returns information about tenants\""; - } -}; - -} -} diff --git a/ydb/core/viewer/json_tenants.h b/ydb/core/viewer/json_tenants.h deleted file mode 100644 index e15ebdfcc152..000000000000 --- a/ydb/core/viewer/json_tenants.h +++ /dev/null @@ -1,154 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" -#include "wb_aggregate.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTenants : public TViewerPipeClient { - using TBase = TViewerPipeClient; - IViewer* Viewer; - NKikimrViewer::TTenants Result; - NMon::TEvHttpInfo::TPtr Event; - TJsonSettings JsonSettings; - ui32 Timeout = 0; - bool State = true; - THashMap TenantIndex; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTenants(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), true); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - InitConfig(params); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - State = FromStringWithDefault(params.Get("state"), true); - TIntrusivePtr domains = AppData()->DomainsInfo; - auto *domain = domains->GetDomain(); - NKikimrViewer::TTenant& tenant = *Result.AddTenants(); - tenant.SetName("/" + domain->Name); - if (State) { - tenant.SetState(Ydb::Cms::GetDatabaseStatusResult::State::GetDatabaseStatusResult_State_RUNNING); - } - RequestConsoleListTenants(); - Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(NConsole::TEvConsole::TEvListTenantsResponse, Handle); - hFunc(NConsole::TEvConsole::TEvGetTenantStatusResponse, Handle); - hFunc(TEvTabletPipe::TEvClientConnected, TBase::Handle); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(NConsole::TEvConsole::TEvListTenantsResponse::TPtr& ev) { - Ydb::Cms::ListDatabasesResult listTenantsResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&listTenantsResult); - for (const TString& path : listTenantsResult.paths()) { - NKikimrViewer::TTenant& tenant = *Result.AddTenants(); - tenant.SetName(path); - TenantIndex[path] = &tenant; - if (State) { - RequestConsoleGetTenantStatus(path); - } - } - RequestDone(); - } - - void Handle(NConsole::TEvConsole::TEvGetTenantStatusResponse::TPtr& ev) { - Ydb::Cms::GetDatabaseStatusResult getTenantStatusResult; - ev->Get()->Record.GetResponse().operation().result().UnpackTo(&getTenantStatusResult); - auto itTenant = TenantIndex.find(getTenantStatusResult.path()); - if (itTenant != TenantIndex.end()) { - NKikimrViewer::TTenant& tenant = *itTenant->second; - tenant.SetState(getTenantStatusResult.state()); - } - RequestDone(); - } - - void ReplyAndPassAway() { - TStringStream json; - TProtoToJson::ProtoToJson(json, Result, JsonSettings); - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: state - in: query - description: return tenant state - required: false - type: boolean - default: true - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Tenant info (brief)"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns list of tenants"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_topicinfo.h b/ydb/core/viewer/json_topicinfo.h deleted file mode 100644 index d5880859ff16..000000000000 --- a/ydb/core/viewer/json_topicinfo.h +++ /dev/null @@ -1,158 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonTopicInfo : public TActorBootstrapped { - using TBase = TActorBootstrapped; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - NKikimrLabeledCounters::TEvTabletLabeledCountersResponse TopicInfoResult; - TJsonSettings JsonSettings; - TString Topic; - TString Client; - TString GroupNames; - bool ShowAll = false; - ui32 Timeout = 0; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonTopicInfo(IViewer* viewer, NMon::TEvHttpInfo::TPtr &ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - const auto& params(Event->Get()->Request.GetParams()); - JsonSettings.EnumAsNumbers = !FromStringWithDefault(params.Get("enums"), false); - JsonSettings.UI64AsString = !FromStringWithDefault(params.Get("ui64"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Topic = params.Get("path"); - Client = params.Has("client") ? params.Get("client") : "total"; - GroupNames = params.Get("group_names"); - ShowAll = FromStringWithDefault(params.Get("all"), false); - size_t pos = Topic.rfind('/'); - if (pos != TString::npos) - Topic = Topic.substr(pos + 1); - //proxy is not used - CreateClusterLabeledCountersAggregator(ctx.SelfID, TTabletTypes::PersQueue, ctx); - - Become(&TThis::StateRequestedTopicInfo, ctx, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - void Die(const TActorContext& ctx) override { - TBase::Die(ctx); - } - - STFUNC(StateRequestedTopicInfo) { - switch (ev->GetTypeRewrite()) { - HFunc(TEvTabletCounters::TEvTabletLabeledCountersResponse, Handle); - CFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void Handle(TEvTabletCounters::TEvTabletLabeledCountersResponse::TPtr &ev, const TActorContext &ctx) { - TString groupPrefix = Client + "/"; - TString groupSuffix = "/" + Topic; - for (ui32 i = 0; i < ev->Get()->Record.LabeledCountersByGroupSize(); ++i) { - const auto& uc = ev->Get()->Record.GetLabeledCountersByGroup(i); - const TString& group(uc.GetGroup()); - if (ShowAll - || (group.StartsWith(groupPrefix) && group.EndsWith(groupSuffix)) - || uc.GetGroup() == Topic - || uc.GetGroupNames() == GroupNames) { - TopicInfoResult.AddLabeledCountersByGroup()->CopyFrom(uc); - } - } - ReplyAndDie(ctx); - } - - void ReplyAndDie(const TActorContext &ctx) { - TStringStream json; - TProtoToJson::ProtoToJson(json, TopicInfoResult, JsonSettings); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), json.Str()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: path - in: query - description: schema path - required: true - type: string - - name: client - in: query - description: client name - required: false - type: string - default: total - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: all - in: query - description: return all topics and all clients - required: false - type: boolean - default: false - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - default: 10000 - )___"); - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Topic information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Information about topic"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_vdisk_evict.h b/ydb/core/viewer/json_vdisk_evict.h deleted file mode 100644 index 3af5a956c0b3..000000000000 --- a/ydb/core/viewer/json_vdisk_evict.h +++ /dev/null @@ -1,269 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" -#include "json_pipe_req.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonVDiskEvict : public TViewerPipeClient { - enum EEv { - EvRetryNodeRequest = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), - EvEnd - }; - - static_assert(EvEnd < EventSpaceEnd(NActors::TEvents::ES_PRIVATE), "expect EvEnd < EventSpaceEnd(TEvents::ES_PRIVATE)"); - - struct TEvRetryNodeRequest : NActors::TEventLocal { - TEvRetryNodeRequest() - {} - }; - -protected: - using TThis = TJsonVDiskEvict; - using TBase = TViewerPipeClient; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - ui32 Timeout = 0; - ui32 ActualRetries = 0; - ui32 Retries = 0; - TDuration RetryPeriod = TDuration::MilliSeconds(500); - - std::unique_ptr Response; - - ui32 GroupId = 0; - ui32 GroupGeneration = 0; - ui32 FailRealmIdx = 0; - ui32 FailDomainIdx = 0; - ui32 VdiskIdx = 0; - bool Force = false; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonVDiskEvict(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - inline ui32 GetRequiredParam(const TCgiParameters& params, const std::string& name, ui32& obj) { - if (!TryFromString(params.Get(name), obj)) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "field '" << name << "' or 'vdisk_id' are required"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return false; - } - return true; - } - - void Bootstrap() { - const auto& params(Event->Get()->Request.GetParams()); - TString vdisk_id = params.Get("vdisk_id"); - if (vdisk_id) { - TVector parts = StringSplitter(vdisk_id).Split('-').SkipEmpty(); - if (parts.size() == 5) { - GroupId = FromStringWithDefault(parts[0], Max()); - GroupGeneration = FromStringWithDefault(parts[1], Max()); - FailRealmIdx = FromStringWithDefault(parts[2], Max()); - FailDomainIdx = FromStringWithDefault(parts[3], Max()); - VdiskIdx = FromStringWithDefault(parts[4], Max()); - } - if (parts.size() != 5 || GroupId == Max() - || GroupGeneration == Max() || FailRealmIdx == Max() - || FailDomainIdx == Max() || VdiskIdx == Max()) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", TStringBuilder() << "Unable to parse the 'vdisk_id' parameter"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - } else if (!GetRequiredParam(params, "group_id", GroupId) - || !GetRequiredParam(params, "group_generation_id", GroupGeneration) - || !GetRequiredParam(params, "fail_realm_idx", FailRealmIdx) - || !GetRequiredParam(params, "fail_domain_idx", FailDomainIdx) - || !GetRequiredParam(params, "vdisk_idx", VdiskIdx)) { - return PassAway(); - } - - if (Event->Get()->Request.GetMethod() != HTTP_METHOD_POST) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPBADREQUEST(Event->Get(), "text/plain", "Only POST method is allowed"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - TBase::InitConfig(params); - - Force = FromStringWithDefault(params.Get("force"), false); - Timeout = FromStringWithDefault(params.Get("timeout"), 10000); - Retries = FromStringWithDefault(params.Get("retries"), 0); - RetryPeriod = TDuration::MilliSeconds(FromStringWithDefault(params.Get("retry_period"), RetryPeriod.MilliSeconds())); - - if (Force && !Viewer->CheckAccessAdministration(Event->Get())) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPFORBIDDEN(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - return PassAway(); - } - - SendRequest(); - - TBase::Become(&TThis::StateWork, TDuration::MilliSeconds(Timeout), new TEvents::TEvWakeup()); - } - - STATEFN(StateWork) { - switch (ev->GetTypeRewrite()) { - hFunc(TEvBlobStorage::TEvControllerConfigResponse, Handle); - cFunc(TEvRetryNodeRequest::EventType, HandleRetry); - cFunc(TEvents::TEvUndelivered::EventType, Undelivered); - cFunc(TEvents::TSystem::Wakeup, HandleTimeout); - } - } - - void SendRequest() { - RequestBSControllerVDiskEvict(GroupId, GroupGeneration, FailRealmIdx, FailDomainIdx, VdiskIdx, Force); - } - - bool RetryRequest() { - if (Retries) { - if (++ActualRetries <= Retries) { - TBase::Schedule(RetryPeriod, new TEvRetryNodeRequest()); - return true; - } - } - return false; - } - - void Undelivered() { - if (!RetryRequest()) { - TBase::RequestDone(); - } - } - - void Handle(TEvBlobStorage::TEvControllerConfigResponse::TPtr& ev) { - Response.reset(ev->Release().Release()); - ReplyAndPassAway(); - } - - void HandleRetry() { - SendRequest(); - } - - void HandleTimeout() { - Send(Event->Sender, new NMon::TEvHttpInfoRes( - Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get(), "text/plain", "Timeout receiving response from BSC"), - 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - } - - void PassAway() override { - TBase::PassAway(); - } - - void ReplyAndPassAway() { - NJson::TJsonValue json; - if (Response != nullptr) { - if (Response->Record.GetResponse().GetSuccess()) { - json["result"] = true; - } else { - json["result"] = false; - TString error; - bool forceRetryPossible = false; - Viewer->TranslateFromBSC2Human(Response->Record.GetResponse(), error, forceRetryPossible); - json["error"] = error; - if (forceRetryPossible && Viewer->CheckAccessAdministration(Event->Get())) { - json["forceRetryPossible"] = true; - } - } - json["debugMessage"] = Response->Record.ShortDebugString(); - } else { - json["result"] = false; - json["error"] = "No response was received from BSC"; - } - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - PassAway(); - } -}; - -template <> -YAML::Node TJsonRequestSwagger::GetSwagger() { - return YAML::Load(R"___( - post: - tags: - - vdisk - summary: VDisk evict - description: VDisk evict - parameters: - - name: vdisk_id - in: query - description: vdisk identifier - required: false - type: string - - name: group_id - in: query - description: group identifier - required: false - type: integer - - name: group_generation_id - in: query - description: group generation identifier - required: false - type: integer - - name: fail_realm_idx - in: query - description: fail realm identifier - required: false - type: integer - - name: fail_domain_ids - in: query - description: fail domain identifier - required: false - type: integer - - name: vdisk_idx - in: query - description: vdisk idx identifier - required: false - type: integer - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: force - in: query - description: attempt forced operation, ignore warnings - required: false - type: boolean - responses: - 200: - description: OK - content: - application/json: - schema: - type: object - properties: - result: - type: boolean - description: was operation successful or not - error: - type: string - description: details about failed operation - forceRetryPossible: - type: boolean - description: if true, operation can be retried with force flag - 400: - description: Bad Request - 403: - description: Forbidden - 504: - description: Gateway Timeout - )___"); -} - -} -} diff --git a/ydb/core/viewer/json_vdisk_req.h b/ydb/core/viewer/json_vdisk_req.h index 43b455a2a11a..71ccc636ca7f 100644 --- a/ydb/core/viewer/json_vdisk_req.h +++ b/ydb/core/viewer/json_vdisk_req.h @@ -1,18 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include "viewer.h" #include "json_pipe_req.h" +#include "viewer.h" +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; - template struct TJsonVDiskRequestHelper { static std::unique_ptr MakeRequest(NMon::TEvHttpInfo::TPtr &, TString *) { @@ -24,9 +18,8 @@ struct TJsonVDiskRequestHelper { } }; - template -class TJsonVDiskRequest : public TViewerPipeClient> { +class TJsonVDiskRequest : public TViewerPipeClient { enum EEv { EvRetryNodeRequest = EventSpaceBegin(NActors::TEvents::ES_PRIVATE), EvEnd @@ -42,7 +35,7 @@ class TJsonVDiskRequest : public TViewerPipeClient; - using TBase = TViewerPipeClient; + using TBase = TViewerPipeClient; using THelper = TJsonVDiskRequestHelper; IViewer* Viewer; TActorId Initiator; @@ -63,17 +56,13 @@ class TJsonVDiskRequest : public TViewerPipeClient TcpProxyId; public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - TJsonVDiskRequest(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) : Viewer(viewer) , Initiator(ev->Sender) , Event(ev) {} - virtual void Bootstrap() { + void Bootstrap() override { const auto& params(Event->Get()->Request.GetParams()); NodeId = FromStringWithDefault(params.Get("node_id"), 0); PDiskId = FromStringWithDefault(params.Get("pdisk_id"), Max()); @@ -185,7 +174,7 @@ class TJsonVDiskRequest : public TViewerPipeClient -struct TJsonRequestParameters> { + void ReplyAndPassAway() override { + ReplyAndPassAway({}); + } + + static YAML::Node GetSchema() { + return TProtoToYaml::ProtoToYamlSchema(); + } + static YAML::Node GetParameters() { return YAML::Load(R"___( - name: node_id @@ -251,12 +245,4 @@ struct TJsonRequestParameters> { } }; -template -struct TJsonRequestSchema> { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -} } diff --git a/ydb/core/viewer/json_vdiskinfo.h b/ydb/core/viewer/json_vdiskinfo.h deleted file mode 100644 index 0675c4cd407b..000000000000 --- a/ydb/core/viewer/json_vdiskinfo.h +++ /dev/null @@ -1,96 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_wb_req.h" - -namespace std { - -template <> -struct equal_to { - static decltype(auto) make_tuple(const NKikimrBlobStorage::TVDiskID& id) { - return std::make_tuple( - id.GetGroupID(), - id.GetGroupGeneration(), - id.GetRing(), - id.GetDomain(), - id.GetVDisk() - ); - } - - bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const { - return make_tuple(a) == make_tuple(b); - } -}; - -template <> -struct less { - bool operator ()(const NKikimrBlobStorage::TVDiskID& a, const NKikimrBlobStorage::TVDiskID& b) const { - return equal_to::make_tuple(a) < equal_to::make_tuple(b); - } -}; - -template <> -struct hash { - size_t operator ()(const NKikimrBlobStorage::TVDiskID& a) const { - auto tp = equal_to::make_tuple(a); - return hash()(tp); - } -}; - -} - -namespace NKikimr { -namespace NViewer { - -template <> -struct TWhiteboardInfo { - using TResponseEventType = TEvWhiteboard::TEvVDiskStateResponse; - using TResponseType = NKikimrWhiteboard::TEvVDiskStateResponse; - using TElementType = NKikimrWhiteboard::TVDiskStateInfo; - using TElementKeyType = NKikimrBlobStorage::TVDiskID; - - static constexpr bool StaticNodesOnly = true; - - static ::google::protobuf::RepeatedPtrField& GetElementsField(TResponseType& response) { - return *response.MutableVDiskStateInfo(); - } - - static const NKikimrBlobStorage::TVDiskID& GetElementKey(const TElementType& type) { - return type.GetVDiskId(); - } - - static TString GetDefaultMergeField() { - return "VDiskId"; - } - - static void MergeResponses(TResponseType& result, TMap& responses, const TString& fields = GetDefaultMergeField()) { - if (fields == GetDefaultMergeField()) { - TWhiteboardMerger::MergeResponsesElementKey(result, responses); - } else { - TWhiteboardMerger::MergeResponses(result, responses, fields); - } - } -}; - -using TJsonVDiskInfo = TJsonWhiteboardRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "VDisk information"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "VDisk information"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_vdiskstat.h b/ydb/core/viewer/json_vdiskstat.h deleted file mode 100644 index 8aaa81ed1a9d..000000000000 --- a/ydb/core/viewer/json_vdiskstat.h +++ /dev/null @@ -1,30 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include "json_vdisk_req.h" - -namespace NKikimr { -namespace NViewer { - -using TJsonVDiskStat = TJsonVDiskRequest; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "VDisk statistic"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "VDisk statistic"; - } -}; - -} -} diff --git a/ydb/core/viewer/json_wb_req.cpp b/ydb/core/viewer/json_wb_req.cpp new file mode 100644 index 000000000000..facb6287f283 --- /dev/null +++ b/ydb/core/viewer/json_wb_req.cpp @@ -0,0 +1,231 @@ +#include "viewer_bsgroupinfo.h" +#include "viewer_nodeinfo.h" +#include "viewer_pdiskinfo.h" +#include "viewer_sysinfo.h" +#include "viewer_tabletinfo.h" +#include "viewer_vdiskinfo.h" +#include "json_handlers.h" + +namespace NKikimr::NViewer { + +YAML::Node GetWhiteboardRequestParameters() { + return YAML::Load(R"___( + - name: node_id + in: query + description: node identifier + required: false + type: integer + - name: merge + in: query + description: merge information from nodes + required: false + type: boolean + - name: group + in: query + description: group information by field + required: false + type: string + - name: all + in: query + description: return all possible key combinations (for enums only) + required: false + type: boolean + - name: filter + in: query + description: filter information by field + required: false + type: string + - name: alive + in: query + description: request from alive (connected) nodes only + required: false + type: boolean + - name: enums + in: query + description: convert enums to strings + required: false + type: boolean + - name: ui64 + in: query + description: return ui64 as number + required: false + type: boolean + - name: timeout + in: query + description: timeout in ms + required: false + type: integer + - name: retries + in: query + description: number of retries + required: false + type: integer + - name: retry_period + in: query + description: retry period in ms + required: false + type: integer + default: 500 + - name: static + in: query + description: request from static nodes only + required: false + type: boolean + - name: since + in: query + description: filter by update time + required: false + type: string + )___"); +} + +void InitViewerBSGroupInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Storage groups information", + .Description = "Returns information about storage groups" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/bsgroupinfo", new TJsonHandler(yaml)); + TWhiteboardInfo::InitMerger(); +} + +void InitViewerNodeInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Interconnect information", + .Description = "Returns information about node connections" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/nodeinfo", new TJsonHandler(yaml)); + TWhiteboardInfo::InitMerger(); +} + +void InitViewerPDiskInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "PDisk information", + .Description = "Returns information about PDisks" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/pdiskinfo", new TJsonHandler(yaml)); +} + +void InitViewerSysInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "System information", + .Description = "Returns system information" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/sysinfo", new TJsonHandler(yaml)); +} + +void InitViewerTabletInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "Tablet information", + .Description = "Returns information about tablets" + }); + yaml.AddParameter({ + .Name = "database", + .Description = "database name", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "node_id", + .Description = "node identifier", + .Type = "integer", + }); + yaml.AddParameter({ + .Name = "path", + .Description = "schema path", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "merge", + .Description = "merge information from nodes", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "group", + .Description = "group information by field", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "all", + .Description = "return all possible key combinations (for enums only)", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "filter", + .Description = "filter information by field", + .Type = "string", + }); + yaml.AddParameter({ + .Name = "alive", + .Description = "request from alive (connected) nodes only", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "enums", + .Description = "convert enums to strings", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "ui64", + .Description = "return ui64 as number", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "timeout", + .Description = "timeout in ms", + .Type = "integer", + }); + yaml.AddParameter({ + .Name = "retries", + .Description = "number of retries", + .Type = "integer", + }); + yaml.AddParameter({ + .Name = "retry_period", + .Description = "retry period in ms", + .Type = "integer", + .Default = "500", + }); + yaml.AddParameter({ + .Name = "static", + .Description = "request from static nodes only", + .Type = "boolean", + }); + yaml.AddParameter({ + .Name = "since", + .Description = "filter by update time", + .Type = "string", + }); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/tabletinfo", new TJsonHandler(yaml)); +} + +void InitViewerVDiskInfoJsonHandler(TJsonHandlers& jsonHandlers) { + TSimpleYamlBuilder yaml({ + .Method = "get", + .Tag = "viewer", + .Summary = "VDisk information", + .Description = "Returns information about VDisks" + }); + yaml.SetParameters(GetWhiteboardRequestParameters()); + yaml.SetResponseSchema(TProtoToYaml::ProtoToYamlSchema()); + jsonHandlers.AddHandler("/viewer/vdiskinfo", new TJsonHandler(yaml)); +} + +} diff --git a/ydb/core/viewer/json_wb_req.h b/ydb/core/viewer/json_wb_req.h index ae0905642b00..f5e0cd103336 100644 --- a/ydb/core/viewer/json_wb_req.h +++ b/ydb/core/viewer/json_wb_req.h @@ -1,48 +1,60 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" #include "json_pipe_req.h" -#include "wb_merge.h" -#include "wb_group.h" +#include "log.h" +#include "viewer.h" #include "wb_filter.h" +#include "wb_group.h" +#include "wb_merge.h" #include "wb_req.h" -#include "log.h" +#include +#include +#include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { using namespace NActors; using namespace NNodeWhiteboard; +YAML::Node GetWhiteboardRequestParameters(); + template -class TJsonWhiteboardRequest : public TWhiteboardRequest, TRequestEventType, TResponseEventType> { -protected: +class TJsonWhiteboardRequest : public TWhiteboardRequest { +public: using TThis = TJsonWhiteboardRequest; - using TBase = TWhiteboardRequest; + using TBase = TWhiteboardRequest; using TResponseType = typename TResponseEventType::ProtoRecordType; - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; + using TBase::Event; + using TBase::ReplyAndPassAway; TJsonSettings JsonSettings; -public: static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::VIEWER_HANDLER; } TJsonWhiteboardRequest(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) + : TBase(viewer, ev) {} void Bootstrap() override { const auto& params(Event->Get()->Request.GetParams()); - SplitIds(params.Get("node_id"), ',', TBase::RequestSettings.FilterNodeIds); + std::vector nodeIds; + SplitIds(params.Get("node_id"), ',', nodeIds); + if (!nodeIds.empty()) { + if (TBase::RequestSettings.FilterNodeIds.empty()) { + TBase::RequestSettings.FilterNodeIds = nodeIds; + } else { + std::sort(nodeIds.begin(), nodeIds.end()); + std::sort(TBase::RequestSettings.FilterNodeIds.begin(), TBase::RequestSettings.FilterNodeIds.end()); + std::vector intersection; + std::set_intersection(nodeIds.begin(), nodeIds.end(), TBase::RequestSettings.FilterNodeIds.begin(), TBase::RequestSettings.FilterNodeIds.end(), std::back_inserter(intersection)); + if (intersection.empty()) { + TBase::RequestSettings.FilterNodeIds = {0}; + } else { + TBase::RequestSettings.FilterNodeIds = intersection; + } + } + } { TString merge = params.Get("merge"); if (merge.empty() || merge == "1" || merge == "true") { @@ -67,8 +79,14 @@ class TJsonWhiteboardRequest : public TWhiteboardRequest(params.Get("static"), false); } + if (params.Has("fields_required")) { + if (params.Get("fields_required") == "all") { + TBase::RequestSettings.FieldsRequired = {-1}; + } else { + SplitIds(params.Get("fields_required"), ',', TBase::RequestSettings.FieldsRequired); + } + } TBase::RequestSettings.Format = params.Get("format"); - TBase::Bootstrap(); } @@ -85,20 +103,23 @@ class TJsonWhiteboardRequest : public TWhiteboardRequestsecond; + auto it = TBase::NodeResponses.find(nodeId); + if (it != TBase::NodeResponses.end()) { + if (it->second.IsError()) { + if (error.empty()) { + error = it->second.GetError(); + } + errors++; } - errors++; } } } @@ -106,14 +127,14 @@ class TJsonWhiteboardRequest : public TWhiteboardRequestfirst << "\":"; @@ -123,94 +144,11 @@ class TJsonWhiteboardRequest : public TWhiteboardRequestSender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), std::move(json.Str())), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + ReplyAndPassAway(TBase::GetHTTPOKJSON(json.Str())); } catch (const std::exception& e) { - TBase::Send(Event->Sender, new NMon::TEvHttpInfoRes(TString("HTTP/1.1 400 Bad Request\r\n\r\n") + e.what(), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); + ReplyAndPassAway(TBase::GetHTTPBADREQUEST("text/plain", e.what())); } - TBase::PassAway(); - } -}; - -template -struct TJsonRequestParameters> { - static YAML::Node GetParameters() { - return YAML::Load(R"___( - - name: node_id - in: query - description: node identifier - required: false - type: integer - - name: merge - in: query - description: merge information from nodes - required: false - type: boolean - - name: group - in: query - description: group information by field - required: false - type: string - - name: all - in: query - description: return all possible key combinations (for enums only) - required: false - type: boolean - - name: filter - in: query - description: filter information by field - required: false - type: string - - name: alive - in: query - description: request from alive (connected) nodes only - required: false - type: boolean - - name: enums - in: query - description: convert enums to strings - required: false - type: boolean - - name: ui64 - in: query - description: return ui64 as number - required: false - type: boolean - - name: timeout - in: query - description: timeout in ms - required: false - type: integer - - name: retries - in: query - description: number of retries - required: false - type: integer - - name: retry_period - in: query - description: retry period in ms - required: false - type: integer - default: 500 - - name: static - in: query - description: request from static nodes only - required: false - type: boolean - - name: since - in: query - description: filter by update time - required: false - type: string - )___"); } }; -template -struct TJsonRequestSchema> { - static YAML::Node GetSchema() { - return TProtoToYaml::ProtoToYamlSchema(); - } -}; - -} } diff --git a/ydb/core/viewer/json_whoami.h b/ydb/core/viewer/json_whoami.h deleted file mode 100644 index 36e3715268fd..000000000000 --- a/ydb/core/viewer/json_whoami.h +++ /dev/null @@ -1,141 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "viewer.h" - -namespace NKikimr { -namespace NViewer { - -using namespace NActors; - -class TJsonWhoAmI : public TActorBootstrapped { - IViewer* Viewer; - NMon::TEvHttpInfo::TPtr Event; - -public: - static constexpr NKikimrServices::TActivity::EType ActorActivityType() { - return NKikimrServices::TActivity::VIEWER_HANDLER; - } - - TJsonWhoAmI(IViewer* viewer, NMon::TEvHttpInfo::TPtr& ev) - : Viewer(viewer) - , Event(ev) - {} - - void Bootstrap(const TActorContext& ctx) { - ReplyAndDie(ctx); - } - - bool CheckGroupMembership(std::unique_ptr& token, const NProtoBuf::RepeatedPtrField& sids) { - if (sids.empty()) { - return true; - } - for (const auto& sid : sids) { - if (token->IsExist(sid)) { - return true; - } - } - return false; - } - - void ReplyAndDie(const TActorContext &ctx) { - NACLibProto::TUserToken userToken; - Y_PROTOBUF_SUPPRESS_NODISCARD userToken.ParseFromString(Event->Get()->UserToken); - NJson::TJsonValue json(NJson::JSON_MAP); - if (userToken.HasUserSID()) { - json["UserSID"] = userToken.GetUserSID(); - } - if (userToken.HasGroupSIDs() && userToken.GetGroupSIDs().BucketsSize() > 0) { - NJson::TJsonValue& groupSIDs(json["GroupSIDs"]); - groupSIDs.SetType(NJson::JSON_ARRAY); - for (const auto& buckets : userToken.GetGroupSIDs().GetBuckets()) { - for (const auto& group : buckets.GetValues()) { - groupSIDs.AppendValue(group); - } - } - } - if (userToken.HasOriginalUserToken()) { - json["OriginalUserToken"] = userToken.GetOriginalUserToken(); - } - if (userToken.HasAuthType()) { - json["AuthType"] = userToken.GetAuthType(); - } - auto token = std::make_unique(userToken); - json["IsViewerAllowed"] = CheckGroupMembership(token, AppData()->DomainsConfig.GetSecurityConfig().GetViewerAllowedSIDs()); - json["IsMonitoringAllowed"] = CheckGroupMembership(token, AppData()->DomainsConfig.GetSecurityConfig().GetMonitoringAllowedSIDs()); - json["IsAdministrationAllowed"] = CheckGroupMembership(token, AppData()->DomainsConfig.GetSecurityConfig().GetAdministrationAllowedSIDs()); - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPOKJSON(Event->Get(), NJson::WriteJson(json, false)), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } - - void HandleTimeout(const TActorContext &ctx) { - ctx.Send(Event->Sender, new NMon::TEvHttpInfoRes(Viewer->GetHTTPGATEWAYTIMEOUT(Event->Get()), 0, NMon::IEvHttpInfoRes::EContentType::Custom)); - Die(ctx); - } -}; - -template <> -struct TJsonRequestSchema { - static YAML::Node GetSchema() { - return YAML::Load(R"___( - type: object - title: WhoAmI - properties: - UserSID: - type: string - description: User ID / name - GroupSID: - type: array - items: - type: string - description: User groups - OriginalUserToken: - type: string - description: User's token used to authenticate - AuthType: - type: string - description: Authentication type - IsViewerAllowed: - type: boolean - description: Is user allowed to view data - IsMonitoringAllowed: - type: boolean - description: Is user allowed to view deeper and make simple changes - IsAdministrationAllowed: - type: boolean - description: Is user allowed to do unrestricted changes in the system - )___"); - } -}; - -template <> -struct TJsonRequestParameters { - static YAML::Node GetParameters() { - return {}; - } -}; - -template <> -struct TJsonRequestSummary { - static TString GetSummary() { - return "Information about current user"; - } -}; - -template <> -struct TJsonRequestDescription { - static TString GetDescription() { - return "Returns information about user token"; - } -}; - -} -} diff --git a/ydb/core/viewer/log.h b/ydb/core/viewer/log.h index 8a0fe6dcf054..da5229ff0c68 100644 --- a/ydb/core/viewer/log.h +++ b/ydb/core/viewer/log.h @@ -1,16 +1,13 @@ #pragma once - -#include #include +#include -namespace NKikimr { -namespace NViewer { +namespace NKikimr::NViewer { inline TString GetLogPrefix() { return {}; } -} } #define BLOG_D(stream) LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::VIEWER, GetLogPrefix() << stream) diff --git a/ydb/core/viewer/monitoring/asset-manifest.json b/ydb/core/viewer/monitoring/asset-manifest.json index bac25a42dd95..aff0a3b603e9 100644 --- a/ydb/core/viewer/monitoring/asset-manifest.json +++ b/ydb/core/viewer/monitoring/asset-manifest.json @@ -1,253 +1,284 @@ { "files": { - "main.css": "./static/css/main.c8ce3bba.css", - "main.js": "./static/js/main.62a60ecb.js", - "static/js/3457.b193afe6.chunk.js": "./static/js/3457.b193afe6.chunk.js", - "static/js/6876.867b698c.chunk.js": "./static/js/6876.867b698c.chunk.js", - "static/js/2435.092e8d7f.chunk.js": "./static/js/2435.092e8d7f.chunk.js", - "static/js/7409.4408962b.chunk.js": "./static/js/7409.4408962b.chunk.js", - "static/js/8622.49f3054c.chunk.js": "./static/js/8622.49f3054c.chunk.js", - "static/js/598.243fd68d.chunk.js": "./static/js/598.243fd68d.chunk.js", - "static/js/6392.134ee5e4.chunk.js": "./static/js/6392.134ee5e4.chunk.js", - "static/js/4618.131d9563.chunk.js": "./static/js/4618.131d9563.chunk.js", - "static/js/704.45771d88.chunk.js": "./static/js/704.45771d88.chunk.js", - "static/js/515.cd9a8a90.chunk.js": "./static/js/515.cd9a8a90.chunk.js", - "static/js/8858.cd9d49a5.chunk.js": "./static/js/8858.cd9d49a5.chunk.js", - "static/js/6887.0855fd66.chunk.js": "./static/js/6887.0855fd66.chunk.js", - "static/js/4848.64f47dc3.chunk.js": "./static/js/4848.64f47dc3.chunk.js", - "static/js/4198.d0671061.chunk.js": "./static/js/4198.d0671061.chunk.js", - "static/js/620.7aea5425.chunk.js": "./static/js/620.7aea5425.chunk.js", - "static/js/9204.77418f94.chunk.js": "./static/js/9204.77418f94.chunk.js", - "static/js/1736.9f4a6b02.chunk.js": "./static/js/1736.9f4a6b02.chunk.js", - "static/js/8747.baf63d86.chunk.js": "./static/js/8747.baf63d86.chunk.js", - "static/js/1528.2a39d066.chunk.js": "./static/js/1528.2a39d066.chunk.js", - "static/js/6877.d2d51d98.chunk.js": "./static/js/6877.d2d51d98.chunk.js", - "static/js/4814.11309069.chunk.js": "./static/js/4814.11309069.chunk.js", - "static/js/202.52f13cd5.chunk.js": "./static/js/202.52f13cd5.chunk.js", - "static/js/9280.40cff028.chunk.js": "./static/js/9280.40cff028.chunk.js", - "static/js/5863.e2cd2452.chunk.js": "./static/js/5863.e2cd2452.chunk.js", - "static/js/6058.7f474f92.chunk.js": "./static/js/6058.7f474f92.chunk.js", - "static/js/632.b6c03857.chunk.js": "./static/js/632.b6c03857.chunk.js", - "static/js/7202.fefd43ee.chunk.js": "./static/js/7202.fefd43ee.chunk.js", - "static/js/7999.bdf4fe79.chunk.js": "./static/js/7999.bdf4fe79.chunk.js", - "static/js/2367.052e678b.chunk.js": "./static/js/2367.052e678b.chunk.js", - "static/js/5373.90c95a6e.chunk.js": "./static/js/5373.90c95a6e.chunk.js", - "static/js/6393.b0de2d9e.chunk.js": "./static/js/6393.b0de2d9e.chunk.js", - "static/js/5448.cef3c129.chunk.js": "./static/js/5448.cef3c129.chunk.js", - "static/js/6679.6e0a87d5.chunk.js": "./static/js/6679.6e0a87d5.chunk.js", - "static/js/4132.04be158e.chunk.js": "./static/js/4132.04be158e.chunk.js", - "static/js/9219.24a20881.chunk.js": "./static/js/9219.24a20881.chunk.js", - "static/js/556.55f00ac6.chunk.js": "./static/js/556.55f00ac6.chunk.js", - "static/js/8850.97635389.chunk.js": "./static/js/8850.97635389.chunk.js", - "static/js/9297.eadc4dba.chunk.js": "./static/js/9297.eadc4dba.chunk.js", - "static/js/3630.8eda2d3f.chunk.js": "./static/js/3630.8eda2d3f.chunk.js", - "static/js/3231.65396654.chunk.js": "./static/js/3231.65396654.chunk.js", - "static/js/6815.672badd5.chunk.js": "./static/js/6815.672badd5.chunk.js", - "static/js/2620.8e5c52fb.chunk.js": "./static/js/2620.8e5c52fb.chunk.js", - "static/js/6961.f4888ae1.chunk.js": "./static/js/6961.f4888ae1.chunk.js", - "static/js/7257.8ce0d045.chunk.js": "./static/js/7257.8ce0d045.chunk.js", - "static/js/8702.69a3e0d5.chunk.js": "./static/js/8702.69a3e0d5.chunk.js", - "static/js/3304.f5897a96.chunk.js": "./static/js/3304.f5897a96.chunk.js", - "static/js/1508.f0158935.chunk.js": "./static/js/1508.f0158935.chunk.js", - "static/js/3271.7b005742.chunk.js": "./static/js/3271.7b005742.chunk.js", - "static/js/30.b097cbb4.chunk.js": "./static/js/30.b097cbb4.chunk.js", - "static/js/5117.896f7ffb.chunk.js": "./static/js/5117.896f7ffb.chunk.js", - "static/js/5387.8af1d694.chunk.js": "./static/js/5387.8af1d694.chunk.js", - "static/js/5670.5c30cef1.chunk.js": "./static/js/5670.5c30cef1.chunk.js", - "static/js/7388.9f447514.chunk.js": "./static/js/7388.9f447514.chunk.js", - "static/js/3333.ceb196e6.chunk.js": "./static/js/3333.ceb196e6.chunk.js", - "static/js/1278.c0717a20.chunk.js": "./static/js/1278.c0717a20.chunk.js", - "static/js/178.e0df04cc.chunk.js": "./static/js/178.e0df04cc.chunk.js", - "static/js/6892.2c3c2bcb.chunk.js": "./static/js/6892.2c3c2bcb.chunk.js", - "static/js/2229.6687fc46.chunk.js": "./static/js/2229.6687fc46.chunk.js", - "static/js/4326.d5c34c54.chunk.js": "./static/js/4326.d5c34c54.chunk.js", - "static/js/7276.47f377a4.chunk.js": "./static/js/7276.47f377a4.chunk.js", - "static/js/7803.a56cfca6.chunk.js": "./static/js/7803.a56cfca6.chunk.js", - "static/js/5720.39a954f1.chunk.js": "./static/js/5720.39a954f1.chunk.js", - "static/js/6954.e18be130.chunk.js": "./static/js/6954.e18be130.chunk.js", - "static/js/9413.b2921c36.chunk.js": "./static/js/9413.b2921c36.chunk.js", - "static/js/3945.054c871d.chunk.js": "./static/js/3945.054c871d.chunk.js", - "static/js/2981.6d027811.chunk.js": "./static/js/2981.6d027811.chunk.js", - "static/js/1150.2b47004d.chunk.js": "./static/js/1150.2b47004d.chunk.js", - "static/js/3926.8f2c9741.chunk.js": "./static/js/3926.8f2c9741.chunk.js", - "static/js/5643.00957838.chunk.js": "./static/js/5643.00957838.chunk.js", - "static/js/5161.45b4f520.chunk.js": "./static/js/5161.45b4f520.chunk.js", - "static/js/2238.3cf88b79.chunk.js": "./static/js/2238.3cf88b79.chunk.js", - "static/js/8133.2afc4db4.chunk.js": "./static/js/8133.2afc4db4.chunk.js", - "static/js/4949.6bf46e71.chunk.js": "./static/js/4949.6bf46e71.chunk.js", - "static/js/383.4faec08b.chunk.js": "./static/js/383.4faec08b.chunk.js", - "static/js/2701.86912840.chunk.js": "./static/js/2701.86912840.chunk.js", - "static/js/3645.bdd20200.chunk.js": "./static/js/3645.bdd20200.chunk.js", - "static/js/2677.3d7ea3fc.chunk.js": "./static/js/2677.3d7ea3fc.chunk.js", - "static/js/2477.e6121bfd.chunk.js": "./static/js/2477.e6121bfd.chunk.js", - "static/js/5399.f9398084.chunk.js": "./static/js/5399.f9398084.chunk.js", - "static/js/4985.991de003.chunk.js": "./static/js/4985.991de003.chunk.js", - "static/js/674.e6536250.chunk.js": "./static/js/674.e6536250.chunk.js", - "static/js/9207.5881b206.chunk.js": "./static/js/9207.5881b206.chunk.js", - "static/js/7779.9d9b07ae.chunk.js": "./static/js/7779.9d9b07ae.chunk.js", - "static/js/1148.3c629236.chunk.js": "./static/js/1148.3c629236.chunk.js", - "static/js/8011.4fed4307.chunk.js": "./static/js/8011.4fed4307.chunk.js", - "static/js/96.6e1bf3f4.chunk.js": "./static/js/96.6e1bf3f4.chunk.js", - "static/js/8167.b9a90da5.chunk.js": "./static/js/8167.b9a90da5.chunk.js", - "static/js/4347.adf03999.chunk.js": "./static/js/4347.adf03999.chunk.js", - "static/js/2223.63ae5a05.chunk.js": "./static/js/2223.63ae5a05.chunk.js", - "static/js/2033.5c6dfca9.chunk.js": "./static/js/2033.5c6dfca9.chunk.js", - "static/js/8695.f17f8853.chunk.js": "./static/js/8695.f17f8853.chunk.js", - "static/js/8140.8d8e9309.chunk.js": "./static/js/8140.8d8e9309.chunk.js", - "static/js/6227.fc562bbf.chunk.js": "./static/js/6227.fc562bbf.chunk.js", - "static/js/148.b60f0e5e.chunk.js": "./static/js/148.b60f0e5e.chunk.js", - "static/js/9572.9f83f004.chunk.js": "./static/js/9572.9f83f004.chunk.js", - "static/js/1179.15d7ac65.chunk.js": "./static/js/1179.15d7ac65.chunk.js", - "static/js/1746.a8ba5c62.chunk.js": "./static/js/1746.a8ba5c62.chunk.js", - "static/js/3466.98f036ac.chunk.js": "./static/js/3466.98f036ac.chunk.js", - "static/js/4684.27f737c4.chunk.js": "./static/js/4684.27f737c4.chunk.js", - "static/js/5226.675d55fb.chunk.js": "./static/js/5226.675d55fb.chunk.js", - "static/js/115.2c4de87e.chunk.js": "./static/js/115.2c4de87e.chunk.js", - "static/js/4964.c7c75eb0.chunk.js": "./static/js/4964.c7c75eb0.chunk.js", - "static/js/1869.d6661a03.chunk.js": "./static/js/1869.d6661a03.chunk.js", - "static/js/9917.67d792e3.chunk.js": "./static/js/9917.67d792e3.chunk.js", - "static/js/163.eea01641.chunk.js": "./static/js/163.eea01641.chunk.js", - "static/js/3025.7e536c57.chunk.js": "./static/js/3025.7e536c57.chunk.js", - "static/js/6156.0c562627.chunk.js": "./static/js/6156.0c562627.chunk.js", - "static/js/6361.a9f11e7a.chunk.js": "./static/js/6361.a9f11e7a.chunk.js", - "static/js/4663.b893c670.chunk.js": "./static/js/4663.b893c670.chunk.js", - "static/js/7992.20690745.chunk.js": "./static/js/7992.20690745.chunk.js", - "static/js/3756.67bd6b00.chunk.js": "./static/js/3756.67bd6b00.chunk.js", - "static/js/678.b73063ff.chunk.js": "./static/js/678.b73063ff.chunk.js", - "static/js/436.564ff0f8.chunk.js": "./static/js/436.564ff0f8.chunk.js", - "static/js/5112.6189bbe0.chunk.js": "./static/js/5112.6189bbe0.chunk.js", - "static/js/9555.c9b5ee61.chunk.js": "./static/js/9555.c9b5ee61.chunk.js", - "static/js/5809.d78ebebb.chunk.js": "./static/js/5809.d78ebebb.chunk.js", - "static/js/5450.f0dcfc15.chunk.js": "./static/js/5450.f0dcfc15.chunk.js", - "static/js/5491.a460479e.chunk.js": "./static/js/5491.a460479e.chunk.js", - "static/js/8591.93172fe9.chunk.js": "./static/js/8591.93172fe9.chunk.js", - "static/js/7016.4a34a027.chunk.js": "./static/js/7016.4a34a027.chunk.js", - "static/js/9308.c72b8585.chunk.js": "./static/js/9308.c72b8585.chunk.js", - "static/js/9411.96fb3e2f.chunk.js": "./static/js/9411.96fb3e2f.chunk.js", - "static/js/6521.371403ec.chunk.js": "./static/js/6521.371403ec.chunk.js", - "static/js/4159.5e0cfd91.chunk.js": "./static/js/4159.5e0cfd91.chunk.js", - "static/js/9528.9991c023.chunk.js": "./static/js/9528.9991c023.chunk.js", - "static/js/4826.d2723706.chunk.js": "./static/js/4826.d2723706.chunk.js", - "static/js/5352.3d3187b7.chunk.js": "./static/js/5352.3d3187b7.chunk.js", - "static/js/9292.91ed23f7.chunk.js": "./static/js/9292.91ed23f7.chunk.js", - "static/js/7684.a3920b72.chunk.js": "./static/js/7684.a3920b72.chunk.js", - "static/js/9212.870f16f0.chunk.js": "./static/js/9212.870f16f0.chunk.js", - "static/js/6065.b08e9640.chunk.js": "./static/js/6065.b08e9640.chunk.js", - "static/js/5341.2c19c723.chunk.js": "./static/js/5341.2c19c723.chunk.js", - "static/js/4583.1682cf86.chunk.js": "./static/js/4583.1682cf86.chunk.js", - "static/js/3920.11b8c9d7.chunk.js": "./static/js/3920.11b8c9d7.chunk.js", - "static/js/7119.e94f8dac.chunk.js": "./static/js/7119.e94f8dac.chunk.js", - "static/js/6144.e1568f26.chunk.js": "./static/js/6144.e1568f26.chunk.js", - "static/js/1350.21b6a9ef.chunk.js": "./static/js/1350.21b6a9ef.chunk.js", - "static/js/2590.75b6626e.chunk.js": "./static/js/2590.75b6626e.chunk.js", - "static/js/6291.e7cdf7f2.chunk.js": "./static/js/6291.e7cdf7f2.chunk.js", - "static/js/3397.9c0005a3.chunk.js": "./static/js/3397.9c0005a3.chunk.js", - "static/js/1168.91d9e2c2.chunk.js": "./static/js/1168.91d9e2c2.chunk.js", - "static/js/8853.c8f9e9d6.chunk.js": "./static/js/8853.c8f9e9d6.chunk.js", - "static/js/4535.5d1c8322.chunk.js": "./static/js/4535.5d1c8322.chunk.js", - "static/js/9101.ce051539.chunk.js": "./static/js/9101.ce051539.chunk.js", - "static/js/2986.2100fcad.chunk.js": "./static/js/2986.2100fcad.chunk.js", - "static/js/4080.07be3744.chunk.js": "./static/js/4080.07be3744.chunk.js", - "static/js/3898.1fec42e6.chunk.js": "./static/js/3898.1fec42e6.chunk.js", - "static/js/1616.8a217b93.chunk.js": "./static/js/1616.8a217b93.chunk.js", - "static/js/7522.1a0f9c02.chunk.js": "./static/js/7522.1a0f9c02.chunk.js", - "static/js/6531.7eac62d1.chunk.js": "./static/js/6531.7eac62d1.chunk.js", - "static/css/4983.5c3e5de4.chunk.css": "./static/css/4983.5c3e5de4.chunk.css", - "static/js/3757.7c534899.chunk.js": "./static/js/3757.7c534899.chunk.js", - "static/js/4842.57182d38.chunk.js": "./static/js/4842.57182d38.chunk.js", - "static/css/328.c0ade9c1.chunk.css": "./static/css/328.c0ade9c1.chunk.css", - "static/js/328.f24db8bf.chunk.js": "./static/js/328.f24db8bf.chunk.js", - "static/js/599.c58caf58.chunk.js": "./static/js/599.c58caf58.chunk.js", - "static/js/1155.4fce1854.chunk.js": "./static/js/1155.4fce1854.chunk.js", - "static/js/6230.8e64216a.chunk.js": "./static/js/6230.8e64216a.chunk.js", - "static/js/337.b6fc715e.chunk.js": "./static/js/337.b6fc715e.chunk.js", - "static/js/451.3b449e79.chunk.js": "./static/js/451.3b449e79.chunk.js", - "static/js/2322.29255c22.chunk.js": "./static/js/2322.29255c22.chunk.js", - "static/js/4123.64882a16.chunk.js": "./static/js/4123.64882a16.chunk.js", - "static/js/6289.51f8741e.chunk.js": "./static/js/6289.51f8741e.chunk.js", - "static/js/4635.ffa9b6b7.chunk.js": "./static/js/4635.ffa9b6b7.chunk.js", - "static/js/4345.9238776d.chunk.js": "./static/js/4345.9238776d.chunk.js", - "static/js/9319.40f9e46a.chunk.js": "./static/js/9319.40f9e46a.chunk.js", - "static/js/924.382f18b1.chunk.js": "./static/js/924.382f18b1.chunk.js", - "static/js/6795.5ec0c96a.chunk.js": "./static/js/6795.5ec0c96a.chunk.js", - "static/js/2302.7e7a2fb4.chunk.js": "./static/js/2302.7e7a2fb4.chunk.js", - "static/js/4388.edb51304.chunk.js": "./static/js/4388.edb51304.chunk.js", - "static/js/4046.5dac72a9.chunk.js": "./static/js/4046.5dac72a9.chunk.js", - "static/js/2190.27f354f5.chunk.js": "./static/js/2190.27f354f5.chunk.js", - "static/js/3358.c777fe1f.chunk.js": "./static/js/3358.c777fe1f.chunk.js", - "static/js/6142.b2452554.chunk.js": "./static/js/6142.b2452554.chunk.js", - "static/js/2962.66e01691.chunk.js": "./static/js/2962.66e01691.chunk.js", - "static/js/214.99a17949.chunk.js": "./static/js/214.99a17949.chunk.js", - "static/js/8791.b209de42.chunk.js": "./static/js/8791.b209de42.chunk.js", - "static/js/6898.5580b941.chunk.js": "./static/js/6898.5580b941.chunk.js", - "static/js/9173.71d773f2.chunk.js": "./static/js/9173.71d773f2.chunk.js", - "static/js/2532.30bb087d.chunk.js": "./static/js/2532.30bb087d.chunk.js", - "static/js/6329.d78c1432.chunk.js": "./static/js/6329.d78c1432.chunk.js", - "static/js/2840.b69eb597.chunk.js": "./static/js/2840.b69eb597.chunk.js", - "static/js/5311.a500a1ea.chunk.js": "./static/js/5311.a500a1ea.chunk.js", - "static/js/2403.82cd0025.chunk.js": "./static/js/2403.82cd0025.chunk.js", - "static/js/1747.b4331799.chunk.js": "./static/js/1747.b4331799.chunk.js", - "static/js/3498.c7d39060.chunk.js": "./static/js/3498.c7d39060.chunk.js", - "static/js/185.7d51fcfa.chunk.js": "./static/js/185.7d51fcfa.chunk.js", - "static/js/8450.baf3a89d.chunk.js": "./static/js/8450.baf3a89d.chunk.js", - "static/js/3771.764124c3.chunk.js": "./static/js/3771.764124c3.chunk.js", - "static/js/7529.ddf87a9a.chunk.js": "./static/js/7529.ddf87a9a.chunk.js", - "static/js/785.d2eae69c.chunk.js": "./static/js/785.d2eae69c.chunk.js", - "static/js/5107.8cac6a03.chunk.js": "./static/js/5107.8cac6a03.chunk.js", - "static/js/6919.84ed9ccc.chunk.js": "./static/js/6919.84ed9ccc.chunk.js", - "static/js/2104.4f22ecac.chunk.js": "./static/js/2104.4f22ecac.chunk.js", - "static/js/9433.7ce648d0.chunk.js": "./static/js/9433.7ce648d0.chunk.js", - "static/js/1956.0205a5bb.chunk.js": "./static/js/1956.0205a5bb.chunk.js", - "static/js/6619.9e1de7a6.chunk.js": "./static/js/6619.9e1de7a6.chunk.js", - "static/js/2492.64b7d727.chunk.js": "./static/js/2492.64b7d727.chunk.js", - "static/js/2194.38bafdfc.chunk.js": "./static/js/2194.38bafdfc.chunk.js", - "static/js/9526.10bb1684.chunk.js": "./static/js/9526.10bb1684.chunk.js", - "static/js/5790.e3d88e2c.chunk.js": "./static/js/5790.e3d88e2c.chunk.js", - "static/js/8905.b8a9fd91.chunk.js": "./static/js/8905.b8a9fd91.chunk.js", - "static/js/5168.6fb23f08.chunk.js": "./static/js/5168.6fb23f08.chunk.js", - "static/js/619.f27ddcbd.chunk.js": "./static/js/619.f27ddcbd.chunk.js", - "static/js/4550.2e04d705.chunk.js": "./static/js/4550.2e04d705.chunk.js", - "static/js/3644.aeda46ca.chunk.js": "./static/js/3644.aeda46ca.chunk.js", - "static/js/8797.f8f0ce13.chunk.js": "./static/js/8797.f8f0ce13.chunk.js", - "static/js/2521.21bdfab9.chunk.js": "./static/js/2521.21bdfab9.chunk.js", - "static/js/1478.5044be66.chunk.js": "./static/js/1478.5044be66.chunk.js", - "static/js/6300.dca75d45.chunk.js": "./static/js/6300.dca75d45.chunk.js", - "static/js/3074.bbb8aaef.chunk.js": "./static/js/3074.bbb8aaef.chunk.js", - "static/js/9371.b42befbc.chunk.js": "./static/js/9371.b42befbc.chunk.js", - "static/js/9923.270f0a19.chunk.js": "./static/js/9923.270f0a19.chunk.js", - "static/js/358.d6300019.chunk.js": "./static/js/358.d6300019.chunk.js", - "static/js/86.ad271bdc.chunk.js": "./static/js/86.ad271bdc.chunk.js", - "static/js/5661.c83a4eb0.chunk.js": "./static/js/5661.c83a4eb0.chunk.js", - "static/js/3621.9b6c61ab.chunk.js": "./static/js/3621.9b6c61ab.chunk.js", - "static/js/2994.e6c77407.chunk.js": "./static/js/2994.e6c77407.chunk.js", - "static/js/4812.73af8448.chunk.js": "./static/js/4812.73af8448.chunk.js", - "static/js/9621.48073631.chunk.js": "./static/js/9621.48073631.chunk.js", - "static/js/7554.28f3da22.chunk.js": "./static/js/7554.28f3da22.chunk.js", - "static/js/425.c6dd581a.chunk.js": "./static/js/425.c6dd581a.chunk.js", - "static/js/6044.2de9962d.chunk.js": "./static/js/6044.2de9962d.chunk.js", - "static/js/2141.26c930aa.chunk.js": "./static/js/2141.26c930aa.chunk.js", - "static/js/919.53e04507.chunk.js": "./static/js/919.53e04507.chunk.js", - "static/js/6692.9322b59d.chunk.js": "./static/js/6692.9322b59d.chunk.js", - "static/js/6321.aa3e44de.chunk.js": "./static/js/6321.aa3e44de.chunk.js", - "static/js/2931.3ade3bc3.chunk.js": "./static/js/2931.3ade3bc3.chunk.js", - "static/js/2876.afe7e47f.chunk.js": "./static/js/2876.afe7e47f.chunk.js", - "static/js/5868.be04313a.chunk.js": "./static/js/5868.be04313a.chunk.js", - "static/js/2553.5faabf5a.chunk.js": "./static/js/2553.5faabf5a.chunk.js", - "static/js/9876.b336d1f5.chunk.js": "./static/js/9876.b336d1f5.chunk.js", - "static/js/5378.86805fba.chunk.js": "./static/js/5378.86805fba.chunk.js", - "static/js/2183.e2318c37.chunk.js": "./static/js/2183.e2318c37.chunk.js", - "static/js/7543.3fcfd3ba.chunk.js": "./static/js/7543.3fcfd3ba.chunk.js", - "static/js/6390.497d0ec8.chunk.js": "./static/js/6390.497d0ec8.chunk.js", - "static/js/2118.bc169874.chunk.js": "./static/js/2118.bc169874.chunk.js", - "static/js/8065.666ef449.chunk.js": "./static/js/8065.666ef449.chunk.js", - "static/js/7520.d245d6ac.chunk.js": "./static/js/7520.d245d6ac.chunk.js", - "static/js/4789.d52069de.chunk.js": "./static/js/4789.d52069de.chunk.js", - "static/js/8607.1e377882.chunk.js": "./static/js/8607.1e377882.chunk.js", - "static/css/1551.d5e5efc2.chunk.css": "./static/css/1551.d5e5efc2.chunk.css", - "static/js/1551.2e8e3e50.chunk.js": "./static/js/1551.2e8e3e50.chunk.js", - "static/css/8424.308a04db.chunk.css": "./static/css/8424.308a04db.chunk.css", - "static/js/8424.5b5c42b5.chunk.js": "./static/js/8424.5b5c42b5.chunk.js", - "static/js/7645.6565454c.chunk.js": "./static/js/7645.6565454c.chunk.js", + "main.css": "./static/css/main.0bf24cd8.css", + "main.js": "./static/js/main.7cff0321.js", + "static/js/4226.9f88ef38.chunk.js": "./static/js/4226.9f88ef38.chunk.js", + "static/js/6595.53cb237b.chunk.js": "./static/js/6595.53cb237b.chunk.js", + "static/js/2503.96b0c070.chunk.js": "./static/js/2503.96b0c070.chunk.js", + "static/js/3768.988c811a.chunk.js": "./static/js/3768.988c811a.chunk.js", + "static/js/3283.59bb81e0.chunk.js": "./static/js/3283.59bb81e0.chunk.js", + "static/js/1855.17a4a673.chunk.js": "./static/js/1855.17a4a673.chunk.js", + "static/js/3092.117dd3ef.chunk.js": "./static/js/3092.117dd3ef.chunk.js", + "static/js/3643.bec8ad3c.chunk.js": "./static/js/3643.bec8ad3c.chunk.js", + "static/js/9974.57e4b5e1.chunk.js": "./static/js/9974.57e4b5e1.chunk.js", + "static/js/4806.cff0f21e.chunk.js": "./static/js/4806.cff0f21e.chunk.js", + "static/js/6943.a900c8d8.chunk.js": "./static/js/6943.a900c8d8.chunk.js", + "static/js/4463.e01e2d73.chunk.js": "./static/js/4463.e01e2d73.chunk.js", + "static/js/6395.82fdc368.chunk.js": "./static/js/6395.82fdc368.chunk.js", + "static/js/4779.f5b56e5a.chunk.js": "./static/js/4779.f5b56e5a.chunk.js", + "static/js/753.7b0f09c6.chunk.js": "./static/js/753.7b0f09c6.chunk.js", + "static/js/4563.c214a5dd.chunk.js": "./static/js/4563.c214a5dd.chunk.js", + "static/js/6332.982e12ce.chunk.js": "./static/js/6332.982e12ce.chunk.js", + "static/js/2100.06ecdc28.chunk.js": "./static/js/2100.06ecdc28.chunk.js", + "static/js/2845.2216d105.chunk.js": "./static/js/2845.2216d105.chunk.js", + "static/js/8169.3b607870.chunk.js": "./static/js/8169.3b607870.chunk.js", + "static/js/5257.020aa417.chunk.js": "./static/js/5257.020aa417.chunk.js", + "static/js/1074.8bbb31b1.chunk.js": "./static/js/1074.8bbb31b1.chunk.js", + "static/js/8332.c2056746.chunk.js": "./static/js/8332.c2056746.chunk.js", + "static/js/2216.be9ff335.chunk.js": "./static/js/2216.be9ff335.chunk.js", + "static/js/8726.76eea1ab.chunk.js": "./static/js/8726.76eea1ab.chunk.js", + "static/js/9617.3a063882.chunk.js": "./static/js/9617.3a063882.chunk.js", + "static/js/1971.a2544f07.chunk.js": "./static/js/1971.a2544f07.chunk.js", + "static/js/7076.99b06857.chunk.js": "./static/js/7076.99b06857.chunk.js", + "static/js/1035.4d5fe2fd.chunk.js": "./static/js/1035.4d5fe2fd.chunk.js", + "static/js/9757.1fa8ec24.chunk.js": "./static/js/9757.1fa8ec24.chunk.js", + "static/js/7439.9de88a36.chunk.js": "./static/js/7439.9de88a36.chunk.js", + "static/js/9711.5936b47c.chunk.js": "./static/js/9711.5936b47c.chunk.js", + "static/js/3235.884b5b99.chunk.js": "./static/js/3235.884b5b99.chunk.js", + "static/js/3830.4c0547b6.chunk.js": "./static/js/3830.4c0547b6.chunk.js", + "static/js/3997.030699b5.chunk.js": "./static/js/3997.030699b5.chunk.js", + "static/js/9937.2df3d582.chunk.js": "./static/js/9937.2df3d582.chunk.js", + "static/js/2972.f8a04d68.chunk.js": "./static/js/2972.f8a04d68.chunk.js", + "static/js/2804.6549b96e.chunk.js": "./static/js/2804.6549b96e.chunk.js", + "static/js/2070.62b9c9e9.chunk.js": "./static/js/2070.62b9c9e9.chunk.js", + "static/js/1388.dc200448.chunk.js": "./static/js/1388.dc200448.chunk.js", + "static/js/1923.a4cf691b.chunk.js": "./static/js/1923.a4cf691b.chunk.js", + "static/js/7132.d1397593.chunk.js": "./static/js/7132.d1397593.chunk.js", + "static/js/5821.269b9961.chunk.js": "./static/js/5821.269b9961.chunk.js", + "static/js/5957.22d94683.chunk.js": "./static/js/5957.22d94683.chunk.js", + "static/js/3145.cfb85bc6.chunk.js": "./static/js/3145.cfb85bc6.chunk.js", + "static/js/9765.410b7864.chunk.js": "./static/js/9765.410b7864.chunk.js", + "static/js/6170.f5db6881.chunk.js": "./static/js/6170.f5db6881.chunk.js", + "static/js/592.07f568c9.chunk.js": "./static/js/592.07f568c9.chunk.js", + "static/js/2039.674765db.chunk.js": "./static/js/2039.674765db.chunk.js", + "static/js/6070.d22f533e.chunk.js": "./static/js/6070.d22f533e.chunk.js", + "static/js/7446.c434978e.chunk.js": "./static/js/7446.c434978e.chunk.js", + "static/js/3520.5df2f9dc.chunk.js": "./static/js/3520.5df2f9dc.chunk.js", + "static/js/579.ff7e5b95.chunk.js": "./static/js/579.ff7e5b95.chunk.js", + "static/js/6845.e64bd413.chunk.js": "./static/js/6845.e64bd413.chunk.js", + "static/js/323.e36b8268.chunk.js": "./static/js/323.e36b8268.chunk.js", + "static/js/3607.6bedcc44.chunk.js": "./static/js/3607.6bedcc44.chunk.js", + "static/js/9703.c7826405.chunk.js": "./static/js/9703.c7826405.chunk.js", + "static/js/3998.051d103c.chunk.js": "./static/js/3998.051d103c.chunk.js", + "static/js/2974.36362aa2.chunk.js": "./static/js/2974.36362aa2.chunk.js", + "static/js/4408.4cb113d4.chunk.js": "./static/js/4408.4cb113d4.chunk.js", + "static/js/2734.2f81aa93.chunk.js": "./static/js/2734.2f81aa93.chunk.js", + "static/js/1236.3b0b47e7.chunk.js": "./static/js/1236.3b0b47e7.chunk.js", + "static/js/4324.8b4dfeeb.chunk.js": "./static/js/4324.8b4dfeeb.chunk.js", + "static/js/5203.dbaf6737.chunk.js": "./static/js/5203.dbaf6737.chunk.js", + "static/js/4006.183d37e4.chunk.js": "./static/js/4006.183d37e4.chunk.js", + "static/js/4413.424607bb.chunk.js": "./static/js/4413.424607bb.chunk.js", + "static/js/7367.37be1f98.chunk.js": "./static/js/7367.37be1f98.chunk.js", + "static/js/3883.9513e6a0.chunk.js": "./static/js/3883.9513e6a0.chunk.js", + "static/js/4018.76940440.chunk.js": "./static/js/4018.76940440.chunk.js", + "static/js/6663.26d7c75f.chunk.js": "./static/js/6663.26d7c75f.chunk.js", + "static/js/6079.95240888.chunk.js": "./static/js/6079.95240888.chunk.js", + "static/js/5070.58b434a0.chunk.js": "./static/js/5070.58b434a0.chunk.js", + "static/js/9685.590d0ab0.chunk.js": "./static/js/9685.590d0ab0.chunk.js", + "static/js/4254.9abe8ed3.chunk.js": "./static/js/4254.9abe8ed3.chunk.js", + "static/js/846.c37c0f60.chunk.js": "./static/js/846.c37c0f60.chunk.js", + "static/js/3495.77665cfd.chunk.js": "./static/js/3495.77665cfd.chunk.js", + "static/js/6625.1d36f68b.chunk.js": "./static/js/6625.1d36f68b.chunk.js", + "static/js/862.e5ac847a.chunk.js": "./static/js/862.e5ac847a.chunk.js", + "static/js/9139.d6e566d7.chunk.js": "./static/js/9139.d6e566d7.chunk.js", + "static/js/7245.79bffd2e.chunk.js": "./static/js/7245.79bffd2e.chunk.js", + "static/js/610.307fc3ff.chunk.js": "./static/js/610.307fc3ff.chunk.js", + "static/js/328.ff68c87b.chunk.js": "./static/js/328.ff68c87b.chunk.js", + "static/js/2670.33b83982.chunk.js": "./static/js/2670.33b83982.chunk.js", + "static/js/7862.970a7935.chunk.js": "./static/js/7862.970a7935.chunk.js", + "static/js/1389.4a6fe794.chunk.js": "./static/js/1389.4a6fe794.chunk.js", + "static/js/2455.36823616.chunk.js": "./static/js/2455.36823616.chunk.js", + "static/js/6731.36cc76de.chunk.js": "./static/js/6731.36cc76de.chunk.js", + "static/js/2651.a0376f78.chunk.js": "./static/js/2651.a0376f78.chunk.js", + "static/js/127.031dd9bd.chunk.js": "./static/js/127.031dd9bd.chunk.js", + "static/js/9101.c3da85f5.chunk.js": "./static/js/9101.c3da85f5.chunk.js", + "static/js/9433.0240620f.chunk.js": "./static/js/9433.0240620f.chunk.js", + "static/js/7950.868790bd.chunk.js": "./static/js/7950.868790bd.chunk.js", + "static/js/2421.b0e53cec.chunk.js": "./static/js/2421.b0e53cec.chunk.js", + "static/js/2405.6cb977b5.chunk.js": "./static/js/2405.6cb977b5.chunk.js", + "static/js/7273.1cfa7ba2.chunk.js": "./static/js/7273.1cfa7ba2.chunk.js", + "static/js/7443.85993ad1.chunk.js": "./static/js/7443.85993ad1.chunk.js", + "static/js/178.25784a3a.chunk.js": "./static/js/178.25784a3a.chunk.js", + "static/js/3950.57b0da11.chunk.js": "./static/js/3950.57b0da11.chunk.js", + "static/js/8446.e6a23d4b.chunk.js": "./static/js/8446.e6a23d4b.chunk.js", + "static/js/8864.be2503b0.chunk.js": "./static/js/8864.be2503b0.chunk.js", + "static/js/8828.d910f13b.chunk.js": "./static/js/8828.d910f13b.chunk.js", + "static/js/7994.5c64203a.chunk.js": "./static/js/7994.5c64203a.chunk.js", + "static/js/182.a977025f.chunk.js": "./static/js/182.a977025f.chunk.js", + "static/js/5641.b3b5ddf6.chunk.js": "./static/js/5641.b3b5ddf6.chunk.js", + "static/js/4175.62734866.chunk.js": "./static/js/4175.62734866.chunk.js", + "static/js/3493.b911f175.chunk.js": "./static/js/3493.b911f175.chunk.js", + "static/js/4320.f2de8175.chunk.js": "./static/js/4320.f2de8175.chunk.js", + "static/js/452.004a8d50.chunk.js": "./static/js/452.004a8d50.chunk.js", + "static/js/8505.c733306c.chunk.js": "./static/js/8505.c733306c.chunk.js", + "static/js/4040.6f8f7155.chunk.js": "./static/js/4040.6f8f7155.chunk.js", + "static/js/4442.9a979278.chunk.js": "./static/js/4442.9a979278.chunk.js", + "static/js/9821.c0703c78.chunk.js": "./static/js/9821.c0703c78.chunk.js", + "static/js/8731.72990525.chunk.js": "./static/js/8731.72990525.chunk.js", + "static/js/4503.fa229fbe.chunk.js": "./static/js/4503.fa229fbe.chunk.js", + "static/js/2871.3d11e695.chunk.js": "./static/js/2871.3d11e695.chunk.js", + "static/js/9963.fc7b507c.chunk.js": "./static/js/9963.fc7b507c.chunk.js", + "static/js/6789.febacb48.chunk.js": "./static/js/6789.febacb48.chunk.js", + "static/js/6292.fee79b86.chunk.js": "./static/js/6292.fee79b86.chunk.js", + "static/js/2516.44b079ed.chunk.js": "./static/js/2516.44b079ed.chunk.js", + "static/js/5982.b7dba432.chunk.js": "./static/js/5982.b7dba432.chunk.js", + "static/js/2967.b531e7aa.chunk.js": "./static/js/2967.b531e7aa.chunk.js", + "static/js/9350.692675cc.chunk.js": "./static/js/9350.692675cc.chunk.js", + "static/js/9008.d4b4f454.chunk.js": "./static/js/9008.d4b4f454.chunk.js", + "static/js/6660.ad1074e6.chunk.js": "./static/js/6660.ad1074e6.chunk.js", + "static/js/9625.c44ae47a.chunk.js": "./static/js/9625.c44ae47a.chunk.js", + "static/js/8841.d097b94c.chunk.js": "./static/js/8841.d097b94c.chunk.js", + "static/js/5066.a0bc3ca7.chunk.js": "./static/js/5066.a0bc3ca7.chunk.js", + "static/js/5014.51de99f4.chunk.js": "./static/js/5014.51de99f4.chunk.js", + "static/js/7656.527a98cb.chunk.js": "./static/js/7656.527a98cb.chunk.js", + "static/js/4723.b5ccc693.chunk.js": "./static/js/4723.b5ccc693.chunk.js", + "static/js/1577.e3c3298a.chunk.js": "./static/js/1577.e3c3298a.chunk.js", + "static/js/4639.a666f082.chunk.js": "./static/js/4639.a666f082.chunk.js", + "static/js/5748.d17b947f.chunk.js": "./static/js/5748.d17b947f.chunk.js", + "static/js/4712.68848e03.chunk.js": "./static/js/4712.68848e03.chunk.js", + "static/js/6679.5a32e19d.chunk.js": "./static/js/6679.5a32e19d.chunk.js", + "static/js/6927.a0faae6b.chunk.js": "./static/js/6927.a0faae6b.chunk.js", + "static/js/824.c56f39b3.chunk.js": "./static/js/824.c56f39b3.chunk.js", + "static/js/7275.84809a86.chunk.js": "./static/js/7275.84809a86.chunk.js", + "static/js/9352.9d3dd18e.chunk.js": "./static/js/9352.9d3dd18e.chunk.js", + "static/js/1728.782bd268.chunk.js": "./static/js/1728.782bd268.chunk.js", + "static/js/1886.e1f61c67.chunk.js": "./static/js/1886.e1f61c67.chunk.js", + "static/js/689.e9ec08fb.chunk.js": "./static/js/689.e9ec08fb.chunk.js", + "static/js/4023.1cf957ab.chunk.js": "./static/js/4023.1cf957ab.chunk.js", + "static/js/225.66110a65.chunk.js": "./static/js/225.66110a65.chunk.js", + "static/js/7199.fa3c4603.chunk.js": "./static/js/7199.fa3c4603.chunk.js", + "static/js/253.35728f09.chunk.js": "./static/js/253.35728f09.chunk.js", + "static/js/4657.793f8fff.chunk.js": "./static/js/4657.793f8fff.chunk.js", + "static/js/2780.3e502f16.chunk.js": "./static/js/2780.3e502f16.chunk.js", + "static/js/8645.08b4eef2.chunk.js": "./static/js/8645.08b4eef2.chunk.js", + "static/css/5426.8f2389ae.chunk.css": "./static/css/5426.8f2389ae.chunk.css", + "static/js/5426.c44b6444.chunk.js": "./static/js/5426.c44b6444.chunk.js", + "static/css/4808.146b1de9.chunk.css": "./static/css/4808.146b1de9.chunk.css", + "static/js/4808.382260ae.chunk.js": "./static/js/4808.382260ae.chunk.js", + "static/css/1001.d55e6f5e.chunk.css": "./static/css/1001.d55e6f5e.chunk.css", + "static/js/1001.2a472a60.chunk.js": "./static/js/1001.2a472a60.chunk.js", + "static/css/3209.28cb5984.chunk.css": "./static/css/3209.28cb5984.chunk.css", + "static/js/3209.c7f538f3.chunk.js": "./static/js/3209.c7f538f3.chunk.js", + "static/css/7512.b17f7aaf.chunk.css": "./static/css/7512.b17f7aaf.chunk.css", + "static/js/7512.923733a2.chunk.js": "./static/js/7512.923733a2.chunk.js", + "static/css/3812.b1faff6b.chunk.css": "./static/css/3812.b1faff6b.chunk.css", + "static/js/3812.62e6074c.chunk.js": "./static/js/3812.62e6074c.chunk.js", + "static/css/7054.44580026.chunk.css": "./static/css/7054.44580026.chunk.css", + "static/js/7054.3701f39e.chunk.js": "./static/js/7054.3701f39e.chunk.js", + "static/css/1276.f723033a.chunk.css": "./static/css/1276.f723033a.chunk.css", + "static/js/1276.f7603cad.chunk.js": "./static/js/1276.f7603cad.chunk.js", + "static/css/6866.3cc21bd9.chunk.css": "./static/css/6866.3cc21bd9.chunk.css", + "static/js/6866.ea949d3d.chunk.js": "./static/js/6866.ea949d3d.chunk.js", + "static/js/1072.9a47be2b.chunk.js": "./static/js/1072.9a47be2b.chunk.js", + "static/js/633.834b6a1c.chunk.js": "./static/js/633.834b6a1c.chunk.js", + "static/css/5118.ac57fa01.chunk.css": "./static/css/5118.ac57fa01.chunk.css", + "static/js/4609.28ef81ef.chunk.js": "./static/js/4609.28ef81ef.chunk.js", + "static/js/9530.ada06b35.chunk.js": "./static/js/9530.ada06b35.chunk.js", + "static/js/4535.457b09fd.chunk.js": "./static/js/4535.457b09fd.chunk.js", + "static/js/5682.966a8cc0.chunk.js": "./static/js/5682.966a8cc0.chunk.js", + "static/js/7043.e2958b70.chunk.js": "./static/js/7043.e2958b70.chunk.js", + "static/js/2262.2ed64c2e.chunk.js": "./static/js/2262.2ed64c2e.chunk.js", + "static/js/6062.6eaa8611.chunk.js": "./static/js/6062.6eaa8611.chunk.js", + "static/js/1222.1b644c9d.chunk.js": "./static/js/1222.1b644c9d.chunk.js", + "static/js/4231.14c1bca0.chunk.js": "./static/js/4231.14c1bca0.chunk.js", + "static/js/4628.388e99ac.chunk.js": "./static/js/4628.388e99ac.chunk.js", + "static/js/5685.d6c42c56.chunk.js": "./static/js/5685.d6c42c56.chunk.js", + "static/js/6659.4c0edc60.chunk.js": "./static/js/6659.4c0edc60.chunk.js", + "static/js/4465.a054353e.chunk.js": "./static/js/4465.a054353e.chunk.js", + "static/js/1410.9bec28d3.chunk.js": "./static/js/1410.9bec28d3.chunk.js", + "static/js/6010.23d4bb8d.chunk.js": "./static/js/6010.23d4bb8d.chunk.js", + "static/js/7828.5627775e.chunk.js": "./static/js/7828.5627775e.chunk.js", + "static/js/6435.15f0b61d.chunk.js": "./static/js/6435.15f0b61d.chunk.js", + "static/js/8504.b46947f0.chunk.js": "./static/js/8504.b46947f0.chunk.js", + "static/js/835.fcf4f3fd.chunk.js": "./static/js/835.fcf4f3fd.chunk.js", + "static/js/3164.b52af9a7.chunk.js": "./static/js/3164.b52af9a7.chunk.js", + "static/js/7852.6a83a8ff.chunk.js": "./static/js/7852.6a83a8ff.chunk.js", + "static/js/1460.53037d83.chunk.js": "./static/js/1460.53037d83.chunk.js", + "static/js/453.ea7639b5.chunk.js": "./static/js/453.ea7639b5.chunk.js", + "static/js/3230.ce6d3a8f.chunk.js": "./static/js/3230.ce6d3a8f.chunk.js", + "static/js/4859.5bb93a34.chunk.js": "./static/js/4859.5bb93a34.chunk.js", + "static/js/346.f47bbaef.chunk.js": "./static/js/346.f47bbaef.chunk.js", + "static/js/1109.739c032d.chunk.js": "./static/js/1109.739c032d.chunk.js", + "static/js/5467.23a85e74.chunk.js": "./static/js/5467.23a85e74.chunk.js", + "static/js/5319.acb1fb2c.chunk.js": "./static/js/5319.acb1fb2c.chunk.js", + "static/js/6914.3b644505.chunk.js": "./static/js/6914.3b644505.chunk.js", + "static/js/2769.49fdcd1a.chunk.js": "./static/js/2769.49fdcd1a.chunk.js", + "static/js/8297.67ab228e.chunk.js": "./static/js/8297.67ab228e.chunk.js", + "static/js/5484.c5ede353.chunk.js": "./static/js/5484.c5ede353.chunk.js", + "static/js/4087.46473bf5.chunk.js": "./static/js/4087.46473bf5.chunk.js", + "static/js/8427.e5a51e3c.chunk.js": "./static/js/8427.e5a51e3c.chunk.js", + "static/js/5160.17372d9c.chunk.js": "./static/js/5160.17372d9c.chunk.js", + "static/js/682.229cd996.chunk.js": "./static/js/682.229cd996.chunk.js", + "static/js/228.21aa1ffb.chunk.js": "./static/js/228.21aa1ffb.chunk.js", + "static/js/6953.3f8107dd.chunk.js": "./static/js/6953.3f8107dd.chunk.js", + "static/js/7441.8f4b4d65.chunk.js": "./static/js/7441.8f4b4d65.chunk.js", + "static/js/521.4d640434.chunk.js": "./static/js/521.4d640434.chunk.js", + "static/js/2570.8e1850a3.chunk.js": "./static/js/2570.8e1850a3.chunk.js", + "static/js/6698.7c473cb5.chunk.js": "./static/js/6698.7c473cb5.chunk.js", + "static/js/3972.1d8f063d.chunk.js": "./static/js/3972.1d8f063d.chunk.js", + "static/js/1854.ed793d70.chunk.js": "./static/js/1854.ed793d70.chunk.js", + "static/js/5124.661ee0ac.chunk.js": "./static/js/5124.661ee0ac.chunk.js", + "static/js/8908.eb7bba4b.chunk.js": "./static/js/8908.eb7bba4b.chunk.js", + "static/js/6289.ce9965dc.chunk.js": "./static/js/6289.ce9965dc.chunk.js", + "static/js/4099.0a311abf.chunk.js": "./static/js/4099.0a311abf.chunk.js", + "static/js/1758.8e77088d.chunk.js": "./static/js/1758.8e77088d.chunk.js", + "static/js/5605.49c4cf85.chunk.js": "./static/js/5605.49c4cf85.chunk.js", + "static/js/9776.c950cc09.chunk.js": "./static/js/9776.c950cc09.chunk.js", + "static/js/2293.c95dbf41.chunk.js": "./static/js/2293.c95dbf41.chunk.js", + "static/js/9396.e8ca3d45.chunk.js": "./static/js/9396.e8ca3d45.chunk.js", + "static/js/7690.ec833ac0.chunk.js": "./static/js/7690.ec833ac0.chunk.js", + "static/js/6246.acd51635.chunk.js": "./static/js/6246.acd51635.chunk.js", + "static/js/778.af57bf4a.chunk.js": "./static/js/778.af57bf4a.chunk.js", + "static/js/3421.1b907a2a.chunk.js": "./static/js/3421.1b907a2a.chunk.js", + "static/js/8835.e0455f9c.chunk.js": "./static/js/8835.e0455f9c.chunk.js", + "static/js/1376.41e671ac.chunk.js": "./static/js/1376.41e671ac.chunk.js", + "static/js/6118.9403a72b.chunk.js": "./static/js/6118.9403a72b.chunk.js", + "static/js/1670.886ee3f9.chunk.js": "./static/js/1670.886ee3f9.chunk.js", + "static/js/5154.e5b3bec6.chunk.js": "./static/js/5154.e5b3bec6.chunk.js", + "static/js/6898.25df1470.chunk.js": "./static/js/6898.25df1470.chunk.js", + "static/js/1957.b5fe44dd.chunk.js": "./static/js/1957.b5fe44dd.chunk.js", + "static/js/8606.3593957e.chunk.js": "./static/js/8606.3593957e.chunk.js", + "static/js/7628.66f1aa47.chunk.js": "./static/js/7628.66f1aa47.chunk.js", + "static/js/8329.e74d4179.chunk.js": "./static/js/8329.e74d4179.chunk.js", + "static/js/7083.04557a73.chunk.js": "./static/js/7083.04557a73.chunk.js", + "static/js/1948.47c6f139.chunk.js": "./static/js/1948.47c6f139.chunk.js", + "static/js/7289.188a7958.chunk.js": "./static/js/7289.188a7958.chunk.js", + "static/js/9811.52fb63e7.chunk.js": "./static/js/9811.52fb63e7.chunk.js", + "static/js/8122.fae00246.chunk.js": "./static/js/8122.fae00246.chunk.js", + "static/js/3107.902f71ab.chunk.js": "./static/js/3107.902f71ab.chunk.js", + "static/js/2807.2824c79c.chunk.js": "./static/js/2807.2824c79c.chunk.js", + "static/js/4222.d4ec5a9d.chunk.js": "./static/js/4222.d4ec5a9d.chunk.js", + "static/js/766.70c23808.chunk.js": "./static/js/766.70c23808.chunk.js", + "static/js/4066.de4c1e02.chunk.js": "./static/js/4066.de4c1e02.chunk.js", + "static/js/2869.3192ed9a.chunk.js": "./static/js/2869.3192ed9a.chunk.js", + "static/js/5503.ecc3bcca.chunk.js": "./static/js/5503.ecc3bcca.chunk.js", + "static/js/1640.eae19c57.chunk.js": "./static/js/1640.eae19c57.chunk.js", + "static/js/3166.95b28d63.chunk.js": "./static/js/3166.95b28d63.chunk.js", + "static/js/3010.b103dfa4.chunk.js": "./static/js/3010.b103dfa4.chunk.js", + "static/js/1487.4365b2d5.chunk.js": "./static/js/1487.4365b2d5.chunk.js", + "static/js/3828.a61513fa.chunk.js": "./static/js/3828.a61513fa.chunk.js", + "static/js/2598.77403ca9.chunk.js": "./static/js/2598.77403ca9.chunk.js", + "static/js/856.0c5fe91c.chunk.js": "./static/js/856.0c5fe91c.chunk.js", + "static/js/165.fe88e976.chunk.js": "./static/js/165.fe88e976.chunk.js", + "static/css/5573.3ce054a3.chunk.css": "./static/css/5573.3ce054a3.chunk.css", + "static/js/5573.6b78d499.chunk.js": "./static/js/5573.6b78d499.chunk.js", + "static/js/3366.7739db1f.chunk.js": "./static/js/3366.7739db1f.chunk.js", + "static/js/7981.7e989bc3.chunk.js": "./static/js/7981.7e989bc3.chunk.js", + "static/js/8337.93117b59.chunk.js": "./static/js/8337.93117b59.chunk.js", + "static/css/511.0a2e691a.chunk.css": "./static/css/511.0a2e691a.chunk.css", + "static/js/511.34323faa.chunk.js": "./static/js/511.34323faa.chunk.js", + "static/js/1139.e67ff859.chunk.js": "./static/js/1139.e67ff859.chunk.js", + "static/js/5130.12a9aaae.chunk.js": "./static/js/5130.12a9aaae.chunk.js", + "static/js/9368.8f2cf4ea.chunk.js": "./static/js/9368.8f2cf4ea.chunk.js", + "static/css/3295.8975fa00.chunk.css": "./static/css/3295.8975fa00.chunk.css", + "static/js/896.a21d0b2a.chunk.js": "./static/js/896.a21d0b2a.chunk.js", + "static/js/8021.341b37dd.chunk.js": "./static/js/8021.341b37dd.chunk.js", + "static/css/6869.1d0912d9.chunk.css": "./static/css/6869.1d0912d9.chunk.css", + "ts.worker.js": "./ts.worker.js", + "css.worker.js": "./css.worker.js", + "html.worker.js": "./html.worker.js", + "json.worker.js": "./json.worker.js", + "editor.worker.js": "./editor.worker.js", "static/media/codicon.ttf": "./static/media/codicon.762fced46d6cddbda272.ttf", "static/media/thumbsUp.svg": "./static/media/thumbsUp.d4a03fbaa64ce85a0045bf8ba77f8e2b.svg", "static/media/error.svg": "./static/media/error.ca9e31d5d3dc34da07e11a00f7af0842.svg", @@ -255,7 +286,7 @@ "index.html": "./index.html" }, "entrypoints": [ - "static/css/main.c8ce3bba.css", - "static/js/main.62a60ecb.js" + "static/css/main.0bf24cd8.css", + "static/js/main.7cff0321.js" ] } \ No newline at end of file diff --git a/ydb/core/viewer/monitoring/css.worker.js b/ydb/core/viewer/monitoring/css.worker.js new file mode 100644 index 000000000000..c0848a5c36fe --- /dev/null +++ b/ydb/core/viewer/monitoring/css.worker.js @@ -0,0 +1,2 @@ +/*! For license information please see css.worker.js.LICENSE.txt */ +(()=>{"use strict";const e=new class{constructor(){this.listeners=[],this.unexpectedErrorHandler=function(e){setTimeout((()=>{if(e.stack){if(a.isErrorNoTelemetry(e))throw new a(e.message+"\n\n"+e.stack);throw new Error(e.message+"\n\n"+e.stack)}throw e}),0)}}emit(e){this.listeners.forEach((t=>{t(e)}))}onUnexpectedError(e){this.unexpectedErrorHandler(e),this.emit(e)}onUnexpectedExternalError(e){this.unexpectedErrorHandler(e)}};function t(t){r(t)||e.onUnexpectedError(t)}function n(e){if(e instanceof Error){const{name:t,message:n}=e;return{$isError:!0,name:t,message:n,stack:e.stacktrace||e.stack,noTelemetry:a.isErrorNoTelemetry(e)}}return e}const i="Canceled";function r(e){return e instanceof o||e instanceof Error&&e.name===i&&e.message===i}class o extends Error{constructor(){super(i),this.name=this.message}}class s extends Error{constructor(e){super("NotSupported"),e&&(this.message=e)}}class a extends Error{constructor(e){super(e),this.name="CodeExpectedError"}static fromError(e){if(e instanceof a)return e;const t=new a;return t.message=e.message,t.stack=e.stack,t}static isErrorNoTelemetry(e){return"CodeExpectedError"===e.name}}class l extends Error{constructor(e){super(e||"An unexpected bug occurred."),Object.setPrototypeOf(this,l.prototype)}}function c(e,t){const n=this;let i,r=!1;return function(){if(r)return i;if(r=!0,t)try{i=e.apply(n,arguments)}finally{t()}else i=e.apply(n,arguments);return i}}var h;!function(e){function t(e){return e&&"object"===typeof e&&"function"===typeof e[Symbol.iterator]}e.is=t;const n=Object.freeze([]);function*i(e){yield e}e.empty=function(){return n},e.single=i,e.wrap=function(e){return t(e)?e:i(e)},e.from=function(e){return e||n},e.reverse=function*(e){for(let t=e.length-1;t>=0;t--)yield e[t]},e.isEmpty=function(e){return!e||!0===e[Symbol.iterator]().next().done},e.first=function(e){return e[Symbol.iterator]().next().value},e.some=function(e,t){for(const n of e)if(t(n))return!0;return!1},e.find=function(e,t){for(const n of e)if(t(n))return n},e.filter=function*(e,t){for(const n of e)t(n)&&(yield n)},e.map=function*(e,t){let n=0;for(const i of e)yield t(i,n++)},e.concat=function*(){for(var e=arguments.length,t=new Array(e),n=0;n2&&void 0!==arguments[2]?arguments[2]:e.length;return function*(){for(t<0&&(t+=e.length),n<0?n+=e.length:n>e.length&&(n=e.length);t1&&void 0!==arguments[1]?arguments[1]:Number.POSITIVE_INFINITY;const i=[];if(0===n)return[i,t];const r=t[Symbol.iterator]();for(let o=0;or}]},e.asyncToArray=async function(e){const t=[];for await(const n of e)t.push(n);return Promise.resolve(t)}}(h||(h={}));let d=null;function p(e){return null===d||void 0===d||d.trackDisposable(e),e}function u(e){null===d||void 0===d||d.markAsDisposed(e)}function m(e,t){null===d||void 0===d||d.setParent(e,t)}function f(e){if(h.is(e)){const n=[];for(const i of e)if(i)try{i.dispose()}catch(t){n.push(t)}if(1===n.length)throw n[0];if(n.length>1)throw new AggregateError(n,"Encountered errors while disposing of store");return Array.isArray(e)?[]:e}if(e)return e.dispose(),e}function g(){for(var e=arguments.length,t=new Array(e),n=0;nf(t)));return function(e,t){if(d)for(const n of e)d.setParent(n,t)}(t,i),i}function b(e){const t=p({dispose:c((()=>{u(t),e()}))});return t}class v{constructor(){this._toDispose=new Set,this._isDisposed=!1,p(this)}dispose(){this._isDisposed||(u(this),this._isDisposed=!0,this.clear())}get isDisposed(){return this._isDisposed}clear(){if(0!==this._toDispose.size)try{f(this._toDispose)}finally{this._toDispose.clear()}}add(e){if(!e)return e;if(e===this)throw new Error("Cannot register a disposable on itself!");return m(e,this),this._isDisposed?v.DISABLE_DISPOSED_WARNING||console.warn(new Error("Trying to add a disposable to a DisposableStore that has already been disposed of. The added object will be leaked!").stack):this._toDispose.add(e),e}deleteAndLeak(e){e&&this._toDispose.has(e)&&(this._toDispose.delete(e),m(e,null))}}v.DISABLE_DISPOSED_WARNING=!1;class y{constructor(){this._store=new v,p(this),m(this._store,this)}dispose(){u(this),this._store.dispose()}_register(e){if(e===this)throw new Error("Cannot register a disposable on itself!");return this._store.add(e)}}y.None=Object.freeze({dispose(){}});class w{constructor(){this._store=new Map,this._isDisposed=!1,p(this)}dispose(){u(this),this._isDisposed=!0,this.clearAndDisposeAll()}clearAndDisposeAll(){if(this._store.size)try{f(this._store.values())}finally{this._store.clear()}}get(e){return this._store.get(e)}set(e,t){let n=arguments.length>2&&void 0!==arguments[2]&&arguments[2];var i;this._isDisposed&&console.warn(new Error("Trying to add a disposable to a DisposableMap that has already been disposed of. The added object will be leaked!").stack),n||null===this._store.get(e)||void 0===i||i.dispose(),this._store.set(e,t)}deleteAndDispose(e){var t;null===this._store.get(e)||void 0===t||t.dispose(),this._store.delete(e)}[Symbol.iterator](){return this._store[Symbol.iterator]()}}class x{constructor(e){this.element=e,this.next=x.Undefined,this.prev=x.Undefined}}x.Undefined=new x(void 0);class S{constructor(){this._first=x.Undefined,this._last=x.Undefined,this._size=0}get size(){return this._size}isEmpty(){return this._first===x.Undefined}clear(){let e=this._first;for(;e!==x.Undefined;){const t=e.next;e.prev=x.Undefined,e.next=x.Undefined,e=t}this._first=x.Undefined,this._last=x.Undefined,this._size=0}unshift(e){return this._insert(e,!1)}push(e){return this._insert(e,!0)}_insert(e,t){const n=new x(e);if(this._first===x.Undefined)this._first=n,this._last=n;else if(t){const e=this._last;this._last=n,n.prev=e,e.next=n}else{const e=this._first;this._first=n,n.next=e,e.prev=n}this._size+=1;let i=!1;return()=>{i||(i=!0,this._remove(n))}}shift(){if(this._first!==x.Undefined){const e=this._first.element;return this._remove(this._first),e}}pop(){if(this._last!==x.Undefined){const e=this._last.element;return this._remove(this._last),e}}_remove(e){if(e.prev!==x.Undefined&&e.next!==x.Undefined){const t=e.prev;t.next=e.next,e.next.prev=t}else e.prev===x.Undefined&&e.next===x.Undefined?(this._first=x.Undefined,this._last=x.Undefined):e.next===x.Undefined?(this._last=this._last.prev,this._last.next=x.Undefined):e.prev===x.Undefined&&(this._first=this._first.next,this._first.prev=x.Undefined);this._size-=1}*[Symbol.iterator](){let e=this._first;for(;e!==x.Undefined;)yield e.element,e=e.next}}const C=globalThis.performance&&"function"===typeof globalThis.performance.now;class _{static create(e){return new _(e)}constructor(e){this._now=C&&!1===e?Date.now:globalThis.performance.now.bind(globalThis.performance),this._startTime=this._now(),this._stopTime=-1}stop(){this._stopTime=this._now()}reset(){this._startTime=this._now(),this._stopTime=-1}elapsed(){return-1!==this._stopTime?this._stopTime-this._startTime:this._now()-this._startTime}}const k=!1;var E;!function(e){function t(e){if(k){const{onDidAddListener:t}=e,n=F.create();let i=0;e.onDidAddListener=()=>{2===++i&&(console.warn("snapshotted emitter LIKELY used public and SHOULD HAVE BEEN created with DisposableStore. snapshotted here"),n.print()),null===t||void 0===t||t()}}}function n(e){return function(t){let n,i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null,r=!1;return n=e((e=>{if(!r)return n?n.dispose():r=!0,t.call(i,e)}),null,arguments.length>2?arguments[2]:void 0),r&&n.dispose(),n}}function i(e,t,n){return o((function(n){let i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;return e((e=>n.call(i,t(e))),null,arguments.length>2?arguments[2]:void 0)}),n)}function r(e,t,n){return o((function(n){let i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;return e((e=>t(e)&&n.call(i,e)),null,arguments.length>2?arguments[2]:void 0)}),n)}function o(e,n){let i;const r={onWillAddFirstListener(){i=e(o.fire,o)},onDidRemoveLastListener(){null===i||void 0===i||i.dispose()}};n||t(r);const o=new A(r);return null===n||void 0===n||n.add(o),o.event}function s(e,n){let i,r,o,s,a=arguments.length>2&&void 0!==arguments[2]?arguments[2]:100,l=arguments.length>3&&void 0!==arguments[3]&&arguments[3],c=arguments.length>4&&void 0!==arguments[4]&&arguments[4],h=arguments.length>6?arguments[6]:void 0,d=0;const p={leakWarningThreshold:arguments.length>5?arguments[5]:void 0,onWillAddFirstListener(){i=e((e=>{d++,r=n(r,e),l&&!o&&(u.fire(r),r=void 0),s=()=>{const e=r;r=void 0,o=void 0,(!l||d>1)&&u.fire(e),d=0},"number"===typeof a?(clearTimeout(o),o=setTimeout(s,a)):void 0===o&&(o=0,queueMicrotask(s))}))},onWillRemoveListener(){c&&d>0&&(null===s||void 0===s||s())},onDidRemoveLastListener(){s=void 0,i.dispose()}};h||t(p);const u=new A(p);return null===h||void 0===h||h.add(u),u.event}e.None=()=>y.None,e.defer=function(e,t){return s(e,(()=>{}),0,void 0,!0,void 0,t)},e.once=n,e.map=i,e.forEach=function(e,t,n){return o((function(n){let i=arguments.length>1&&void 0!==arguments[1]?arguments[1]:null;return e((e=>{t(e),n.call(i,e)}),null,arguments.length>2?arguments[2]:void 0)}),n)},e.filter=r,e.signal=function(e){return e},e.any=function(){for(var e=arguments.length,t=new Array(e),n=0;n1&&void 0!==arguments[1]?arguments[1]:null,i=arguments.length>2?arguments[2]:void 0;return function(e,t){t instanceof Array?t.push(e):t&&t.add(e);return e}(g(...t.map((t=>t((t=>e.call(n,t)))))),i)}},e.reduce=function(e,t,n,r){let o=n;return i(e,(e=>(o=t(o,e),o)),r)},e.debounce=s,e.accumulate=function(t){let n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0,i=arguments.length>2?arguments[2]:void 0;return e.debounce(t,((e,t)=>e?(e.push(t),e):[t]),n,void 0,!0,void 0,i)},e.latch=function(e){let t,n=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(e,t)=>e===t,i=!0;return r(e,(e=>{const r=i||!n(e,t);return i=!1,t=e,r}),arguments.length>2?arguments[2]:void 0)},e.split=function(t,n,i){return[e.filter(t,n,i),e.filter(t,(e=>!n(e)),i)]},e.buffer=function(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1],n=arguments.length>3?arguments[3]:void 0,i=(arguments.length>2&&void 0!==arguments[2]?arguments[2]:[]).slice(),r=e((e=>{i?i.push(e):s.fire(e)}));n&&n.add(r);const o=()=>{null===i||void 0===i||i.forEach((e=>s.fire(e))),i=null},s=new A({onWillAddFirstListener(){r||(r=e((e=>s.fire(e))),n&&n.add(r))},onDidAddFirstListener(){i&&(t?setTimeout(o):o())},onDidRemoveLastListener(){r&&r.dispose(),r=null}});return n&&n.add(s),s.event},e.chain=function(e,t){return(n,i,r)=>{const o=t(new l);return e((function(e){const t=o.evaluate(e);t!==a&&n.call(i,t)}),void 0,r)}};const a=Symbol("HaltChainable");class l{constructor(){this.steps=[]}map(e){return this.steps.push(e),this}forEach(e){return this.steps.push((t=>(e(t),t))),this}filter(e){return this.steps.push((t=>e(t)?t:a)),this}reduce(e,t){let n=t;return this.steps.push((t=>(n=e(n,t),n))),this}latch(){let e,t=arguments.length>0&&void 0!==arguments[0]?arguments[0]:(e,t)=>e===t,n=!0;return this.steps.push((i=>{const r=n||!t(i,e);return n=!1,e=i,r?i:a})),this}evaluate(e){for(const t of this.steps)if((e=t(e))===a)break;return e}}e.fromNodeEventEmitter=function(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:e=>e;const i=function(){return r.fire(n(...arguments))},r=new A({onWillAddFirstListener:()=>e.on(t,i),onDidRemoveLastListener:()=>e.removeListener(t,i)});return r.event},e.fromDOMEventEmitter=function(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:e=>e;const i=function(){return r.fire(n(...arguments))},r=new A({onWillAddFirstListener:()=>e.addEventListener(t,i),onDidRemoveLastListener:()=>e.removeEventListener(t,i)});return r.event},e.toPromise=function(e){return new Promise((t=>n(e)(t)))},e.fromPromise=function(e){const t=new A;return e.then((e=>{t.fire(e)}),(()=>{t.fire(void 0)})).finally((()=>{t.dispose()})),t.event},e.runAndSubscribe=function(e,t,n){return t(n),e((e=>t(e)))};class c{constructor(e,n){this._observable=e,this._counter=0,this._hasChanged=!1;const i={onWillAddFirstListener:()=>{e.addObserver(this)},onDidRemoveLastListener:()=>{e.removeObserver(this)}};n||t(i),this.emitter=new A(i),n&&n.add(this.emitter)}beginUpdate(e){this._counter++}handlePossibleChange(e){}handleChange(e,t){this._hasChanged=!0}endUpdate(e){this._counter--,0===this._counter&&(this._observable.reportChanges(),this._hasChanged&&(this._hasChanged=!1,this.emitter.fire(this._observable.get())))}}e.fromObservable=function(e,t){return new c(e,t).emitter.event},e.fromObservableLight=function(e){return(t,n,i)=>{let r=0,o=!1;const s={beginUpdate(){r++},endUpdate(){r--,0===r&&(e.reportChanges(),o&&(o=!1,t.call(n)))},handlePossibleChange(){},handleChange(){o=!0}};e.addObserver(s),e.reportChanges();const a={dispose(){e.removeObserver(s)}};return i instanceof v?i.add(a):Array.isArray(i)&&i.push(a),a}}}(E||(E={}));class R{constructor(e){this.listenerCount=0,this.invocationCount=0,this.elapsedOverall=0,this.durations=[],this.name=`${e}_${R._idPool++}`,R.all.add(this)}start(e){this._stopWatch=new _,this.listenerCount=e}stop(){if(this._stopWatch){const e=this._stopWatch.elapsed();this.durations.push(e),this.elapsedOverall+=e,this.invocationCount+=1,this._stopWatch=void 0}}}R.all=new Set,R._idPool=0;class N{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:Math.random().toString(18).slice(2,5);this.threshold=e,this.name=t,this._warnCountdown=0}dispose(){var e;null===(e=this._stacks)||void 0===e||e.clear()}check(e,t){const n=this.threshold;if(n<=0||t{const t=this._stacks.get(e.value)||0;this._stacks.set(e.value,t-1)}}}class F{static create(){var e;return new F(null!==(e=(new Error).stack)&&void 0!==e?e:"")}constructor(e){this.value=e}print(){console.warn(this.value.split("\n").slice(2).join("\n"))}}class D{constructor(e){this.value=e}}const T=void 0;class A{constructor(e){var t,n,i,r,o;this._size=0,this._options=e,this._leakageMon=(null===(t=this._options)||void 0===t?void 0:t.leakWarningThreshold)?new N(null!==(i=null===(n=this._options)||void 0===n?void 0:n.leakWarningThreshold)&&void 0!==i?i:-1):void 0,this._perfMon=(null===(r=this._options)||void 0===r?void 0:r._profName)?new R(this._options._profName):void 0,this._deliveryQueue=null===(o=this._options)||void 0===o?void 0:o.deliveryQueue}dispose(){var e,t,n,i;this._disposed||(this._disposed=!0,(null===(e=this._deliveryQueue)||void 0===e?void 0:e.current)===this&&this._deliveryQueue.reset(),this._listeners&&(this._listeners=void 0,this._size=0),null===(n=null===(t=this._options)||void 0===t?void 0:t.onDidRemoveLastListener)||void 0===n||n.call(t),null===(i=this._leakageMon)||void 0===i||i.dispose())}get event(){var e;return null!==(e=this._event)&&void 0!==e||(this._event=(e,t,n)=>{var i,r,o,s,a;if(this._leakageMon&&this._size>3*this._leakageMon.threshold)return console.warn(`[${this._leakageMon.name}] REFUSES to accept new listeners because it exceeded its threshold by far`),y.None;if(this._disposed)return y.None;t&&(e=e.bind(t));const l=new D(e);let c;this._leakageMon&&this._size>=Math.ceil(.2*this._leakageMon.threshold)&&(l.stack=F.create(),c=this._leakageMon.check(l.stack,this._size+1)),this._listeners?this._listeners instanceof D?(null!==(a=this._deliveryQueue)&&void 0!==a||(this._deliveryQueue=new M),this._listeners=[this._listeners,l]):this._listeners.push(l):(null===(r=null===(i=this._options)||void 0===i?void 0:i.onWillAddFirstListener)||void 0===r||r.call(i,this),this._listeners=l,null===(s=null===(o=this._options)||void 0===o?void 0:o.onDidAddFirstListener)||void 0===s||s.call(o,this)),this._size++;const h=b((()=>{null===T||void 0===T||T.unregister(h),null===c||void 0===c||c(),this._removeListener(l)}));if(n instanceof v?n.add(h):Array.isArray(n)&&n.push(h),T){const e=(new Error).stack.split("\n").slice(2).join("\n").trim();T.register(h,e,h)}return h}),this._event}_removeListener(e){var t,n,i,r;if(null===(n=null===(t=this._options)||void 0===t?void 0:t.onWillRemoveListener)||void 0===n||n.call(t,this),!this._listeners)return;if(1===this._size)return this._listeners=void 0,null===(r=null===(i=this._options)||void 0===i?void 0:i.onDidRemoveLastListener)||void 0===r||r.call(i,this),void(this._size=0);const o=this._listeners,s=o.indexOf(e);if(-1===s)throw console.log("disposed?",this._disposed),console.log("size?",this._size),console.log("arr?",JSON.stringify(this._listeners)),new Error("Attempted to dispose unknown listener");this._size--,o[s]=void 0;const a=this._deliveryQueue.current===this;if(2*this._size<=o.length){let e=0;for(let t=0;t0}}class M{constructor(){this.i=-1,this.end=0}enqueue(e,t,n){this.i=0,this.end=n,this.current=e,this.value=t}reset(){this.i=this.end,this.current=void 0,this.value=void 0}}Object.prototype.hasOwnProperty;function z(e){const t=[];for(const n of function(e){let t=[];for(;Object.prototype!==e;)t=t.concat(Object.getOwnPropertyNames(e)),e=Object.getPrototypeOf(e);return t}(e))"function"===typeof e[n]&&t.push(n);return t}let I="undefined"!==typeof document&&document.location&&document.location.hash.indexOf("pseudo=true")>=0;function L(e,t){let n;return n=0===t.length?e:e.replace(/\{(\d+)\}/g,((e,n)=>{const i=n[0],r=t[i];let o=e;return"string"===typeof r?o=r:"number"!==typeof r&&"boolean"!==typeof r&&void 0!==r&&null!==r||(o=String(r)),o})),I&&(n="\uff3b"+n.replace(/[aouei]/g,"$&$&")+"\uff3d"),n}function P(e,t){for(var n=arguments.length,i=new Array(n>2?n-2:0),r=2;r=0,B=K.indexOf("Macintosh")>=0,X=(K.indexOf("Macintosh")>=0||K.indexOf("iPad")>=0||K.indexOf("iPhone")>=0)&&!!navigator.maxTouchPoints&&navigator.maxTouchPoints>0,j=K.indexOf("Linux")>=0,Q=(null===K||void 0===K?void 0:K.indexOf("Mobi"))>=0,G=!0;U=void P({key:"ensureLoaderPluginIsLoaded",comment:["{Locked}"]},"_")||V,Z=U,ee=navigator.language}let se=0;B?se=1:q?se=3:j&&(se=2);const ae=q,le=B,ce=(G&&"function"===typeof ne.importScripts&&ne.origin,K),he="function"===typeof ne.postMessage&&!ne.importScripts;(()=>{if(he){const e=[];ne.addEventListener("message",(t=>{if(t.data&&t.data.vscodeScheduleAsyncWork)for(let n=0,i=e.length;n{const i=++t;e.push({id:i,callback:n}),ne.postMessage({vscodeScheduleAsyncWork:i},"*")}}})();const de=!!(ce&&ce.indexOf("Chrome")>=0);ce&&ce.indexOf("Firefox"),!de&&ce&&ce.indexOf("Safari"),ce&&ce.indexOf("Edg/"),ce&&ce.indexOf("Android");class pe{constructor(e){this.executor=e,this._didRun=!1}get value(){if(!this._didRun)try{this._value=this.executor()}catch(e){this._error=e}finally{this._didRun=!0}if(this._error)throw this._error;return this._value}get rawValue(){return this._value}}var ue;function me(e){return e.replace(/[\\\{\}\*\+\?\|\^\$\.\[\]\(\)]/g,"\\$&")}function fe(e){return e>=65&&e<=90}function ge(e){return 55296<=e&&e<=56319}function be(e){return 56320<=e&&e<=57343}function ve(e,t){return t-56320+(e-55296<<10)+65536}function ye(e,t,n){const i=e.charCodeAt(n);if(ge(i)&&n+1t[3*i+1]))return t[3*i+2];i=2*i+1}return 0}}xe._INSTANCE=null;class Se{static getInstance(e){return ue.cache.get(Array.from(e))}static getLocales(){return ue._locales.value}constructor(e){this.confusableDictionary=e}isAmbiguous(e){return this.confusableDictionary.has(e)}getPrimaryConfusable(e){return this.confusableDictionary.get(e)}getConfusableCodePoints(){return new Set(this.confusableDictionary.keys())}}ue=Se,Se.ambiguousCharacterData=new pe((()=>JSON.parse('{"_common":[8232,32,8233,32,5760,32,8192,32,8193,32,8194,32,8195,32,8196,32,8197,32,8198,32,8200,32,8201,32,8202,32,8287,32,8199,32,8239,32,2042,95,65101,95,65102,95,65103,95,8208,45,8209,45,8210,45,65112,45,1748,45,8259,45,727,45,8722,45,10134,45,11450,45,1549,44,1643,44,8218,44,184,44,42233,44,894,59,2307,58,2691,58,1417,58,1795,58,1796,58,5868,58,65072,58,6147,58,6153,58,8282,58,1475,58,760,58,42889,58,8758,58,720,58,42237,58,451,33,11601,33,660,63,577,63,2429,63,5038,63,42731,63,119149,46,8228,46,1793,46,1794,46,42510,46,68176,46,1632,46,1776,46,42232,46,1373,96,65287,96,8219,96,8242,96,1370,96,1523,96,8175,96,65344,96,900,96,8189,96,8125,96,8127,96,8190,96,697,96,884,96,712,96,714,96,715,96,756,96,699,96,701,96,700,96,702,96,42892,96,1497,96,2036,96,2037,96,5194,96,5836,96,94033,96,94034,96,65339,91,10088,40,10098,40,12308,40,64830,40,65341,93,10089,41,10099,41,12309,41,64831,41,10100,123,119060,123,10101,125,65342,94,8270,42,1645,42,8727,42,66335,42,5941,47,8257,47,8725,47,8260,47,9585,47,10187,47,10744,47,119354,47,12755,47,12339,47,11462,47,20031,47,12035,47,65340,92,65128,92,8726,92,10189,92,10741,92,10745,92,119311,92,119355,92,12756,92,20022,92,12034,92,42872,38,708,94,710,94,5869,43,10133,43,66203,43,8249,60,10094,60,706,60,119350,60,5176,60,5810,60,5120,61,11840,61,12448,61,42239,61,8250,62,10095,62,707,62,119351,62,5171,62,94015,62,8275,126,732,126,8128,126,8764,126,65372,124,65293,45,120784,50,120794,50,120804,50,120814,50,120824,50,130034,50,42842,50,423,50,1000,50,42564,50,5311,50,42735,50,119302,51,120785,51,120795,51,120805,51,120815,51,120825,51,130035,51,42923,51,540,51,439,51,42858,51,11468,51,1248,51,94011,51,71882,51,120786,52,120796,52,120806,52,120816,52,120826,52,130036,52,5070,52,71855,52,120787,53,120797,53,120807,53,120817,53,120827,53,130037,53,444,53,71867,53,120788,54,120798,54,120808,54,120818,54,120828,54,130038,54,11474,54,5102,54,71893,54,119314,55,120789,55,120799,55,120809,55,120819,55,120829,55,130039,55,66770,55,71878,55,2819,56,2538,56,2666,56,125131,56,120790,56,120800,56,120810,56,120820,56,120830,56,130040,56,547,56,546,56,66330,56,2663,57,2920,57,2541,57,3437,57,120791,57,120801,57,120811,57,120821,57,120831,57,130041,57,42862,57,11466,57,71884,57,71852,57,71894,57,9082,97,65345,97,119834,97,119886,97,119938,97,119990,97,120042,97,120094,97,120146,97,120198,97,120250,97,120302,97,120354,97,120406,97,120458,97,593,97,945,97,120514,97,120572,97,120630,97,120688,97,120746,97,65313,65,119808,65,119860,65,119912,65,119964,65,120016,65,120068,65,120120,65,120172,65,120224,65,120276,65,120328,65,120380,65,120432,65,913,65,120488,65,120546,65,120604,65,120662,65,120720,65,5034,65,5573,65,42222,65,94016,65,66208,65,119835,98,119887,98,119939,98,119991,98,120043,98,120095,98,120147,98,120199,98,120251,98,120303,98,120355,98,120407,98,120459,98,388,98,5071,98,5234,98,5551,98,65314,66,8492,66,119809,66,119861,66,119913,66,120017,66,120069,66,120121,66,120173,66,120225,66,120277,66,120329,66,120381,66,120433,66,42932,66,914,66,120489,66,120547,66,120605,66,120663,66,120721,66,5108,66,5623,66,42192,66,66178,66,66209,66,66305,66,65347,99,8573,99,119836,99,119888,99,119940,99,119992,99,120044,99,120096,99,120148,99,120200,99,120252,99,120304,99,120356,99,120408,99,120460,99,7428,99,1010,99,11429,99,43951,99,66621,99,128844,67,71922,67,71913,67,65315,67,8557,67,8450,67,8493,67,119810,67,119862,67,119914,67,119966,67,120018,67,120174,67,120226,67,120278,67,120330,67,120382,67,120434,67,1017,67,11428,67,5087,67,42202,67,66210,67,66306,67,66581,67,66844,67,8574,100,8518,100,119837,100,119889,100,119941,100,119993,100,120045,100,120097,100,120149,100,120201,100,120253,100,120305,100,120357,100,120409,100,120461,100,1281,100,5095,100,5231,100,42194,100,8558,68,8517,68,119811,68,119863,68,119915,68,119967,68,120019,68,120071,68,120123,68,120175,68,120227,68,120279,68,120331,68,120383,68,120435,68,5024,68,5598,68,5610,68,42195,68,8494,101,65349,101,8495,101,8519,101,119838,101,119890,101,119942,101,120046,101,120098,101,120150,101,120202,101,120254,101,120306,101,120358,101,120410,101,120462,101,43826,101,1213,101,8959,69,65317,69,8496,69,119812,69,119864,69,119916,69,120020,69,120072,69,120124,69,120176,69,120228,69,120280,69,120332,69,120384,69,120436,69,917,69,120492,69,120550,69,120608,69,120666,69,120724,69,11577,69,5036,69,42224,69,71846,69,71854,69,66182,69,119839,102,119891,102,119943,102,119995,102,120047,102,120099,102,120151,102,120203,102,120255,102,120307,102,120359,102,120411,102,120463,102,43829,102,42905,102,383,102,7837,102,1412,102,119315,70,8497,70,119813,70,119865,70,119917,70,120021,70,120073,70,120125,70,120177,70,120229,70,120281,70,120333,70,120385,70,120437,70,42904,70,988,70,120778,70,5556,70,42205,70,71874,70,71842,70,66183,70,66213,70,66853,70,65351,103,8458,103,119840,103,119892,103,119944,103,120048,103,120100,103,120152,103,120204,103,120256,103,120308,103,120360,103,120412,103,120464,103,609,103,7555,103,397,103,1409,103,119814,71,119866,71,119918,71,119970,71,120022,71,120074,71,120126,71,120178,71,120230,71,120282,71,120334,71,120386,71,120438,71,1292,71,5056,71,5107,71,42198,71,65352,104,8462,104,119841,104,119945,104,119997,104,120049,104,120101,104,120153,104,120205,104,120257,104,120309,104,120361,104,120413,104,120465,104,1211,104,1392,104,5058,104,65320,72,8459,72,8460,72,8461,72,119815,72,119867,72,119919,72,120023,72,120179,72,120231,72,120283,72,120335,72,120387,72,120439,72,919,72,120494,72,120552,72,120610,72,120668,72,120726,72,11406,72,5051,72,5500,72,42215,72,66255,72,731,105,9075,105,65353,105,8560,105,8505,105,8520,105,119842,105,119894,105,119946,105,119998,105,120050,105,120102,105,120154,105,120206,105,120258,105,120310,105,120362,105,120414,105,120466,105,120484,105,618,105,617,105,953,105,8126,105,890,105,120522,105,120580,105,120638,105,120696,105,120754,105,1110,105,42567,105,1231,105,43893,105,5029,105,71875,105,65354,106,8521,106,119843,106,119895,106,119947,106,119999,106,120051,106,120103,106,120155,106,120207,106,120259,106,120311,106,120363,106,120415,106,120467,106,1011,106,1112,106,65322,74,119817,74,119869,74,119921,74,119973,74,120025,74,120077,74,120129,74,120181,74,120233,74,120285,74,120337,74,120389,74,120441,74,42930,74,895,74,1032,74,5035,74,5261,74,42201,74,119844,107,119896,107,119948,107,120000,107,120052,107,120104,107,120156,107,120208,107,120260,107,120312,107,120364,107,120416,107,120468,107,8490,75,65323,75,119818,75,119870,75,119922,75,119974,75,120026,75,120078,75,120130,75,120182,75,120234,75,120286,75,120338,75,120390,75,120442,75,922,75,120497,75,120555,75,120613,75,120671,75,120729,75,11412,75,5094,75,5845,75,42199,75,66840,75,1472,108,8739,73,9213,73,65512,73,1633,108,1777,73,66336,108,125127,108,120783,73,120793,73,120803,73,120813,73,120823,73,130033,73,65321,73,8544,73,8464,73,8465,73,119816,73,119868,73,119920,73,120024,73,120128,73,120180,73,120232,73,120284,73,120336,73,120388,73,120440,73,65356,108,8572,73,8467,108,119845,108,119897,108,119949,108,120001,108,120053,108,120105,73,120157,73,120209,73,120261,73,120313,73,120365,73,120417,73,120469,73,448,73,120496,73,120554,73,120612,73,120670,73,120728,73,11410,73,1030,73,1216,73,1493,108,1503,108,1575,108,126464,108,126592,108,65166,108,65165,108,1994,108,11599,73,5825,73,42226,73,93992,73,66186,124,66313,124,119338,76,8556,76,8466,76,119819,76,119871,76,119923,76,120027,76,120079,76,120131,76,120183,76,120235,76,120287,76,120339,76,120391,76,120443,76,11472,76,5086,76,5290,76,42209,76,93974,76,71843,76,71858,76,66587,76,66854,76,65325,77,8559,77,8499,77,119820,77,119872,77,119924,77,120028,77,120080,77,120132,77,120184,77,120236,77,120288,77,120340,77,120392,77,120444,77,924,77,120499,77,120557,77,120615,77,120673,77,120731,77,1018,77,11416,77,5047,77,5616,77,5846,77,42207,77,66224,77,66321,77,119847,110,119899,110,119951,110,120003,110,120055,110,120107,110,120159,110,120211,110,120263,110,120315,110,120367,110,120419,110,120471,110,1400,110,1404,110,65326,78,8469,78,119821,78,119873,78,119925,78,119977,78,120029,78,120081,78,120185,78,120237,78,120289,78,120341,78,120393,78,120445,78,925,78,120500,78,120558,78,120616,78,120674,78,120732,78,11418,78,42208,78,66835,78,3074,111,3202,111,3330,111,3458,111,2406,111,2662,111,2790,111,3046,111,3174,111,3302,111,3430,111,3664,111,3792,111,4160,111,1637,111,1781,111,65359,111,8500,111,119848,111,119900,111,119952,111,120056,111,120108,111,120160,111,120212,111,120264,111,120316,111,120368,111,120420,111,120472,111,7439,111,7441,111,43837,111,959,111,120528,111,120586,111,120644,111,120702,111,120760,111,963,111,120532,111,120590,111,120648,111,120706,111,120764,111,11423,111,4351,111,1413,111,1505,111,1607,111,126500,111,126564,111,126596,111,65259,111,65260,111,65258,111,65257,111,1726,111,64428,111,64429,111,64427,111,64426,111,1729,111,64424,111,64425,111,64423,111,64422,111,1749,111,3360,111,4125,111,66794,111,71880,111,71895,111,66604,111,1984,79,2534,79,2918,79,12295,79,70864,79,71904,79,120782,79,120792,79,120802,79,120812,79,120822,79,130032,79,65327,79,119822,79,119874,79,119926,79,119978,79,120030,79,120082,79,120134,79,120186,79,120238,79,120290,79,120342,79,120394,79,120446,79,927,79,120502,79,120560,79,120618,79,120676,79,120734,79,11422,79,1365,79,11604,79,4816,79,2848,79,66754,79,42227,79,71861,79,66194,79,66219,79,66564,79,66838,79,9076,112,65360,112,119849,112,119901,112,119953,112,120005,112,120057,112,120109,112,120161,112,120213,112,120265,112,120317,112,120369,112,120421,112,120473,112,961,112,120530,112,120544,112,120588,112,120602,112,120646,112,120660,112,120704,112,120718,112,120762,112,120776,112,11427,112,65328,80,8473,80,119823,80,119875,80,119927,80,119979,80,120031,80,120083,80,120187,80,120239,80,120291,80,120343,80,120395,80,120447,80,929,80,120504,80,120562,80,120620,80,120678,80,120736,80,11426,80,5090,80,5229,80,42193,80,66197,80,119850,113,119902,113,119954,113,120006,113,120058,113,120110,113,120162,113,120214,113,120266,113,120318,113,120370,113,120422,113,120474,113,1307,113,1379,113,1382,113,8474,81,119824,81,119876,81,119928,81,119980,81,120032,81,120084,81,120188,81,120240,81,120292,81,120344,81,120396,81,120448,81,11605,81,119851,114,119903,114,119955,114,120007,114,120059,114,120111,114,120163,114,120215,114,120267,114,120319,114,120371,114,120423,114,120475,114,43847,114,43848,114,7462,114,11397,114,43905,114,119318,82,8475,82,8476,82,8477,82,119825,82,119877,82,119929,82,120033,82,120189,82,120241,82,120293,82,120345,82,120397,82,120449,82,422,82,5025,82,5074,82,66740,82,5511,82,42211,82,94005,82,65363,115,119852,115,119904,115,119956,115,120008,115,120060,115,120112,115,120164,115,120216,115,120268,115,120320,115,120372,115,120424,115,120476,115,42801,115,445,115,1109,115,43946,115,71873,115,66632,115,65331,83,119826,83,119878,83,119930,83,119982,83,120034,83,120086,83,120138,83,120190,83,120242,83,120294,83,120346,83,120398,83,120450,83,1029,83,1359,83,5077,83,5082,83,42210,83,94010,83,66198,83,66592,83,119853,116,119905,116,119957,116,120009,116,120061,116,120113,116,120165,116,120217,116,120269,116,120321,116,120373,116,120425,116,120477,116,8868,84,10201,84,128872,84,65332,84,119827,84,119879,84,119931,84,119983,84,120035,84,120087,84,120139,84,120191,84,120243,84,120295,84,120347,84,120399,84,120451,84,932,84,120507,84,120565,84,120623,84,120681,84,120739,84,11430,84,5026,84,42196,84,93962,84,71868,84,66199,84,66225,84,66325,84,119854,117,119906,117,119958,117,120010,117,120062,117,120114,117,120166,117,120218,117,120270,117,120322,117,120374,117,120426,117,120478,117,42911,117,7452,117,43854,117,43858,117,651,117,965,117,120534,117,120592,117,120650,117,120708,117,120766,117,1405,117,66806,117,71896,117,8746,85,8899,85,119828,85,119880,85,119932,85,119984,85,120036,85,120088,85,120140,85,120192,85,120244,85,120296,85,120348,85,120400,85,120452,85,1357,85,4608,85,66766,85,5196,85,42228,85,94018,85,71864,85,8744,118,8897,118,65366,118,8564,118,119855,118,119907,118,119959,118,120011,118,120063,118,120115,118,120167,118,120219,118,120271,118,120323,118,120375,118,120427,118,120479,118,7456,118,957,118,120526,118,120584,118,120642,118,120700,118,120758,118,1141,118,1496,118,71430,118,43945,118,71872,118,119309,86,1639,86,1783,86,8548,86,119829,86,119881,86,119933,86,119985,86,120037,86,120089,86,120141,86,120193,86,120245,86,120297,86,120349,86,120401,86,120453,86,1140,86,11576,86,5081,86,5167,86,42719,86,42214,86,93960,86,71840,86,66845,86,623,119,119856,119,119908,119,119960,119,120012,119,120064,119,120116,119,120168,119,120220,119,120272,119,120324,119,120376,119,120428,119,120480,119,7457,119,1121,119,1309,119,1377,119,71434,119,71438,119,71439,119,43907,119,71919,87,71910,87,119830,87,119882,87,119934,87,119986,87,120038,87,120090,87,120142,87,120194,87,120246,87,120298,87,120350,87,120402,87,120454,87,1308,87,5043,87,5076,87,42218,87,5742,120,10539,120,10540,120,10799,120,65368,120,8569,120,119857,120,119909,120,119961,120,120013,120,120065,120,120117,120,120169,120,120221,120,120273,120,120325,120,120377,120,120429,120,120481,120,5441,120,5501,120,5741,88,9587,88,66338,88,71916,88,65336,88,8553,88,119831,88,119883,88,119935,88,119987,88,120039,88,120091,88,120143,88,120195,88,120247,88,120299,88,120351,88,120403,88,120455,88,42931,88,935,88,120510,88,120568,88,120626,88,120684,88,120742,88,11436,88,11613,88,5815,88,42219,88,66192,88,66228,88,66327,88,66855,88,611,121,7564,121,65369,121,119858,121,119910,121,119962,121,120014,121,120066,121,120118,121,120170,121,120222,121,120274,121,120326,121,120378,121,120430,121,120482,121,655,121,7935,121,43866,121,947,121,8509,121,120516,121,120574,121,120632,121,120690,121,120748,121,1199,121,4327,121,71900,121,65337,89,119832,89,119884,89,119936,89,119988,89,120040,89,120092,89,120144,89,120196,89,120248,89,120300,89,120352,89,120404,89,120456,89,933,89,978,89,120508,89,120566,89,120624,89,120682,89,120740,89,11432,89,1198,89,5033,89,5053,89,42220,89,94019,89,71844,89,66226,89,119859,122,119911,122,119963,122,120015,122,120067,122,120119,122,120171,122,120223,122,120275,122,120327,122,120379,122,120431,122,120483,122,7458,122,43923,122,71876,122,66293,90,71909,90,65338,90,8484,90,8488,90,119833,90,119885,90,119937,90,119989,90,120041,90,120197,90,120249,90,120301,90,120353,90,120405,90,120457,90,918,90,120493,90,120551,90,120609,90,120667,90,120725,90,5059,90,42204,90,71849,90,65282,34,65284,36,65285,37,65286,38,65290,42,65291,43,65294,46,65295,47,65296,48,65297,49,65298,50,65299,51,65300,52,65301,53,65302,54,65303,55,65304,56,65305,57,65308,60,65309,61,65310,62,65312,64,65316,68,65318,70,65319,71,65324,76,65329,81,65330,82,65333,85,65334,86,65335,87,65343,95,65346,98,65348,100,65350,102,65355,107,65357,109,65358,110,65361,113,65362,114,65364,116,65365,117,65367,119,65370,122,65371,123,65373,125,119846,109],"_default":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"cs":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"de":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"es":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"fr":[65374,126,65306,58,65281,33,8216,96,8245,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"it":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"ja":[8211,45,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65292,44,65307,59],"ko":[8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"pl":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"pt-BR":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"qps-ploc":[160,32,8211,45,65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"ru":[65374,126,65306,58,65281,33,8216,96,8217,96,8245,96,180,96,12494,47,305,105,921,73,1009,112,215,120,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"tr":[160,32,8211,45,65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65288,40,65289,41,65292,44,65307,59,65311,63],"zh-hans":[65374,126,65306,58,65281,33,8245,96,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65288,40,65289,41],"zh-hant":[8211,45,65374,126,180,96,12494,47,1047,51,1073,54,1072,97,1040,65,1068,98,1042,66,1089,99,1057,67,1077,101,1045,69,1053,72,305,105,1050,75,921,73,1052,77,1086,111,1054,79,1009,112,1088,112,1056,80,1075,114,1058,84,215,120,1093,120,1061,88,1091,121,1059,89,65283,35,65307,59]}'))),Se.cache=new class{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:JSON.stringify;this.fn=e,this._computeKey=t,this.lastCache=void 0,this.lastArgKey=void 0}get(e){const t=this._computeKey(e);return this.lastArgKey!==t&&(this.lastArgKey=t,this.lastCache=this.fn(e)),this.lastCache}}((e=>{function t(e){const t=new Map;for(let n=0;n!e.startsWith("_")&&e in i));0===o.length&&(o=["_default"]);for(const a of o){r=n(r,t(i[a]))}const s=function(e,t){const n=new Map(e);for(const[i,r]of t)n.set(i,r);return n}(t(i._common),r);return new ue(s)})),Se._locales=new pe((()=>Object.keys(ue.ambiguousCharacterData.value).filter((e=>!e.startsWith("_")))));class Ce{static getRawData(){return JSON.parse("[9,10,11,12,13,32,127,160,173,847,1564,4447,4448,6068,6069,6155,6156,6157,6158,7355,7356,8192,8193,8194,8195,8196,8197,8198,8199,8200,8201,8202,8203,8204,8205,8206,8207,8234,8235,8236,8237,8238,8239,8287,8288,8289,8290,8291,8292,8293,8294,8295,8296,8297,8298,8299,8300,8301,8302,8303,10240,12288,12644,65024,65025,65026,65027,65028,65029,65030,65031,65032,65033,65034,65035,65036,65037,65038,65039,65279,65440,65520,65521,65522,65523,65524,65525,65526,65527,65528,65532,78844,119155,119156,119157,119158,119159,119160,119161,119162,917504,917505,917506,917507,917508,917509,917510,917511,917512,917513,917514,917515,917516,917517,917518,917519,917520,917521,917522,917523,917524,917525,917526,917527,917528,917529,917530,917531,917532,917533,917534,917535,917536,917537,917538,917539,917540,917541,917542,917543,917544,917545,917546,917547,917548,917549,917550,917551,917552,917553,917554,917555,917556,917557,917558,917559,917560,917561,917562,917563,917564,917565,917566,917567,917568,917569,917570,917571,917572,917573,917574,917575,917576,917577,917578,917579,917580,917581,917582,917583,917584,917585,917586,917587,917588,917589,917590,917591,917592,917593,917594,917595,917596,917597,917598,917599,917600,917601,917602,917603,917604,917605,917606,917607,917608,917609,917610,917611,917612,917613,917614,917615,917616,917617,917618,917619,917620,917621,917622,917623,917624,917625,917626,917627,917628,917629,917630,917631,917760,917761,917762,917763,917764,917765,917766,917767,917768,917769,917770,917771,917772,917773,917774,917775,917776,917777,917778,917779,917780,917781,917782,917783,917784,917785,917786,917787,917788,917789,917790,917791,917792,917793,917794,917795,917796,917797,917798,917799,917800,917801,917802,917803,917804,917805,917806,917807,917808,917809,917810,917811,917812,917813,917814,917815,917816,917817,917818,917819,917820,917821,917822,917823,917824,917825,917826,917827,917828,917829,917830,917831,917832,917833,917834,917835,917836,917837,917838,917839,917840,917841,917842,917843,917844,917845,917846,917847,917848,917849,917850,917851,917852,917853,917854,917855,917856,917857,917858,917859,917860,917861,917862,917863,917864,917865,917866,917867,917868,917869,917870,917871,917872,917873,917874,917875,917876,917877,917878,917879,917880,917881,917882,917883,917884,917885,917886,917887,917888,917889,917890,917891,917892,917893,917894,917895,917896,917897,917898,917899,917900,917901,917902,917903,917904,917905,917906,917907,917908,917909,917910,917911,917912,917913,917914,917915,917916,917917,917918,917919,917920,917921,917922,917923,917924,917925,917926,917927,917928,917929,917930,917931,917932,917933,917934,917935,917936,917937,917938,917939,917940,917941,917942,917943,917944,917945,917946,917947,917948,917949,917950,917951,917952,917953,917954,917955,917956,917957,917958,917959,917960,917961,917962,917963,917964,917965,917966,917967,917968,917969,917970,917971,917972,917973,917974,917975,917976,917977,917978,917979,917980,917981,917982,917983,917984,917985,917986,917987,917988,917989,917990,917991,917992,917993,917994,917995,917996,917997,917998,917999]")}static getData(){return this._data||(this._data=new Set(Ce.getRawData())),this._data}static isInvisibleCharacter(e){return Ce.getData().has(e)}static get codePoints(){return Ce.getData()}}Ce._data=void 0;const _e="$initialize";let ke;class Ee{constructor(e,t,n,i){this.vsWorker=e,this.req=t,this.method=n,this.args=i,this.type=0}}class Re{constructor(e,t,n,i){this.vsWorker=e,this.seq=t,this.res=n,this.err=i,this.type=1}}class Ne{constructor(e,t,n,i){this.vsWorker=e,this.req=t,this.eventName=n,this.arg=i,this.type=2}}class Fe{constructor(e,t,n){this.vsWorker=e,this.req=t,this.event=n,this.type=3}}class De{constructor(e,t){this.vsWorker=e,this.req=t,this.type=4}}class Te{constructor(e){this._workerId=-1,this._handler=e,this._lastSentReq=0,this._pendingReplies=Object.create(null),this._pendingEmitters=new Map,this._pendingEvents=new Map}setWorkerId(e){this._workerId=e}sendMessage(e,t){const n=String(++this._lastSentReq);return new Promise(((i,r)=>{this._pendingReplies[n]={resolve:i,reject:r},this._send(new Ee(this._workerId,n,e,t))}))}listen(e,t){let n=null;const i=new A({onWillAddFirstListener:()=>{n=String(++this._lastSentReq),this._pendingEmitters.set(n,i),this._send(new Ne(this._workerId,n,e,t))},onDidRemoveLastListener:()=>{this._pendingEmitters.delete(n),this._send(new De(this._workerId,n)),n=null}});return i.event}handleMessage(e){e&&e.vsWorker&&(-1!==this._workerId&&e.vsWorker!==this._workerId||this._handleMessage(e))}_handleMessage(e){switch(e.type){case 1:return this._handleReplyMessage(e);case 0:return this._handleRequestMessage(e);case 2:return this._handleSubscribeEventMessage(e);case 3:return this._handleEventMessage(e);case 4:return this._handleUnsubscribeEventMessage(e)}}_handleReplyMessage(e){if(!this._pendingReplies[e.seq])return void console.warn("Got reply to unknown seq");const t=this._pendingReplies[e.seq];if(delete this._pendingReplies[e.seq],e.err){let n=e.err;return e.err.$isError&&(n=new Error,n.name=e.err.name,n.message=e.err.message,n.stack=e.err.stack),void t.reject(n)}t.resolve(e.res)}_handleRequestMessage(e){const t=e.req;this._handler.handleMessage(e.method,e.args).then((e=>{this._send(new Re(this._workerId,t,e,void 0))}),(e=>{e.detail instanceof Error&&(e.detail=n(e.detail)),this._send(new Re(this._workerId,t,void 0,n(e)))}))}_handleSubscribeEventMessage(e){const t=e.req,n=this._handler.handleEvent(e.eventName,e.arg)((e=>{this._send(new Fe(this._workerId,t,e))}));this._pendingEvents.set(t,n)}_handleEventMessage(e){this._pendingEmitters.has(e.req)?this._pendingEmitters.get(e.req).fire(e.event):console.warn("Got event for unknown req")}_handleUnsubscribeEventMessage(e){this._pendingEvents.has(e.req)?(this._pendingEvents.get(e.req).dispose(),this._pendingEvents.delete(e.req)):console.warn("Got unsubscribe for unknown req")}_send(e){const t=[];if(0===e.type)for(let n=0;nfunction(){const n=Array.prototype.slice.call(arguments,0);return t(e,n)},r=e=>function(t){return n(e,t)},o={};for(const s of e)Me(s)?o[s]=r(s):Ae(s)?o[s]=n(s,void 0):o[s]=i(s);return o}class Ie{constructor(e,t){this._requestHandlerFactory=t,this._requestHandler=null,this._protocol=new Te({sendMessage:(t,n)=>{e(t,n)},handleMessage:(e,t)=>this._handleMessage(e,t),handleEvent:(e,t)=>this._handleEvent(e,t)})}onmessage(e){this._protocol.handleMessage(e)}_handleMessage(e,t){if(e===_e)return this.initialize(t[0],t[1],t[2],t[3]);if(!this._requestHandler||"function"!==typeof this._requestHandler[e])return Promise.reject(new Error("Missing requestHandler or method: "+e));try{return Promise.resolve(this._requestHandler[e].apply(this._requestHandler,t))}catch(Du){return Promise.reject(Du)}}_handleEvent(e,t){if(!this._requestHandler)throw new Error("Missing requestHandler");if(Me(e)){const n=this._requestHandler[e].call(this._requestHandler,t);if("function"!==typeof n)throw new Error(`Missing dynamic event ${e} on request handler.`);return n}if(Ae(e)){const t=this._requestHandler[e];if("function"!==typeof t)throw new Error(`Missing event ${e} on request handler.`);return t}throw new Error(`Malformed event name ${e}`)}initialize(e,t,n,i){this._protocol.setWorkerId(e);const r=ze(i,((e,t)=>this._protocol.sendMessage(e,t)),((e,t)=>this._protocol.listen(e,t)));return this._requestHandlerFactory?(this._requestHandler=this._requestHandlerFactory(r),Promise.resolve(z(this._requestHandler))):(t&&("undefined"!==typeof t.baseUrl&&delete t.baseUrl,"undefined"!==typeof t.paths&&"undefined"!==typeof t.paths.vs&&delete t.paths.vs,"undefined"!==typeof t.trustedTypesPolicy&&delete t.trustedTypesPolicy,t.catchError=!0,globalThis.require.config(t)),new Promise(((e,t)=>{(0,globalThis.require)([n],(n=>{this._requestHandler=n.create(r),this._requestHandler?e(z(this._requestHandler)):t(new Error("No RequestHandler!"))}),t)})))}}class Le{constructor(e,t,n,i){this.originalStart=e,this.originalLength=t,this.modifiedStart=n,this.modifiedLength=i}getOriginalEnd(){return this.originalStart+this.originalLength}getModifiedEnd(){return this.modifiedStart+this.modifiedLength}}function Pe(e,t){return(t<<5)-t+e|0}function Oe(e,t){t=Pe(149417,t);for(let n=0,i=e.length;n2&&void 0!==arguments[2]?arguments[2]:32)-t;return(e<>>n)>>>0}function Ve(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0,n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:e.byteLength,i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:0;for(let r=0;r1&&void 0!==arguments[1]?arguments[1]:32;return e instanceof ArrayBuffer?Array.from(new Uint8Array(e)).map((e=>e.toString(16).padStart(2,"0"))).join(""):function(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:"0";for(;e.length>>0).toString(16),t/4)}class Ke{constructor(){this._h0=1732584193,this._h1=4023233417,this._h2=2562383102,this._h3=271733878,this._h4=3285377520,this._buff=new Uint8Array(67),this._buffDV=new DataView(this._buff.buffer),this._buffLen=0,this._totalLen=0,this._leftoverHighSurrogate=0,this._finished=!1}update(e){const t=e.length;if(0===t)return;const n=this._buff;let i,r,o=this._buffLen,s=this._leftoverHighSurrogate;for(0!==s?(i=s,r=-1,s=0):(i=e.charCodeAt(0),r=0);;){let a=i;if(ge(i)){if(!(r+1>>6,e[t++]=128|(63&n)>>>0):n<65536?(e[t++]=224|(61440&n)>>>12,e[t++]=128|(4032&n)>>>6,e[t++]=128|(63&n)>>>0):(e[t++]=240|(1835008&n)>>>18,e[t++]=128|(258048&n)>>>12,e[t++]=128|(4032&n)>>>6,e[t++]=128|(63&n)>>>0),t>=64&&(this._step(),t-=64,this._totalLen+=64,e[0]=e[64],e[1]=e[65],e[2]=e[66]),t}digest(){return this._finished||(this._finished=!0,this._leftoverHighSurrogate&&(this._leftoverHighSurrogate=0,this._buffLen=this._push(this._buff,this._buffLen,65533)),this._totalLen+=this._buffLen,this._wrapUp()),Ue(this._h0)+Ue(this._h1)+Ue(this._h2)+Ue(this._h3)+Ue(this._h4)}_wrapUp(){this._buff[this._buffLen++]=128,Ve(this._buff,this._buffLen),this._buffLen>56&&(this._step(),Ve(this._buff));const e=8*this._totalLen;this._buffDV.setUint32(56,Math.floor(e/4294967296),!1),this._buffDV.setUint32(60,e%4294967296,!1),this._step()}_step(){const e=Ke._bigBlock32,t=this._buffDV;for(let h=0;h<64;h+=4)e.setUint32(h,t.getUint32(h,!1),!1);for(let h=64;h<320;h+=4)e.setUint32(h,We(e.getUint32(h-12,!1)^e.getUint32(h-32,!1)^e.getUint32(h-56,!1)^e.getUint32(h-64,!1),1),!1);let n,i,r,o=this._h0,s=this._h1,a=this._h2,l=this._h3,c=this._h4;for(let h=0;h<80;h++)h<20?(n=s&a|~s&l,i=1518500249):h<40?(n=s^a^l,i=1859775393):h<60?(n=s&a|s&l|a&l,i=2400959708):(n=s^a^l,i=3395469782),r=We(o,5)+n+c+i+e.getUint32(4*h,!1)&4294967295,c=l,l=a,a=We(s,30),s=o,o=r;this._h0=this._h0+o&4294967295,this._h1=this._h1+s&4294967295,this._h2=this._h2+a&4294967295,this._h3=this._h3+l&4294967295,this._h4=this._h4+c&4294967295}}Ke._bigBlock32=new DataView(new ArrayBuffer(320));class qe{constructor(e){this.source=e}getElements(){const e=this.source,t=new Int32Array(e.length);for(let n=0,i=e.length;n0||this.m_modifiedCount>0)&&this.m_changes.push(new Le(this.m_originalStart,this.m_originalCount,this.m_modifiedStart,this.m_modifiedCount)),this.m_originalCount=0,this.m_modifiedCount=0,this.m_originalStart=1073741824,this.m_modifiedStart=1073741824}AddOriginalElement(e,t){this.m_originalStart=Math.min(this.m_originalStart,e),this.m_modifiedStart=Math.min(this.m_modifiedStart,t),this.m_originalCount++}AddModifiedElement(e,t){this.m_originalStart=Math.min(this.m_originalStart,e),this.m_modifiedStart=Math.min(this.m_modifiedStart,t),this.m_modifiedCount++}getChanges(){return(this.m_originalCount>0||this.m_modifiedCount>0)&&this.MarkNextChange(),this.m_changes}getReverseChanges(){return(this.m_originalCount>0||this.m_modifiedCount>0)&&this.MarkNextChange(),this.m_changes.reverse(),this.m_changes}}class Ge{constructor(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:null;this.ContinueProcessingPredicate=n,this._originalSequence=e,this._modifiedSequence=t;const[i,r,o]=Ge._getElements(e),[s,a,l]=Ge._getElements(t);this._hasStrings=o&&l,this._originalStringElements=i,this._originalElementsOrHash=r,this._modifiedStringElements=s,this._modifiedElementsOrHash=a,this.m_forwardHistory=[],this.m_reverseHistory=[]}static _isStringArray(e){return e.length>0&&"string"===typeof e[0]}static _getElements(e){const t=e.getElements();if(Ge._isStringArray(t)){const e=new Int32Array(t.length);for(let n=0,i=t.length;n=e&&i>=n&&this.ElementsAreEqual(t,i);)t--,i--;if(e>t||n>i){let r;return n<=i?(je.Assert(e===t+1,"originalStart should only be one more than originalEnd"),r=[new Le(e,0,n,i-n+1)]):e<=t?(je.Assert(n===i+1,"modifiedStart should only be one more than modifiedEnd"),r=[new Le(e,t-e+1,n,0)]):(je.Assert(e===t+1,"originalStart should only be one more than originalEnd"),je.Assert(n===i+1,"modifiedStart should only be one more than modifiedEnd"),r=[]),r}const o=[0],s=[0],a=this.ComputeRecursionPoint(e,t,n,i,o,s,r),l=o[0],c=s[0];if(null!==a)return a;if(!r[0]){const o=this.ComputeDiffRecursive(e,l,n,c,r);let s=[];return s=r[0]?[new Le(l+1,t-(l+1)+1,c+1,i-(c+1)+1)]:this.ComputeDiffRecursive(l+1,t,c+1,i,r),this.ConcatenateChanges(o,s)}return[new Le(e,t-e+1,n,i-n+1)]}WALKTRACE(e,t,n,i,r,o,s,a,l,c,h,d,p,u,m,f,g,b){let v=null,y=null,w=new He,x=t,S=n,C=p[0]-f[0]-i,_=-1073741824,k=this.m_forwardHistory.length-1;do{const t=C+e;t===x||t=0&&(e=(l=this.m_forwardHistory[k])[0],x=1,S=l.length-1)}while(--k>=-1);if(v=w.getReverseChanges(),b[0]){let e=p[0]+1,t=f[0]+1;if(null!==v&&v.length>0){const n=v[v.length-1];e=Math.max(e,n.getOriginalEnd()),t=Math.max(t,n.getModifiedEnd())}y=[new Le(e,d-e+1,t,m-t+1)]}else{w=new He,x=o,S=s,C=p[0]-f[0]-a,_=1073741824,k=g?this.m_reverseHistory.length-1:this.m_reverseHistory.length-2;do{const e=C+r;e===x||e=c[e+1]?(u=(h=c[e+1]-1)-C-a,h>_&&w.MarkNextChange(),_=h+1,w.AddOriginalElement(h+1,u+1),C=e+1-r):(u=(h=c[e-1])-C-a,h>_&&w.MarkNextChange(),_=h,w.AddModifiedElement(h+1,u+1),C=e-1-r),k>=0&&(r=(c=this.m_reverseHistory[k])[0],x=1,S=c.length-1)}while(--k>=-1);y=w.getChanges()}return this.ConcatenateChanges(v,y)}ComputeRecursionPoint(e,t,n,i,r,o,s){let a=0,l=0,c=0,h=0,d=0,p=0;e--,n--,r[0]=0,o[0]=0,this.m_forwardHistory=[],this.m_reverseHistory=[];const u=t-e+(i-n),m=u+1,f=new Int32Array(m),g=new Int32Array(m),b=i-n,v=t-e,y=e-n,w=t-i,x=(v-b)%2===0;f[b]=e,g[v]=t,s[0]=!1;for(let S=1;S<=u/2+1;S++){let u=0,C=0;c=this.ClipDiagonalBound(b-S,S,b,m),h=this.ClipDiagonalBound(b+S,S,b,m);for(let e=c;e<=h;e+=2){a=e===c||eu+C&&(u=a,C=l),!x&&Math.abs(e-v)<=S-1&&a>=g[e])return r[0]=a,o[0]=l,n<=g[e]&&S<=1448?this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s):null}const _=(u-e+(C-n)-S)/2;if(null!==this.ContinueProcessingPredicate&&!this.ContinueProcessingPredicate(u,_))return s[0]=!0,r[0]=u,o[0]=C,_>0&&S<=1448?this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s):(e++,n++,[new Le(e,t-e+1,n,i-n+1)]);d=this.ClipDiagonalBound(v-S,S,v,m),p=this.ClipDiagonalBound(v+S,S,v,m);for(let m=d;m<=p;m+=2){a=m===d||m=g[m+1]?g[m+1]-1:g[m-1],l=a-(m-v)-w;const u=a;for(;a>e&&l>n&&this.ElementsAreEqual(a,l);)a--,l--;if(g[m]=a,x&&Math.abs(m-b)<=S&&a<=f[m])return r[0]=a,o[0]=l,u>=f[m]&&S<=1448?this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s):null}if(S<=1447){let e=new Int32Array(h-c+2);e[0]=b-c+1,$e.Copy2(f,c,e,1,h-c+1),this.m_forwardHistory.push(e),e=new Int32Array(p-d+2),e[0]=v-d+1,$e.Copy2(g,d,e,1,p-d+1),this.m_reverseHistory.push(e)}}return this.WALKTRACE(b,c,h,y,v,d,p,w,f,g,a,t,r,l,i,o,x,s)}PrettifyChanges(e){for(let t=0;t0,s=n.modifiedLength>0;for(;n.originalStart+n.originalLength=0;t--){const n=e[t];let i=0,r=0;if(t>0){const n=e[t-1];i=n.originalStart+n.originalLength,r=n.modifiedStart+n.modifiedLength}const o=n.originalLength>0,s=n.modifiedLength>0;let a=0,l=this._boundaryScore(n.originalStart,n.originalLength,n.modifiedStart,n.modifiedLength);for(let e=1;;e++){const t=n.originalStart-e,c=n.modifiedStart-e;if(tl&&(l=h,a=e)}n.originalStart-=a,n.modifiedStart-=a;const c=[null];t>0&&this.ChangesOverlap(e[t-1],e[t],c)&&(e[t-1]=c[0],e.splice(t,1),t++)}if(this._hasStrings)for(let t=1,n=e.length;t0&&t>a&&(a=t,l=h,c=e)}return a>0?[l,c]:null}_contiguousSequenceScore(e,t,n){let i=0;for(let r=0;r=this._originalElementsOrHash.length-1||this._hasStrings&&/^\s*$/.test(this._originalStringElements[e])}_OriginalRegionIsBoundary(e,t){if(this._OriginalIsBoundary(e)||this._OriginalIsBoundary(e-1))return!0;if(t>0){const n=e+t;if(this._OriginalIsBoundary(n-1)||this._OriginalIsBoundary(n))return!0}return!1}_ModifiedIsBoundary(e){return e<=0||e>=this._modifiedElementsOrHash.length-1||this._hasStrings&&/^\s*$/.test(this._modifiedStringElements[e])}_ModifiedRegionIsBoundary(e,t){if(this._ModifiedIsBoundary(e)||this._ModifiedIsBoundary(e-1))return!0;if(t>0){const n=e+t;if(this._ModifiedIsBoundary(n-1)||this._ModifiedIsBoundary(n))return!0}return!1}_boundaryScore(e,t,n,i){return(this._OriginalRegionIsBoundary(e,t)?1:0)+(this._ModifiedRegionIsBoundary(n,i)?1:0)}ConcatenateChanges(e,t){const n=[];if(0===e.length||0===t.length)return t.length>0?t:e;if(this.ChangesOverlap(e[e.length-1],t[0],n)){const i=new Array(e.length+t.length-1);return $e.Copy(e,0,i,0,e.length-1),i[e.length-1]=n[0],$e.Copy(t,1,i,e.length,t.length-1),i}{const n=new Array(e.length+t.length);return $e.Copy(e,0,n,0,e.length),$e.Copy(t,0,n,e.length,t.length),n}}ChangesOverlap(e,t,n){if(je.Assert(e.originalStart<=t.originalStart,"Left change is not less than or equal to right change"),je.Assert(e.modifiedStart<=t.modifiedStart,"Left change is not less than or equal to right change"),e.originalStart+e.originalLength>=t.originalStart||e.modifiedStart+e.modifiedLength>=t.modifiedStart){const i=e.originalStart;let r=e.originalLength;const o=e.modifiedStart;let s=e.modifiedLength;return e.originalStart+e.originalLength>=t.originalStart&&(r=t.originalStart+t.originalLength-e.originalStart),e.modifiedStart+e.modifiedLength>=t.modifiedStart&&(s=t.modifiedStart+t.modifiedLength-e.modifiedStart),n[0]=new Le(i,r,o,s),!0}return n[0]=null,!1}ClipDiagonalBound(e,t,n,i){if(e>=0&&ee.cwd()}}else ke="undefined"!==typeof process?{get platform(){return process.platform},get arch(){return process.arch},get env(){return{NODE_ENV:"production",PUBLIC_URL:".",WDS_SOCKET_HOST:void 0,WDS_SOCKET_PATH:void 0,WDS_SOCKET_PORT:void 0,FAST_REFRESH:!0,REACT_APP_BACKEND:"http://localhost:8765",REACT_APP_META_BACKEND:"undefined"}},cwd:()=>({NODE_ENV:"production",PUBLIC_URL:".",WDS_SOCKET_HOST:void 0,WDS_SOCKET_PATH:void 0,WDS_SOCKET_PORT:void 0,FAST_REFRESH:!0,REACT_APP_BACKEND:"http://localhost:8765",REACT_APP_META_BACKEND:"undefined"}.VSCODE_CWD||process.cwd())}:{get platform(){return ae?"win32":le?"darwin":"linux"},get arch(){},get env(){return{}},cwd:()=>"/"};const Xe=ke.cwd,Ye=(ke.env,ke.platform),Qe=65,Ze=97,et=90,tt=122,nt=46,it=47,rt=92,ot=58;class st extends Error{constructor(e,t,n){let i;"string"===typeof t&&0===t.indexOf("not ")?(i="must not be",t=t.replace(/^not /,"")):i="must be";const r=-1!==e.indexOf(".")?"property":"argument";let o=`The "${e}" ${r} ${i} of type ${t}`;o+=". Received type "+typeof n,super(o),this.code="ERR_INVALID_ARG_TYPE"}}function at(e,t){if("string"!==typeof e)throw new st(t,"string",e)}const lt="win32"===Ye;function ct(e){return e===it||e===rt}function ht(e){return e===it}function dt(e){return e>=Qe&&e<=et||e>=Ze&&e<=tt}function pt(e,t,n,i){let r="",o=0,s=-1,a=0,l=0;for(let c=0;c<=e.length;++c){if(c2){const e=r.lastIndexOf(n);-1===e?(r="",o=0):(r=r.slice(0,e),o=r.length-1-r.lastIndexOf(n)),s=c,a=0;continue}if(0!==r.length){r="",o=0,s=c,a=0;continue}}t&&(r+=r.length>0?`${n}..`:"..",o=2)}else r.length>0?r+=`${n}${e.slice(s+1,c)}`:r=e.slice(s+1,c),o=c-s-1;s=c,a=0}else l===nt&&-1!==a?++a:a=-1}return r}function ut(e,t){!function(e,t){if(null===e||"object"!==typeof e)throw new st(t,"Object",e)}(t,"pathObject");const n=t.dir||t.root,i=t.base||`${t.name||""}${t.ext||""}`;return n?n===t.root?`${n}${i}`:`${n}${e}${i}`:i}const mt={resolve(){let e="",t="",n=!1;for(let i=arguments.length-1;i>=-1;i--){let r;if(i>=0){if(r=i<0||arguments.length<=i?void 0:arguments[i],at(r,"path"),0===r.length)continue}else 0===e.length?r=Xe():(r={NODE_ENV:"production",PUBLIC_URL:".",WDS_SOCKET_HOST:void 0,WDS_SOCKET_PATH:void 0,WDS_SOCKET_PORT:void 0,FAST_REFRESH:!0,REACT_APP_BACKEND:"http://localhost:8765",REACT_APP_META_BACKEND:"undefined"}[`=${e}`]||Xe(),(void 0===r||r.slice(0,2).toLowerCase()!==e.toLowerCase()&&r.charCodeAt(2)===rt)&&(r=`${e}\\`));const o=r.length;let s=0,a="",l=!1;const c=r.charCodeAt(0);if(1===o)ct(c)&&(s=1,l=!0);else if(ct(c))if(l=!0,ct(r.charCodeAt(1))){let e=2,t=e;for(;e2&&ct(r.charCodeAt(2))&&(l=!0,s=3));if(a.length>0)if(e.length>0){if(a.toLowerCase()!==e.toLowerCase())continue}else e=a;if(n){if(e.length>0)break}else if(t=`${r.slice(s)}\\${t}`,n=l,l&&e.length>0)break}return t=pt(t,!n,"\\",ct),n?`${e}\\${t}`:`${e}${t}`||"."},normalize(e){at(e,"path");const t=e.length;if(0===t)return".";let n,i=0,r=!1;const o=e.charCodeAt(0);if(1===t)return ht(o)?"\\":e;if(ct(o))if(r=!0,ct(e.charCodeAt(1))){let r=2,o=r;for(;r2&&ct(e.charCodeAt(2))&&(r=!0,i=3));let s=i0&&ct(e.charCodeAt(t-1))&&(s+="\\"),void 0===n?r?`\\${s}`:s:r?`${n}\\${s}`:`${n}${s}`},isAbsolute(e){at(e,"path");const t=e.length;if(0===t)return!1;const n=e.charCodeAt(0);return ct(n)||t>2&&dt(n)&&e.charCodeAt(1)===ot&&ct(e.charCodeAt(2))},join(){if(0===arguments.length)return".";let e,t;for(let r=0;r0&&(void 0===e?e=t=n:e+=`\\${n}`)}if(void 0===e)return".";let n=!0,i=0;if("string"===typeof t&&ct(t.charCodeAt(0))){++i;const e=t.length;e>1&&ct(t.charCodeAt(1))&&(++i,e>2&&(ct(t.charCodeAt(2))?++i:n=!1))}if(n){for(;i=2&&(e=`\\${e.slice(i)}`)}return mt.normalize(e)},relative(e,t){if(at(e,"from"),at(t,"to"),e===t)return"";const n=mt.resolve(e),i=mt.resolve(t);if(n===i)return"";if((e=n.toLowerCase())===(t=i.toLowerCase()))return"";let r=0;for(;rr&&e.charCodeAt(o-1)===rt;)o--;const s=o-r;let a=0;for(;aa&&t.charCodeAt(l-1)===rt;)l--;const c=l-a,h=sh){if(t.charCodeAt(a+p)===rt)return i.slice(a+p+1);if(2===p)return i.slice(a+p)}s>h&&(e.charCodeAt(r+p)===rt?d=p:2===p&&(d=3)),-1===d&&(d=0)}let u="";for(p=r+d+1;p<=o;++p)p!==o&&e.charCodeAt(p)!==rt||(u+=0===u.length?"..":"\\..");return a+=d,u.length>0?`${u}${i.slice(a,l)}`:(i.charCodeAt(a)===rt&&++a,i.slice(a,l))},toNamespacedPath(e){if("string"!==typeof e||0===e.length)return e;const t=mt.resolve(e);if(t.length<=2)return e;if(t.charCodeAt(0)===rt){if(t.charCodeAt(1)===rt){const e=t.charCodeAt(2);if(63!==e&&e!==nt)return`\\\\?\\UNC\\${t.slice(2)}`}}else if(dt(t.charCodeAt(0))&&t.charCodeAt(1)===ot&&t.charCodeAt(2)===rt)return`\\\\?\\${t}`;return e},dirname(e){at(e,"path");const t=e.length;if(0===t)return".";let n=-1,i=0;const r=e.charCodeAt(0);if(1===t)return ct(r)?e:".";if(ct(r)){if(n=i=1,ct(e.charCodeAt(1))){let r=2,o=r;for(;r2&&ct(e.charCodeAt(2))?3:2,i=n);let o=-1,s=!0;for(let a=t-1;a>=i;--a)if(ct(e.charCodeAt(a))){if(!s){o=a;break}}else s=!1;if(-1===o){if(-1===n)return".";o=n}return e.slice(0,o)},basename(e,t){void 0!==t&&at(t,"ext"),at(e,"path");let n,i=0,r=-1,o=!0;if(e.length>=2&&dt(e.charCodeAt(0))&&e.charCodeAt(1)===ot&&(i=2),void 0!==t&&t.length>0&&t.length<=e.length){if(t===e)return"";let s=t.length-1,a=-1;for(n=e.length-1;n>=i;--n){const l=e.charCodeAt(n);if(ct(l)){if(!o){i=n+1;break}}else-1===a&&(o=!1,a=n+1),s>=0&&(l===t.charCodeAt(s)?-1===--s&&(r=n):(s=-1,r=a))}return i===r?r=a:-1===r&&(r=e.length),e.slice(i,r)}for(n=e.length-1;n>=i;--n)if(ct(e.charCodeAt(n))){if(!o){i=n+1;break}}else-1===r&&(o=!1,r=n+1);return-1===r?"":e.slice(i,r)},extname(e){at(e,"path");let t=0,n=-1,i=0,r=-1,o=!0,s=0;e.length>=2&&e.charCodeAt(1)===ot&&dt(e.charCodeAt(0))&&(t=i=2);for(let a=e.length-1;a>=t;--a){const t=e.charCodeAt(a);if(ct(t)){if(!o){i=a+1;break}}else-1===r&&(o=!1,r=a+1),t===nt?-1===n?n=a:1!==s&&(s=1):-1!==n&&(s=-1)}return-1===n||-1===r||0===s||1===s&&n===r-1&&n===i+1?"":e.slice(n,r)},format:ut.bind(null,"\\"),parse(e){at(e,"path");const t={root:"",dir:"",base:"",ext:"",name:""};if(0===e.length)return t;const n=e.length;let i=0,r=e.charCodeAt(0);if(1===n)return ct(r)?(t.root=t.dir=e,t):(t.base=t.name=e,t);if(ct(r)){if(i=1,ct(e.charCodeAt(1))){let t=2,r=t;for(;t0&&(t.root=e.slice(0,i));let o=-1,s=i,a=-1,l=!0,c=e.length-1,h=0;for(;c>=i;--c)if(r=e.charCodeAt(c),ct(r)){if(!l){s=c+1;break}}else-1===a&&(l=!1,a=c+1),r===nt?-1===o?o=c:1!==h&&(h=1):-1!==o&&(h=-1);return-1!==a&&(-1===o||0===h||1===h&&o===a-1&&o===s+1?t.base=t.name=e.slice(s,a):(t.name=e.slice(s,o),t.base=e.slice(s,a),t.ext=e.slice(o,a))),t.dir=s>0&&s!==i?e.slice(0,s-1):t.root,t},sep:"\\",delimiter:";",win32:null,posix:null},ft=(()=>{if(lt){const e=/\\/g;return()=>{const t=Xe().replace(e,"/");return t.slice(t.indexOf("/"))}}return()=>Xe()})(),gt={resolve(){let e="",t=!1;for(let n=arguments.length-1;n>=-1&&!t;n--){const i=n>=0?n<0||arguments.length<=n?void 0:arguments[n]:ft();at(i,"path"),0!==i.length&&(e=`${i}/${e}`,t=i.charCodeAt(0)===it)}return e=pt(e,!t,"/",ht),t?`/${e}`:e.length>0?e:"."},normalize(e){if(at(e,"path"),0===e.length)return".";const t=e.charCodeAt(0)===it,n=e.charCodeAt(e.length-1)===it;return 0===(e=pt(e,!t,"/",ht)).length?t?"/":n?"./":".":(n&&(e+="/"),t?`/${e}`:e)},isAbsolute:e=>(at(e,"path"),e.length>0&&e.charCodeAt(0)===it),join(){if(0===arguments.length)return".";let e;for(let t=0;t0&&(void 0===e?e=n:e+=`/${n}`)}return void 0===e?".":gt.normalize(e)},relative(e,t){if(at(e,"from"),at(t,"to"),e===t)return"";if((e=gt.resolve(e))===(t=gt.resolve(t)))return"";const n=e.length,i=n-1,r=t.length-1,o=io){if(t.charCodeAt(1+a)===it)return t.slice(1+a+1);if(0===a)return t.slice(1+a)}else i>o&&(e.charCodeAt(1+a)===it?s=a:0===a&&(s=0));let l="";for(a=1+s+1;a<=n;++a)a!==n&&e.charCodeAt(a)!==it||(l+=0===l.length?"..":"/..");return`${l}${t.slice(1+s)}`},toNamespacedPath:e=>e,dirname(e){if(at(e,"path"),0===e.length)return".";const t=e.charCodeAt(0)===it;let n=-1,i=!0;for(let r=e.length-1;r>=1;--r)if(e.charCodeAt(r)===it){if(!i){n=r;break}}else i=!1;return-1===n?t?"/":".":t&&1===n?"//":e.slice(0,n)},basename(e,t){void 0!==t&&at(t,"ext"),at(e,"path");let n,i=0,r=-1,o=!0;if(void 0!==t&&t.length>0&&t.length<=e.length){if(t===e)return"";let s=t.length-1,a=-1;for(n=e.length-1;n>=0;--n){const l=e.charCodeAt(n);if(l===it){if(!o){i=n+1;break}}else-1===a&&(o=!1,a=n+1),s>=0&&(l===t.charCodeAt(s)?-1===--s&&(r=n):(s=-1,r=a))}return i===r?r=a:-1===r&&(r=e.length),e.slice(i,r)}for(n=e.length-1;n>=0;--n)if(e.charCodeAt(n)===it){if(!o){i=n+1;break}}else-1===r&&(o=!1,r=n+1);return-1===r?"":e.slice(i,r)},extname(e){at(e,"path");let t=-1,n=0,i=-1,r=!0,o=0;for(let s=e.length-1;s>=0;--s){const a=e.charCodeAt(s);if(a!==it)-1===i&&(r=!1,i=s+1),a===nt?-1===t?t=s:1!==o&&(o=1):-1!==t&&(o=-1);else if(!r){n=s+1;break}}return-1===t||-1===i||0===o||1===o&&t===i-1&&t===n+1?"":e.slice(t,i)},format:ut.bind(null,"/"),parse(e){at(e,"path");const t={root:"",dir:"",base:"",ext:"",name:""};if(0===e.length)return t;const n=e.charCodeAt(0)===it;let i;n?(t.root="/",i=1):i=0;let r=-1,o=0,s=-1,a=!0,l=e.length-1,c=0;for(;l>=i;--l){const t=e.charCodeAt(l);if(t!==it)-1===s&&(a=!1,s=l+1),t===nt?-1===r?r=l:1!==c&&(c=1):-1!==r&&(c=-1);else if(!a){o=l+1;break}}if(-1!==s){const i=0===o&&n?1:o;-1===r||0===c||1===c&&r===s-1&&r===o+1?t.base=t.name=e.slice(i,s):(t.name=e.slice(i,r),t.base=e.slice(i,s),t.ext=e.slice(r,s))}return o>0?t.dir=e.slice(0,o-1):n&&(t.dir="/"),t},sep:"/",delimiter:":",win32:null,posix:null};gt.win32=mt.win32=mt,gt.posix=mt.posix=gt;lt?mt.normalize:gt.normalize,lt?mt.resolve:gt.resolve,lt?mt.relative:gt.relative,lt?mt.dirname:gt.dirname,lt?mt.basename:gt.basename,lt?mt.extname:gt.extname,lt?mt.sep:gt.sep;const bt=/^\w[\w\d+.-]*$/,vt=/^\//,yt=/^\/\//;const wt="",xt="/",St=/^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/;class Ct{static isUri(e){return e instanceof Ct||!!e&&("string"===typeof e.authority&&"string"===typeof e.fragment&&"string"===typeof e.path&&"string"===typeof e.query&&"string"===typeof e.scheme&&"string"===typeof e.fsPath&&"function"===typeof e.with&&"function"===typeof e.toString)}constructor(e,t,n,i,r){let o=arguments.length>5&&void 0!==arguments[5]&&arguments[5];"object"===typeof e?(this.scheme=e.scheme||wt,this.authority=e.authority||wt,this.path=e.path||wt,this.query=e.query||wt,this.fragment=e.fragment||wt):(this.scheme=function(e,t){return e||t?e:"file"}(e,o),this.authority=t||wt,this.path=function(e,t){switch(e){case"https":case"http":case"file":t?t[0]!==xt&&(t=xt+t):t=xt}return t}(this.scheme,n||wt),this.query=i||wt,this.fragment=r||wt,function(e,t){if(!e.scheme&&t)throw new Error(`[UriError]: Scheme is missing: {scheme: "", authority: "${e.authority}", path: "${e.path}", query: "${e.query}", fragment: "${e.fragment}"}`);if(e.scheme&&!bt.test(e.scheme))throw new Error("[UriError]: Scheme contains illegal characters.");if(e.path)if(e.authority){if(!vt.test(e.path))throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character')}else if(yt.test(e.path))throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")')}(this,o))}get fsPath(){return Ft(this,!1)}with(e){if(!e)return this;let{scheme:t,authority:n,path:i,query:r,fragment:o}=e;return void 0===t?t=this.scheme:null===t&&(t=wt),void 0===n?n=this.authority:null===n&&(n=wt),void 0===i?i=this.path:null===i&&(i=wt),void 0===r?r=this.query:null===r&&(r=wt),void 0===o?o=this.fragment:null===o&&(o=wt),t===this.scheme&&n===this.authority&&i===this.path&&r===this.query&&o===this.fragment?this:new kt(t,n,i,r,o)}static parse(e){let t=arguments.length>1&&void 0!==arguments[1]&&arguments[1];const n=St.exec(e);return n?new kt(n[2]||wt,Mt(n[4]||wt),Mt(n[5]||wt),Mt(n[7]||wt),Mt(n[9]||wt),t):new kt(wt,wt,wt,wt,wt)}static file(e){let t=wt;if(ae&&(e=e.replace(/\\/g,xt)),e[0]===xt&&e[1]===xt){const n=e.indexOf(xt,2);-1===n?(t=e.substring(2),e=xt):(t=e.substring(2,n),e=e.substring(n)||xt)}return new kt("file",t,e,wt,wt)}static from(e,t){return new kt(e.scheme,e.authority,e.path,e.query,e.fragment,t)}static joinPath(e){if(!e.path)throw new Error("[UriError]: cannot call joinPath on URI without path");let t;for(var n=arguments.length,i=new Array(n>1?n-1:0),r=1;r0&&void 0!==arguments[0]&&arguments[0])}toJSON(){return this}static revive(e){var t,n;if(e){if(e instanceof Ct)return e;{const i=new kt(e);return i._formatted=null!==(t=e.external)&&void 0!==t?t:null,i._fsPath=e._sep===_t&&null!==(n=e.fsPath)&&void 0!==n?n:null,i}}return e}}const _t=ae?1:void 0;class kt extends Ct{constructor(){super(...arguments),this._formatted=null,this._fsPath=null}get fsPath(){return this._fsPath||(this._fsPath=Ft(this,!1)),this._fsPath}toString(){return arguments.length>0&&void 0!==arguments[0]&&arguments[0]?Dt(this,!0):(this._formatted||(this._formatted=Dt(this,!1)),this._formatted)}toJSON(){const e={$mid:1};return this._fsPath&&(e.fsPath=this._fsPath,e._sep=_t),this._formatted&&(e.external=this._formatted),this.path&&(e.path=this.path),this.scheme&&(e.scheme=this.scheme),this.authority&&(e.authority=this.authority),this.query&&(e.query=this.query),this.fragment&&(e.fragment=this.fragment),e}}const Et={58:"%3A",47:"%2F",63:"%3F",35:"%23",91:"%5B",93:"%5D",64:"%40",33:"%21",36:"%24",38:"%26",39:"%27",40:"%28",41:"%29",42:"%2A",43:"%2B",44:"%2C",59:"%3B",61:"%3D",32:"%20"};function Rt(e,t,n){let i,r=-1;for(let o=0;o=97&&s<=122||s>=65&&s<=90||s>=48&&s<=57||45===s||46===s||95===s||126===s||t&&47===s||n&&91===s||n&&93===s||n&&58===s)-1!==r&&(i+=encodeURIComponent(e.substring(r,o)),r=-1),void 0!==i&&(i+=e.charAt(o));else{void 0===i&&(i=e.substr(0,o));const t=Et[s];void 0!==t?(-1!==r&&(i+=encodeURIComponent(e.substring(r,o)),r=-1),i+=t):-1===r&&(r=o)}}return-1!==r&&(i+=encodeURIComponent(e.substring(r))),void 0!==i?i:e}function Nt(e){let t;for(let n=0;n1&&"file"===e.scheme?`//${e.authority}${e.path}`:47===e.path.charCodeAt(0)&&(e.path.charCodeAt(1)>=65&&e.path.charCodeAt(1)<=90||e.path.charCodeAt(1)>=97&&e.path.charCodeAt(1)<=122)&&58===e.path.charCodeAt(2)?t?e.path.substr(1):e.path[1].toLowerCase()+e.path.substr(2):e.path,ae&&(n=n.replace(/\//g,"\\")),n}function Dt(e,t){const n=t?Nt:Rt;let i="",{scheme:r,authority:o,path:s,query:a,fragment:l}=e;if(r&&(i+=r,i+=":"),(o||"file"===r)&&(i+=xt,i+=xt),o){let e=o.indexOf("@");if(-1!==e){const t=o.substr(0,e);o=o.substr(e+1),e=t.lastIndexOf(":"),-1===e?i+=n(t,!1,!1):(i+=n(t.substr(0,e),!1,!1),i+=":",i+=n(t.substr(e+1),!1,!0)),i+="@"}o=o.toLowerCase(),e=o.lastIndexOf(":"),-1===e?i+=n(o,!1,!0):(i+=n(o.substr(0,e),!1,!0),i+=o.substr(e))}if(s){if(s.length>=3&&47===s.charCodeAt(0)&&58===s.charCodeAt(2)){const e=s.charCodeAt(1);e>=65&&e<=90&&(s=`/${String.fromCharCode(e+32)}:${s.substr(3)}`)}else if(s.length>=2&&58===s.charCodeAt(1)){const e=s.charCodeAt(0);e>=65&&e<=90&&(s=`${String.fromCharCode(e+32)}:${s.substr(2)}`)}i+=n(s,!0,!1)}return a&&(i+="?",i+=n(a,!1,!1)),l&&(i+="#",i+=t?l:Rt(l,!1,!1)),i}function Tt(e){try{return decodeURIComponent(e)}catch(O){return e.length>3?e.substr(0,3)+Tt(e.substr(3)):e}}const At=/(%[0-9A-Za-z][0-9A-Za-z])+/g;function Mt(e){return e.match(At)?e.replace(At,(e=>Tt(e))):e}class zt{constructor(e,t){this.lineNumber=e,this.column=t}with(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:this.lineNumber,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:this.column;return e===this.lineNumber&&t===this.column?this:new zt(e,t)}delta(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:0,t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0;return this.with(this.lineNumber+e,this.column+t)}equals(e){return zt.equals(this,e)}static equals(e,t){return!e&&!t||!!e&&!!t&&e.lineNumber===t.lineNumber&&e.column===t.column}isBefore(e){return zt.isBefore(this,e)}static isBefore(e,t){return e.lineNumbern||e===n&&t>i?(this.startLineNumber=n,this.startColumn=i,this.endLineNumber=e,this.endColumn=t):(this.startLineNumber=e,this.startColumn=t,this.endLineNumber=n,this.endColumn=i)}isEmpty(){return It.isEmpty(this)}static isEmpty(e){return e.startLineNumber===e.endLineNumber&&e.startColumn===e.endColumn}containsPosition(e){return It.containsPosition(this,e)}static containsPosition(e,t){return!(t.lineNumbere.endLineNumber)&&(!(t.lineNumber===e.startLineNumber&&t.columne.endColumn))}static strictContainsPosition(e,t){return!(t.lineNumbere.endLineNumber)&&(!(t.lineNumber===e.startLineNumber&&t.column<=e.startColumn)&&!(t.lineNumber===e.endLineNumber&&t.column>=e.endColumn))}containsRange(e){return It.containsRange(this,e)}static containsRange(e,t){return!(t.startLineNumbere.endLineNumber||t.endLineNumber>e.endLineNumber)&&(!(t.startLineNumber===e.startLineNumber&&t.startColumne.endColumn)))}strictContainsRange(e){return It.strictContainsRange(this,e)}static strictContainsRange(e,t){return!(t.startLineNumbere.endLineNumber||t.endLineNumber>e.endLineNumber)&&(!(t.startLineNumber===e.startLineNumber&&t.startColumn<=e.startColumn)&&!(t.endLineNumber===e.endLineNumber&&t.endColumn>=e.endColumn)))}plusRange(e){return It.plusRange(this,e)}static plusRange(e,t){let n,i,r,o;return t.startLineNumbere.endLineNumber?(r=t.endLineNumber,o=t.endColumn):t.endLineNumber===e.endLineNumber?(r=t.endLineNumber,o=Math.max(t.endColumn,e.endColumn)):(r=e.endLineNumber,o=e.endColumn),new It(n,i,r,o)}intersectRanges(e){return It.intersectRanges(this,e)}static intersectRanges(e,t){let n=e.startLineNumber,i=e.startColumn,r=e.endLineNumber,o=e.endColumn;const s=t.startLineNumber,a=t.startColumn,l=t.endLineNumber,c=t.endColumn;return nl?(r=l,o=c):r===l&&(o=Math.min(o,c)),n>r||n===r&&i>o?null:new It(n,i,r,o)}equalsRange(e){return It.equalsRange(this,e)}static equalsRange(e,t){return!e&&!t||!!e&&!!t&&e.startLineNumber===t.startLineNumber&&e.startColumn===t.startColumn&&e.endLineNumber===t.endLineNumber&&e.endColumn===t.endColumn}getEndPosition(){return It.getEndPosition(this)}static getEndPosition(e){return new zt(e.endLineNumber,e.endColumn)}getStartPosition(){return It.getStartPosition(this)}static getStartPosition(e){return new zt(e.startLineNumber,e.startColumn)}toString(){return"["+this.startLineNumber+","+this.startColumn+" -> "+this.endLineNumber+","+this.endColumn+"]"}setEndPosition(e,t){return new It(this.startLineNumber,this.startColumn,e,t)}setStartPosition(e,t){return new It(e,t,this.endLineNumber,this.endColumn)}collapseToStart(){return It.collapseToStart(this)}static collapseToStart(e){return new It(e.startLineNumber,e.startColumn,e.startLineNumber,e.startColumn)}collapseToEnd(){return It.collapseToEnd(this)}static collapseToEnd(e){return new It(e.endLineNumber,e.endColumn,e.endLineNumber,e.endColumn)}delta(e){return new It(this.startLineNumber+e,this.startColumn,this.endLineNumber+e,this.endColumn)}static fromPositions(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:e;return new It(e.lineNumber,e.column,t.lineNumber,t.column)}static lift(e){return e?new It(e.startLineNumber,e.startColumn,e.endLineNumber,e.endColumn):null}static isIRange(e){return e&&"number"===typeof e.startLineNumber&&"number"===typeof e.startColumn&&"number"===typeof e.endLineNumber&&"number"===typeof e.endColumn}static areIntersectingOrTouching(e,t){return!(e.endLineNumbere.startLineNumber}toJSON(){return this}}var Lt;function Pt(e,t){return(n,i)=>t(e(n),e(i))}!function(e){e.isLessThan=function(e){return e<0},e.isLessThanOrEqual=function(e){return e<=0},e.isGreaterThan=function(e){return e>0},e.isNeitherLessOrGreaterThan=function(e){return 0===e},e.greaterThan=1,e.lessThan=-1,e.neitherLessOrGreaterThan=0}(Lt||(Lt={}));const Ot=(e,t)=>e-t;class Wt{constructor(e){this.iterate=e}toArray(){const e=[];return this.iterate((t=>(e.push(t),!0))),e}filter(e){return new Wt((t=>this.iterate((n=>!e(n)||t(n)))))}map(e){return new Wt((t=>this.iterate((n=>t(e(n))))))}findLast(e){let t;return this.iterate((n=>(e(n)&&(t=n),!0))),t}findLastMaxBy(e){let t,n=!0;return this.iterate((i=>((n||Lt.isGreaterThan(e(i,t)))&&(n=!1,t=i),!0))),t}}Wt.empty=new Wt((e=>{}));function Vt(e){return e<0?0:e>255?255:0|e}function Ut(e){return e<0?0:e>4294967295?4294967295:0|e}class Kt{constructor(e){this.values=e,this.prefixSum=new Uint32Array(e.length),this.prefixSumValidIndex=new Int32Array(1),this.prefixSumValidIndex[0]=-1}insertValues(e,t){e=Ut(e);const n=this.values,i=this.prefixSum,r=t.length;return 0!==r&&(this.values=new Uint32Array(n.length+r),this.values.set(n.subarray(0,e),0),this.values.set(n.subarray(e),e+r),this.values.set(t,e),e-1=0&&this.prefixSum.set(i.subarray(0,this.prefixSumValidIndex[0]+1)),!0)}setValue(e,t){return e=Ut(e),t=Ut(t),this.values[e]!==t&&(this.values[e]=t,e-1=n.length)return!1;const r=n.length-e;return t>=r&&(t=r),0!==t&&(this.values=new Uint32Array(n.length-t),this.values.set(n.subarray(0,e),0),this.values.set(n.subarray(e+t),e),this.prefixSum=new Uint32Array(this.values.length),e-1=0&&this.prefixSum.set(i.subarray(0,this.prefixSumValidIndex[0]+1)),!0)}getTotalSum(){return 0===this.values.length?0:this._getPrefixSum(this.values.length-1)}getPrefixSum(e){return e<0?0:(e=Ut(e),this._getPrefixSum(e))}_getPrefixSum(e){if(e<=this.prefixSumValidIndex[0])return this.prefixSum[e];let t=this.prefixSumValidIndex[0]+1;0===t&&(this.prefixSum[0]=this.values[0],t++),e>=this.values.length&&(e=this.values.length-1);for(let n=t;n<=e;n++)this.prefixSum[n]=this.prefixSum[n-1]+this.values[n];return this.prefixSumValidIndex[0]=Math.max(this.prefixSumValidIndex[0],e),this.prefixSum[e]}getIndexOf(e){e=Math.floor(e),this.getTotalSum();let t=0,n=this.values.length-1,i=0,r=0,o=0;for(;t<=n;)if(i=t+(n-t)/2|0,r=this.prefixSum[i],o=r-this.values[i],e=r))break;t=i+1}return new qt(i,e-o)}}class qt{constructor(e,t){this.index=e,this.remainder=t,this._prefixSumIndexOfResultBrand=void 0,this.index=e,this.remainder=t}}class Bt{constructor(e,t,n,i){this._uri=e,this._lines=t,this._eol=n,this._versionId=i,this._lineStarts=null,this._cachedTextValue=null}dispose(){this._lines.length=0}get version(){return this._versionId}getText(){return null===this._cachedTextValue&&(this._cachedTextValue=this._lines.join(this._eol)),this._cachedTextValue}onEvents(e){e.eol&&e.eol!==this._eol&&(this._eol=e.eol,this._lineStarts=null);const t=e.changes;for(const n of t)this._acceptDeleteRange(n.range),this._acceptInsertText(new zt(n.range.startLineNumber,n.range.startColumn),n.text);this._versionId=e.versionId,this._cachedTextValue=null}_ensureLineStarts(){if(!this._lineStarts){const e=this._eol.length,t=this._lines.length,n=new Uint32Array(t);for(let i=0;i/?";const $t=function(){let e=arguments.length>0&&void 0!==arguments[0]?arguments[0]:"",t="(-?\\d*\\.\\d\\w*)|([^";for(const n of jt)e.indexOf(n)>=0||(t+="\\"+n);return t+="\\s]+)",new RegExp(t,"g")}();function Ht(e){let t=$t;if(e&&e instanceof RegExp)if(e.global)t=e;else{let n="g";e.ignoreCase&&(n+="i"),e.multiline&&(n+="m"),e.unicode&&(n+="u"),t=new RegExp(e.source,n)}return t.lastIndex=0,t}const Gt=new S;function Jt(e,t,n,i,r){if(t=Ht(t),r||(r=h.first(Gt)),n.length>r.maxLen){let o=e-r.maxLen/2;return o<0?o=0:i+=o,Jt(e,t,n=n.substring(o,e+r.maxLen/2),i,r)}const o=Date.now(),s=e-1-i;let a=-1,l=null;for(let c=1;!(Date.now()-o>=r.timeBudget);c++){const e=s-r.windowSize*c;t.lastIndex=Math.max(0,e);const i=Xt(t,n,s,a);if(!i&&l)break;if(l=i,e<=0)break;a=e}if(l){const e={word:l[0],startColumn:i+1+l.index,endColumn:i+1+l.index+l[0].length};return t.lastIndex=0,e}return null}function Xt(e,t,n,i){let r;for(;r=e.exec(t);){const t=r.index||0;if(t<=n&&e.lastIndex>=n)return r;if(i>0&&t>i)return null}return null}Gt.unshift({maxLen:1e3,windowSize:15,timeBudget:150});class Yt{constructor(e){const t=Vt(e);this._defaultValue=t,this._asciiMap=Yt._createAsciiMap(t),this._map=new Map}static _createAsciiMap(e){const t=new Uint8Array(256);return t.fill(e),t}set(e,t){const n=Vt(t);e>=0&&e<256?this._asciiMap[e]=n:this._map.set(e,n)}get(e){return e>=0&&e<256?this._asciiMap[e]:this._map.get(e)||this._defaultValue}clear(){this._asciiMap.fill(this._defaultValue),this._map.clear()}}class Qt{constructor(e,t,n){const i=new Uint8Array(e*t);for(let r=0,o=e*t;rt&&(t=o),i>n&&(n=i),s>n&&(n=s)}t++,n++;const i=new Qt(n,t,0);for(let r=0,o=e.length;r=this._maxCharCode?0:this._states.get(e,t)}}let en=null;let tn=null;class nn{static _createLink(e,t,n,i,r){let o=r-1;do{const n=t.charCodeAt(o);if(2!==e.get(n))break;o--}while(o>i);if(i>0){const e=t.charCodeAt(i-1),n=t.charCodeAt(o);(40===e&&41===n||91===e&&93===n||123===e&&125===n)&&o--}return{range:{startLineNumber:n,startColumn:i+1,endLineNumber:n,endColumn:o+2},url:t.substring(i,o+1)}}static computeLinks(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:(null===en&&(en=new Zt([[1,104,2],[1,72,2],[1,102,6],[1,70,6],[2,116,3],[2,84,3],[3,116,4],[3,84,4],[4,112,5],[4,80,5],[5,115,9],[5,83,9],[5,58,10],[6,105,7],[6,73,7],[7,108,8],[7,76,8],[8,101,9],[8,69,9],[9,58,10],[10,47,11],[11,47,12]])),en);const n=function(){if(null===tn){tn=new Yt(0);const e=" \t<>'\"\u3001\u3002\uff61\uff64\uff0c\uff0e\uff1a\uff1b\u2018\u3008\u300c\u300e\u3014\uff08\uff3b\uff5b\uff62\uff63\uff5d\uff3d\uff09\u3015\u300f\u300d\u3009\u2019\uff40\uff5e\u2026";for(let n=0;n=0?(i+=n?1:-1,i<0?i=e.length-1:i%=e.length,e[i]):null}}rn.INSTANCE=new rn;const on=Object.freeze((function(e,t){const n=setTimeout(e.bind(t),0);return{dispose(){clearTimeout(n)}}}));var sn;!function(e){e.isCancellationToken=function(t){return t===e.None||t===e.Cancelled||(t instanceof an||!(!t||"object"!==typeof t)&&("boolean"===typeof t.isCancellationRequested&&"function"===typeof t.onCancellationRequested))},e.None=Object.freeze({isCancellationRequested:!1,onCancellationRequested:E.None}),e.Cancelled=Object.freeze({isCancellationRequested:!0,onCancellationRequested:on})}(sn||(sn={}));class an{constructor(){this._isCancelled=!1,this._emitter=null}cancel(){this._isCancelled||(this._isCancelled=!0,this._emitter&&(this._emitter.fire(void 0),this.dispose()))}get isCancellationRequested(){return this._isCancelled}get onCancellationRequested(){return this._isCancelled?on:(this._emitter||(this._emitter=new A),this._emitter.event)}dispose(){this._emitter&&(this._emitter.dispose(),this._emitter=null)}}class ln{constructor(e){this._token=void 0,this._parentListener=void 0,this._parentListener=e&&e.onCancellationRequested(this.cancel,this)}get token(){return this._token||(this._token=new an),this._token}cancel(){this._token?this._token instanceof an&&this._token.cancel():this._token=sn.Cancelled}dispose(){var e;arguments.length>0&&void 0!==arguments[0]&&arguments[0]&&this.cancel(),null===(e=this._parentListener)||void 0===e||e.dispose(),this._token?this._token instanceof an&&this._token.dispose():this._token=sn.None}}class cn{constructor(){this._keyCodeToStr=[],this._strToKeyCode=Object.create(null)}define(e,t){this._keyCodeToStr[e]=t,this._strToKeyCode[t.toLowerCase()]=e}keyCodeToStr(e){return this._keyCodeToStr[e]}strToKeyCode(e){return this._strToKeyCode[e.toLowerCase()]||0}}const hn=new cn,dn=new cn,pn=new cn,un=new Array(230),mn={},fn=[],gn=Object.create(null),bn=Object.create(null),vn=[],yn=[];for(let Tu=0;Tu<=193;Tu++)vn[Tu]=-1;for(let Tu=0;Tu<=132;Tu++)yn[Tu]=-1;var wn;!function(){const e="",t=[[1,0,"None",0,"unknown",0,"VK_UNKNOWN",e,e],[1,1,"Hyper",0,e,0,e,e,e],[1,2,"Super",0,e,0,e,e,e],[1,3,"Fn",0,e,0,e,e,e],[1,4,"FnLock",0,e,0,e,e,e],[1,5,"Suspend",0,e,0,e,e,e],[1,6,"Resume",0,e,0,e,e,e],[1,7,"Turbo",0,e,0,e,e,e],[1,8,"Sleep",0,e,0,"VK_SLEEP",e,e],[1,9,"WakeUp",0,e,0,e,e,e],[0,10,"KeyA",31,"A",65,"VK_A",e,e],[0,11,"KeyB",32,"B",66,"VK_B",e,e],[0,12,"KeyC",33,"C",67,"VK_C",e,e],[0,13,"KeyD",34,"D",68,"VK_D",e,e],[0,14,"KeyE",35,"E",69,"VK_E",e,e],[0,15,"KeyF",36,"F",70,"VK_F",e,e],[0,16,"KeyG",37,"G",71,"VK_G",e,e],[0,17,"KeyH",38,"H",72,"VK_H",e,e],[0,18,"KeyI",39,"I",73,"VK_I",e,e],[0,19,"KeyJ",40,"J",74,"VK_J",e,e],[0,20,"KeyK",41,"K",75,"VK_K",e,e],[0,21,"KeyL",42,"L",76,"VK_L",e,e],[0,22,"KeyM",43,"M",77,"VK_M",e,e],[0,23,"KeyN",44,"N",78,"VK_N",e,e],[0,24,"KeyO",45,"O",79,"VK_O",e,e],[0,25,"KeyP",46,"P",80,"VK_P",e,e],[0,26,"KeyQ",47,"Q",81,"VK_Q",e,e],[0,27,"KeyR",48,"R",82,"VK_R",e,e],[0,28,"KeyS",49,"S",83,"VK_S",e,e],[0,29,"KeyT",50,"T",84,"VK_T",e,e],[0,30,"KeyU",51,"U",85,"VK_U",e,e],[0,31,"KeyV",52,"V",86,"VK_V",e,e],[0,32,"KeyW",53,"W",87,"VK_W",e,e],[0,33,"KeyX",54,"X",88,"VK_X",e,e],[0,34,"KeyY",55,"Y",89,"VK_Y",e,e],[0,35,"KeyZ",56,"Z",90,"VK_Z",e,e],[0,36,"Digit1",22,"1",49,"VK_1",e,e],[0,37,"Digit2",23,"2",50,"VK_2",e,e],[0,38,"Digit3",24,"3",51,"VK_3",e,e],[0,39,"Digit4",25,"4",52,"VK_4",e,e],[0,40,"Digit5",26,"5",53,"VK_5",e,e],[0,41,"Digit6",27,"6",54,"VK_6",e,e],[0,42,"Digit7",28,"7",55,"VK_7",e,e],[0,43,"Digit8",29,"8",56,"VK_8",e,e],[0,44,"Digit9",30,"9",57,"VK_9",e,e],[0,45,"Digit0",21,"0",48,"VK_0",e,e],[1,46,"Enter",3,"Enter",13,"VK_RETURN",e,e],[1,47,"Escape",9,"Escape",27,"VK_ESCAPE",e,e],[1,48,"Backspace",1,"Backspace",8,"VK_BACK",e,e],[1,49,"Tab",2,"Tab",9,"VK_TAB",e,e],[1,50,"Space",10,"Space",32,"VK_SPACE",e,e],[0,51,"Minus",88,"-",189,"VK_OEM_MINUS","-","OEM_MINUS"],[0,52,"Equal",86,"=",187,"VK_OEM_PLUS","=","OEM_PLUS"],[0,53,"BracketLeft",92,"[",219,"VK_OEM_4","[","OEM_4"],[0,54,"BracketRight",94,"]",221,"VK_OEM_6","]","OEM_6"],[0,55,"Backslash",93,"\\",220,"VK_OEM_5","\\","OEM_5"],[0,56,"IntlHash",0,e,0,e,e,e],[0,57,"Semicolon",85,";",186,"VK_OEM_1",";","OEM_1"],[0,58,"Quote",95,"'",222,"VK_OEM_7","'","OEM_7"],[0,59,"Backquote",91,"`",192,"VK_OEM_3","`","OEM_3"],[0,60,"Comma",87,",",188,"VK_OEM_COMMA",",","OEM_COMMA"],[0,61,"Period",89,".",190,"VK_OEM_PERIOD",".","OEM_PERIOD"],[0,62,"Slash",90,"/",191,"VK_OEM_2","/","OEM_2"],[1,63,"CapsLock",8,"CapsLock",20,"VK_CAPITAL",e,e],[1,64,"F1",59,"F1",112,"VK_F1",e,e],[1,65,"F2",60,"F2",113,"VK_F2",e,e],[1,66,"F3",61,"F3",114,"VK_F3",e,e],[1,67,"F4",62,"F4",115,"VK_F4",e,e],[1,68,"F5",63,"F5",116,"VK_F5",e,e],[1,69,"F6",64,"F6",117,"VK_F6",e,e],[1,70,"F7",65,"F7",118,"VK_F7",e,e],[1,71,"F8",66,"F8",119,"VK_F8",e,e],[1,72,"F9",67,"F9",120,"VK_F9",e,e],[1,73,"F10",68,"F10",121,"VK_F10",e,e],[1,74,"F11",69,"F11",122,"VK_F11",e,e],[1,75,"F12",70,"F12",123,"VK_F12",e,e],[1,76,"PrintScreen",0,e,0,e,e,e],[1,77,"ScrollLock",84,"ScrollLock",145,"VK_SCROLL",e,e],[1,78,"Pause",7,"PauseBreak",19,"VK_PAUSE",e,e],[1,79,"Insert",19,"Insert",45,"VK_INSERT",e,e],[1,80,"Home",14,"Home",36,"VK_HOME",e,e],[1,81,"PageUp",11,"PageUp",33,"VK_PRIOR",e,e],[1,82,"Delete",20,"Delete",46,"VK_DELETE",e,e],[1,83,"End",13,"End",35,"VK_END",e,e],[1,84,"PageDown",12,"PageDown",34,"VK_NEXT",e,e],[1,85,"ArrowRight",17,"RightArrow",39,"VK_RIGHT","Right",e],[1,86,"ArrowLeft",15,"LeftArrow",37,"VK_LEFT","Left",e],[1,87,"ArrowDown",18,"DownArrow",40,"VK_DOWN","Down",e],[1,88,"ArrowUp",16,"UpArrow",38,"VK_UP","Up",e],[1,89,"NumLock",83,"NumLock",144,"VK_NUMLOCK",e,e],[1,90,"NumpadDivide",113,"NumPad_Divide",111,"VK_DIVIDE",e,e],[1,91,"NumpadMultiply",108,"NumPad_Multiply",106,"VK_MULTIPLY",e,e],[1,92,"NumpadSubtract",111,"NumPad_Subtract",109,"VK_SUBTRACT",e,e],[1,93,"NumpadAdd",109,"NumPad_Add",107,"VK_ADD",e,e],[1,94,"NumpadEnter",3,e,0,e,e,e],[1,95,"Numpad1",99,"NumPad1",97,"VK_NUMPAD1",e,e],[1,96,"Numpad2",100,"NumPad2",98,"VK_NUMPAD2",e,e],[1,97,"Numpad3",101,"NumPad3",99,"VK_NUMPAD3",e,e],[1,98,"Numpad4",102,"NumPad4",100,"VK_NUMPAD4",e,e],[1,99,"Numpad5",103,"NumPad5",101,"VK_NUMPAD5",e,e],[1,100,"Numpad6",104,"NumPad6",102,"VK_NUMPAD6",e,e],[1,101,"Numpad7",105,"NumPad7",103,"VK_NUMPAD7",e,e],[1,102,"Numpad8",106,"NumPad8",104,"VK_NUMPAD8",e,e],[1,103,"Numpad9",107,"NumPad9",105,"VK_NUMPAD9",e,e],[1,104,"Numpad0",98,"NumPad0",96,"VK_NUMPAD0",e,e],[1,105,"NumpadDecimal",112,"NumPad_Decimal",110,"VK_DECIMAL",e,e],[0,106,"IntlBackslash",97,"OEM_102",226,"VK_OEM_102",e,e],[1,107,"ContextMenu",58,"ContextMenu",93,e,e,e],[1,108,"Power",0,e,0,e,e,e],[1,109,"NumpadEqual",0,e,0,e,e,e],[1,110,"F13",71,"F13",124,"VK_F13",e,e],[1,111,"F14",72,"F14",125,"VK_F14",e,e],[1,112,"F15",73,"F15",126,"VK_F15",e,e],[1,113,"F16",74,"F16",127,"VK_F16",e,e],[1,114,"F17",75,"F17",128,"VK_F17",e,e],[1,115,"F18",76,"F18",129,"VK_F18",e,e],[1,116,"F19",77,"F19",130,"VK_F19",e,e],[1,117,"F20",78,"F20",131,"VK_F20",e,e],[1,118,"F21",79,"F21",132,"VK_F21",e,e],[1,119,"F22",80,"F22",133,"VK_F22",e,e],[1,120,"F23",81,"F23",134,"VK_F23",e,e],[1,121,"F24",82,"F24",135,"VK_F24",e,e],[1,122,"Open",0,e,0,e,e,e],[1,123,"Help",0,e,0,e,e,e],[1,124,"Select",0,e,0,e,e,e],[1,125,"Again",0,e,0,e,e,e],[1,126,"Undo",0,e,0,e,e,e],[1,127,"Cut",0,e,0,e,e,e],[1,128,"Copy",0,e,0,e,e,e],[1,129,"Paste",0,e,0,e,e,e],[1,130,"Find",0,e,0,e,e,e],[1,131,"AudioVolumeMute",117,"AudioVolumeMute",173,"VK_VOLUME_MUTE",e,e],[1,132,"AudioVolumeUp",118,"AudioVolumeUp",175,"VK_VOLUME_UP",e,e],[1,133,"AudioVolumeDown",119,"AudioVolumeDown",174,"VK_VOLUME_DOWN",e,e],[1,134,"NumpadComma",110,"NumPad_Separator",108,"VK_SEPARATOR",e,e],[0,135,"IntlRo",115,"ABNT_C1",193,"VK_ABNT_C1",e,e],[1,136,"KanaMode",0,e,0,e,e,e],[0,137,"IntlYen",0,e,0,e,e,e],[1,138,"Convert",0,e,0,e,e,e],[1,139,"NonConvert",0,e,0,e,e,e],[1,140,"Lang1",0,e,0,e,e,e],[1,141,"Lang2",0,e,0,e,e,e],[1,142,"Lang3",0,e,0,e,e,e],[1,143,"Lang4",0,e,0,e,e,e],[1,144,"Lang5",0,e,0,e,e,e],[1,145,"Abort",0,e,0,e,e,e],[1,146,"Props",0,e,0,e,e,e],[1,147,"NumpadParenLeft",0,e,0,e,e,e],[1,148,"NumpadParenRight",0,e,0,e,e,e],[1,149,"NumpadBackspace",0,e,0,e,e,e],[1,150,"NumpadMemoryStore",0,e,0,e,e,e],[1,151,"NumpadMemoryRecall",0,e,0,e,e,e],[1,152,"NumpadMemoryClear",0,e,0,e,e,e],[1,153,"NumpadMemoryAdd",0,e,0,e,e,e],[1,154,"NumpadMemorySubtract",0,e,0,e,e,e],[1,155,"NumpadClear",131,"Clear",12,"VK_CLEAR",e,e],[1,156,"NumpadClearEntry",0,e,0,e,e,e],[1,0,e,5,"Ctrl",17,"VK_CONTROL",e,e],[1,0,e,4,"Shift",16,"VK_SHIFT",e,e],[1,0,e,6,"Alt",18,"VK_MENU",e,e],[1,0,e,57,"Meta",91,"VK_COMMAND",e,e],[1,157,"ControlLeft",5,e,0,"VK_LCONTROL",e,e],[1,158,"ShiftLeft",4,e,0,"VK_LSHIFT",e,e],[1,159,"AltLeft",6,e,0,"VK_LMENU",e,e],[1,160,"MetaLeft",57,e,0,"VK_LWIN",e,e],[1,161,"ControlRight",5,e,0,"VK_RCONTROL",e,e],[1,162,"ShiftRight",4,e,0,"VK_RSHIFT",e,e],[1,163,"AltRight",6,e,0,"VK_RMENU",e,e],[1,164,"MetaRight",57,e,0,"VK_RWIN",e,e],[1,165,"BrightnessUp",0,e,0,e,e,e],[1,166,"BrightnessDown",0,e,0,e,e,e],[1,167,"MediaPlay",0,e,0,e,e,e],[1,168,"MediaRecord",0,e,0,e,e,e],[1,169,"MediaFastForward",0,e,0,e,e,e],[1,170,"MediaRewind",0,e,0,e,e,e],[1,171,"MediaTrackNext",124,"MediaTrackNext",176,"VK_MEDIA_NEXT_TRACK",e,e],[1,172,"MediaTrackPrevious",125,"MediaTrackPrevious",177,"VK_MEDIA_PREV_TRACK",e,e],[1,173,"MediaStop",126,"MediaStop",178,"VK_MEDIA_STOP",e,e],[1,174,"Eject",0,e,0,e,e,e],[1,175,"MediaPlayPause",127,"MediaPlayPause",179,"VK_MEDIA_PLAY_PAUSE",e,e],[1,176,"MediaSelect",128,"LaunchMediaPlayer",181,"VK_MEDIA_LAUNCH_MEDIA_SELECT",e,e],[1,177,"LaunchMail",129,"LaunchMail",180,"VK_MEDIA_LAUNCH_MAIL",e,e],[1,178,"LaunchApp2",130,"LaunchApp2",183,"VK_MEDIA_LAUNCH_APP2",e,e],[1,179,"LaunchApp1",0,e,0,"VK_MEDIA_LAUNCH_APP1",e,e],[1,180,"SelectTask",0,e,0,e,e,e],[1,181,"LaunchScreenSaver",0,e,0,e,e,e],[1,182,"BrowserSearch",120,"BrowserSearch",170,"VK_BROWSER_SEARCH",e,e],[1,183,"BrowserHome",121,"BrowserHome",172,"VK_BROWSER_HOME",e,e],[1,184,"BrowserBack",122,"BrowserBack",166,"VK_BROWSER_BACK",e,e],[1,185,"BrowserForward",123,"BrowserForward",167,"VK_BROWSER_FORWARD",e,e],[1,186,"BrowserStop",0,e,0,"VK_BROWSER_STOP",e,e],[1,187,"BrowserRefresh",0,e,0,"VK_BROWSER_REFRESH",e,e],[1,188,"BrowserFavorites",0,e,0,"VK_BROWSER_FAVORITES",e,e],[1,189,"ZoomToggle",0,e,0,e,e,e],[1,190,"MailReply",0,e,0,e,e,e],[1,191,"MailForward",0,e,0,e,e,e],[1,192,"MailSend",0,e,0,e,e,e],[1,0,e,114,"KeyInComposition",229,e,e,e],[1,0,e,116,"ABNT_C2",194,"VK_ABNT_C2",e,e],[1,0,e,96,"OEM_8",223,"VK_OEM_8",e,e],[1,0,e,0,e,0,"VK_KANA",e,e],[1,0,e,0,e,0,"VK_HANGUL",e,e],[1,0,e,0,e,0,"VK_JUNJA",e,e],[1,0,e,0,e,0,"VK_FINAL",e,e],[1,0,e,0,e,0,"VK_HANJA",e,e],[1,0,e,0,e,0,"VK_KANJI",e,e],[1,0,e,0,e,0,"VK_CONVERT",e,e],[1,0,e,0,e,0,"VK_NONCONVERT",e,e],[1,0,e,0,e,0,"VK_ACCEPT",e,e],[1,0,e,0,e,0,"VK_MODECHANGE",e,e],[1,0,e,0,e,0,"VK_SELECT",e,e],[1,0,e,0,e,0,"VK_PRINT",e,e],[1,0,e,0,e,0,"VK_EXECUTE",e,e],[1,0,e,0,e,0,"VK_SNAPSHOT",e,e],[1,0,e,0,e,0,"VK_HELP",e,e],[1,0,e,0,e,0,"VK_APPS",e,e],[1,0,e,0,e,0,"VK_PROCESSKEY",e,e],[1,0,e,0,e,0,"VK_PACKET",e,e],[1,0,e,0,e,0,"VK_DBE_SBCSCHAR",e,e],[1,0,e,0,e,0,"VK_DBE_DBCSCHAR",e,e],[1,0,e,0,e,0,"VK_ATTN",e,e],[1,0,e,0,e,0,"VK_CRSEL",e,e],[1,0,e,0,e,0,"VK_EXSEL",e,e],[1,0,e,0,e,0,"VK_EREOF",e,e],[1,0,e,0,e,0,"VK_PLAY",e,e],[1,0,e,0,e,0,"VK_ZOOM",e,e],[1,0,e,0,e,0,"VK_NONAME",e,e],[1,0,e,0,e,0,"VK_PA1",e,e],[1,0,e,0,e,0,"VK_OEM_CLEAR",e,e]],n=[],i=[];for(const r of t){const[e,t,o,s,a,l,c,h,d]=r;if(i[t]||(i[t]=!0,fn[t]=o,gn[o]=t,bn[o.toLowerCase()]=t,e&&(vn[t]=s,0!==s&&3!==s&&5!==s&&4!==s&&6!==s&&57!==s&&(yn[s]=t))),!n[s]){if(n[s]=!0,!a)throw new Error(`String representation missing for key code ${s} around scan code ${o}`);hn.define(s,a),dn.define(s,h||a),pn.define(s,d||h||a)}l&&(un[l]=s),c&&(mn[c]=s)}yn[3]=46}(),function(e){e.toString=function(e){return hn.keyCodeToStr(e)},e.fromString=function(e){return hn.strToKeyCode(e)},e.toUserSettingsUS=function(e){return dn.keyCodeToStr(e)},e.toUserSettingsGeneral=function(e){return pn.keyCodeToStr(e)},e.fromUserSettings=function(e){return dn.strToKeyCode(e)||pn.strToKeyCode(e)},e.toElectronAccelerator=function(e){if(e>=98&&e<=113)return null;switch(e){case 16:return"Up";case 18:return"Down";case 15:return"Left";case 17:return"Right"}return hn.keyCodeToStr(e)}}(wn||(wn={}));class xn extends It{constructor(e,t,n,i){super(e,t,n,i),this.selectionStartLineNumber=e,this.selectionStartColumn=t,this.positionLineNumber=n,this.positionColumn=i}toString(){return"["+this.selectionStartLineNumber+","+this.selectionStartColumn+" -> "+this.positionLineNumber+","+this.positionColumn+"]"}equalsSelection(e){return xn.selectionsEqual(this,e)}static selectionsEqual(e,t){return e.selectionStartLineNumber===t.selectionStartLineNumber&&e.selectionStartColumn===t.selectionStartColumn&&e.positionLineNumber===t.positionLineNumber&&e.positionColumn===t.positionColumn}getDirection(){return this.selectionStartLineNumber===this.startLineNumber&&this.selectionStartColumn===this.startColumn?0:1}setEndPosition(e,t){return 0===this.getDirection()?new xn(this.startLineNumber,this.startColumn,e,t):new xn(e,t,this.startLineNumber,this.startColumn)}getPosition(){return new zt(this.positionLineNumber,this.positionColumn)}getSelectionStart(){return new zt(this.selectionStartLineNumber,this.selectionStartColumn)}setStartPosition(e,t){return 0===this.getDirection()?new xn(e,t,this.endLineNumber,this.endColumn):new xn(this.endLineNumber,this.endColumn,e,t)}static fromPositions(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:e;return new xn(e.lineNumber,e.column,t.lineNumber,t.column)}static fromRange(e,t){return 0===t?new xn(e.startLineNumber,e.startColumn,e.endLineNumber,e.endColumn):new xn(e.endLineNumber,e.endColumn,e.startLineNumber,e.startColumn)}static liftSelection(e){return new xn(e.selectionStartLineNumber,e.selectionStartColumn,e.positionLineNumber,e.positionColumn)}static selectionsArrEqual(e,t){if(e&&!t||!e&&t)return!1;if(!e&&!t)return!0;if(e.length!==t.length)return!1;for(let n=0,i=e.length;n{this._tokenizationSupports.get(e)===t&&(this._tokenizationSupports.delete(e),this.handleChange([e]))}))}get(e){return this._tokenizationSupports.get(e)||null}registerFactory(e,t){var n;null===(n=this._factories.get(e))||void 0===n||n.dispose();const i=new En(this,e,t);return this._factories.set(e,i),b((()=>{const t=this._factories.get(e);t&&t===i&&(this._factories.delete(e),t.dispose())}))}async getOrCreate(e){const t=this.get(e);if(t)return t;const n=this._factories.get(e);return!n||n.isResolved?null:(await n.resolve(),this.get(e))}isResolved(e){if(this.get(e))return!0;const t=this._factories.get(e);return!(t&&!t.isResolved)}setColorMap(e){this._colorMap=e,this._onDidChange.fire({changedLanguages:Array.from(this._tokenizationSupports.keys()),changedColorMap:!0})}getColorMap(){return this._colorMap}getDefaultBackground(){return this._colorMap&&this._colorMap.length>2?this._colorMap[2]:null}};var On,Wn,Vn,Un,Kn,qn,Bn,jn,$n,Hn,Gn,Jn,Xn,Yn,Qn,Zn,ei,ti,ni,ii,ri,oi,si,ai,li,ci,hi,di,pi,ui,mi,fi,gi,bi,vi,yi,wi,xi,Si,Ci,_i,ki,Ei,Ri,Ni,Fi,Di;!function(e){e[e.Invoke=0]="Invoke",e[e.Automatic=1]="Automatic"}(On||(On={})),function(e){e[e.Unknown=0]="Unknown",e[e.Disabled=1]="Disabled",e[e.Enabled=2]="Enabled"}(Wn||(Wn={})),function(e){e[e.Invoke=1]="Invoke",e[e.Auto=2]="Auto"}(Vn||(Vn={})),function(e){e[e.None=0]="None",e[e.KeepWhitespace=1]="KeepWhitespace",e[e.InsertAsSnippet=4]="InsertAsSnippet"}(Un||(Un={})),function(e){e[e.Method=0]="Method",e[e.Function=1]="Function",e[e.Constructor=2]="Constructor",e[e.Field=3]="Field",e[e.Variable=4]="Variable",e[e.Class=5]="Class",e[e.Struct=6]="Struct",e[e.Interface=7]="Interface",e[e.Module=8]="Module",e[e.Property=9]="Property",e[e.Event=10]="Event",e[e.Operator=11]="Operator",e[e.Unit=12]="Unit",e[e.Value=13]="Value",e[e.Constant=14]="Constant",e[e.Enum=15]="Enum",e[e.EnumMember=16]="EnumMember",e[e.Keyword=17]="Keyword",e[e.Text=18]="Text",e[e.Color=19]="Color",e[e.File=20]="File",e[e.Reference=21]="Reference",e[e.Customcolor=22]="Customcolor",e[e.Folder=23]="Folder",e[e.TypeParameter=24]="TypeParameter",e[e.User=25]="User",e[e.Issue=26]="Issue",e[e.Snippet=27]="Snippet"}(Kn||(Kn={})),function(e){e[e.Deprecated=1]="Deprecated"}(qn||(qn={})),function(e){e[e.Invoke=0]="Invoke",e[e.TriggerCharacter=1]="TriggerCharacter",e[e.TriggerForIncompleteCompletions=2]="TriggerForIncompleteCompletions"}(Bn||(Bn={})),function(e){e[e.EXACT=0]="EXACT",e[e.ABOVE=1]="ABOVE",e[e.BELOW=2]="BELOW"}(jn||(jn={})),function(e){e[e.NotSet=0]="NotSet",e[e.ContentFlush=1]="ContentFlush",e[e.RecoverFromMarkers=2]="RecoverFromMarkers",e[e.Explicit=3]="Explicit",e[e.Paste=4]="Paste",e[e.Undo=5]="Undo",e[e.Redo=6]="Redo"}($n||($n={})),function(e){e[e.LF=1]="LF",e[e.CRLF=2]="CRLF"}(Hn||(Hn={})),function(e){e[e.Text=0]="Text",e[e.Read=1]="Read",e[e.Write=2]="Write"}(Gn||(Gn={})),function(e){e[e.None=0]="None",e[e.Keep=1]="Keep",e[e.Brackets=2]="Brackets",e[e.Advanced=3]="Advanced",e[e.Full=4]="Full"}(Jn||(Jn={})),function(e){e[e.acceptSuggestionOnCommitCharacter=0]="acceptSuggestionOnCommitCharacter",e[e.acceptSuggestionOnEnter=1]="acceptSuggestionOnEnter",e[e.accessibilitySupport=2]="accessibilitySupport",e[e.accessibilityPageSize=3]="accessibilityPageSize",e[e.ariaLabel=4]="ariaLabel",e[e.ariaRequired=5]="ariaRequired",e[e.autoClosingBrackets=6]="autoClosingBrackets",e[e.autoClosingComments=7]="autoClosingComments",e[e.screenReaderAnnounceInlineSuggestion=8]="screenReaderAnnounceInlineSuggestion",e[e.autoClosingDelete=9]="autoClosingDelete",e[e.autoClosingOvertype=10]="autoClosingOvertype",e[e.autoClosingQuotes=11]="autoClosingQuotes",e[e.autoIndent=12]="autoIndent",e[e.automaticLayout=13]="automaticLayout",e[e.autoSurround=14]="autoSurround",e[e.bracketPairColorization=15]="bracketPairColorization",e[e.guides=16]="guides",e[e.codeLens=17]="codeLens",e[e.codeLensFontFamily=18]="codeLensFontFamily",e[e.codeLensFontSize=19]="codeLensFontSize",e[e.colorDecorators=20]="colorDecorators",e[e.colorDecoratorsLimit=21]="colorDecoratorsLimit",e[e.columnSelection=22]="columnSelection",e[e.comments=23]="comments",e[e.contextmenu=24]="contextmenu",e[e.copyWithSyntaxHighlighting=25]="copyWithSyntaxHighlighting",e[e.cursorBlinking=26]="cursorBlinking",e[e.cursorSmoothCaretAnimation=27]="cursorSmoothCaretAnimation",e[e.cursorStyle=28]="cursorStyle",e[e.cursorSurroundingLines=29]="cursorSurroundingLines",e[e.cursorSurroundingLinesStyle=30]="cursorSurroundingLinesStyle",e[e.cursorWidth=31]="cursorWidth",e[e.disableLayerHinting=32]="disableLayerHinting",e[e.disableMonospaceOptimizations=33]="disableMonospaceOptimizations",e[e.domReadOnly=34]="domReadOnly",e[e.dragAndDrop=35]="dragAndDrop",e[e.dropIntoEditor=36]="dropIntoEditor",e[e.emptySelectionClipboard=37]="emptySelectionClipboard",e[e.experimentalWhitespaceRendering=38]="experimentalWhitespaceRendering",e[e.extraEditorClassName=39]="extraEditorClassName",e[e.fastScrollSensitivity=40]="fastScrollSensitivity",e[e.find=41]="find",e[e.fixedOverflowWidgets=42]="fixedOverflowWidgets",e[e.folding=43]="folding",e[e.foldingStrategy=44]="foldingStrategy",e[e.foldingHighlight=45]="foldingHighlight",e[e.foldingImportsByDefault=46]="foldingImportsByDefault",e[e.foldingMaximumRegions=47]="foldingMaximumRegions",e[e.unfoldOnClickAfterEndOfLine=48]="unfoldOnClickAfterEndOfLine",e[e.fontFamily=49]="fontFamily",e[e.fontInfo=50]="fontInfo",e[e.fontLigatures=51]="fontLigatures",e[e.fontSize=52]="fontSize",e[e.fontWeight=53]="fontWeight",e[e.fontVariations=54]="fontVariations",e[e.formatOnPaste=55]="formatOnPaste",e[e.formatOnType=56]="formatOnType",e[e.glyphMargin=57]="glyphMargin",e[e.gotoLocation=58]="gotoLocation",e[e.hideCursorInOverviewRuler=59]="hideCursorInOverviewRuler",e[e.hover=60]="hover",e[e.inDiffEditor=61]="inDiffEditor",e[e.inlineSuggest=62]="inlineSuggest",e[e.inlineEdit=63]="inlineEdit",e[e.letterSpacing=64]="letterSpacing",e[e.lightbulb=65]="lightbulb",e[e.lineDecorationsWidth=66]="lineDecorationsWidth",e[e.lineHeight=67]="lineHeight",e[e.lineNumbers=68]="lineNumbers",e[e.lineNumbersMinChars=69]="lineNumbersMinChars",e[e.linkedEditing=70]="linkedEditing",e[e.links=71]="links",e[e.matchBrackets=72]="matchBrackets",e[e.minimap=73]="minimap",e[e.mouseStyle=74]="mouseStyle",e[e.mouseWheelScrollSensitivity=75]="mouseWheelScrollSensitivity",e[e.mouseWheelZoom=76]="mouseWheelZoom",e[e.multiCursorMergeOverlapping=77]="multiCursorMergeOverlapping",e[e.multiCursorModifier=78]="multiCursorModifier",e[e.multiCursorPaste=79]="multiCursorPaste",e[e.multiCursorLimit=80]="multiCursorLimit",e[e.occurrencesHighlight=81]="occurrencesHighlight",e[e.overviewRulerBorder=82]="overviewRulerBorder",e[e.overviewRulerLanes=83]="overviewRulerLanes",e[e.padding=84]="padding",e[e.pasteAs=85]="pasteAs",e[e.parameterHints=86]="parameterHints",e[e.peekWidgetDefaultFocus=87]="peekWidgetDefaultFocus",e[e.definitionLinkOpensInPeek=88]="definitionLinkOpensInPeek",e[e.quickSuggestions=89]="quickSuggestions",e[e.quickSuggestionsDelay=90]="quickSuggestionsDelay",e[e.readOnly=91]="readOnly",e[e.readOnlyMessage=92]="readOnlyMessage",e[e.renameOnType=93]="renameOnType",e[e.renderControlCharacters=94]="renderControlCharacters",e[e.renderFinalNewline=95]="renderFinalNewline",e[e.renderLineHighlight=96]="renderLineHighlight",e[e.renderLineHighlightOnlyWhenFocus=97]="renderLineHighlightOnlyWhenFocus",e[e.renderValidationDecorations=98]="renderValidationDecorations",e[e.renderWhitespace=99]="renderWhitespace",e[e.revealHorizontalRightPadding=100]="revealHorizontalRightPadding",e[e.roundedSelection=101]="roundedSelection",e[e.rulers=102]="rulers",e[e.scrollbar=103]="scrollbar",e[e.scrollBeyondLastColumn=104]="scrollBeyondLastColumn",e[e.scrollBeyondLastLine=105]="scrollBeyondLastLine",e[e.scrollPredominantAxis=106]="scrollPredominantAxis",e[e.selectionClipboard=107]="selectionClipboard",e[e.selectionHighlight=108]="selectionHighlight",e[e.selectOnLineNumbers=109]="selectOnLineNumbers",e[e.showFoldingControls=110]="showFoldingControls",e[e.showUnused=111]="showUnused",e[e.snippetSuggestions=112]="snippetSuggestions",e[e.smartSelect=113]="smartSelect",e[e.smoothScrolling=114]="smoothScrolling",e[e.stickyScroll=115]="stickyScroll",e[e.stickyTabStops=116]="stickyTabStops",e[e.stopRenderingLineAfter=117]="stopRenderingLineAfter",e[e.suggest=118]="suggest",e[e.suggestFontSize=119]="suggestFontSize",e[e.suggestLineHeight=120]="suggestLineHeight",e[e.suggestOnTriggerCharacters=121]="suggestOnTriggerCharacters",e[e.suggestSelection=122]="suggestSelection",e[e.tabCompletion=123]="tabCompletion",e[e.tabIndex=124]="tabIndex",e[e.unicodeHighlighting=125]="unicodeHighlighting",e[e.unusualLineTerminators=126]="unusualLineTerminators",e[e.useShadowDOM=127]="useShadowDOM",e[e.useTabStops=128]="useTabStops",e[e.wordBreak=129]="wordBreak",e[e.wordSegmenterLocales=130]="wordSegmenterLocales",e[e.wordSeparators=131]="wordSeparators",e[e.wordWrap=132]="wordWrap",e[e.wordWrapBreakAfterCharacters=133]="wordWrapBreakAfterCharacters",e[e.wordWrapBreakBeforeCharacters=134]="wordWrapBreakBeforeCharacters",e[e.wordWrapColumn=135]="wordWrapColumn",e[e.wordWrapOverride1=136]="wordWrapOverride1",e[e.wordWrapOverride2=137]="wordWrapOverride2",e[e.wrappingIndent=138]="wrappingIndent",e[e.wrappingStrategy=139]="wrappingStrategy",e[e.showDeprecated=140]="showDeprecated",e[e.inlayHints=141]="inlayHints",e[e.editorClassName=142]="editorClassName",e[e.pixelRatio=143]="pixelRatio",e[e.tabFocusMode=144]="tabFocusMode",e[e.layoutInfo=145]="layoutInfo",e[e.wrappingInfo=146]="wrappingInfo",e[e.defaultColorDecorators=147]="defaultColorDecorators",e[e.colorDecoratorsActivatedOn=148]="colorDecoratorsActivatedOn",e[e.inlineCompletionsAccessibilityVerbose=149]="inlineCompletionsAccessibilityVerbose"}(Xn||(Xn={})),function(e){e[e.TextDefined=0]="TextDefined",e[e.LF=1]="LF",e[e.CRLF=2]="CRLF"}(Yn||(Yn={})),function(e){e[e.LF=0]="LF",e[e.CRLF=1]="CRLF"}(Qn||(Qn={})),function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=3]="Right"}(Zn||(Zn={})),function(e){e[e.None=0]="None",e[e.Indent=1]="Indent",e[e.IndentOutdent=2]="IndentOutdent",e[e.Outdent=3]="Outdent"}(ei||(ei={})),function(e){e[e.Both=0]="Both",e[e.Right=1]="Right",e[e.Left=2]="Left",e[e.None=3]="None"}(ti||(ti={})),function(e){e[e.Type=1]="Type",e[e.Parameter=2]="Parameter"}(ni||(ni={})),function(e){e[e.Automatic=0]="Automatic",e[e.Explicit=1]="Explicit"}(ii||(ii={})),function(e){e[e.Invoke=0]="Invoke",e[e.Automatic=1]="Automatic"}(ri||(ri={})),function(e){e[e.DependsOnKbLayout=-1]="DependsOnKbLayout",e[e.Unknown=0]="Unknown",e[e.Backspace=1]="Backspace",e[e.Tab=2]="Tab",e[e.Enter=3]="Enter",e[e.Shift=4]="Shift",e[e.Ctrl=5]="Ctrl",e[e.Alt=6]="Alt",e[e.PauseBreak=7]="PauseBreak",e[e.CapsLock=8]="CapsLock",e[e.Escape=9]="Escape",e[e.Space=10]="Space",e[e.PageUp=11]="PageUp",e[e.PageDown=12]="PageDown",e[e.End=13]="End",e[e.Home=14]="Home",e[e.LeftArrow=15]="LeftArrow",e[e.UpArrow=16]="UpArrow",e[e.RightArrow=17]="RightArrow",e[e.DownArrow=18]="DownArrow",e[e.Insert=19]="Insert",e[e.Delete=20]="Delete",e[e.Digit0=21]="Digit0",e[e.Digit1=22]="Digit1",e[e.Digit2=23]="Digit2",e[e.Digit3=24]="Digit3",e[e.Digit4=25]="Digit4",e[e.Digit5=26]="Digit5",e[e.Digit6=27]="Digit6",e[e.Digit7=28]="Digit7",e[e.Digit8=29]="Digit8",e[e.Digit9=30]="Digit9",e[e.KeyA=31]="KeyA",e[e.KeyB=32]="KeyB",e[e.KeyC=33]="KeyC",e[e.KeyD=34]="KeyD",e[e.KeyE=35]="KeyE",e[e.KeyF=36]="KeyF",e[e.KeyG=37]="KeyG",e[e.KeyH=38]="KeyH",e[e.KeyI=39]="KeyI",e[e.KeyJ=40]="KeyJ",e[e.KeyK=41]="KeyK",e[e.KeyL=42]="KeyL",e[e.KeyM=43]="KeyM",e[e.KeyN=44]="KeyN",e[e.KeyO=45]="KeyO",e[e.KeyP=46]="KeyP",e[e.KeyQ=47]="KeyQ",e[e.KeyR=48]="KeyR",e[e.KeyS=49]="KeyS",e[e.KeyT=50]="KeyT",e[e.KeyU=51]="KeyU",e[e.KeyV=52]="KeyV",e[e.KeyW=53]="KeyW",e[e.KeyX=54]="KeyX",e[e.KeyY=55]="KeyY",e[e.KeyZ=56]="KeyZ",e[e.Meta=57]="Meta",e[e.ContextMenu=58]="ContextMenu",e[e.F1=59]="F1",e[e.F2=60]="F2",e[e.F3=61]="F3",e[e.F4=62]="F4",e[e.F5=63]="F5",e[e.F6=64]="F6",e[e.F7=65]="F7",e[e.F8=66]="F8",e[e.F9=67]="F9",e[e.F10=68]="F10",e[e.F11=69]="F11",e[e.F12=70]="F12",e[e.F13=71]="F13",e[e.F14=72]="F14",e[e.F15=73]="F15",e[e.F16=74]="F16",e[e.F17=75]="F17",e[e.F18=76]="F18",e[e.F19=77]="F19",e[e.F20=78]="F20",e[e.F21=79]="F21",e[e.F22=80]="F22",e[e.F23=81]="F23",e[e.F24=82]="F24",e[e.NumLock=83]="NumLock",e[e.ScrollLock=84]="ScrollLock",e[e.Semicolon=85]="Semicolon",e[e.Equal=86]="Equal",e[e.Comma=87]="Comma",e[e.Minus=88]="Minus",e[e.Period=89]="Period",e[e.Slash=90]="Slash",e[e.Backquote=91]="Backquote",e[e.BracketLeft=92]="BracketLeft",e[e.Backslash=93]="Backslash",e[e.BracketRight=94]="BracketRight",e[e.Quote=95]="Quote",e[e.OEM_8=96]="OEM_8",e[e.IntlBackslash=97]="IntlBackslash",e[e.Numpad0=98]="Numpad0",e[e.Numpad1=99]="Numpad1",e[e.Numpad2=100]="Numpad2",e[e.Numpad3=101]="Numpad3",e[e.Numpad4=102]="Numpad4",e[e.Numpad5=103]="Numpad5",e[e.Numpad6=104]="Numpad6",e[e.Numpad7=105]="Numpad7",e[e.Numpad8=106]="Numpad8",e[e.Numpad9=107]="Numpad9",e[e.NumpadMultiply=108]="NumpadMultiply",e[e.NumpadAdd=109]="NumpadAdd",e[e.NUMPAD_SEPARATOR=110]="NUMPAD_SEPARATOR",e[e.NumpadSubtract=111]="NumpadSubtract",e[e.NumpadDecimal=112]="NumpadDecimal",e[e.NumpadDivide=113]="NumpadDivide",e[e.KEY_IN_COMPOSITION=114]="KEY_IN_COMPOSITION",e[e.ABNT_C1=115]="ABNT_C1",e[e.ABNT_C2=116]="ABNT_C2",e[e.AudioVolumeMute=117]="AudioVolumeMute",e[e.AudioVolumeUp=118]="AudioVolumeUp",e[e.AudioVolumeDown=119]="AudioVolumeDown",e[e.BrowserSearch=120]="BrowserSearch",e[e.BrowserHome=121]="BrowserHome",e[e.BrowserBack=122]="BrowserBack",e[e.BrowserForward=123]="BrowserForward",e[e.MediaTrackNext=124]="MediaTrackNext",e[e.MediaTrackPrevious=125]="MediaTrackPrevious",e[e.MediaStop=126]="MediaStop",e[e.MediaPlayPause=127]="MediaPlayPause",e[e.LaunchMediaPlayer=128]="LaunchMediaPlayer",e[e.LaunchMail=129]="LaunchMail",e[e.LaunchApp2=130]="LaunchApp2",e[e.Clear=131]="Clear",e[e.MAX_VALUE=132]="MAX_VALUE"}(oi||(oi={})),function(e){e[e.Hint=1]="Hint",e[e.Info=2]="Info",e[e.Warning=4]="Warning",e[e.Error=8]="Error"}(si||(si={})),function(e){e[e.Unnecessary=1]="Unnecessary",e[e.Deprecated=2]="Deprecated"}(ai||(ai={})),function(e){e[e.Inline=1]="Inline",e[e.Gutter=2]="Gutter"}(li||(li={})),function(e){e[e.Normal=1]="Normal",e[e.Underlined=2]="Underlined"}(ci||(ci={})),function(e){e[e.UNKNOWN=0]="UNKNOWN",e[e.TEXTAREA=1]="TEXTAREA",e[e.GUTTER_GLYPH_MARGIN=2]="GUTTER_GLYPH_MARGIN",e[e.GUTTER_LINE_NUMBERS=3]="GUTTER_LINE_NUMBERS",e[e.GUTTER_LINE_DECORATIONS=4]="GUTTER_LINE_DECORATIONS",e[e.GUTTER_VIEW_ZONE=5]="GUTTER_VIEW_ZONE",e[e.CONTENT_TEXT=6]="CONTENT_TEXT",e[e.CONTENT_EMPTY=7]="CONTENT_EMPTY",e[e.CONTENT_VIEW_ZONE=8]="CONTENT_VIEW_ZONE",e[e.CONTENT_WIDGET=9]="CONTENT_WIDGET",e[e.OVERVIEW_RULER=10]="OVERVIEW_RULER",e[e.SCROLLBAR=11]="SCROLLBAR",e[e.OVERLAY_WIDGET=12]="OVERLAY_WIDGET",e[e.OUTSIDE_EDITOR=13]="OUTSIDE_EDITOR"}(hi||(hi={})),function(e){e[e.AIGenerated=1]="AIGenerated"}(di||(di={})),function(e){e[e.TOP_RIGHT_CORNER=0]="TOP_RIGHT_CORNER",e[e.BOTTOM_RIGHT_CORNER=1]="BOTTOM_RIGHT_CORNER",e[e.TOP_CENTER=2]="TOP_CENTER"}(pi||(pi={})),function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=4]="Right",e[e.Full=7]="Full"}(ui||(ui={})),function(e){e[e.Word=0]="Word",e[e.Line=1]="Line",e[e.Suggest=2]="Suggest"}(mi||(mi={})),function(e){e[e.Left=0]="Left",e[e.Right=1]="Right",e[e.None=2]="None",e[e.LeftOfInjectedText=3]="LeftOfInjectedText",e[e.RightOfInjectedText=4]="RightOfInjectedText"}(fi||(fi={})),function(e){e[e.Off=0]="Off",e[e.On=1]="On",e[e.Relative=2]="Relative",e[e.Interval=3]="Interval",e[e.Custom=4]="Custom"}(gi||(gi={})),function(e){e[e.None=0]="None",e[e.Text=1]="Text",e[e.Blocks=2]="Blocks"}(bi||(bi={})),function(e){e[e.Smooth=0]="Smooth",e[e.Immediate=1]="Immediate"}(vi||(vi={})),function(e){e[e.Auto=1]="Auto",e[e.Hidden=2]="Hidden",e[e.Visible=3]="Visible"}(yi||(yi={})),function(e){e[e.LTR=0]="LTR",e[e.RTL=1]="RTL"}(wi||(wi={})),function(e){e.Off="off",e.OnCode="onCode",e.On="on"}(xi||(xi={})),function(e){e[e.Invoke=1]="Invoke",e[e.TriggerCharacter=2]="TriggerCharacter",e[e.ContentChange=3]="ContentChange"}(Si||(Si={})),function(e){e[e.File=0]="File",e[e.Module=1]="Module",e[e.Namespace=2]="Namespace",e[e.Package=3]="Package",e[e.Class=4]="Class",e[e.Method=5]="Method",e[e.Property=6]="Property",e[e.Field=7]="Field",e[e.Constructor=8]="Constructor",e[e.Enum=9]="Enum",e[e.Interface=10]="Interface",e[e.Function=11]="Function",e[e.Variable=12]="Variable",e[e.Constant=13]="Constant",e[e.String=14]="String",e[e.Number=15]="Number",e[e.Boolean=16]="Boolean",e[e.Array=17]="Array",e[e.Object=18]="Object",e[e.Key=19]="Key",e[e.Null=20]="Null",e[e.EnumMember=21]="EnumMember",e[e.Struct=22]="Struct",e[e.Event=23]="Event",e[e.Operator=24]="Operator",e[e.TypeParameter=25]="TypeParameter"}(Ci||(Ci={})),function(e){e[e.Deprecated=1]="Deprecated"}(_i||(_i={})),function(e){e[e.Hidden=0]="Hidden",e[e.Blink=1]="Blink",e[e.Smooth=2]="Smooth",e[e.Phase=3]="Phase",e[e.Expand=4]="Expand",e[e.Solid=5]="Solid"}(ki||(ki={})),function(e){e[e.Line=1]="Line",e[e.Block=2]="Block",e[e.Underline=3]="Underline",e[e.LineThin=4]="LineThin",e[e.BlockOutline=5]="BlockOutline",e[e.UnderlineThin=6]="UnderlineThin"}(Ei||(Ei={})),function(e){e[e.AlwaysGrowsWhenTypingAtEdges=0]="AlwaysGrowsWhenTypingAtEdges",e[e.NeverGrowsWhenTypingAtEdges=1]="NeverGrowsWhenTypingAtEdges",e[e.GrowsOnlyWhenTypingBefore=2]="GrowsOnlyWhenTypingBefore",e[e.GrowsOnlyWhenTypingAfter=3]="GrowsOnlyWhenTypingAfter"}(Ri||(Ri={})),function(e){e[e.None=0]="None",e[e.Same=1]="Same",e[e.Indent=2]="Indent",e[e.DeepIndent=3]="DeepIndent"}(Ni||(Ni={}));class Ti{static chord(e,t){return function(e,t){return(e|(65535&t)<<16>>>0)>>>0}(e,t)}}Ti.CtrlCmd=2048,Ti.Shift=1024,Ti.Alt=512,Ti.WinCtrl=256;class Ai{constructor(e,t){this.uri=e,this.value=t}}class Mi{constructor(e,t){if(this[Fi]="ResourceMap",e instanceof Mi)this.map=new Map(e.map),this.toKey=null!==t&&void 0!==t?t:Mi.defaultToKey;else if(function(e){return Array.isArray(e)}(e)){this.map=new Map,this.toKey=null!==t&&void 0!==t?t:Mi.defaultToKey;for(const[t,n]of e)this.set(t,n)}else this.map=new Map,this.toKey=null!==e&&void 0!==e?e:Mi.defaultToKey}set(e,t){return this.map.set(this.toKey(e),new Ai(e,t)),this}get(e){var t;return null===(t=this.map.get(this.toKey(e)))||void 0===t?void 0:t.value}has(e){return this.map.has(this.toKey(e))}get size(){return this.map.size}clear(){this.map.clear()}delete(e){return this.map.delete(this.toKey(e))}forEach(e,t){"undefined"!==typeof t&&(e=e.bind(t));for(const[n,i]of this.map)e(i.value,i.uri,this)}*values(){for(const e of this.map.values())yield e.value}*keys(){for(const e of this.map.values())yield e.uri}*entries(){for(const e of this.map.values())yield[e.uri,e.value]}*[(Fi=Symbol.toStringTag,Symbol.iterator)](){for(const[,e]of this.map)yield[e.uri,e.value]}}Mi.defaultToKey=e=>e.toString();class zi{constructor(){this[Di]="LinkedMap",this._map=new Map,this._head=void 0,this._tail=void 0,this._size=0,this._state=0}clear(){this._map.clear(),this._head=void 0,this._tail=void 0,this._size=0,this._state++}isEmpty(){return!this._head&&!this._tail}get size(){return this._size}get first(){var e;return null===(e=this._head)||void 0===e?void 0:e.value}get last(){var e;return null===(e=this._tail)||void 0===e?void 0:e.value}has(e){return this._map.has(e)}get(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:0;const n=this._map.get(e);if(n)return 0!==t&&this.touch(n,t),n.value}set(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:0,i=this._map.get(e);if(i)i.value=t,0!==n&&this.touch(i,n);else{switch(i={key:e,value:t,next:void 0,previous:void 0},n){case 0:case 2:default:this.addItemLast(i);break;case 1:this.addItemFirst(i)}this._map.set(e,i),this._size++}return this}delete(e){return!!this.remove(e)}remove(e){const t=this._map.get(e);if(t)return this._map.delete(e),this.removeItem(t),this._size--,t.value}shift(){if(!this._head&&!this._tail)return;if(!this._head||!this._tail)throw new Error("Invalid list");const e=this._head;return this._map.delete(e.key),this.removeItem(e),this._size--,e.value}forEach(e,t){const n=this._state;let i=this._head;for(;i;){if(t?e.bind(t)(i.value,i.key,this):e(i.value,i.key,this),this._state!==n)throw new Error("LinkedMap got modified during iteration.");i=i.next}}keys(){const e=this,t=this._state;let n=this._head;const i={[Symbol.iterator]:()=>i,next(){if(e._state!==t)throw new Error("LinkedMap got modified during iteration.");if(n){const e={value:n.key,done:!1};return n=n.next,e}return{value:void 0,done:!0}}};return i}values(){const e=this,t=this._state;let n=this._head;const i={[Symbol.iterator]:()=>i,next(){if(e._state!==t)throw new Error("LinkedMap got modified during iteration.");if(n){const e={value:n.value,done:!1};return n=n.next,e}return{value:void 0,done:!0}}};return i}entries(){const e=this,t=this._state;let n=this._head;const i={[Symbol.iterator]:()=>i,next(){if(e._state!==t)throw new Error("LinkedMap got modified during iteration.");if(n){const e={value:[n.key,n.value],done:!1};return n=n.next,e}return{value:void 0,done:!0}}};return i}[(Di=Symbol.toStringTag,Symbol.iterator)](){return this.entries()}trimOld(e){if(e>=this.size)return;if(0===e)return void this.clear();let t=this._head,n=this.size;for(;t&&n>e;)this._map.delete(t.key),t=t.next,n--;this._head=t,this._size=n,t&&(t.previous=void 0),this._state++}addItemFirst(e){if(this._head||this._tail){if(!this._head)throw new Error("Invalid list");e.next=this._head,this._head.previous=e}else this._tail=e;this._head=e,this._state++}addItemLast(e){if(this._head||this._tail){if(!this._tail)throw new Error("Invalid list");e.previous=this._tail,this._tail.next=e}else this._head=e;this._tail=e,this._state++}removeItem(e){if(e===this._head&&e===this._tail)this._head=void 0,this._tail=void 0;else if(e===this._head){if(!e.next)throw new Error("Invalid list");e.next.previous=void 0,this._head=e.next}else if(e===this._tail){if(!e.previous)throw new Error("Invalid list");e.previous.next=void 0,this._tail=e.previous}else{const t=e.next,n=e.previous;if(!t||!n)throw new Error("Invalid list");t.previous=n,n.next=t}e.next=void 0,e.previous=void 0,this._state++}touch(e,t){if(!this._head||!this._tail)throw new Error("Invalid list");if(1===t||2===t)if(1===t){if(e===this._head)return;const t=e.next,n=e.previous;e===this._tail?(n.next=void 0,this._tail=n):(t.previous=n,n.next=t),e.previous=void 0,e.next=this._head,this._head.previous=e,this._head=e,this._state++}else if(2===t){if(e===this._tail)return;const t=e.next,n=e.previous;e===this._head?(t.previous=void 0,this._head=t):(t.previous=n,n.next=t),e.next=void 0,e.previous=this._tail,this._tail.next=e,this._tail=e,this._state++}}toJSON(){const e=[];return this.forEach(((t,n)=>{e.push([n,t])})),e}fromJSON(e){this.clear();for(const[t,n]of e)this.set(t,n)}}class Ii{constructor(){this.map=new Map}add(e,t){let n=this.map.get(e);n||(n=new Set,this.map.set(e,n)),n.add(t)}delete(e,t){const n=this.map.get(e);n&&(n.delete(t),0===n.size&&this.map.delete(e))}forEach(e,t){const n=this.map.get(e);n&&n.forEach(t)}get(e){const t=this.map.get(e);return t||new Set}}new class extends zi{constructor(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:1;super(),this._limit=e,this._ratio=Math.min(Math.max(0,t),1)}get limit(){return this._limit}set limit(e){this._limit=e,this.checkTrim()}get(e){let t=arguments.length>1&&void 0!==arguments[1]?arguments[1]:2;return super.get(e,t)}peek(e){return super.get(e,0)}set(e,t){return super.set(e,t,2),this.checkTrim(),this}checkTrim(){this.size>this._limit&&this.trimOld(Math.round(this._limit*this._ratio))}}(10);var Li,Pi,Oi;!function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=4]="Right",e[e.Full=7]="Full"}(Li||(Li={})),function(e){e[e.Left=1]="Left",e[e.Center=2]="Center",e[e.Right=3]="Right"}(Pi||(Pi={})),function(e){e[e.Both=0]="Both",e[e.Right=1]="Right",e[e.Left=2]="Left",e[e.None=3]="None"}(Oi||(Oi={}));function Wi(e,t,n,i,r){return function(e,t,n,i,r){if(0===i)return!0;const o=t.charCodeAt(i-1);if(0!==e.get(o))return!0;if(13===o||10===o)return!0;if(r>0){const n=t.charCodeAt(i);if(0!==e.get(n))return!0}return!1}(e,t,0,i,r)&&function(e,t,n,i,r){if(i+r===n)return!0;const o=t.charCodeAt(i+r);if(0!==e.get(o))return!0;if(13===o||10===o)return!0;if(r>0){const n=t.charCodeAt(i+r-1);if(0!==e.get(n))return!0}return!1}(e,t,n,i,r)}class Vi{constructor(e,t){this._wordSeparators=e,this._searchRegex=t,this._prevMatchStartIndex=-1,this._prevMatchLength=0}reset(e){this._searchRegex.lastIndex=e,this._prevMatchStartIndex=-1,this._prevMatchLength=0}next(e){const t=e.length;let n;do{if(this._prevMatchStartIndex+this._prevMatchLength===t)return null;if(n=this._searchRegex.exec(e),!n)return null;const i=n.index,r=n[0].length;if(i===this._prevMatchStartIndex&&r===this._prevMatchLength){if(0===r){ye(e,t,this._searchRegex.lastIndex)>65535?this._searchRegex.lastIndex+=2:this._searchRegex.lastIndex+=1;continue}return null}if(this._prevMatchStartIndex=i,this._prevMatchLength=r,!this._wordSeparators||Wi(this._wordSeparators,e,t,i,r))return n}while(n);return null}}function Ui(e){throw new Error(arguments.length>1&&void 0!==arguments[1]?arguments[1]:"Unreachable")}function Ki(e){e()||(e(),t(new l("Assertion Failed")))}function qi(e,t){let n=0;for(;nString.fromCodePoint(e))).join(""))}]`,"g");const l=new Vi(null,a),c=[];let h,d=!1,p=0,u=0,m=0;e:for(let f=i,g=r;f<=g;f++){const t=e.getLineContent(f),n=t.length;l.reset(0);do{if(h=l.next(t),h){let e=h.index,i=h.index+h[0].length;if(e>0){ge(t.charCodeAt(e-1))&&e--}if(i+1=t){d=!0;break e}c.push(new It(f,e+1,f,i+1))}}}while(h)}return{ranges:c,hasMore:d,ambiguousCharacterCount:p,invisibleCharacterCount:u,nonBasicAsciiCharacterCount:m}}static computeUnicodeHighlightReason(e,t){const n=new ji(t);switch(n.shouldHighlightNonBasicASCII(e,null)){case 0:return null;case 2:return{kind:1};case 3:{const i=e.codePointAt(0),r=n.ambiguousCharacters.getPrimaryConfusable(i),o=Se.getLocales().filter((e=>!Se.getInstance(new Set([...t.allowedLocales,e])).isAmbiguous(i)));return{kind:0,confusableWith:String.fromCodePoint(r),notAmbiguousInLocales:o}}case 1:return{kind:2}}}}class ji{constructor(e){this.options=e,this.allowedCodePoints=new Set(e.allowedCodePoints),this.ambiguousCharacters=Se.getInstance(new Set(e.allowedLocales))}getCandidateCodePoints(){if(this.options.nonBasicASCII)return"allNonBasicAscii";const e=new Set;if(this.options.invisibleCharacters)for(const t of Ce.codePoints)$i(String.fromCodePoint(t))||e.add(t);if(this.options.ambiguousCharacters)for(const t of this.ambiguousCharacters.getConfusableCodePoints())e.add(t);for(const t of this.allowedCodePoints)e.delete(t);return e}shouldHighlightNonBasicASCII(e,t){const n=e.codePointAt(0);if(this.allowedCodePoints.has(n))return 0;if(this.options.nonBasicASCII)return 1;let i=!1,r=!1;if(t)for(const s of t){const e=s.codePointAt(0),t=(o=s,we.test(o));i=i||t,t||this.ambiguousCharacters.isAmbiguous(e)||Ce.isInvisibleCharacter(e)||(r=!0)}var o;return!i&&r?0:this.options.invisibleCharacters&&!$i(e)&&Ce.isInvisibleCharacter(n)?2:this.options.ambiguousCharacters&&this.ambiguousCharacters.isAmbiguous(n)?3:0}}function $i(e){return" "===e||"\n"===e||"\t"===e}class Hi{constructor(e,t,n){this.changes=e,this.moves=t,this.hitTimeout=n}}class Gi{constructor(e,t){this.lineRangeMapping=e,this.changes=t}}class Ji{static addRange(e,t){let n=0;for(;nt))return new Ji(e,t)}static ofLength(e){return new Ji(0,e)}static ofStartAndLength(e,t){return new Ji(e,e+t)}constructor(e,t){if(this.start=e,this.endExclusive=t,e>t)throw new l(`Invalid range: ${this.toString()}`)}get isEmpty(){return this.start===this.endExclusive}delta(e){return new Ji(this.start+e,this.endExclusive+e)}deltaStart(e){return new Ji(this.start+e,this.endExclusive)}deltaEnd(e){return new Ji(this.start,this.endExclusive+e)}get length(){return this.endExclusive-this.start}toString(){return`[${this.start}, ${this.endExclusive})`}contains(e){return this.start<=e&&e=e.endExclusive}slice(e){return e.slice(this.start,this.endExclusive)}substring(e){return e.substring(this.start,this.endExclusive)}clip(e){if(this.isEmpty)throw new l(`Invalid clipping range: ${this.toString()}`);return Math.max(this.start,Math.min(this.endExclusive-1,e))}clipCyclic(e){if(this.isEmpty)throw new l(`Invalid clipping range: ${this.toString()}`);return e=this.endExclusive?this.start+(e-this.start)%this.length:e}forEach(e){for(let t=this.start;t2&&void 0!==arguments[2]?arguments[2]:0,i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:e.length;for(;n2&&void 0!==arguments[2]?arguments[2]:0,i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:e.length;for(;nt)throw new l(`startLineNumber ${e} cannot be after endLineNumberExclusive ${t}`);this.startLineNumber=e,this.endLineNumberExclusive=t}contains(e){return this.startLineNumber<=e&&e0&&void 0!==arguments[0]?arguments[0]:[];this._normalizedRanges=e}get ranges(){return this._normalizedRanges}addRange(e){if(0===e.length)return;const t=Qi(this._normalizedRanges,(t=>t.endLineNumberExclusive>=e.startLineNumber)),n=Yi(this._normalizedRanges,(t=>t.startLineNumber<=e.endLineNumberExclusive))+1;if(t===n)this._normalizedRanges.splice(t,0,e);else if(t===n-1){const n=this._normalizedRanges[t];this._normalizedRanges[t]=n.join(e)}else{const i=this._normalizedRanges[t].join(this._normalizedRanges[n-1]).join(e);this._normalizedRanges.splice(t,n-t,i)}}contains(e){const t=Xi(this._normalizedRanges,(t=>t.startLineNumber<=e));return!!t&&t.endLineNumberExclusive>e}intersects(e){const t=Xi(this._normalizedRanges,(t=>t.startLineNumbere.startLineNumber}getUnion(e){if(0===this._normalizedRanges.length)return e;if(0===e._normalizedRanges.length)return this;const t=[];let n=0,i=0,r=null;for(;n=o.startLineNumber?r=new er(r.startLineNumber,Math.max(r.endLineNumberExclusive,o.endLineNumberExclusive)):(t.push(r),r=o)}return null!==r&&t.push(r),new tr(t)}subtractFrom(e){const t=Qi(this._normalizedRanges,(t=>t.endLineNumberExclusive>=e.startLineNumber)),n=Yi(this._normalizedRanges,(t=>t.startLineNumber<=e.endLineNumberExclusive))+1;if(t===n)return new tr([e]);const i=[];let r=e.startLineNumber;for(let o=t;or&&i.push(new er(r,e.startLineNumber)),r=e.endLineNumberExclusive}return re.toString())).join(", ")}getIntersection(e){const t=[];let n=0,i=0;for(;nt.delta(e))))}}class nr{static inverse(e,t,n){const i=[];let r=1,o=1;for(const a of e){const e=new nr(new er(r,a.original.startLineNumber),new er(o,a.modified.startLineNumber));e.modified.isEmpty||i.push(e),r=a.original.endLineNumberExclusive,o=a.modified.endLineNumberExclusive}const s=new nr(new er(r,t+1),new er(o,n+1));return s.modified.isEmpty||i.push(s),i}static clip(e,t,n){const i=[];for(const r of e){const e=r.original.intersect(t),o=r.modified.intersect(n);e&&!e.isEmpty&&o&&!o.isEmpty&&i.push(new nr(e,o))}return i}constructor(e,t){this.original=e,this.modified=t}toString(){return`{${this.original.toString()}->${this.modified.toString()}}`}flip(){return new nr(this.modified,this.original)}join(e){return new nr(this.original.join(e.original),this.modified.join(e.modified))}}class ir extends nr{static fromRangeMappings(e){const t=er.join(e.map((e=>er.fromRangeInclusive(e.originalRange)))),n=er.join(e.map((e=>er.fromRangeInclusive(e.modifiedRange))));return new ir(t,n,e)}constructor(e,t,n){super(e,t),this.innerChanges=n}flip(){var e;return new ir(this.modified,this.original,null===(e=this.innerChanges)||void 0===e?void 0:e.map((e=>e.flip())))}withInnerChangesFromLineRanges(){return new ir(this.original,this.modified,[new rr(this.original.toExclusiveRange(),this.modified.toExclusiveRange())])}}class rr{constructor(e,t){this.originalRange=e,this.modifiedRange=t}toString(){return`{${this.originalRange.toString()}->${this.modifiedRange.toString()}}`}flip(){return new rr(this.modifiedRange,this.originalRange)}}const or=3;class sr{computeDiff(e,t,n){var i;const r=new pr(e,t,{maxComputationTime:n.maxComputationTimeMs,shouldIgnoreTrimWhitespace:n.ignoreTrimWhitespace,shouldComputeCharChanges:!0,shouldMakePrettyDiff:!0,shouldPostProcessCharChanges:!0}).computeDiff(),o=[];let s=null;for(const a of r.changes){let e,t;e=0===a.originalEndLineNumber?new er(a.originalStartLineNumber+1,a.originalStartLineNumber+1):new er(a.originalStartLineNumber,a.originalEndLineNumber+1),t=0===a.modifiedEndLineNumber?new er(a.modifiedStartLineNumber+1,a.modifiedStartLineNumber+1):new er(a.modifiedStartLineNumber,a.modifiedEndLineNumber+1);let n=new ir(e,t,null===(i=a.charChanges)||void 0===i?void 0:i.map((e=>new rr(new It(e.originalStartLineNumber,e.originalStartColumn,e.originalEndLineNumber,e.originalEndColumn),new It(e.modifiedStartLineNumber,e.modifiedStartColumn,e.modifiedEndLineNumber,e.modifiedEndColumn)))));s&&(s.modified.endLineNumberExclusive!==n.modified.startLineNumber&&s.original.endLineNumberExclusive!==n.original.startLineNumber||(n=new ir(s.original.join(n.original),s.modified.join(n.modified),s.innerChanges&&n.innerChanges?s.innerChanges.concat(n.innerChanges):void 0),o.pop())),o.push(n),s=n}return Ki((()=>qi(o,((e,t)=>t.original.startLineNumber-e.original.endLineNumberExclusive===t.modified.startLineNumber-e.modified.endLineNumberExclusive&&e.original.endLineNumberExclusive(10===e?"\\n":String.fromCharCode(e))+`-(${this._lineNumbers[t]},${this._columns[t]})`)).join(", ")+"]"}_assertIndex(e,t){if(e<0||e>=t.length)throw new Error("Illegal index")}getElements(){return this._charCodes}getStartLineNumber(e){return e>0&&e===this._lineNumbers.length?this.getEndLineNumber(e-1):(this._assertIndex(e,this._lineNumbers),this._lineNumbers[e])}getEndLineNumber(e){return-1===e?this.getStartLineNumber(e+1):(this._assertIndex(e,this._lineNumbers),10===this._charCodes[e]?this._lineNumbers[e]+1:this._lineNumbers[e])}getStartColumn(e){return e>0&&e===this._columns.length?this.getEndColumn(e-1):(this._assertIndex(e,this._columns),this._columns[e])}getEndColumn(e){return-1===e?this.getStartColumn(e+1):(this._assertIndex(e,this._columns),10===this._charCodes[e]?1:this._columns[e]+1)}}class hr{constructor(e,t,n,i,r,o,s,a){this.originalStartLineNumber=e,this.originalStartColumn=t,this.originalEndLineNumber=n,this.originalEndColumn=i,this.modifiedStartLineNumber=r,this.modifiedStartColumn=o,this.modifiedEndLineNumber=s,this.modifiedEndColumn=a}static createFromDiffChange(e,t,n){const i=t.getStartLineNumber(e.originalStart),r=t.getStartColumn(e.originalStart),o=t.getEndLineNumber(e.originalStart+e.originalLength-1),s=t.getEndColumn(e.originalStart+e.originalLength-1),a=n.getStartLineNumber(e.modifiedStart),l=n.getStartColumn(e.modifiedStart),c=n.getEndLineNumber(e.modifiedStart+e.modifiedLength-1),h=n.getEndColumn(e.modifiedStart+e.modifiedLength-1);return new hr(i,r,o,s,a,l,c,h)}}class dr{constructor(e,t,n,i,r){this.originalStartLineNumber=e,this.originalEndLineNumber=t,this.modifiedStartLineNumber=n,this.modifiedEndLineNumber=i,this.charChanges=r}static createFromDiffResult(e,t,n,i,r,o,s){let a,l,c,h,d;if(0===t.originalLength?(a=n.getStartLineNumber(t.originalStart)-1,l=0):(a=n.getStartLineNumber(t.originalStart),l=n.getEndLineNumber(t.originalStart+t.originalLength-1)),0===t.modifiedLength?(c=i.getStartLineNumber(t.modifiedStart)-1,h=0):(c=i.getStartLineNumber(t.modifiedStart),h=i.getEndLineNumber(t.modifiedStart+t.modifiedLength-1)),o&&t.originalLength>0&&t.originalLength<20&&t.modifiedLength>0&&t.modifiedLength<20&&r()){const o=n.createCharSequence(e,t.originalStart,t.originalStart+t.originalLength-1),a=i.createCharSequence(e,t.modifiedStart,t.modifiedStart+t.modifiedLength-1);if(o.getElements().length>0&&a.getElements().length>0){let e=ar(o,a,r,!0).changes;s&&(e=function(e){if(e.length<=1)return e;const t=[e[0]];let n=t[0];for(let i=1,r=e.length;i1&&s>1;){if(e.charCodeAt(n-2)!==t.charCodeAt(s-2))break;n--,s--}(n>1||s>1)&&this._pushTrimWhitespaceCharChange(i,r+1,1,n,o+1,1,s)}{let n=mr(e,1),s=mr(t,1);const a=e.length+1,l=t.length+1;for(;n1&&void 0!==arguments[1]?arguments[1]:e.length-1;t>=0;t--){const n=e.charCodeAt(t);if(32!==n&&9!==n)return t}return-1}(e);return-1===n?t:n+2}function fr(e){if(0===e)return()=>!0;const t=Date.now();return()=>Date.now()-t{n.push(br.fromOffsetPairs(e?e.getEndExclusives():vr.zero,i?i.getStarts():new vr(t,(e?e.seq2Range.endExclusive-e.seq1Range.endExclusive:0)+t)))})),n}static fromOffsetPairs(e,t){return new br(new Ji(e.offset1,t.offset1),new Ji(e.offset2,t.offset2))}constructor(e,t){this.seq1Range=e,this.seq2Range=t}swap(){return new br(this.seq2Range,this.seq1Range)}toString(){return`${this.seq1Range} <-> ${this.seq2Range}`}join(e){return new br(this.seq1Range.join(e.seq1Range),this.seq2Range.join(e.seq2Range))}delta(e){return 0===e?this:new br(this.seq1Range.delta(e),this.seq2Range.delta(e))}deltaStart(e){return 0===e?this:new br(this.seq1Range.deltaStart(e),this.seq2Range.deltaStart(e))}deltaEnd(e){return 0===e?this:new br(this.seq1Range.deltaEnd(e),this.seq2Range.deltaEnd(e))}intersect(e){const t=this.seq1Range.intersect(e.seq1Range),n=this.seq2Range.intersect(e.seq2Range);if(t&&n)return new br(t,n)}getStarts(){return new vr(this.seq1Range.start,this.seq2Range.start)}getEndExclusives(){return new vr(this.seq1Range.endExclusive,this.seq2Range.endExclusive)}}class vr{constructor(e,t){this.offset1=e,this.offset2=t}toString(){return`${this.offset1} <-> ${this.offset2}`}delta(e){return 0===e?this:new vr(this.offset1+e,this.offset2+e)}equals(e){return this.offset1===e.offset1&&this.offset2===e.offset2}}vr.zero=new vr(0,0),vr.max=new vr(Number.MAX_SAFE_INTEGER,Number.MAX_SAFE_INTEGER);class yr{isValid(){return!0}}yr.instance=new yr;class wr{constructor(e){if(this.timeout=e,this.startTime=Date.now(),this.valid=!0,e<=0)throw new l("timeout must be positive")}isValid(){return!(Date.now()-this.startTime2&&void 0!==arguments[2]?arguments[2]:yr.instance,i=arguments.length>3?arguments[3]:void 0;if(0===e.length||0===t.length)return gr.trivial(e,t);const r=new xr(e.length,t.length),o=new xr(e.length,t.length),s=new xr(e.length,t.length);for(let u=0;u0&&a>0&&3===o.get(u-1,a-1)&&(h+=s.get(u-1,a-1)),h+=i?i(u,a):1):h=-1;const d=Math.max(l,c,h);if(d===h){const e=u>0&&a>0?s.get(u-1,a-1):0;s.set(u,a,e+1),o.set(u,a,3)}else d===l?(s.set(u,a,0),o.set(u,a,1)):d===c&&(s.set(u,a,0),o.set(u,a,2));r.set(u,a,d)}const a=[];let l=e.length,c=t.length;function h(e,t){e+1===l&&t+1===c||a.push(new br(new Ji(e+1,l),new Ji(t+1,c))),l=e,c=t}let d=e.length-1,p=t.length-1;for(;d>=0&&p>=0;)3===o.get(d,p)?(h(d,p),d--,p--):1===o.get(d,p)?d--:p--;return h(-1,-1),a.reverse(),new gr(a,!1)}}class kr{compute(e,t){let n=arguments.length>2&&void 0!==arguments[2]?arguments[2]:yr.instance;if(0===e.length||0===t.length)return gr.trivial(e,t);const i=e,r=t;function o(e,t){for(;ei.length||p>r.length)continue;const u=o(d,p);a.set(c,u);const m=d===s?l.get(c+1):l.get(c-1);if(l.set(c,u!==d?new Er(m,d,p,u-d):m),a.get(c)===i.length&&a.get(c)-c===r.length)break e}}let h=l.get(c);const d=[];let p=i.length,u=r.length;for(;;){const e=h?h.x+h.length:0,t=h?h.y+h.length:0;if(e===p&&t===u||d.push(new br(new Ji(e,p),new Ji(t,u))),!h)break;p=h.x,u=h.y,h=h.prev}return d.reverse(),new gr(d,!1)}}class Er{constructor(e,t,n,i){this.prev=e,this.x=t,this.y=n,this.length=i}}class Rr{constructor(){this.positiveArr=new Int32Array(10),this.negativeArr=new Int32Array(10)}get(e){return e<0?(e=-e-1,this.negativeArr[e]):this.positiveArr[e]}set(e,t){if(e<0){if((e=-e-1)>=this.negativeArr.length){const e=this.negativeArr;this.negativeArr=new Int32Array(2*e.length),this.negativeArr.set(e)}this.negativeArr[e]=t}else{if(e>=this.positiveArr.length){const e=this.positiveArr;this.positiveArr=new Int32Array(2*e.length),this.positiveArr.set(e)}this.positiveArr[e]=t}}}class Nr{constructor(){this.positiveArr=[],this.negativeArr=[]}get(e){return e<0?(e=-e-1,this.negativeArr[e]):this.positiveArr[e]}set(e,t){e<0?(e=-e-1,this.negativeArr[e]=t):this.positiveArr[e]=t}}class Fr{constructor(e,t,n){this.lines=e,this.considerWhitespaceChanges=n,this.elements=[],this.firstCharOffsetByLine=[],this.additionalOffsetByLine=[];let i=!1;t.start>0&&t.endExclusive>=e.length&&(t=new Ji(t.start-1,t.endExclusive),i=!0),this.lineRange=t,this.firstCharOffsetByLine[0]=0;for(let r=this.lineRange.start;rString.fromCharCode(e))).join("")}getElement(e){return this.elements[e]}get length(){return this.elements.length}getBoundaryScore(e){const t=Mr(e>0?this.elements[e-1]:-1),n=Mr(et<=e));return new zt(this.lineRange.start+t+1,e-this.firstCharOffsetByLine[t]+this.additionalOffsetByLine[t]+1)}translateRange(e){return It.fromPositions(this.translateOffset(e.start),this.translateOffset(e.endExclusive))}findWordContaining(e){if(e<0||e>=this.elements.length)return;if(!Dr(this.elements[e]))return;let t=e;for(;t>0&&Dr(this.elements[t-1]);)t--;let n=e;for(;nt<=e.start)))&&void 0!==t?t:0,r=null!==(n=function(e,t){const n=Qi(e,t);return n===e.length?void 0:e[n]}(this.firstCharOffsetByLine,(t=>e.endExclusive<=t)))&&void 0!==n?n:this.elements.length;return new Ji(i,r)}}function Dr(e){return e>=97&&e<=122||e>=65&&e<=90||e>=48&&e<=57}const Tr={0:0,1:0,2:0,3:10,4:2,5:30,6:3,7:10,8:10};function Ar(e){return Tr[e]}function Mr(e){return 10===e?8:13===e?7:Sr(e)?6:e>=97&&e<=122?0:e>=65&&e<=90?1:e>=48&&e<=57?2:-1===e?3:44===e||59===e?5:4}function zr(e,t,n,i,r,o){let{moves:s,excludedChanges:a}=function(e,t,n,i){const r=[],o=e.filter((e=>e.modified.isEmpty&&e.original.length>=3)).map((e=>new Cr(e.original,t,e))),s=new Set(e.filter((e=>e.original.isEmpty&&e.modified.length>=3)).map((e=>new Cr(e.modified,n,e)))),a=new Set;for(const l of o){let e,t=-1;for(const n of s){const i=l.computeSimilarity(n);i>t&&(t=i,e=n)}if(t>.9&&e&&(s.delete(e),r.push(new nr(l.range,e.range)),a.add(l.source),a.add(e.source)),!i.isValid())return{moves:r,excludedChanges:a}}return{moves:r,excludedChanges:a}}(e,t,n,o);if(!o.isValid())return[];const l=function(e,t,n,i,r,o){const s=[],a=new Ii;for(const u of e)for(let e=u.original.startLineNumber;ee.modified.startLineNumber),Ot));for(const u of e){let e=[];for(let t=u.modified.startLineNumber;t{let{range:n}=t;for(const s of e)if(s.originalLineRange.endLineNumberExclusive+1===n.endLineNumberExclusive&&s.modifiedLineRange.endLineNumberExclusive+1===r.endLineNumberExclusive)return s.originalLineRange=new er(s.originalLineRange.startLineNumber,n.endLineNumberExclusive),s.modifiedLineRange=new er(s.modifiedLineRange.startLineNumber,r.endLineNumberExclusive),void o.push(s);const i={modifiedLineRange:r,originalLineRange:n};l.push(i),o.push(i)})),e=o}if(!o.isValid())return[]}l.sort((c=Pt((e=>e.modifiedLineRange.length),Ot),(e,t)=>-c(e,t)));var c;const h=new tr,d=new tr;for(const u of l){const e=u.modifiedLineRange.startLineNumber-u.originalLineRange.startLineNumber,t=h.subtractFrom(u.modifiedLineRange),n=d.subtractFrom(u.originalLineRange).getWithDelta(e),i=t.getIntersection(n);for(const r of i.ranges){if(r.length<3)continue;const t=r,n=r.delta(-e);s.push(new nr(n,t)),h.addRange(t),d.addRange(n)}}s.sort(Pt((e=>e.original.startLineNumber),Ot));const p=new Zi(e);for(let u=0;ue.original.startLineNumber<=t.original.startLineNumber)),a=Xi(e,(e=>e.modified.startLineNumber<=t.modified.startLineNumber)),l=Math.max(t.original.startLineNumber-n.original.startLineNumber,t.modified.startLineNumber-a.modified.startLineNumber),c=p.findLastMonotonous((e=>e.original.startLineNumbere.modified.startLineNumberi.length||n>r.length)break;if(h.contains(n)||d.contains(e))break;if(!Ir(i[e-1],r[n-1],o))break}for(g>0&&(d.addRange(new er(t.original.startLineNumber-g,t.original.startLineNumber)),h.addRange(new er(t.modified.startLineNumber-g,t.modified.startLineNumber))),b=0;bi.length||n>r.length)break;if(h.contains(n)||d.contains(e))break;if(!Ir(i[e-1],r[n-1],o))break}b>0&&(d.addRange(new er(t.original.endLineNumberExclusive,t.original.endLineNumberExclusive+b)),h.addRange(new er(t.modified.endLineNumberExclusive,t.modified.endLineNumberExclusive+b))),(g>0||b>0)&&(s[u]=new nr(new er(t.original.startLineNumber-g,t.original.endLineNumberExclusive+b),new er(t.modified.startLineNumber-g,t.modified.endLineNumberExclusive+b)))}return s}(e.filter((e=>!a.has(e))),i,r,t,n,o);return function(e,t){for(const n of t)e.push(n)}(s,l),s=function(e){if(0===e.length)return e;e.sort(Pt((e=>e.original.startLineNumber),Ot));const t=[e[0]];for(let n=1;n=0&&s>=0&&o+s<=2?t[t.length-1]=i.join(r):t.push(r)}return t}(s),s=s.filter((e=>{const n=e.original.toOffsetRange().slice(t).map((e=>e.trim()));return n.join("\n").length>=15&&function(e,t){let n=0;for(const i of e)t(i)&&n++;return n}(n,(e=>e.length>=2))>=2})),s=function(e,t){const n=new Zi(e);return t=t.filter((t=>(n.findLastMonotonous((e=>e.original.startLineNumbere.modified.startLineNumber300&&t.length>300)return!1;const i=(new kr).compute(new Fr([e],new Ji(0,1),!1),new Fr([t],new Ji(0,1),!1),n);let r=0;const o=br.invert(i.diffs,e.length);for(const a of o)a.seq1Range.forEach((t=>{Sr(e.charCodeAt(t))||r++}));const s=function(t){let n=0;for(let i=0;it.length?e:t);return r/s>.6&&s>10}function Lr(e,t,n){let i=n;return i=Pr(e,t,i),i=Pr(e,t,i),i=function(e,t,n){if(!e.getBoundaryScore||!t.getBoundaryScore)return n;for(let i=0;i0?n[i-1]:void 0,o=n[i],s=i+10&&(s=s.delta(a))}r.push(s)}return i.length>0&&r.push(i[i.length-1]),r}function Or(e,t,n,i,r){let o=1;for(;e.seq1Range.start-o>=i.start&&e.seq2Range.start-o>=r.start&&n.isStronglyEqual(e.seq2Range.start-o,e.seq2Range.endExclusive-o)&&o<100;)o++;o--;let s=0;for(;e.seq1Range.start+sl&&(l=s,a=c)}return e.delta(a)}class Wr{constructor(e,t){this.trimmedHash=e,this.lines=t}getElement(e){return this.trimmedHash[e]}get length(){return this.trimmedHash.length}getBoundaryScore(e){return 1e3-((0===e?0:Vr(this.lines[e-1]))+(e===this.lines.length?0:Vr(this.lines[e])))}getText(e){return this.lines.slice(e.start,e.endExclusive).join("\n")}isStronglyEqual(e,t){return this.lines[e]===this.lines[t]}}function Vr(e){let t=0;for(;t2&&void 0!==arguments[2]?arguments[2]:(e,t)=>e===t;if(e===t)return!0;if(!e||!t)return!1;if(e.length!==t.length)return!1;for(let i=0,r=e.length;ie===t)))return new Hi([],[],!1);if(1===e.length&&0===e[0].length||1===t.length&&0===t[0].length)return new Hi([new ir(new er(1,e.length+1),new er(1,t.length+1),[new rr(new It(1,1,e.length,e[0].length+1),new It(1,1,t.length,t[0].length+1))])],[],!1);const i=0===n.maxComputationTimeMs?yr.instance:new wr(n.maxComputationTimeMs),r=!n.ignoreTrimWhitespace,o=new Map;function s(e){let t=o.get(e);return void 0===t&&(t=o.size,o.set(e,t)),t}const a=e.map((e=>s(e.trim()))),l=t.map((e=>s(e.trim()))),c=new Wr(a,e),h=new Wr(l,t),d=(()=>c.length+h.length<1700?this.dynamicProgrammingDiffing.compute(c,h,i,((n,i)=>e[n]===t[i]?0===t[i].length?.1:1+Math.log(1+t[i].length):.99)):this.myersDiffingAlgorithm.compute(c,h))();let p=d.diffs,u=d.hitTimeout;p=Lr(c,h,p),p=function(e,t,n){let i=n;if(0===i.length)return i;let r,o=0;do{r=!1;const s=[i[0]];for(let a=1;a5||n.seq1Range.length+n.seq2Range.length>5)}h(c,l)?(r=!0,s[s.length-1]=s[s.length-1].join(l)):s.push(l)}i=s}while(o++<10&&r);return i}(c,0,p);const m=[],f=n=>{if(r)for(let o=0;ow.seq1Range.start-g===w.seq2Range.start-b));f(w.seq1Range.start-g),g=w.seq1Range.endExclusive,b=w.seq2Range.endExclusive;const n=this.refineDiff(e,t,w,i,r);n.hitTimeout&&(u=!0);for(const e of n.mappings)m.push(e)}f(e.length-g);const v=Kr(m,e,t);let y=[];return n.computeMoves&&(y=this.computeMoves(v,e,t,a,l,i,r)),Ki((()=>{function n(e,t){if(e.lineNumber<1||e.lineNumber>t.length)return!1;const n=t[e.lineNumber-1];return!(e.column<1||e.column>n.length+1)}function i(e,t){return!(e.startLineNumber<1||e.startLineNumber>t.length+1)&&!(e.endLineNumberExclusive<1||e.endLineNumberExclusive>t.length+1)}for(const r of v){if(!r.innerChanges)return!1;for(const i of r.innerChanges){if(!(n(i.modifiedRange.getStartPosition(),t)&&n(i.modifiedRange.getEndPosition(),t)&&n(i.originalRange.getStartPosition(),e)&&n(i.originalRange.getEndPosition(),e)))return!1}if(!i(r.modified,t)||!i(r.original,e))return!1}return!0})),new Hi(v,y,u)}computeMoves(e,t,n,i,r,o,s){return zr(e,t,n,i,r,o).map((e=>{const i=Kr(this.refineDiff(t,n,new br(e.original.toOffsetRange(),e.modified.toOffsetRange()),o,s).mappings,t,n,!0);return new Gi(e,i)}))}refineDiff(e,t,n,i,r){const o=new Fr(e,n.seq1Range,r),s=new Fr(t,n.seq2Range,r),a=o.length+s.length<500?this.dynamicProgrammingDiffing.compute(o,s,i):this.myersDiffingAlgorithm.compute(o,s,i);let l=a.diffs;l=Lr(o,s,l),l=function(e,t,n){const i=br.invert(n,e.length),r=[];let o=new vr(0,0);function s(n,s){if(n.offset10;){const n=i[0];if(!n.seq1Range.intersects(c.seq1Range)&&!n.seq2Range.intersects(c.seq2Range))break;const r=e.findWordContaining(n.seq1Range.start),o=t.findWordContaining(n.seq2Range.start),s=new br(r,o),a=s.intersect(n);if(d+=a.seq1Range.length,p+=a.seq2Range.length,c=c.join(s),!(c.seq1Range.endExclusive>=n.seq1Range.endExclusive))break;i.shift()}d+p<2*(c.seq1Range.length+c.seq2Range.length)/3&&r.push(c),o=c.getEndExclusives()}for(;i.length>0;){const e=i.shift();e.seq1Range.isEmpty||(s(e.getStarts(),e),s(e.getEndExclusives().delta(-1),e))}return function(e,t){const n=[];for(;e.length>0||t.length>0;){const i=e[0],r=t[0];let o;o=i&&(!r||i.seq1Range.start0&&n[n.length-1].seq1Range.endExclusive>=o.seq1Range.start?n[n.length-1]=n[n.length-1].join(o):n.push(o)}return n}(n,r)}(o,s,l),l=function(e,t,n){const i=[];for(const r of n){const e=i[i.length-1];e&&(r.seq1Range.start-e.seq1Range.endExclusive<=2||r.seq2Range.start-e.seq2Range.endExclusive<=2)?i[i.length-1]=new br(e.seq1Range.join(r.seq1Range),e.seq2Range.join(r.seq2Range)):i.push(r)}return i}(0,0,l),l=function(e,t,n){let i=n;if(0===i.length)return i;let r,o=0;do{r=!1;const a=[i[0]];for(let l=1;l5||r.length>500)return!1;const o=e.getText(r).trim();if(o.length>20||o.split(/\r\n|\r|\n/).length>1)return!1;const s=e.countLinesIn(n.seq1Range),a=n.seq1Range.length,l=t.countLinesIn(n.seq2Range),d=n.seq2Range.length,p=e.countLinesIn(i.seq1Range),u=i.seq1Range.length,m=t.countLinesIn(i.seq2Range),f=i.seq2Range.length,g=130;function b(e){return Math.min(e,g)}return Math.pow(Math.pow(b(40*s+a),1.5)+Math.pow(b(40*l+d),1.5),1.5)+Math.pow(Math.pow(b(40*p+u),1.5)+Math.pow(b(40*m+f),1.5),1.5)>(g**1.5)**1.5*1.3}d(h,c)?(r=!0,a[a.length-1]=a[a.length-1].join(c)):a.push(c)}i=a}while(o++<10&&r);const s=[];return function(e,t){for(let n=0;n{let r=n;function o(e){return e.length>0&&e.trim().length<=3&&n.seq1Range.length+n.seq2Range.length>100}const a=e.extendToFullLines(n.seq1Range),l=e.getText(new Ji(a.start,n.seq1Range.start));o(l)&&(r=r.deltaStart(-l.length));const c=e.getText(new Ji(n.seq1Range.endExclusive,a.endExclusive));o(c)&&(r=r.deltaEnd(c.length));const h=br.fromOffsetPairs(t?t.getEndExclusives():vr.zero,i?i.getStarts():vr.max),d=r.intersect(h);s.length>0&&d.getStarts().equals(s[s.length-1].getEndExclusives())?s[s.length-1]=s[s.length-1].join(d):s.push(d)})),s}(o,s,l);return{mappings:l.map((e=>new rr(o.translateRange(e.seq1Range),s.translateRange(e.seq2Range)))),hitTimeout:a.hitTimeout}}}function Kr(e,t,n){let i=arguments.length>3&&void 0!==arguments[3]&&arguments[3];const r=[];for(const o of function*(e,t){let n,i;for(const r of e)void 0!==i&&t(i,r)?n.push(r):(n&&(yield n),n=[r]),i=r;n&&(yield n)}(e.map((e=>function(e,t,n){let i=0,r=0;1===e.modifiedRange.endColumn&&1===e.originalRange.endColumn&&e.originalRange.startLineNumber+i<=e.originalRange.endLineNumber&&e.modifiedRange.startLineNumber+i<=e.modifiedRange.endLineNumber&&(r=-1);e.modifiedRange.startColumn-1>=n[e.modifiedRange.startLineNumber-1].length&&e.originalRange.startColumn-1>=t[e.originalRange.startLineNumber-1].length&&e.originalRange.startLineNumber<=e.originalRange.endLineNumber+r&&e.modifiedRange.startLineNumber<=e.modifiedRange.endLineNumber+r&&(i=1);const o=new er(e.originalRange.startLineNumber+i,e.originalRange.endLineNumber+1+r),s=new er(e.modifiedRange.startLineNumber+i,e.modifiedRange.endLineNumber+1+r);return new ir(o,s,[e])}(e,t,n))),((e,t)=>e.original.overlapOrTouch(t.original)||e.modified.overlapOrTouch(t.modified)))){const e=o[0],t=o[o.length-1];r.push(new ir(e.original.join(t.original),e.modified.join(t.modified),o.map((e=>e.innerChanges[0]))))}return Ki((()=>{if(!i&&r.length>0){if(r[0].modified.startLineNumber!==r[0].original.startLineNumber)return!1;if(n.length-r[r.length-1].modified.endLineNumberExclusive!==t.length-r[r.length-1].original.endLineNumberExclusive)return!1}return qi(r,((e,t)=>t.original.startLineNumber-e.original.endLineNumberExclusive===t.modified.startLineNumber-e.modified.endLineNumberExclusive&&e.original.endLineNumberExclusivenew sr,Br=()=>new Ur;function jr(e,t){const n=Math.pow(10,t);return Math.round(e*n)/n}class $r{constructor(e,t,n){let i=arguments.length>3&&void 0!==arguments[3]?arguments[3]:1;this._rgbaBrand=void 0,this.r=0|Math.min(255,Math.max(0,e)),this.g=0|Math.min(255,Math.max(0,t)),this.b=0|Math.min(255,Math.max(0,n)),this.a=jr(Math.max(Math.min(1,i),0),3)}static equals(e,t){return e.r===t.r&&e.g===t.g&&e.b===t.b&&e.a===t.a}}class Hr{constructor(e,t,n,i){this._hslaBrand=void 0,this.h=0|Math.max(Math.min(360,e),0),this.s=jr(Math.max(Math.min(1,t),0),3),this.l=jr(Math.max(Math.min(1,n),0),3),this.a=jr(Math.max(Math.min(1,i),0),3)}static equals(e,t){return e.h===t.h&&e.s===t.s&&e.l===t.l&&e.a===t.a}static fromRGBA(e){const t=e.r/255,n=e.g/255,i=e.b/255,r=e.a,o=Math.max(t,n,i),s=Math.min(t,n,i);let a=0,l=0;const c=(s+o)/2,h=o-s;if(h>0){switch(l=Math.min(c<=.5?h/(2*c):h/(2-2*c),1),o){case t:a=(n-i)/h+(n1&&(n-=1),n<1/6?e+6*(t-e)*n:n<.5?t:n<2/3?e+(t-e)*(2/3-n)*6:e}static toRGBA(e){const t=e.h/360,{s:n,l:i,a:r}=e;let o,s,a;if(0===n)o=s=a=i;else{const e=i<.5?i*(1+n):i+n-i*n,r=2*i-e;o=Hr._hue2rgb(r,e,t+1/3),s=Hr._hue2rgb(r,e,t),a=Hr._hue2rgb(r,e,t-1/3)}return new $r(Math.round(255*o),Math.round(255*s),Math.round(255*a),r)}}class Gr{constructor(e,t,n,i){this._hsvaBrand=void 0,this.h=0|Math.max(Math.min(360,e),0),this.s=jr(Math.max(Math.min(1,t),0),3),this.v=jr(Math.max(Math.min(1,n),0),3),this.a=jr(Math.max(Math.min(1,i),0),3)}static equals(e,t){return e.h===t.h&&e.s===t.s&&e.v===t.v&&e.a===t.a}static fromRGBA(e){const t=e.r/255,n=e.g/255,i=e.b/255,r=Math.max(t,n,i),o=r-Math.min(t,n,i),s=0===r?0:o/r;let a;return a=0===o?0:r===t?((n-i)/o%6+6)%6:r===n?(i-t)/o+2:(t-n)/o+4,new Gr(Math.round(60*a),s,r,e.a)}static toRGBA(e){const{h:t,s:n,v:i,a:r}=e,o=i*n,s=o*(1-Math.abs(t/60%2-1)),a=i-o;let[l,c,h]=[0,0,0];return t<60?(l=o,c=s):t<120?(l=s,c=o):t<180?(c=o,h=s):t<240?(c=s,h=o):t<300?(l=s,h=o):t<=360&&(l=o,h=s),l=Math.round(255*(l+a)),c=Math.round(255*(c+a)),h=Math.round(255*(h+a)),new $r(l,c,h,r)}}class Jr{static fromHex(e){return Jr.Format.CSS.parseHex(e)||Jr.red}static equals(e,t){return!e&&!t||!(!e||!t)&&e.equals(t)}get hsla(){return this._hsla?this._hsla:Hr.fromRGBA(this.rgba)}get hsva(){return this._hsva?this._hsva:Gr.fromRGBA(this.rgba)}constructor(e){if(!e)throw new Error("Color needs a value");if(e instanceof $r)this.rgba=e;else if(e instanceof Hr)this._hsla=e,this.rgba=Hr.toRGBA(e);else{if(!(e instanceof Gr))throw new Error("Invalid color ctor argument");this._hsva=e,this.rgba=Gr.toRGBA(e)}}equals(e){return!!e&&$r.equals(this.rgba,e.rgba)&&Hr.equals(this.hsla,e.hsla)&&Gr.equals(this.hsva,e.hsva)}getRelativeLuminance(){return jr(.2126*Jr._relativeLuminanceForComponent(this.rgba.r)+.7152*Jr._relativeLuminanceForComponent(this.rgba.g)+.0722*Jr._relativeLuminanceForComponent(this.rgba.b),4)}static _relativeLuminanceForComponent(e){const t=e/255;return t<=.03928?t/12.92:Math.pow((t+.055)/1.055,2.4)}isLighter(){return(299*this.rgba.r+587*this.rgba.g+114*this.rgba.b)/1e3>=128}isLighterThan(e){return this.getRelativeLuminance()>e.getRelativeLuminance()}isDarkerThan(e){return this.getRelativeLuminance()0)for(const i of n){const n=i.filter((e=>void 0!==e)),r=n[1],o=n[2];if(!o)continue;let s;if("rgb"===r){const t=/^\(\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*\)$/gm;s=eo(Qr(e,i),no(o,t),!1)}else if("rgba"===r){const t=/^\(\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\s*,\s*(0[.][0-9]+|[.][0-9]+|[01][.]|[01])\s*\)$/gm;s=eo(Qr(e,i),no(o,t),!0)}else if("hsl"===r){const t=/^\(\s*(36[0]|3[0-5][0-9]|[12][0-9][0-9]|[1-9]?[0-9])\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*\)$/gm;s=to(Qr(e,i),no(o,t),!1)}else if("hsla"===r){const t=/^\(\s*(36[0]|3[0-5][0-9]|[12][0-9][0-9]|[1-9]?[0-9])\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*,\s*(100|\d{1,2}[.]\d*|\d{1,2})%\s*,\s*(0[.][0-9]+|[.][0-9]+|[01][.]|[01])\s*\)$/gm;s=to(Qr(e,i),no(o,t),!0)}else"#"===r&&(s=Zr(Qr(e,i),r+o));s&&t.push(s)}return t}(e):[]}Jr.white=new Jr(new $r(255,255,255,1)),Jr.black=new Jr(new $r(0,0,0,1)),Jr.red=new Jr(new $r(255,0,0,1)),Jr.blue=new Jr(new $r(0,0,255,1)),Jr.green=new Jr(new $r(0,255,0,1)),Jr.cyan=new Jr(new $r(0,255,255,1)),Jr.lightgrey=new Jr(new $r(211,211,211,1)),Jr.transparent=new Jr(new $r(0,0,0,0)),function(e){let t;!function(t){let n;!function(t){function n(e){const t=e.toString(16);return 2!==t.length?"0"+t:t}function i(e){switch(e){case 48:return 0;case 49:return 1;case 50:return 2;case 51:return 3;case 52:return 4;case 53:return 5;case 54:return 6;case 55:return 7;case 56:return 8;case 57:return 9;case 97:case 65:return 10;case 98:case 66:return 11;case 99:case 67:return 12;case 100:case 68:return 13;case 101:case 69:return 14;case 102:case 70:return 15}return 0}t.formatRGB=function(t){return 1===t.rgba.a?`rgb(${t.rgba.r}, ${t.rgba.g}, ${t.rgba.b})`:e.Format.CSS.formatRGBA(t)},t.formatRGBA=function(e){return`rgba(${e.rgba.r}, ${e.rgba.g}, ${e.rgba.b}, ${+e.rgba.a.toFixed(2)})`},t.formatHSL=function(t){return 1===t.hsla.a?`hsl(${t.hsla.h}, ${(100*t.hsla.s).toFixed(2)}%, ${(100*t.hsla.l).toFixed(2)}%)`:e.Format.CSS.formatHSLA(t)},t.formatHSLA=function(e){return`hsla(${e.hsla.h}, ${(100*e.hsla.s).toFixed(2)}%, ${(100*e.hsla.l).toFixed(2)}%, ${e.hsla.a.toFixed(2)})`},t.formatHex=function(e){return`#${n(e.rgba.r)}${n(e.rgba.g)}${n(e.rgba.b)}`},t.formatHexA=function(t){return arguments.length>1&&void 0!==arguments[1]&&arguments[1]&&1===t.rgba.a?e.Format.CSS.formatHex(t):`#${n(t.rgba.r)}${n(t.rgba.g)}${n(t.rgba.b)}${n(Math.round(255*t.rgba.a))}`},t.format=function(t){return t.isOpaque()?e.Format.CSS.formatHex(t):e.Format.CSS.formatRGBA(t)},t.parseHex=function(t){const n=t.length;if(0===n)return null;if(35!==t.charCodeAt(0))return null;if(7===n){const n=16*i(t.charCodeAt(1))+i(t.charCodeAt(2)),r=16*i(t.charCodeAt(3))+i(t.charCodeAt(4)),o=16*i(t.charCodeAt(5))+i(t.charCodeAt(6));return new e(new $r(n,r,o,1))}if(9===n){const n=16*i(t.charCodeAt(1))+i(t.charCodeAt(2)),r=16*i(t.charCodeAt(3))+i(t.charCodeAt(4)),o=16*i(t.charCodeAt(5))+i(t.charCodeAt(6)),s=16*i(t.charCodeAt(7))+i(t.charCodeAt(8));return new e(new $r(n,r,o,s/255))}if(4===n){const n=i(t.charCodeAt(1)),r=i(t.charCodeAt(2)),o=i(t.charCodeAt(3));return new e(new $r(16*n+n,16*r+r,16*o+o))}if(5===n){const n=i(t.charCodeAt(1)),r=i(t.charCodeAt(2)),o=i(t.charCodeAt(3)),s=i(t.charCodeAt(4));return new e(new $r(16*n+n,16*r+r,16*o+o,(16*s+s)/255))}return null}}(n=t.CSS||(t.CSS={}))}(t=e.Format||(e.Format={}))}(Jr||(Jr={}));const ro=/\bMARK:\s*(.*)$/d,oo=/^-+|-+$/g;function so(e,t){var n;let i=[];if(t.findRegionSectionHeaders&&(null===(n=t.foldingRules)||void 0===n?void 0:n.markers)){const n=function(e,t){const n=[],i=e.getLineCount();for(let r=1;r<=i;r++){const i=e.getLineContent(r),o=i.match(t.foldingRules.markers.start);if(o){const e={startLineNumber:r,startColumn:o[0].length+1,endLineNumber:r,endColumn:i.length+1};if(e.endColumn>e.startColumn){const t={range:e,...lo(i.substring(o[0].length)),shouldBeInComments:!1};(t.text||t.hasSeparatorLine)&&n.push(t)}}}return n}(e,t);i=i.concat(n)}if(t.findMarkSectionHeaders){const t=function(e){const t=[],n=e.getLineCount();for(let i=1;i<=n;i++){ao(e.getLineContent(i),i,t)}return t}(e);i=i.concat(t)}return i}function ao(e,t,n){ro.lastIndex=0;const i=ro.exec(e);if(i){const e={startLineNumber:t,startColumn:i.indices[1][0]+1,endLineNumber:t,endColumn:i.indices[1][1]+1};if(e.endColumn>e.startColumn){const t={range:e,...lo(i[1]),shouldBeInComments:!0};(t.text||t.hasSeparatorLine)&&n.push(t)}}}function lo(e){const t=(e=e.trim()).startsWith("-");return{text:e=e.replace(oo,""),hasSeparatorLine:t}}class co extends Bt{get uri(){return this._uri}get eol(){return this._eol}getValue(){return this.getText()}findMatches(e){const t=[];for(let n=0;nthis._lines.length)t=this._lines.length,n=this._lines[t-1].length+1,i=!0;else{const e=this._lines[t-1].length+1;n<1?(n=1,i=!0):n>e&&(n=e,i=!0)}return i?{lineNumber:t,column:n}:e}}class ho{constructor(e,t){this._host=e,this._models=Object.create(null),this._foreignModuleFactory=t,this._foreignModule=null}dispose(){this._models=Object.create(null)}_getModel(e){return this._models[e]}_getModels(){const e=[];return Object.keys(this._models).forEach((t=>e.push(this._models[t]))),e}acceptNewModel(e){this._models[e.url]=new co(Ct.parse(e.url),e.lines,e.EOL,e.versionId)}acceptModelChanged(e,t){if(!this._models[e])return;this._models[e].onEvents(t)}acceptRemovedModel(e){this._models[e]&&delete this._models[e]}async computeUnicodeHighlights(e,t,n){const i=this._getModel(e);return i?Bi.computeUnicodeHighlights(i,t,n):{ranges:[],hasMore:!1,ambiguousCharacterCount:0,invisibleCharacterCount:0,nonBasicAsciiCharacterCount:0}}async findSectionHeaders(e,t){const n=this._getModel(e);return n?so(n,t):[]}async computeDiff(e,t,n,i){const r=this._getModel(e),o=this._getModel(t);if(!r||!o)return null;return ho.computeDiff(r,o,n,i)}static computeDiff(e,t,n,i){const r="advanced"===i?Br():qr(),o=e.getLinesContent(),s=t.getLinesContent(),a=r.computeDiff(o,s,n);function l(e){return e.map((e=>{var t;return[e.original.startLineNumber,e.original.endLineNumberExclusive,e.modified.startLineNumber,e.modified.endLineNumberExclusive,null===(t=e.innerChanges)||void 0===t?void 0:t.map((e=>[e.originalRange.startLineNumber,e.originalRange.startColumn,e.originalRange.endLineNumber,e.originalRange.endColumn,e.modifiedRange.startLineNumber,e.modifiedRange.startColumn,e.modifiedRange.endLineNumber,e.modifiedRange.endColumn]))]}))}return{identical:!(a.changes.length>0)&&this._modelsAreIdentical(e,t),quitEarly:a.hitTimeout,changes:l(a.changes),moves:a.moves.map((e=>[e.lineRangeMapping.original.startLineNumber,e.lineRangeMapping.original.endLineNumberExclusive,e.lineRangeMapping.modified.startLineNumber,e.lineRangeMapping.modified.endLineNumberExclusive,l(e.changes)]))}}static _modelsAreIdentical(e,t){const n=e.getLineCount();if(n!==t.getLineCount())return!1;for(let i=1;i<=n;i++){if(e.getLineContent(i)!==t.getLineContent(i))return!1}return!0}async computeMoreMinimalEdits(e,t,n){const i=this._getModel(e);if(!i)return t;const r=[];let o;t=t.slice(0).sort(((e,t)=>{if(e.range&&t.range)return It.compareRangesUsingStarts(e.range,t.range);return(e.range?0:1)-(t.range?0:1)}));let s=0;for(let a=1;aho._diffLimit){r.push({range:a,text:l});continue}const t=Be(e,l,n),s=i.offsetAt(It.lift(a).getStartPosition());for(const n of t){const e=i.positionAt(s+n.originalStart),t=i.positionAt(s+n.originalStart+n.originalLength),o={text:l.substr(n.modifiedStart,n.modifiedLength),range:{startLineNumber:e.lineNumber,startColumn:e.column,endLineNumber:t.lineNumber,endColumn:t.column}};i.getValueInRange(o.range)!==o.text&&r.push(o)}}return"number"===typeof o&&r.push({eol:o,text:"",range:{startLineNumber:0,startColumn:0,endLineNumber:0,endColumn:0}}),r}async computeLinks(e){const t=this._getModel(e);return t?function(e){return e&&"function"===typeof e.getLineCount&&"function"===typeof e.getLineContent?nn.computeLinks(e):[]}(t):null}async computeDefaultDocumentColors(e){const t=this._getModel(e);return t?io(t):null}async textualSuggest(e,t,n,i){const r=new _,o=new RegExp(n,i),s=new Set;e:for(const a of e){const e=this._getModel(a);if(e)for(const n of e.words(o))if(n!==t&&isNaN(Number(n))&&(s.add(n),s.size>ho._suggestionsLimit))break e}return{words:Array.from(s),duration:r.elapsed()}}async computeWordRanges(e,t,n,i){const r=this._getModel(e);if(!r)return Object.create(null);const o=new RegExp(n,i),s=Object.create(null);for(let a=t.startLineNumber;afunction(){const n=Array.prototype.slice.call(arguments,0);return t(e,n)},i={};for(const r of e)i[r]=n(r);return i}(n,((e,t)=>this._host.fhr(e,t))),r={host:i,getMirrorModels:()=>this._getModels()};return this._foreignModuleFactory?(this._foreignModule=this._foreignModuleFactory(r,t),Promise.resolve(z(this._foreignModule))):Promise.reject(new Error("Unexpected usage"))}fmr(e,t){if(!this._foreignModule||"function"!==typeof this._foreignModule[e])return Promise.reject(new Error("Missing requestHandler or method: "+e));try{return Promise.resolve(this._foreignModule[e].apply(this._foreignModule,t))}catch(Du){return Promise.reject(Du)}}}ho._diffLimit=1e5,ho._suggestionsLimit=1e4,"function"===typeof importScripts&&(globalThis.monaco={editor:void 0,languages:void 0,CancellationTokenSource:ln,Emitter:A,KeyCode:oi,KeyMod:Ti,Position:zt,Range:It,Selection:xn,SelectionDirection:wi,MarkerSeverity:si,MarkerTag:ai,Uri:Ct,Token:Rn});let po=!1;function uo(e){if(po)return;po=!0;const t=new Ie((e=>{globalThis.postMessage(e)}),(t=>new ho(t,e)));globalThis.onmessage=e=>{t.onmessage(e.data)}}var mo,fo;globalThis.onmessage=e=>{po||uo(null)},(fo=mo||(mo={}))[fo.Ident=0]="Ident",fo[fo.AtKeyword=1]="AtKeyword",fo[fo.String=2]="String",fo[fo.BadString=3]="BadString",fo[fo.UnquotedString=4]="UnquotedString",fo[fo.Hash=5]="Hash",fo[fo.Num=6]="Num",fo[fo.Percentage=7]="Percentage",fo[fo.Dimension=8]="Dimension",fo[fo.UnicodeRange=9]="UnicodeRange",fo[fo.CDO=10]="CDO",fo[fo.CDC=11]="CDC",fo[fo.Colon=12]="Colon",fo[fo.SemiColon=13]="SemiColon",fo[fo.CurlyL=14]="CurlyL",fo[fo.CurlyR=15]="CurlyR",fo[fo.ParenthesisL=16]="ParenthesisL",fo[fo.ParenthesisR=17]="ParenthesisR",fo[fo.BracketL=18]="BracketL",fo[fo.BracketR=19]="BracketR",fo[fo.Whitespace=20]="Whitespace",fo[fo.Includes=21]="Includes",fo[fo.Dashmatch=22]="Dashmatch",fo[fo.SubstringOperator=23]="SubstringOperator",fo[fo.PrefixOperator=24]="PrefixOperator",fo[fo.SuffixOperator=25]="SuffixOperator",fo[fo.Delim=26]="Delim",fo[fo.EMS=27]="EMS",fo[fo.EXS=28]="EXS",fo[fo.Length=29]="Length",fo[fo.Angle=30]="Angle",fo[fo.Time=31]="Time",fo[fo.Freq=32]="Freq",fo[fo.Exclamation=33]="Exclamation",fo[fo.Resolution=34]="Resolution",fo[fo.Comma=35]="Comma",fo[fo.Charset=36]="Charset",fo[fo.EscapedJavaScript=37]="EscapedJavaScript",fo[fo.BadEscapedJavaScript=38]="BadEscapedJavaScript",fo[fo.Comment=39]="Comment",fo[fo.SingleLineComment=40]="SingleLineComment",fo[fo.EOF=41]="EOF",fo[fo.CustomToken=42]="CustomToken";var go=function(){function e(e){this.source=e,this.len=e.length,this.position=0}return e.prototype.substring=function(e,t){return void 0===t&&(t=this.position),this.source.substring(e,t)},e.prototype.eos=function(){return this.len<=this.position},e.prototype.pos=function(){return this.position},e.prototype.goBackTo=function(e){this.position=e},e.prototype.goBack=function(e){this.position-=e},e.prototype.advance=function(e){this.position+=e},e.prototype.nextChar=function(){return this.source.charCodeAt(this.position++)||0},e.prototype.peekChar=function(e){return void 0===e&&(e=0),this.source.charCodeAt(this.position+e)||0},e.prototype.lookbackChar=function(e){return void 0===e&&(e=0),this.source.charCodeAt(this.position-e)||0},e.prototype.advanceIfChar=function(e){return e===this.source.charCodeAt(this.position)&&(this.position++,!0)},e.prototype.advanceIfChars=function(e){if(this.position+e.length>this.source.length)return!1;for(var t=0;t".charCodeAt(0),Po="@".charCodeAt(0),Oo="#".charCodeAt(0),Wo="$".charCodeAt(0),Vo="\\".charCodeAt(0),Uo="/".charCodeAt(0),Ko="\n".charCodeAt(0),qo="\r".charCodeAt(0),Bo="\f".charCodeAt(0),jo='"'.charCodeAt(0),$o="'".charCodeAt(0),Ho=" ".charCodeAt(0),Go="\t".charCodeAt(0),Jo=";".charCodeAt(0),Xo=":".charCodeAt(0),Yo="{".charCodeAt(0),Qo="}".charCodeAt(0),Zo="[".charCodeAt(0),es="]".charCodeAt(0),ts=",".charCodeAt(0),ns=".".charCodeAt(0),is="!".charCodeAt(0),rs="?".charCodeAt(0),os="+".charCodeAt(0),ss={};ss[Jo]=mo.SemiColon,ss[Xo]=mo.Colon,ss[Yo]=mo.CurlyL,ss[Qo]=mo.CurlyR,ss[es]=mo.BracketR,ss[Zo]=mo.BracketL,ss[Mo]=mo.ParenthesisL,ss[zo]=mo.ParenthesisR,ss[ts]=mo.Comma;var as={};as.em=mo.EMS,as.ex=mo.EXS,as.px=mo.Length,as.cm=mo.Length,as.mm=mo.Length,as.in=mo.Length,as.pt=mo.Length,as.pc=mo.Length,as.deg=mo.Angle,as.rad=mo.Angle,as.grad=mo.Angle,as.ms=mo.Time,as.s=mo.Time,as.hz=mo.Freq,as.khz=mo.Freq,as["%"]=mo.Percentage,as.fr=mo.Percentage,as.dpi=mo.Resolution,as.dpcm=mo.Resolution;var ls=function(){function e(){this.stream=new go(""),this.ignoreComment=!0,this.ignoreWhitespace=!0,this.inURL=!1}return e.prototype.setSource=function(e){this.stream=new go(e)},e.prototype.finishToken=function(e,t,n){return{offset:e,len:this.stream.pos()-e,type:t,text:n||this.stream.substring(e)}},e.prototype.substring=function(e,t){return this.stream.substring(e,e+t)},e.prototype.pos=function(){return this.stream.pos()},e.prototype.goBackTo=function(e){this.stream.goBackTo(e)},e.prototype.scanUnquotedString=function(){var e=this.stream.pos(),t=[];return this._unquotedString(t)?this.finishToken(e,mo.UnquotedString,t.join("")):null},e.prototype.scan=function(){var e=this.trivia();if(null!==e)return e;var t=this.stream.pos();return this.stream.eos()?this.finishToken(t,mo.EOF):this.scanNext(t)},e.prototype.tryScanUnicode=function(){var e=this.stream.pos();if(!this.stream.eos()&&this._unicodeRange())return this.finishToken(e,mo.UnicodeRange);this.stream.goBackTo(e)},e.prototype.scanNext=function(e){if(this.stream.advanceIfChars([Io,is,Fo,Fo]))return this.finishToken(e,mo.CDO);if(this.stream.advanceIfChars([Fo,Fo,Lo]))return this.finishToken(e,mo.CDC);var t=[];if(this.ident(t))return this.finishToken(e,mo.Ident,t.join(""));if(this.stream.advanceIfChar(Po)){if(t=["@"],this._name(t)){var n=t.join("");return"@charset"===n?this.finishToken(e,mo.Charset,n):this.finishToken(e,mo.AtKeyword,n)}return this.finishToken(e,mo.Delim)}if(this.stream.advanceIfChar(Oo))return t=["#"],this._name(t)?this.finishToken(e,mo.Hash,t.join("")):this.finishToken(e,mo.Delim);if(this.stream.advanceIfChar(is))return this.finishToken(e,mo.Exclamation);if(this._number()){var i=this.stream.pos();if(t=[this.stream.substring(e,i)],this.stream.advanceIfChar(To))return this.finishToken(e,mo.Percentage);if(this.ident(t)){var r=this.stream.substring(i).toLowerCase(),o=as[r];return"undefined"!==typeof o?this.finishToken(e,o,t.join("")):this.finishToken(e,mo.Dimension,t.join(""))}return this.finishToken(e,mo.Num)}t=[];var s=this._string(t);return null!==s?this.finishToken(e,s,t.join("")):"undefined"!==typeof(s=ss[this.stream.peekChar()])?(this.stream.advance(1),this.finishToken(e,s)):this.stream.peekChar(0)===ko&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.Includes)):this.stream.peekChar(0)===No&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.Dashmatch)):this.stream.peekChar(0)===Ao&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.SubstringOperator)):this.stream.peekChar(0)===Eo&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.PrefixOperator)):this.stream.peekChar(0)===Wo&&this.stream.peekChar(1)===Ro?(this.stream.advance(2),this.finishToken(e,mo.SuffixOperator)):(this.stream.nextChar(),this.finishToken(e,mo.Delim))},e.prototype.trivia=function(){for(;;){var e=this.stream.pos();if(this._whitespace()){if(!this.ignoreWhitespace)return this.finishToken(e,mo.Whitespace)}else{if(!this.comment())return null;if(!this.ignoreComment)return this.finishToken(e,mo.Comment)}}},e.prototype.comment=function(){if(this.stream.advanceIfChars([Uo,Ao])){var e=!1,t=!1;return this.stream.advanceWhileChar((function(n){return t&&n===Uo?(e=!0,!1):(t=n===Ao,!0)})),e&&this.stream.advance(1),!0}return!1},e.prototype._number=function(){var e,t=0;return this.stream.peekChar()===ns&&(t=1),(e=this.stream.peekChar(t))>=Co&&e<=_o&&(this.stream.advance(t+1),this.stream.advanceWhileChar((function(e){return e>=Co&&e<=_o||0===t&&e===ns})),!0)},e.prototype._newline=function(e){var t=this.stream.peekChar();switch(t){case qo:case Bo:case Ko:return this.stream.advance(1),e.push(String.fromCharCode(t)),t===qo&&this.stream.advanceIfChar(Ko)&&e.push("\n"),!0}return!1},e.prototype._escape=function(e,t){var n=this.stream.peekChar();if(n===Vo){this.stream.advance(1),n=this.stream.peekChar();for(var i=0;i<6&&(n>=Co&&n<=_o||n>=bo&&n<=vo||n>=wo&&n<=xo);)this.stream.advance(1),n=this.stream.peekChar(),i++;if(i>0){try{var r=parseInt(this.stream.substring(this.stream.pos()-i),16);r&&e.push(String.fromCharCode(r))}catch(Du){}return n===Ho||n===Go?this.stream.advance(1):this._newline([]),!0}if(n!==qo&&n!==Bo&&n!==Ko)return this.stream.advance(1),e.push(String.fromCharCode(n)),!0;if(t)return this._newline(e)}return!1},e.prototype._stringChar=function(e,t){var n=this.stream.peekChar();return 0!==n&&n!==e&&n!==Vo&&n!==qo&&n!==Bo&&n!==Ko&&(this.stream.advance(1),t.push(String.fromCharCode(n)),!0)},e.prototype._string=function(e){if(this.stream.peekChar()===$o||this.stream.peekChar()===jo){var t=this.stream.nextChar();for(e.push(String.fromCharCode(t));this._stringChar(t,e)||this._escape(e,!0););return this.stream.peekChar()===t?(this.stream.nextChar(),e.push(String.fromCharCode(t)),mo.String):mo.BadString}return null},e.prototype._unquotedChar=function(e){var t=this.stream.peekChar();return 0!==t&&t!==Vo&&t!==$o&&t!==jo&&t!==Mo&&t!==zo&&t!==Ho&&t!==Go&&t!==Ko&&t!==Bo&&t!==qo&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._unquotedString=function(e){for(var t=!1;this._unquotedChar(e)||this._escape(e);)t=!0;return t},e.prototype._whitespace=function(){return this.stream.advanceWhileChar((function(e){return e===Ho||e===Go||e===Ko||e===Bo||e===qo}))>0},e.prototype._name=function(e){for(var t=!1;this._identChar(e)||this._escape(e);)t=!0;return t},e.prototype.ident=function(e){var t=this.stream.pos();if(this._minus(e)){if(this._minus(e)||this._identFirstChar(e)||this._escape(e)){for(;this._identChar(e)||this._escape(e););return!0}}else if(this._identFirstChar(e)||this._escape(e)){for(;this._identChar(e)||this._escape(e););return!0}return this.stream.goBackTo(t),!1},e.prototype._identFirstChar=function(e){var t=this.stream.peekChar();return(t===Do||t>=bo&&t<=yo||t>=wo&&t<=So||t>=128&&t<=65535)&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._minus=function(e){var t=this.stream.peekChar();return t===Fo&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._identChar=function(e){var t=this.stream.peekChar();return(t===Do||t===Fo||t>=bo&&t<=yo||t>=wo&&t<=So||t>=Co&&t<=_o||t>=128&&t<=65535)&&(this.stream.advance(1),e.push(String.fromCharCode(t)),!0)},e.prototype._unicodeRange=function(){if(this.stream.advanceIfChar(os)){var e=function(e){return e>=Co&&e<=_o||e>=bo&&e<=vo||e>=wo&&e<=xo},t=this.stream.advanceWhileChar(e)+this.stream.advanceWhileChar((function(e){return e===rs}));if(t>=1&&t<=6){if(!this.stream.advanceIfChar(Fo))return!0;var n=this.stream.advanceWhileChar(e);if(n>=1&&n<=6)return!0}}return!1},e}();function cs(e,t){if(e.length0?e.lastIndexOf(t)===n:0===n&&e===t}function ds(e,t){return void 0===t&&(t=!0),e?e.length<140?e:e.slice(0,140)+(t?"\u2026":""):""}function ps(e,t){for(var n="";t>0;)1===(1&t)&&(n+=e),e+=e,t>>>=1;return n}var us,ms,fs,gs,bs=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}();function vs(e,t){var n=null;return!e||te.end?null:(e.accept((function(e){return-1===e.offset&&-1===e.length||e.offset<=t&&e.end>=t&&(n?e.length<=n.length&&(n=e):n=e,!0)})),n)}function ys(e,t){for(var n=vs(e,t),i=[];n;)i.unshift(n),n=n.parent;return i}(ms=us||(us={}))[ms.Undefined=0]="Undefined",ms[ms.Identifier=1]="Identifier",ms[ms.Stylesheet=2]="Stylesheet",ms[ms.Ruleset=3]="Ruleset",ms[ms.Selector=4]="Selector",ms[ms.SimpleSelector=5]="SimpleSelector",ms[ms.SelectorInterpolation=6]="SelectorInterpolation",ms[ms.SelectorCombinator=7]="SelectorCombinator",ms[ms.SelectorCombinatorParent=8]="SelectorCombinatorParent",ms[ms.SelectorCombinatorSibling=9]="SelectorCombinatorSibling",ms[ms.SelectorCombinatorAllSiblings=10]="SelectorCombinatorAllSiblings",ms[ms.SelectorCombinatorShadowPiercingDescendant=11]="SelectorCombinatorShadowPiercingDescendant",ms[ms.Page=12]="Page",ms[ms.PageBoxMarginBox=13]="PageBoxMarginBox",ms[ms.ClassSelector=14]="ClassSelector",ms[ms.IdentifierSelector=15]="IdentifierSelector",ms[ms.ElementNameSelector=16]="ElementNameSelector",ms[ms.PseudoSelector=17]="PseudoSelector",ms[ms.AttributeSelector=18]="AttributeSelector",ms[ms.Declaration=19]="Declaration",ms[ms.Declarations=20]="Declarations",ms[ms.Property=21]="Property",ms[ms.Expression=22]="Expression",ms[ms.BinaryExpression=23]="BinaryExpression",ms[ms.Term=24]="Term",ms[ms.Operator=25]="Operator",ms[ms.Value=26]="Value",ms[ms.StringLiteral=27]="StringLiteral",ms[ms.URILiteral=28]="URILiteral",ms[ms.EscapedValue=29]="EscapedValue",ms[ms.Function=30]="Function",ms[ms.NumericValue=31]="NumericValue",ms[ms.HexColorValue=32]="HexColorValue",ms[ms.RatioValue=33]="RatioValue",ms[ms.MixinDeclaration=34]="MixinDeclaration",ms[ms.MixinReference=35]="MixinReference",ms[ms.VariableName=36]="VariableName",ms[ms.VariableDeclaration=37]="VariableDeclaration",ms[ms.Prio=38]="Prio",ms[ms.Interpolation=39]="Interpolation",ms[ms.NestedProperties=40]="NestedProperties",ms[ms.ExtendsReference=41]="ExtendsReference",ms[ms.SelectorPlaceholder=42]="SelectorPlaceholder",ms[ms.Debug=43]="Debug",ms[ms.If=44]="If",ms[ms.Else=45]="Else",ms[ms.For=46]="For",ms[ms.Each=47]="Each",ms[ms.While=48]="While",ms[ms.MixinContentReference=49]="MixinContentReference",ms[ms.MixinContentDeclaration=50]="MixinContentDeclaration",ms[ms.Media=51]="Media",ms[ms.Keyframe=52]="Keyframe",ms[ms.FontFace=53]="FontFace",ms[ms.Import=54]="Import",ms[ms.Namespace=55]="Namespace",ms[ms.Invocation=56]="Invocation",ms[ms.FunctionDeclaration=57]="FunctionDeclaration",ms[ms.ReturnStatement=58]="ReturnStatement",ms[ms.MediaQuery=59]="MediaQuery",ms[ms.MediaCondition=60]="MediaCondition",ms[ms.MediaFeature=61]="MediaFeature",ms[ms.FunctionParameter=62]="FunctionParameter",ms[ms.FunctionArgument=63]="FunctionArgument",ms[ms.KeyframeSelector=64]="KeyframeSelector",ms[ms.ViewPort=65]="ViewPort",ms[ms.Document=66]="Document",ms[ms.AtApplyRule=67]="AtApplyRule",ms[ms.CustomPropertyDeclaration=68]="CustomPropertyDeclaration",ms[ms.CustomPropertySet=69]="CustomPropertySet",ms[ms.ListEntry=70]="ListEntry",ms[ms.Supports=71]="Supports",ms[ms.SupportsCondition=72]="SupportsCondition",ms[ms.NamespacePrefix=73]="NamespacePrefix",ms[ms.GridLine=74]="GridLine",ms[ms.Plugin=75]="Plugin",ms[ms.UnknownAtRule=76]="UnknownAtRule",ms[ms.Use=77]="Use",ms[ms.ModuleConfiguration=78]="ModuleConfiguration",ms[ms.Forward=79]="Forward",ms[ms.ForwardVisibility=80]="ForwardVisibility",ms[ms.Module=81]="Module",ms[ms.UnicodeRange=82]="UnicodeRange",(gs=fs||(fs={}))[gs.Mixin=0]="Mixin",gs[gs.Rule=1]="Rule",gs[gs.Variable=2]="Variable",gs[gs.Function=3]="Function",gs[gs.Keyframe=4]="Keyframe",gs[gs.Unknown=5]="Unknown",gs[gs.Module=6]="Module",gs[gs.Forward=7]="Forward",gs[gs.ForwardVisibility=8]="ForwardVisibility";var ws,xs,Ss=function(){function e(e,t,n){void 0===e&&(e=-1),void 0===t&&(t=-1),this.parent=null,this.offset=e,this.length=t,n&&(this.nodeType=n)}return Object.defineProperty(e.prototype,"end",{get:function(){return this.offset+this.length},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"type",{get:function(){return this.nodeType||us.Undefined},set:function(e){this.nodeType=e},enumerable:!1,configurable:!0}),e.prototype.getTextProvider=function(){for(var e=this;e&&!e.textProvider;)e=e.parent;return e?e.textProvider:function(){return"unknown"}},e.prototype.getText=function(){return this.getTextProvider()(this.offset,this.length)},e.prototype.matches=function(e){return this.length===e.length&&this.getTextProvider()(this.offset,this.length)===e},e.prototype.startsWith=function(e){return this.length>=e.length&&this.getTextProvider()(this.offset,e.length)===e},e.prototype.endsWith=function(e){return this.length>=e.length&&this.getTextProvider()(this.end-e.length,e.length)===e},e.prototype.accept=function(e){if(e(this)&&this.children)for(var t=0,n=this.children;t=0&&e.parent.children.splice(n,1)}e.parent=this;var i=this.children;return i||(i=this.children=[]),-1!==t?i.splice(t,0,e):i.push(e),e},e.prototype.attachTo=function(e,t){return void 0===t&&(t=-1),e&&e.adoptChild(this,t),this},e.prototype.collectIssues=function(e){this.issues&&e.push.apply(e,this.issues)},e.prototype.addIssue=function(e){this.issues||(this.issues=[]),this.issues.push(e)},e.prototype.hasIssue=function(e){return Array.isArray(this.issues)&&this.issues.some((function(t){return t.getRule()===e}))},e.prototype.isErroneous=function(e){return void 0===e&&(e=!1),!!(this.issues&&this.issues.length>0)||e&&Array.isArray(this.children)&&this.children.some((function(e){return e.isErroneous(!0)}))},e.prototype.setNode=function(e,t,n){return void 0===n&&(n=-1),!!t&&(t.attachTo(this,n),this[e]=t,!0)},e.prototype.addChild=function(e){return!!e&&(this.children||(this.children=[]),e.attachTo(this),this.updateOffsetAndLength(e),!0)},e.prototype.updateOffsetAndLength=function(e){(e.offsetthis.end||-1===this.length)&&(this.length=t-this.offset)},e.prototype.hasChildren=function(){return!!this.children&&this.children.length>0},e.prototype.getChildren=function(){return this.children?this.children.slice(0):[]},e.prototype.getChild=function(e){return this.children&&e=0;n--)if((t=this.children[n]).offset<=e)return t;return null},e.prototype.findChildAtOffset=function(e,t){var n=this.findFirstChildBeforeOffset(e);return n&&n.end>=e?t&&n.findChildAtOffset(e,!0)||n:null},e.prototype.encloses=function(e){return this.offset<=e.offset&&this.offset+this.length>=e.offset+e.length},e.prototype.getParent=function(){for(var e=this.parent;e instanceof Cs;)e=e.parent;return e},e.prototype.findParent=function(e){for(var t=this;t&&t.type!==e;)t=t.parent;return t},e.prototype.findAParent=function(){for(var e=[],t=0;t2?n-2:0),r=2;r{let i=n[0];return"undefined"!==typeof t[i]?t[i]:e})),n}(t,i)}function Wa(e){return Oa}var Va,Ua,Ka,qa,Ba,ja,$a,Ha,Ga,Ja,Xa,Ya,Qa,Za,el,tl,nl,il,rl,ol,sl,al,ll,cl,hl,dl,pl,ul,ml,fl,gl,bl,vl,yl,wl,xl,Sl,Cl,_l,kl,El,Rl,Nl,Fl,Dl,Tl,Al,Ml,zl,Il=Wa(),Ll=function(){return function(e,t){this.id=e,this.message=t}}(),Pl={NumberExpected:new Ll("css-numberexpected",Il("expected.number","number expected")),ConditionExpected:new Ll("css-conditionexpected",Il("expected.condt","condition expected")),RuleOrSelectorExpected:new Ll("css-ruleorselectorexpected",Il("expected.ruleorselector","at-rule or selector expected")),DotExpected:new Ll("css-dotexpected",Il("expected.dot","dot expected")),ColonExpected:new Ll("css-colonexpected",Il("expected.colon","colon expected")),SemiColonExpected:new Ll("css-semicolonexpected",Il("expected.semicolon","semi-colon expected")),TermExpected:new Ll("css-termexpected",Il("expected.term","term expected")),ExpressionExpected:new Ll("css-expressionexpected",Il("expected.expression","expression expected")),OperatorExpected:new Ll("css-operatorexpected",Il("expected.operator","operator expected")),IdentifierExpected:new Ll("css-identifierexpected",Il("expected.ident","identifier expected")),PercentageExpected:new Ll("css-percentageexpected",Il("expected.percentage","percentage expected")),URIOrStringExpected:new Ll("css-uriorstringexpected",Il("expected.uriorstring","uri or string expected")),URIExpected:new Ll("css-uriexpected",Il("expected.uri","URI expected")),VariableNameExpected:new Ll("css-varnameexpected",Il("expected.varname","variable name expected")),VariableValueExpected:new Ll("css-varvalueexpected",Il("expected.varvalue","variable value expected")),PropertyValueExpected:new Ll("css-propertyvalueexpected",Il("expected.propvalue","property value expected")),LeftCurlyExpected:new Ll("css-lcurlyexpected",Il("expected.lcurly","{ expected")),RightCurlyExpected:new Ll("css-rcurlyexpected",Il("expected.rcurly","} expected")),LeftSquareBracketExpected:new Ll("css-rbracketexpected",Il("expected.lsquare","[ expected")),RightSquareBracketExpected:new Ll("css-lbracketexpected",Il("expected.rsquare","] expected")),LeftParenthesisExpected:new Ll("css-lparentexpected",Il("expected.lparen","( expected")),RightParenthesisExpected:new Ll("css-rparentexpected",Il("expected.rparent",") expected")),CommaExpected:new Ll("css-commaexpected",Il("expected.comma","comma expected")),PageDirectiveOrDeclarationExpected:new Ll("css-pagedirordeclexpected",Il("expected.pagedirordecl","page directive or declaraton expected")),UnknownAtRule:new Ll("css-unknownatrule",Il("unknown.atrule","at-rule unknown")),UnknownKeyword:new Ll("css-unknownkeyword",Il("unknown.keyword","unknown keyword")),SelectorExpected:new Ll("css-selectorexpected",Il("expected.selector","selector expected")),StringLiteralExpected:new Ll("css-stringliteralexpected",Il("expected.stringliteral","string literal expected")),WhitespaceExpected:new Ll("css-whitespaceexpected",Il("expected.whitespace","whitespace expected")),MediaQueryExpected:new Ll("css-mediaqueryexpected",Il("expected.mediaquery","media query expected")),IdentifierOrWildcardExpected:new Ll("css-idorwildcardexpected",Il("expected.idorwildcard","identifier or wildcard expected")),WildcardExpected:new Ll("css-wildcardexpected",Il("expected.wildcard","wildcard expected")),IdentifierOrVariableExpected:new Ll("css-idorvarexpected",Il("expected.idorvar","identifier or variable expected"))};(Ua=Va||(Va={})).MIN_VALUE=-2147483648,Ua.MAX_VALUE=2147483647,(qa=Ka||(Ka={})).MIN_VALUE=0,qa.MAX_VALUE=2147483647,(ja=Ba||(Ba={})).create=function(e,t){return e===Number.MAX_VALUE&&(e=Ka.MAX_VALUE),t===Number.MAX_VALUE&&(t=Ka.MAX_VALUE),{line:e,character:t}},ja.is=function(e){var t=e;return Pc.objectLiteral(t)&&Pc.uinteger(t.line)&&Pc.uinteger(t.character)},(Ha=$a||($a={})).create=function(e,t,n,i){if(Pc.uinteger(e)&&Pc.uinteger(t)&&Pc.uinteger(n)&&Pc.uinteger(i))return{start:Ba.create(e,t),end:Ba.create(n,i)};if(Ba.is(e)&&Ba.is(t))return{start:e,end:t};throw new Error("Range#create called with invalid arguments["+e+", "+t+", "+n+", "+i+"]")},Ha.is=function(e){var t=e;return Pc.objectLiteral(t)&&Ba.is(t.start)&&Ba.is(t.end)},(Ja=Ga||(Ga={})).create=function(e,t){return{uri:e,range:t}},Ja.is=function(e){var t=e;return Pc.defined(t)&&$a.is(t.range)&&(Pc.string(t.uri)||Pc.undefined(t.uri))},(Ya=Xa||(Xa={})).create=function(e,t,n,i){return{targetUri:e,targetRange:t,targetSelectionRange:n,originSelectionRange:i}},Ya.is=function(e){var t=e;return Pc.defined(t)&&$a.is(t.targetRange)&&Pc.string(t.targetUri)&&($a.is(t.targetSelectionRange)||Pc.undefined(t.targetSelectionRange))&&($a.is(t.originSelectionRange)||Pc.undefined(t.originSelectionRange))},(Za=Qa||(Qa={})).create=function(e,t,n,i){return{red:e,green:t,blue:n,alpha:i}},Za.is=function(e){var t=e;return Pc.numberRange(t.red,0,1)&&Pc.numberRange(t.green,0,1)&&Pc.numberRange(t.blue,0,1)&&Pc.numberRange(t.alpha,0,1)},(tl=el||(el={})).create=function(e,t){return{range:e,color:t}},tl.is=function(e){var t=e;return $a.is(t.range)&&Qa.is(t.color)},(il=nl||(nl={})).create=function(e,t,n){return{label:e,textEdit:t,additionalTextEdits:n}},il.is=function(e){var t=e;return Pc.string(t.label)&&(Pc.undefined(t.textEdit)||yl.is(t))&&(Pc.undefined(t.additionalTextEdits)||Pc.typedArray(t.additionalTextEdits,yl.is))},(ol=rl||(rl={})).Comment="comment",ol.Imports="imports",ol.Region="region",(al=sl||(sl={})).create=function(e,t,n,i,r){var o={startLine:e,endLine:t};return Pc.defined(n)&&(o.startCharacter=n),Pc.defined(i)&&(o.endCharacter=i),Pc.defined(r)&&(o.kind=r),o},al.is=function(e){var t=e;return Pc.uinteger(t.startLine)&&Pc.uinteger(t.startLine)&&(Pc.undefined(t.startCharacter)||Pc.uinteger(t.startCharacter))&&(Pc.undefined(t.endCharacter)||Pc.uinteger(t.endCharacter))&&(Pc.undefined(t.kind)||Pc.string(t.kind))},(cl=ll||(ll={})).create=function(e,t){return{location:e,message:t}},cl.is=function(e){var t=e;return Pc.defined(t)&&Ga.is(t.location)&&Pc.string(t.message)},(dl=hl||(hl={})).Error=1,dl.Warning=2,dl.Information=3,dl.Hint=4,(ul=pl||(pl={})).Unnecessary=1,ul.Deprecated=2,(ml||(ml={})).is=function(e){var t=e;return void 0!==t&&null!==t&&Pc.string(t.href)},(gl=fl||(fl={})).create=function(e,t,n,i,r,o){var s={range:e,message:t};return Pc.defined(n)&&(s.severity=n),Pc.defined(i)&&(s.code=i),Pc.defined(r)&&(s.source=r),Pc.defined(o)&&(s.relatedInformation=o),s},gl.is=function(e){var t,n=e;return Pc.defined(n)&&$a.is(n.range)&&Pc.string(n.message)&&(Pc.number(n.severity)||Pc.undefined(n.severity))&&(Pc.integer(n.code)||Pc.string(n.code)||Pc.undefined(n.code))&&(Pc.undefined(n.codeDescription)||Pc.string(null===(t=n.codeDescription)||void 0===t?void 0:t.href))&&(Pc.string(n.source)||Pc.undefined(n.source))&&(Pc.undefined(n.relatedInformation)||Pc.typedArray(n.relatedInformation,ll.is))},(vl=bl||(bl={})).create=function(e,t){for(var n=[],i=2;i0&&(r.arguments=n),r},vl.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.title)&&Pc.string(t.command)},(wl=yl||(yl={})).replace=function(e,t){return{range:e,newText:t}},wl.insert=function(e,t){return{range:{start:e,end:e},newText:t}},wl.del=function(e){return{range:e,newText:""}},wl.is=function(e){var t=e;return Pc.objectLiteral(t)&&Pc.string(t.newText)&&$a.is(t.range)},(Sl=xl||(xl={})).create=function(e,t,n){var i={label:e};return void 0!==t&&(i.needsConfirmation=t),void 0!==n&&(i.description=n),i},Sl.is=function(e){var t=e;return void 0!==t&&Pc.objectLiteral(t)&&Pc.string(t.label)&&(Pc.boolean(t.needsConfirmation)||void 0===t.needsConfirmation)&&(Pc.string(t.description)||void 0===t.description)},(Cl||(Cl={})).is=function(e){return"string"===typeof e},(kl=_l||(_l={})).replace=function(e,t,n){return{range:e,newText:t,annotationId:n}},kl.insert=function(e,t,n){return{range:{start:e,end:e},newText:t,annotationId:n}},kl.del=function(e,t){return{range:e,newText:"",annotationId:t}},kl.is=function(e){var t=e;return yl.is(t)&&(xl.is(t.annotationId)||Cl.is(t.annotationId))},(Rl=El||(El={})).create=function(e,t){return{textDocument:e,edits:t}},Rl.is=function(e){var t=e;return Pc.defined(t)&&Kl.is(t.textDocument)&&Array.isArray(t.edits)},(Fl=Nl||(Nl={})).create=function(e,t,n){var i={kind:"create",uri:e};return void 0===t||void 0===t.overwrite&&void 0===t.ignoreIfExists||(i.options=t),void 0!==n&&(i.annotationId=n),i},Fl.is=function(e){var t=e;return t&&"create"===t.kind&&Pc.string(t.uri)&&(void 0===t.options||(void 0===t.options.overwrite||Pc.boolean(t.options.overwrite))&&(void 0===t.options.ignoreIfExists||Pc.boolean(t.options.ignoreIfExists)))&&(void 0===t.annotationId||Cl.is(t.annotationId))},(Tl=Dl||(Dl={})).create=function(e,t,n,i){var r={kind:"rename",oldUri:e,newUri:t};return void 0===n||void 0===n.overwrite&&void 0===n.ignoreIfExists||(r.options=n),void 0!==i&&(r.annotationId=i),r},Tl.is=function(e){var t=e;return t&&"rename"===t.kind&&Pc.string(t.oldUri)&&Pc.string(t.newUri)&&(void 0===t.options||(void 0===t.options.overwrite||Pc.boolean(t.options.overwrite))&&(void 0===t.options.ignoreIfExists||Pc.boolean(t.options.ignoreIfExists)))&&(void 0===t.annotationId||Cl.is(t.annotationId))},(Ml=Al||(Al={})).create=function(e,t,n){var i={kind:"delete",uri:e};return void 0===t||void 0===t.recursive&&void 0===t.ignoreIfNotExists||(i.options=t),void 0!==n&&(i.annotationId=n),i},Ml.is=function(e){var t=e;return t&&"delete"===t.kind&&Pc.string(t.uri)&&(void 0===t.options||(void 0===t.options.recursive||Pc.boolean(t.options.recursive))&&(void 0===t.options.ignoreIfNotExists||Pc.boolean(t.options.ignoreIfNotExists)))&&(void 0===t.annotationId||Cl.is(t.annotationId))},(zl||(zl={})).is=function(e){var t=e;return t&&(void 0!==t.changes||void 0!==t.documentChanges)&&(void 0===t.documentChanges||t.documentChanges.every((function(e){return Pc.string(e.kind)?Nl.is(e)||Dl.is(e)||Al.is(e):El.is(e)})))};var Ol,Wl,Vl,Ul,Kl,ql,Bl,jl,$l,Hl,Gl,Jl,Xl,Yl,Ql,Zl,ec,tc,nc,ic,rc,oc,sc,ac,lc,cc,hc,dc,pc,uc,mc,fc,gc,bc,vc,yc,wc,xc,Sc,Cc,_c,kc,Ec,Rc,Nc,Fc,Dc,Tc,Ac,Mc,zc,Ic=function(){function e(e,t){this.edits=e,this.changeAnnotations=t}return e.prototype.insert=function(e,t,n){var i,r;if(void 0===n?i=yl.insert(e,t):Cl.is(n)?(r=n,i=_l.insert(e,t,n)):(this.assertChangeAnnotations(this.changeAnnotations),r=this.changeAnnotations.manage(n),i=_l.insert(e,t,r)),this.edits.push(i),void 0!==r)return r},e.prototype.replace=function(e,t,n){var i,r;if(void 0===n?i=yl.replace(e,t):Cl.is(n)?(r=n,i=_l.replace(e,t,n)):(this.assertChangeAnnotations(this.changeAnnotations),r=this.changeAnnotations.manage(n),i=_l.replace(e,t,r)),this.edits.push(i),void 0!==r)return r},e.prototype.delete=function(e,t){var n,i;if(void 0===t?n=yl.del(e):Cl.is(t)?(i=t,n=_l.del(e,t)):(this.assertChangeAnnotations(this.changeAnnotations),i=this.changeAnnotations.manage(t),n=_l.del(e,i)),this.edits.push(n),void 0!==i)return i},e.prototype.add=function(e){this.edits.push(e)},e.prototype.all=function(){return this.edits},e.prototype.clear=function(){this.edits.splice(0,this.edits.length)},e.prototype.assertChangeAnnotations=function(e){if(void 0===e)throw new Error("Text edit change is not configured to manage change annotations.")},e}(),Lc=function(){function e(e){this._annotations=void 0===e?Object.create(null):e,this._counter=0,this._size=0}return e.prototype.all=function(){return this._annotations},Object.defineProperty(e.prototype,"size",{get:function(){return this._size},enumerable:!1,configurable:!0}),e.prototype.manage=function(e,t){var n;if(Cl.is(e)?n=e:(n=this.nextId(),t=e),void 0!==this._annotations[n])throw new Error("Id "+n+" is already in use.");if(void 0===t)throw new Error("No annotation provided for id "+n);return this._annotations[n]=t,this._size++,n},e.prototype.nextId=function(){return this._counter++,this._counter.toString()},e}();!function(){function e(e){var t=this;this._textEditChanges=Object.create(null),void 0!==e?(this._workspaceEdit=e,e.documentChanges?(this._changeAnnotations=new Lc(e.changeAnnotations),e.changeAnnotations=this._changeAnnotations.all(),e.documentChanges.forEach((function(e){if(El.is(e)){var n=new Ic(e.edits,t._changeAnnotations);t._textEditChanges[e.textDocument.uri]=n}}))):e.changes&&Object.keys(e.changes).forEach((function(n){var i=new Ic(e.changes[n]);t._textEditChanges[n]=i}))):this._workspaceEdit={}}Object.defineProperty(e.prototype,"edit",{get:function(){return this.initDocumentChanges(),void 0!==this._changeAnnotations&&(0===this._changeAnnotations.size?this._workspaceEdit.changeAnnotations=void 0:this._workspaceEdit.changeAnnotations=this._changeAnnotations.all()),this._workspaceEdit},enumerable:!1,configurable:!0}),e.prototype.getTextEditChange=function(e){if(Kl.is(e)){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var t={uri:e.uri,version:e.version};if(!(i=this._textEditChanges[t.uri])){var n={textDocument:t,edits:r=[]};this._workspaceEdit.documentChanges.push(n),i=new Ic(r,this._changeAnnotations),this._textEditChanges[t.uri]=i}return i}if(this.initChanges(),void 0===this._workspaceEdit.changes)throw new Error("Workspace edit is not configured for normal text edit changes.");var i;if(!(i=this._textEditChanges[e])){var r=[];this._workspaceEdit.changes[e]=r,i=new Ic(r),this._textEditChanges[e]=i}return i},e.prototype.initDocumentChanges=function(){void 0===this._workspaceEdit.documentChanges&&void 0===this._workspaceEdit.changes&&(this._changeAnnotations=new Lc,this._workspaceEdit.documentChanges=[],this._workspaceEdit.changeAnnotations=this._changeAnnotations.all())},e.prototype.initChanges=function(){void 0===this._workspaceEdit.documentChanges&&void 0===this._workspaceEdit.changes&&(this._workspaceEdit.changes=Object.create(null))},e.prototype.createFile=function(e,t,n){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var i,r,o;if(xl.is(t)||Cl.is(t)?i=t:n=t,void 0===i?r=Nl.create(e,n):(o=Cl.is(i)?i:this._changeAnnotations.manage(i),r=Nl.create(e,n,o)),this._workspaceEdit.documentChanges.push(r),void 0!==o)return o},e.prototype.renameFile=function(e,t,n,i){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var r,o,s;if(xl.is(n)||Cl.is(n)?r=n:i=n,void 0===r?o=Dl.create(e,t,i):(s=Cl.is(r)?r:this._changeAnnotations.manage(r),o=Dl.create(e,t,i,s)),this._workspaceEdit.documentChanges.push(o),void 0!==s)return s},e.prototype.deleteFile=function(e,t,n){if(this.initDocumentChanges(),void 0===this._workspaceEdit.documentChanges)throw new Error("Workspace edit is not configured for document changes.");var i,r,o;if(xl.is(t)||Cl.is(t)?i=t:n=t,void 0===i?r=Al.create(e,n):(o=Cl.is(i)?i:this._changeAnnotations.manage(i),r=Al.create(e,n,o)),this._workspaceEdit.documentChanges.push(r),void 0!==o)return o}}();(Wl=Ol||(Ol={})).create=function(e){return{uri:e}},Wl.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)},(Ul=Vl||(Vl={})).create=function(e,t){return{uri:e,version:t}},Ul.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)&&Pc.integer(t.version)},(ql=Kl||(Kl={})).create=function(e,t){return{uri:e,version:t}},ql.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)&&(null===t.version||Pc.integer(t.version))},(jl=Bl||(Bl={})).create=function(e,t,n,i){return{uri:e,languageId:t,version:n,text:i}},jl.is=function(e){var t=e;return Pc.defined(t)&&Pc.string(t.uri)&&Pc.string(t.languageId)&&Pc.integer(t.version)&&Pc.string(t.text)},(Hl=$l||($l={})).PlainText="plaintext",Hl.Markdown="markdown",function(e){e.is=function(t){var n=t;return n===e.PlainText||n===e.Markdown}}($l||($l={})),(Gl||(Gl={})).is=function(e){var t=e;return Pc.objectLiteral(e)&&$l.is(t.kind)&&Pc.string(t.value)},(Xl=Jl||(Jl={})).Text=1,Xl.Method=2,Xl.Function=3,Xl.Constructor=4,Xl.Field=5,Xl.Variable=6,Xl.Class=7,Xl.Interface=8,Xl.Module=9,Xl.Property=10,Xl.Unit=11,Xl.Value=12,Xl.Enum=13,Xl.Keyword=14,Xl.Snippet=15,Xl.Color=16,Xl.File=17,Xl.Reference=18,Xl.Folder=19,Xl.EnumMember=20,Xl.Constant=21,Xl.Struct=22,Xl.Event=23,Xl.Operator=24,Xl.TypeParameter=25,(Ql=Yl||(Yl={})).PlainText=1,Ql.Snippet=2,(Zl||(Zl={})).Deprecated=1,(tc=ec||(ec={})).create=function(e,t,n){return{newText:e,insert:t,replace:n}},tc.is=function(e){var t=e;return t&&Pc.string(t.newText)&&$a.is(t.insert)&&$a.is(t.replace)},(ic=nc||(nc={})).asIs=1,ic.adjustIndentation=2,(rc||(rc={})).create=function(e){return{label:e}},(oc||(oc={})).create=function(e,t){return{items:e||[],isIncomplete:!!t}},(ac=sc||(sc={})).fromPlainText=function(e){return e.replace(/[\\`*_{}[\]()#+\-.!]/g,"\\$&")},ac.is=function(e){var t=e;return Pc.string(t)||Pc.objectLiteral(t)&&Pc.string(t.language)&&Pc.string(t.value)},(lc||(lc={})).is=function(e){var t=e;return!!t&&Pc.objectLiteral(t)&&(Gl.is(t.contents)||sc.is(t.contents)||Pc.typedArray(t.contents,sc.is))&&(void 0===e.range||$a.is(e.range))},(cc||(cc={})).create=function(e,t){return t?{label:e,documentation:t}:{label:e}},(hc||(hc={})).create=function(e,t){for(var n=[],i=2;i=0;s--){var a=r[s],l=e.offsetAt(a.range.start),c=e.offsetAt(a.range.end);if(!(c<=o))throw new Error("Overlapping edit");i=i.substring(0,l)+a.newText+i.substring(c,i.length),o=l}return i}}(zc||(zc={}));var Pc,Oc=function(){function e(e,t,n,i){this._uri=e,this._languageId=t,this._version=n,this._content=i,this._lineOffsets=void 0}return Object.defineProperty(e.prototype,"uri",{get:function(){return this._uri},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"languageId",{get:function(){return this._languageId},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"version",{get:function(){return this._version},enumerable:!1,configurable:!0}),e.prototype.getText=function(e){if(e){var t=this.offsetAt(e.start),n=this.offsetAt(e.end);return this._content.substring(t,n)}return this._content},e.prototype.update=function(e,t){this._content=e.text,this._version=t,this._lineOffsets=void 0},e.prototype.getLineOffsets=function(){if(void 0===this._lineOffsets){for(var e=[],t=this._content,n=!0,i=0;i0&&e.push(t.length),this._lineOffsets=e}return this._lineOffsets},e.prototype.positionAt=function(e){e=Math.max(Math.min(e,this._content.length),0);var t=this.getLineOffsets(),n=0,i=t.length;if(0===i)return Ba.create(0,e);for(;ne?i=r:n=r+1}var o=n-1;return Ba.create(o,e-t[o])},e.prototype.offsetAt=function(e){var t=this.getLineOffsets();if(e.line>=t.length)return this._content.length;if(e.line<0)return 0;var n=t[e.line],i=e.line+1e?i=r:n=r+1}let r=n-1;return{line:r,character:e-t[r]}}offsetAt(e){let t=this.getLineOffsets();if(e.line>=t.length)return this._content.length;if(e.line<0)return 0;let n=t[e.line],i=e.line+12&&void 0!==arguments[2]?arguments[2]:0;const i=t?[n]:[];for(let r=0;rn.line||t.line===n.line&&t.character>n.character?{start:n,end:t}:e}function Gc(e){const t=Hc(e.range);return t!==e.range?{newText:e.newText,range:t}:e}(Vc=Wc||(Wc={})).create=function(e,t,n,i){return new Bc(e,t,n,i)},Vc.update=function(e,t,n){if(e instanceof Bc)return e.update(t,n),e;throw new Error("TextDocument.update: document must be created by TextDocument.create")},Vc.applyEdits=function(e,t){let n=e.getText(),i=jc(t.map(Gc),((e,t)=>{let n=e.range.start.line-t.range.start.line;return 0===n?e.range.start.character-t.range.start.character:n})),r=0;const o=[];for(const s of i){let t=e.offsetAt(s.range.start);if(tr&&o.push(n.substring(r,t)),s.newText.length&&o.push(s.newText),r=e.offsetAt(s.range.end)}return o.push(n.substr(r)),o.join("")},(Uc||(Uc={})).LATEST={textDocument:{completion:{completionItem:{documentationFormat:[$l.Markdown,$l.PlainText]}},hover:{contentFormat:[$l.Markdown,$l.PlainText]}}},(qc=Kc||(Kc={}))[qc.Unknown=0]="Unknown",qc[qc.File=1]="File",qc[qc.Directory=2]="Directory",qc[qc.SymbolicLink=64]="SymbolicLink";var Jc={E:"Edge",FF:"Firefox",S:"Safari",C:"Chrome",IE:"IE",O:"Opera"};function Xc(e){switch(e){case"experimental":return"\u26a0\ufe0f Property is experimental. Be cautious when using it.\ufe0f\n\n";case"nonstandard":return"\ud83d\udea8\ufe0f Property is nonstandard. Avoid using it.\n\n";case"obsolete":return"\ud83d\udea8\ufe0f\ufe0f\ufe0f Property is obsolete. Avoid using it.\n\n";default:return""}}function Yc(e,t,n){var i;if(""!==(i=t?{kind:"markdown",value:eh(e,n)}:{kind:"plaintext",value:Zc(e,n)}).value)return i}function Qc(e){return(e=e.replace(/[\\`*_{}[\]()#+\-.!]/g,"\\$&")).replace(//g,">")}function Zc(e,t){if(!e.description||""===e.description)return"";if("string"!==typeof e.description)return e.description.value;var n="";if(!1!==(null===t||void 0===t?void 0:t.documentation)){e.status&&(n+=Xc(e.status)),n+=e.description;var i=th(e.browsers);i&&(n+="\n("+i+")"),"syntax"in e&&(n+="\n\nSyntax: ".concat(e.syntax))}return e.references&&e.references.length>0&&!1!==(null===t||void 0===t?void 0:t.references)&&(n.length>0&&(n+="\n\n"),n+=e.references.map((function(e){return"".concat(e.name,": ").concat(e.url)})).join(" | ")),n}function eh(e,t){if(!e.description||""===e.description)return"";var n="";if(!1!==(null===t||void 0===t?void 0:t.documentation)){e.status&&(n+=Xc(e.status)),"string"===typeof e.description?n+=Qc(e.description):n+=e.description.kind===$l.Markdown?e.description.value:Qc(e.description.value);var i=th(e.browsers);i&&(n+="\n\n("+Qc(i)+")"),"syntax"in e&&e.syntax&&(n+="\n\nSyntax: ".concat(Qc(e.syntax)))}return e.references&&e.references.length>0&&!1!==(null===t||void 0===t?void 0:t.references)&&(n.length>0&&(n+="\n\n"),n+=e.references.map((function(e){return"[".concat(e.name,"](").concat(e.url,")")})).join(" | ")),n}function th(e){return void 0===e&&(e=[]),0===e.length?null:e.map((function(e){var t="",n=e.match(/([A-Z]+)(\d+)?/),i=n[1],r=n[2];return i in Jc&&(t+=Jc[i]),r&&(t+=" "+r),t})).join(", ")}var nh=Wa(),ih=[{func:"rgb($red, $green, $blue)",desc:nh("css.builtin.rgb","Creates a Color from red, green, and blue values.")},{func:"rgba($red, $green, $blue, $alpha)",desc:nh("css.builtin.rgba","Creates a Color from red, green, blue, and alpha values.")},{func:"hsl($hue, $saturation, $lightness)",desc:nh("css.builtin.hsl","Creates a Color from hue, saturation, and lightness values.")},{func:"hsla($hue, $saturation, $lightness, $alpha)",desc:nh("css.builtin.hsla","Creates a Color from hue, saturation, lightness, and alpha values.")},{func:"hwb($hue $white $black)",desc:nh("css.builtin.hwb","Creates a Color from hue, white and black.")}],rh={aliceblue:"#f0f8ff",antiquewhite:"#faebd7",aqua:"#00ffff",aquamarine:"#7fffd4",azure:"#f0ffff",beige:"#f5f5dc",bisque:"#ffe4c4",black:"#000000",blanchedalmond:"#ffebcd",blue:"#0000ff",blueviolet:"#8a2be2",brown:"#a52a2a",burlywood:"#deb887",cadetblue:"#5f9ea0",chartreuse:"#7fff00",chocolate:"#d2691e",coral:"#ff7f50",cornflowerblue:"#6495ed",cornsilk:"#fff8dc",crimson:"#dc143c",cyan:"#00ffff",darkblue:"#00008b",darkcyan:"#008b8b",darkgoldenrod:"#b8860b",darkgray:"#a9a9a9",darkgrey:"#a9a9a9",darkgreen:"#006400",darkkhaki:"#bdb76b",darkmagenta:"#8b008b",darkolivegreen:"#556b2f",darkorange:"#ff8c00",darkorchid:"#9932cc",darkred:"#8b0000",darksalmon:"#e9967a",darkseagreen:"#8fbc8f",darkslateblue:"#483d8b",darkslategray:"#2f4f4f",darkslategrey:"#2f4f4f",darkturquoise:"#00ced1",darkviolet:"#9400d3",deeppink:"#ff1493",deepskyblue:"#00bfff",dimgray:"#696969",dimgrey:"#696969",dodgerblue:"#1e90ff",firebrick:"#b22222",floralwhite:"#fffaf0",forestgreen:"#228b22",fuchsia:"#ff00ff",gainsboro:"#dcdcdc",ghostwhite:"#f8f8ff",gold:"#ffd700",goldenrod:"#daa520",gray:"#808080",grey:"#808080",green:"#008000",greenyellow:"#adff2f",honeydew:"#f0fff0",hotpink:"#ff69b4",indianred:"#cd5c5c",indigo:"#4b0082",ivory:"#fffff0",khaki:"#f0e68c",lavender:"#e6e6fa",lavenderblush:"#fff0f5",lawngreen:"#7cfc00",lemonchiffon:"#fffacd",lightblue:"#add8e6",lightcoral:"#f08080",lightcyan:"#e0ffff",lightgoldenrodyellow:"#fafad2",lightgray:"#d3d3d3",lightgrey:"#d3d3d3",lightgreen:"#90ee90",lightpink:"#ffb6c1",lightsalmon:"#ffa07a",lightseagreen:"#20b2aa",lightskyblue:"#87cefa",lightslategray:"#778899",lightslategrey:"#778899",lightsteelblue:"#b0c4de",lightyellow:"#ffffe0",lime:"#00ff00",limegreen:"#32cd32",linen:"#faf0e6",magenta:"#ff00ff",maroon:"#800000",mediumaquamarine:"#66cdaa",mediumblue:"#0000cd",mediumorchid:"#ba55d3",mediumpurple:"#9370d8",mediumseagreen:"#3cb371",mediumslateblue:"#7b68ee",mediumspringgreen:"#00fa9a",mediumturquoise:"#48d1cc",mediumvioletred:"#c71585",midnightblue:"#191970",mintcream:"#f5fffa",mistyrose:"#ffe4e1",moccasin:"#ffe4b5",navajowhite:"#ffdead",navy:"#000080",oldlace:"#fdf5e6",olive:"#808000",olivedrab:"#6b8e23",orange:"#ffa500",orangered:"#ff4500",orchid:"#da70d6",palegoldenrod:"#eee8aa",palegreen:"#98fb98",paleturquoise:"#afeeee",palevioletred:"#d87093",papayawhip:"#ffefd5",peachpuff:"#ffdab9",peru:"#cd853f",pink:"#ffc0cb",plum:"#dda0dd",powderblue:"#b0e0e6",purple:"#800080",red:"#ff0000",rebeccapurple:"#663399",rosybrown:"#bc8f8f",royalblue:"#4169e1",saddlebrown:"#8b4513",salmon:"#fa8072",sandybrown:"#f4a460",seagreen:"#2e8b57",seashell:"#fff5ee",sienna:"#a0522d",silver:"#c0c0c0",skyblue:"#87ceeb",slateblue:"#6a5acd",slategray:"#708090",slategrey:"#708090",snow:"#fffafa",springgreen:"#00ff7f",steelblue:"#4682b4",tan:"#d2b48c",teal:"#008080",thistle:"#d8bfd8",tomato:"#ff6347",turquoise:"#40e0d0",violet:"#ee82ee",wheat:"#f5deb3",white:"#ffffff",whitesmoke:"#f5f5f5",yellow:"#ffff00",yellowgreen:"#9acd32"},oh={currentColor:"The value of the 'color' property. The computed value of the 'currentColor' keyword is the computed value of the 'color' property. If the 'currentColor' keyword is set on the 'color' property itself, it is treated as 'color:inherit' at parse time.",transparent:"Fully transparent. This keyword can be considered a shorthand for rgba(0,0,0,0) which is its computed value."};function sh(e,t){var n=e.getText().match(/^([-+]?[0-9]*\.?[0-9]+)(%?)$/);if(n){n[2]&&(t=100);var i=parseFloat(n[1])/t;if(i>=0&&i<=1)return i}throw new Error}function ah(e){var t=e.getText(),n=t.match(/^([-+]?[0-9]*\.?[0-9]+)(deg|rad|grad|turn)?$/);if(n)switch(n[2]){case"deg":return parseFloat(t)%360;case"rad":return 180*parseFloat(t)/Math.PI%360;case"grad":return.9*parseFloat(t)%360;case"turn":return 360*parseFloat(t)%360;default:if("undefined"===typeof n[2])return parseFloat(t)%360}throw new Error}var lh=48,ch=57,hh=65,dh=97,ph=102;function uh(e){return e=dh&&e<=ph?e-dh+10:0)}function mh(e){if("#"!==e[0])return null;switch(e.length){case 4:return{red:17*uh(e.charCodeAt(1))/255,green:17*uh(e.charCodeAt(2))/255,blue:17*uh(e.charCodeAt(3))/255,alpha:1};case 5:return{red:17*uh(e.charCodeAt(1))/255,green:17*uh(e.charCodeAt(2))/255,blue:17*uh(e.charCodeAt(3))/255,alpha:17*uh(e.charCodeAt(4))/255};case 7:return{red:(16*uh(e.charCodeAt(1))+uh(e.charCodeAt(2)))/255,green:(16*uh(e.charCodeAt(3))+uh(e.charCodeAt(4)))/255,blue:(16*uh(e.charCodeAt(5))+uh(e.charCodeAt(6)))/255,alpha:1};case 9:return{red:(16*uh(e.charCodeAt(1))+uh(e.charCodeAt(2)))/255,green:(16*uh(e.charCodeAt(3))+uh(e.charCodeAt(4)))/255,blue:(16*uh(e.charCodeAt(5))+uh(e.charCodeAt(6)))/255,alpha:(16*uh(e.charCodeAt(7))+uh(e.charCodeAt(8)))/255}}return null}function fh(e,t,n,i){if(void 0===i&&(i=1),0===t)return{red:n,green:n,blue:n,alpha:i};var r=function(e,t,n){for(;n<0;)n+=6;for(;n>=6;)n-=6;return n<1?(t-e)*n+e:n<3?t:n<4?(t-e)*(4-n)+e:e},o=n<=.5?n*(t+1):n+t-n*t,s=2*n-o;return{red:r(s,o,(e/=60)+2),green:r(s,o,e),blue:r(s,o,e-2),alpha:i}}function gh(e){var t=e.red,n=e.green,i=e.blue,r=e.alpha,o=Math.max(t,n,i),s=Math.min(t,n,i),a=0,l=0,c=(s+o)/2,h=o-s;if(h>0){switch(l=Math.min(c<=.5?h/(2*c):h/(2-2*c),1),o){case t:a=(n-i)/h+(n4)return null;try{var c=4===i.length?sh(i[3],1):1;if("rgb"===n||"rgba"===n)return{red:sh(i[0],255),green:sh(i[1],255),blue:sh(i[2],255),alpha:c};if("hsl"===n||"hsla"===n)return fh(ah(i[0]),sh(i[1],100),sh(i[2],100),c);if("hwb"===n)return function(e,t,n,i){if(void 0===i&&(i=1),t+n>=1){var r=t/(t+n);return{red:r,green:r,blue:r,alpha:i}}var o=fh(e,1,.5,i),s=o.red;s*=1-t-n,s+=t;var a=o.green;a*=1-t-n,a+=t;var l=o.blue;return l*=1-t-n,{red:s,green:a,blue:l+=t,alpha:i}}(ah(i[0]),sh(i[1],100),sh(i[2],100),c)}catch(Du){return null}}else if(e.type===us.Identifier){if(e.parent&&e.parent.type!==us.Term)return null;var h=e.parent;if(h&&h.parent&&h.parent.type===us.BinaryExpression){var d=h.parent;if(d.parent&&d.parent.type===us.ListEntry&&d.parent.key===d)return null}var p=e.getText().toLowerCase();if("none"===p)return null;var u=rh[p];if(u)return mh(u)}return null}var vh={bottom:"Computes to \u2018100%\u2019 for the vertical position if one or two values are given, otherwise specifies the bottom edge as the origin for the next offset.",center:"Computes to \u201850%\u2019 (\u2018left 50%\u2019) for the horizontal position if the horizontal position is not otherwise specified, or \u201850%\u2019 (\u2018top 50%\u2019) for the vertical position if it is.",left:"Computes to \u20180%\u2019 for the horizontal position if one or two values are given, otherwise specifies the left edge as the origin for the next offset.",right:"Computes to \u2018100%\u2019 for the horizontal position if one or two values are given, otherwise specifies the right edge as the origin for the next offset.",top:"Computes to \u20180%\u2019 for the vertical position if one or two values are given, otherwise specifies the top edge as the origin for the next offset."},yh={"no-repeat":"Placed once and not repeated in this direction.",repeat:"Repeated in this direction as often as needed to cover the background painting area.","repeat-x":"Computes to \u2018repeat no-repeat\u2019.","repeat-y":"Computes to \u2018no-repeat repeat\u2019.",round:"Repeated as often as will fit within the background positioning area. If it doesn\u2019t fit a whole number of times, it is rescaled so that it does.",space:"Repeated as often as will fit within the background positioning area without being clipped and then the images are spaced out to fill the area."},wh={dashed:"A series of square-ended dashes.",dotted:"A series of round dots.",double:"Two parallel solid lines with some space between them.",groove:"Looks as if it were carved in the canvas.",hidden:"Same as \u2018none\u2019, but has different behavior in the border conflict resolution rules for border-collapsed tables.",inset:"Looks as if the content on the inside of the border is sunken into the canvas.",none:"No border. Color and width are ignored.",outset:"Looks as if the content on the inside of the border is coming out of the canvas.",ridge:"Looks as if it were coming out of the canvas.",solid:"A single line segment."},xh=["medium","thick","thin"],Sh={"border-box":"The background is painted within (clipped to) the border box.","content-box":"The background is painted within (clipped to) the content box.","padding-box":"The background is painted within (clipped to) the padding box."},Ch={"margin-box":"Uses the margin box as reference box.","fill-box":"Uses the object bounding box as reference box.","stroke-box":"Uses the stroke bounding box as reference box.","view-box":"Uses the nearest SVG viewport as reference box."},_h={initial:"Represents the value specified as the property\u2019s initial value.",inherit:"Represents the computed value of the property on the element\u2019s parent.",unset:"Acts as either `inherit` or `initial`, depending on whether the property is inherited or not."},kh={"var()":"Evaluates the value of a custom variable.","calc()":"Evaluates an mathematical expression. The following operators can be used: + - * /."},Eh={"url()":"Reference an image file by URL","image()":"Provide image fallbacks and annotations.","-webkit-image-set()":"Provide multiple resolutions. Remember to use unprefixed image-set() in addition.","image-set()":"Provide multiple resolutions of an image and const the UA decide which is most appropriate in a given situation.","-moz-element()":"Use an element in the document as an image. Remember to use unprefixed element() in addition.","element()":"Use an element in the document as an image.","cross-fade()":"Indicates the two images to be combined and how far along in the transition the combination is.","-webkit-gradient()":"Deprecated. Use modern linear-gradient() or radial-gradient() instead.","-webkit-linear-gradient()":"Linear gradient. Remember to use unprefixed version in addition.","-moz-linear-gradient()":"Linear gradient. Remember to use unprefixed version in addition.","-o-linear-gradient()":"Linear gradient. Remember to use unprefixed version in addition.","linear-gradient()":"A linear gradient is created by specifying a straight gradient line, and then several colors placed along that line.","-webkit-repeating-linear-gradient()":"Repeating Linear gradient. Remember to use unprefixed version in addition.","-moz-repeating-linear-gradient()":"Repeating Linear gradient. Remember to use unprefixed version in addition.","-o-repeating-linear-gradient()":"Repeating Linear gradient. Remember to use unprefixed version in addition.","repeating-linear-gradient()":"Same as linear-gradient, except the color-stops are repeated infinitely in both directions, with their positions shifted by multiples of the difference between the last specified color-stop\u2019s position and the first specified color-stop\u2019s position.","-webkit-radial-gradient()":"Radial gradient. Remember to use unprefixed version in addition.","-moz-radial-gradient()":"Radial gradient. Remember to use unprefixed version in addition.","radial-gradient()":"Colors emerge from a single point and smoothly spread outward in a circular or elliptical shape.","-webkit-repeating-radial-gradient()":"Repeating radial gradient. Remember to use unprefixed version in addition.","-moz-repeating-radial-gradient()":"Repeating radial gradient. Remember to use unprefixed version in addition.","repeating-radial-gradient()":"Same as radial-gradient, except the color-stops are repeated infinitely in both directions, with their positions shifted by multiples of the difference between the last specified color-stop\u2019s position and the first specified color-stop\u2019s position."},Rh={ease:"Equivalent to cubic-bezier(0.25, 0.1, 0.25, 1.0).","ease-in":"Equivalent to cubic-bezier(0.42, 0, 1.0, 1.0).","ease-in-out":"Equivalent to cubic-bezier(0.42, 0, 0.58, 1.0).","ease-out":"Equivalent to cubic-bezier(0, 0, 0.58, 1.0).",linear:"Equivalent to cubic-bezier(0.0, 0.0, 1.0, 1.0).","step-end":"Equivalent to steps(1, end).","step-start":"Equivalent to steps(1, start).","steps()":"The first parameter specifies the number of intervals in the function. The second parameter, which is optional, is either the value \u201cstart\u201d or \u201cend\u201d.","cubic-bezier()":"Specifies a cubic-bezier curve. The four values specify points P1 and P2 of the curve as (x1, y1, x2, y2).","cubic-bezier(0.6, -0.28, 0.735, 0.045)":"Ease-in Back. Overshoots.","cubic-bezier(0.68, -0.55, 0.265, 1.55)":"Ease-in-out Back. Overshoots.","cubic-bezier(0.175, 0.885, 0.32, 1.275)":"Ease-out Back. Overshoots.","cubic-bezier(0.6, 0.04, 0.98, 0.335)":"Ease-in Circular. Based on half circle.","cubic-bezier(0.785, 0.135, 0.15, 0.86)":"Ease-in-out Circular. Based on half circle.","cubic-bezier(0.075, 0.82, 0.165, 1)":"Ease-out Circular. Based on half circle.","cubic-bezier(0.55, 0.055, 0.675, 0.19)":"Ease-in Cubic. Based on power of three.","cubic-bezier(0.645, 0.045, 0.355, 1)":"Ease-in-out Cubic. Based on power of three.","cubic-bezier(0.215, 0.610, 0.355, 1)":"Ease-out Cubic. Based on power of three.","cubic-bezier(0.95, 0.05, 0.795, 0.035)":"Ease-in Exponential. Based on two to the power ten.","cubic-bezier(1, 0, 0, 1)":"Ease-in-out Exponential. Based on two to the power ten.","cubic-bezier(0.19, 1, 0.22, 1)":"Ease-out Exponential. Based on two to the power ten.","cubic-bezier(0.47, 0, 0.745, 0.715)":"Ease-in Sine.","cubic-bezier(0.445, 0.05, 0.55, 0.95)":"Ease-in-out Sine.","cubic-bezier(0.39, 0.575, 0.565, 1)":"Ease-out Sine.","cubic-bezier(0.55, 0.085, 0.68, 0.53)":"Ease-in Quadratic. Based on power of two.","cubic-bezier(0.455, 0.03, 0.515, 0.955)":"Ease-in-out Quadratic. Based on power of two.","cubic-bezier(0.25, 0.46, 0.45, 0.94)":"Ease-out Quadratic. Based on power of two.","cubic-bezier(0.895, 0.03, 0.685, 0.22)":"Ease-in Quartic. Based on power of four.","cubic-bezier(0.77, 0, 0.175, 1)":"Ease-in-out Quartic. Based on power of four.","cubic-bezier(0.165, 0.84, 0.44, 1)":"Ease-out Quartic. Based on power of four.","cubic-bezier(0.755, 0.05, 0.855, 0.06)":"Ease-in Quintic. Based on power of five.","cubic-bezier(0.86, 0, 0.07, 1)":"Ease-in-out Quintic. Based on power of five.","cubic-bezier(0.23, 1, 0.320, 1)":"Ease-out Quintic. Based on power of five."},Nh={"circle()":"Defines a circle.","ellipse()":"Defines an ellipse.","inset()":"Defines an inset rectangle.","polygon()":"Defines a polygon."},Fh={length:["em","rem","ex","px","cm","mm","in","pt","pc","ch","vw","vh","vmin","vmax"],angle:["deg","rad","grad","turn"],time:["ms","s"],frequency:["Hz","kHz"],resolution:["dpi","dpcm","dppx"],percentage:["%","fr"]},Dh=["a","abbr","address","area","article","aside","audio","b","base","bdi","bdo","blockquote","body","br","button","canvas","caption","cite","code","col","colgroup","data","datalist","dd","del","details","dfn","dialog","div","dl","dt","em","embed","fieldset","figcaption","figure","footer","form","h1","h2","h3","h4","h5","h6","head","header","hgroup","hr","html","i","iframe","img","input","ins","kbd","keygen","label","legend","li","link","main","map","mark","menu","menuitem","meta","meter","nav","noscript","object","ol","optgroup","option","output","p","param","picture","pre","progress","q","rb","rp","rt","rtc","ruby","s","samp","script","section","select","small","source","span","strong","style","sub","summary","sup","table","tbody","td","template","textarea","tfoot","th","thead","time","title","tr","track","u","ul","const","video","wbr"],Th=["circle","clipPath","cursor","defs","desc","ellipse","feBlend","feColorMatrix","feComponentTransfer","feComposite","feConvolveMatrix","feDiffuseLighting","feDisplacementMap","feDistantLight","feDropShadow","feFlood","feFuncA","feFuncB","feFuncG","feFuncR","feGaussianBlur","feImage","feMerge","feMergeNode","feMorphology","feOffset","fePointLight","feSpecularLighting","feSpotLight","feTile","feTurbulence","filter","foreignObject","g","hatch","hatchpath","image","line","linearGradient","marker","mask","mesh","meshpatch","meshrow","metadata","mpath","path","pattern","polygon","polyline","radialGradient","rect","set","solidcolor","stop","svg","switch","symbol","text","textPath","tspan","use","view"],Ah=["@bottom-center","@bottom-left","@bottom-left-corner","@bottom-right","@bottom-right-corner","@left-bottom","@left-middle","@left-top","@right-bottom","@right-middle","@right-top","@top-center","@top-left","@top-left-corner","@top-right","@top-right-corner"];function Mh(e){return Object.keys(e).map((function(t){return e[t]}))}function zh(e){return"undefined"!==typeof e}var Ih=function(e,t,n){if(n||2===arguments.length)for(var i,r=0,o=t.length;re.offset?r-e.offset:0}return e},e.prototype.markError=function(e,t,n,i){this.token!==this.lastErrorToken&&(e.addIssue(new La(e,t,ws.Error,void 0,this.token.offset,this.token.len)),this.lastErrorToken=this.token),(n||i)&&this.resync(n,i)},e.prototype.parseStylesheet=function(e){var t=e.version,n=e.getText();return this.internalParse(n,this._parseStylesheet,(function(i,r){if(e.version!==t)throw new Error("Underlying model has changed, AST is no longer valid");return n.substr(i,r)}))},e.prototype.internalParse=function(e,t,n){this.scanner.setSource(e),this.token=this.scanner.scan();var i=t.bind(this)();return i&&(i.textProvider=n||function(t,n){return e.substr(t,n)}),i},e.prototype._parseStylesheet=function(){for(var e=this.create(Es);e.addChild(this._parseStylesheetStart()););var t=!1;do{var n=!1;do{n=!1;var i=this._parseStylesheetStatement();for(i&&(e.addChild(i),n=!0,t=!1,this.peek(mo.EOF)||!this._needsSemicolonAfter(i)||this.accept(mo.SemiColon)||this.markError(e,Pl.SemiColonExpected));this.accept(mo.SemiColon)||this.accept(mo.CDO)||this.accept(mo.CDC);)n=!0,t=!1}while(n);if(this.peek(mo.EOF))break;t||(this.peek(mo.AtKeyword)?this.markError(e,Pl.UnknownAtRule):this.markError(e,Pl.RuleOrSelectorExpected),t=!0),this.consumeToken()}while(!this.peek(mo.EOF));return this.finish(e)},e.prototype._parseStylesheetStart=function(){return this._parseCharset()},e.prototype._parseStylesheetStatement=function(e){return void 0===e&&(e=!1),this.peek(mo.AtKeyword)?this._parseStylesheetAtStatement(e):this._parseRuleset(e)},e.prototype._parseStylesheetAtStatement=function(e){return void 0===e&&(e=!1),this._parseImport()||this._parseMedia(e)||this._parsePage()||this._parseFontFace()||this._parseKeyframe()||this._parseSupports(e)||this._parseViewPort()||this._parseNamespace()||this._parseDocument()||this._parseUnknownAtRule()},e.prototype._tryParseRuleset=function(e){var t=this.mark();if(this._parseSelector(e)){for(;this.accept(mo.Comma)&&this._parseSelector(e););if(this.accept(mo.CurlyL))return this.restoreAtMark(t),this._parseRuleset(e)}return this.restoreAtMark(t),null},e.prototype._parseRuleset=function(e){void 0===e&&(e=!1);var t=this.create(Fs),n=t.getSelectors();if(!n.addChild(this._parseSelector(e)))return null;for(;this.accept(mo.Comma);)if(!n.addChild(this._parseSelector(e)))return this.finish(t,Pl.SelectorExpected);return this._parseBody(t,this._parseRuleSetDeclaration.bind(this))},e.prototype._parseRuleSetDeclarationAtStatement=function(){return this._parseUnknownAtRule()},e.prototype._parseRuleSetDeclaration=function(){return this.peek(mo.AtKeyword)?this._parseRuleSetDeclarationAtStatement():this._parseDeclaration()},e.prototype._needsSemicolonAfter=function(e){switch(e.type){case us.Keyframe:case us.ViewPort:case us.Media:case us.Ruleset:case us.Namespace:case us.If:case us.For:case us.Each:case us.While:case us.MixinDeclaration:case us.FunctionDeclaration:case us.MixinContentDeclaration:return!1;case us.ExtendsReference:case us.MixinContentReference:case us.ReturnStatement:case us.MediaQuery:case us.Debug:case us.Import:case us.AtApplyRule:case us.CustomPropertyDeclaration:return!0;case us.VariableDeclaration:return e.needsSemicolon;case us.MixinReference:return!e.getContent();case us.Declaration:return!e.getNestedProperties()}return!1},e.prototype._parseDeclarations=function(e){var t=this.create(Rs);if(!this.accept(mo.CurlyL))return null;for(var n=e();t.addChild(n)&&!this.peek(mo.CurlyR);){if(this._needsSemicolonAfter(n)&&!this.accept(mo.SemiColon))return this.finish(t,Pl.SemiColonExpected,[mo.SemiColon,mo.CurlyR]);for(n&&this.prevToken&&this.prevToken.type===mo.SemiColon&&(n.semicolonPosition=this.prevToken.offset);this.accept(mo.SemiColon););n=e()}return this.accept(mo.CurlyR)?this.finish(t):this.finish(t,Pl.RightCurlyExpected,[mo.CurlyR,mo.SemiColon])},e.prototype._parseBody=function(e,t){return e.setDeclarations(this._parseDeclarations(t))?this.finish(e):this.finish(e,Pl.LeftCurlyExpected,[mo.CurlyR,mo.SemiColon])},e.prototype._parseSelector=function(e){var t=this.create(Ds),n=!1;for(e&&(n=t.addChild(this._parseCombinator()));t.addChild(this._parseSimpleSelector());)n=!0,t.addChild(this._parseCombinator());return n?this.finish(t):null},e.prototype._parseDeclaration=function(e){var t=this._tryParseCustomPropertyDeclaration(e);if(t)return t;var n=this.create(zs);return n.setProperty(this._parseProperty())?this.accept(mo.Colon)?(this.prevToken&&(n.colonPosition=this.prevToken.offset),n.setValue(this._parseExpr())?(n.addChild(this._parsePrio()),this.peek(mo.SemiColon)&&(n.semicolonPosition=this.token.offset),this.finish(n)):this.finish(n,Pl.PropertyValueExpected)):this.finish(n,Pl.ColonExpected,[mo.Colon],e||[mo.SemiColon]):null},e.prototype._tryParseCustomPropertyDeclaration=function(e){if(!this.peekRegExp(mo.Ident,/^--/))return null;var t=this.create(Is);if(!t.setProperty(this._parseProperty()))return null;if(!this.accept(mo.Colon))return this.finish(t,Pl.ColonExpected,[mo.Colon]);this.prevToken&&(t.colonPosition=this.prevToken.offset);var n=this.mark();if(this.peek(mo.CurlyL)){var i=this.create(Ms),r=this._parseDeclarations(this._parseRuleSetDeclaration.bind(this));if(i.setDeclarations(r)&&!r.isErroneous(!0)&&(i.addChild(this._parsePrio()),this.peek(mo.SemiColon)))return this.finish(i),t.setPropertySet(i),t.semicolonPosition=this.token.offset,this.finish(t);this.restoreAtMark(n)}var o=this._parseExpr();return o&&!o.isErroneous(!0)&&(this._parsePrio(),this.peekOne.apply(this,Ih(Ih([],e||[],!1),[mo.SemiColon,mo.EOF],!1)))?(t.setValue(o),this.peek(mo.SemiColon)&&(t.semicolonPosition=this.token.offset),this.finish(t)):(this.restoreAtMark(n),t.addChild(this._parseCustomPropertyValue(e)),t.addChild(this._parsePrio()),zh(t.colonPosition)&&this.token.offset===t.colonPosition+1?this.finish(t,Pl.PropertyValueExpected):this.finish(t))},e.prototype._parseCustomPropertyValue=function(e){var t=this;void 0===e&&(e=[mo.CurlyR]);var n=this.create(Ss),i=function(){return 0===o&&0===s&&0===a},r=function(){return-1!==e.indexOf(t.token.type)},o=0,s=0,a=0;e:for(;;){switch(this.token.type){case mo.SemiColon:case mo.Exclamation:if(i())break e;break;case mo.CurlyL:o++;break;case mo.CurlyR:if(--o<0){if(r()&&0===s&&0===a)break e;return this.finish(n,Pl.LeftCurlyExpected)}break;case mo.ParenthesisL:s++;break;case mo.ParenthesisR:if(--s<0){if(r()&&0===a&&0===o)break e;return this.finish(n,Pl.LeftParenthesisExpected)}break;case mo.BracketL:a++;break;case mo.BracketR:if(--a<0)return this.finish(n,Pl.LeftSquareBracketExpected);break;case mo.BadString:break e;case mo.EOF:var l=Pl.RightCurlyExpected;return a>0?l=Pl.RightSquareBracketExpected:s>0&&(l=Pl.RightParenthesisExpected),this.finish(n,l)}this.consumeToken()}return this.finish(n)},e.prototype._tryToParseDeclaration=function(e){var t=this.mark();return this._parseProperty()&&this.accept(mo.Colon)?(this.restoreAtMark(t),this._parseDeclaration(e)):(this.restoreAtMark(t),null)},e.prototype._parseProperty=function(){var e=this.create(Ls),t=this.mark();return(this.acceptDelim("*")||this.acceptDelim("_"))&&this.hasWhitespace()?(this.restoreAtMark(t),null):e.setIdentifier(this._parsePropertyIdentifier())?this.finish(e):null},e.prototype._parsePropertyIdentifier=function(){return this._parseIdent()},e.prototype._parseCharset=function(){if(!this.peek(mo.Charset))return null;var e=this.create(Ss);return this.consumeToken(),this.accept(mo.String)?this.accept(mo.SemiColon)?this.finish(e):this.finish(e,Pl.SemiColonExpected):this.finish(e,Pl.IdentifierExpected)},e.prototype._parseImport=function(){if(!this.peekKeyword("@import"))return null;var e=this.create(Ys);return this.consumeToken(),e.addChild(this._parseURILiteral())||e.addChild(this._parseStringLiteral())?(this.peek(mo.SemiColon)||this.peek(mo.EOF)||e.setMedialist(this._parseMediaQueryList()),this.finish(e)):this.finish(e,Pl.URIOrStringExpected)},e.prototype._parseNamespace=function(){if(!this.peekKeyword("@namespace"))return null;var e=this.create(na);return this.consumeToken(),e.addChild(this._parseURILiteral())||(e.addChild(this._parseIdent()),e.addChild(this._parseURILiteral())||e.addChild(this._parseStringLiteral()))?this.accept(mo.SemiColon)?this.finish(e):this.finish(e,Pl.SemiColonExpected):this.finish(e,Pl.URIExpected,[mo.SemiColon])},e.prototype._parseFontFace=function(){if(!this.peekKeyword("@font-face"))return null;var e=this.create(Hs);return this.consumeToken(),this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._parseViewPort=function(){if(!this.peekKeyword("@-ms-viewport")&&!this.peekKeyword("@-o-viewport")&&!this.peekKeyword("@viewport"))return null;var e=this.create($s);return this.consumeToken(),this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._parseKeyframe=function(){if(!this.peekRegExp(mo.AtKeyword,this.keyframeRegex))return null;var e=this.create(Js),t=this.create(Ss);return this.consumeToken(),e.setKeyword(this.finish(t)),t.matches("@-ms-keyframes")&&this.markError(t,Pl.UnknownKeyword),e.setIdentifier(this._parseKeyframeIdent())?this._parseBody(e,this._parseKeyframeSelector.bind(this)):this.finish(e,Pl.IdentifierExpected,[mo.CurlyR])},e.prototype._parseKeyframeIdent=function(){return this._parseIdent([fs.Keyframe])},e.prototype._parseKeyframeSelector=function(){var e=this.create(Xs);if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return null;for(;this.accept(mo.Comma);)if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return this.finish(e,Pl.PercentageExpected);return this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._tryParseKeyframeSelector=function(){var e=this.create(Xs),t=this.mark();if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return null;for(;this.accept(mo.Comma);)if(!e.addChild(this._parseIdent())&&!this.accept(mo.Percentage))return this.restoreAtMark(t),null;return this.peek(mo.CurlyL)?this._parseBody(e,this._parseRuleSetDeclaration.bind(this)):(this.restoreAtMark(t),null)},e.prototype._parseSupports=function(e){if(void 0===e&&(e=!1),!this.peekKeyword("@supports"))return null;var t=this.create(ra);return this.consumeToken(),t.addChild(this._parseSupportsCondition()),this._parseBody(t,this._parseSupportsDeclaration.bind(this,e))},e.prototype._parseSupportsDeclaration=function(e){return void 0===e&&(e=!1),e?this._tryParseRuleset(!0)||this._tryToParseDeclaration()||this._parseStylesheetStatement(!0):this._parseStylesheetStatement(!1)},e.prototype._parseSupportsCondition=function(){var e=this.create(ha);if(this.acceptIdent("not"))e.addChild(this._parseSupportsConditionInParens());else if(e.addChild(this._parseSupportsConditionInParens()),this.peekRegExp(mo.Ident,/^(and|or)$/i))for(var t=this.token.text.toLowerCase();this.acceptIdent(t);)e.addChild(this._parseSupportsConditionInParens());return this.finish(e)},e.prototype._parseSupportsConditionInParens=function(){var e=this.create(ha);if(this.accept(mo.ParenthesisL))return this.prevToken&&(e.lParent=this.prevToken.offset),e.addChild(this._tryToParseDeclaration([mo.ParenthesisR]))||this._parseSupportsCondition()?this.accept(mo.ParenthesisR)?(this.prevToken&&(e.rParent=this.prevToken.offset),this.finish(e)):this.finish(e,Pl.RightParenthesisExpected,[mo.ParenthesisR],[]):this.finish(e,Pl.ConditionExpected);if(this.peek(mo.Ident)){var t=this.mark();if(this.consumeToken(),!this.hasWhitespace()&&this.accept(mo.ParenthesisL)){for(var n=1;this.token.type!==mo.EOF&&0!==n;)this.token.type===mo.ParenthesisL?n++:this.token.type===mo.ParenthesisR&&n--,this.consumeToken();return this.finish(e)}this.restoreAtMark(t)}return this.finish(e,Pl.LeftParenthesisExpected,[],[mo.ParenthesisL])},e.prototype._parseMediaDeclaration=function(e){return void 0===e&&(e=!1),e?this._tryParseRuleset(!0)||this._tryToParseDeclaration()||this._parseStylesheetStatement(!0):this._parseStylesheetStatement(!1)},e.prototype._parseMedia=function(e){if(void 0===e&&(e=!1),!this.peekKeyword("@media"))return null;var t=this.create(ia);return this.consumeToken(),t.addChild(this._parseMediaQueryList())?this._parseBody(t,this._parseMediaDeclaration.bind(this,e)):this.finish(t,Pl.MediaQueryExpected)},e.prototype._parseMediaQueryList=function(){var e=this.create(sa);if(!e.addChild(this._parseMediaQuery()))return this.finish(e,Pl.MediaQueryExpected);for(;this.accept(mo.Comma);)if(!e.addChild(this._parseMediaQuery()))return this.finish(e,Pl.MediaQueryExpected);return this.finish(e)},e.prototype._parseMediaQuery=function(){var e=this.create(aa),t=this.mark();if(this.acceptIdent("not"),this.peek(mo.ParenthesisL))this.restoreAtMark(t),e.addChild(this._parseMediaCondition());else{if(this.acceptIdent("only"),!e.addChild(this._parseIdent()))return null;this.acceptIdent("and")&&e.addChild(this._parseMediaCondition())}return this.finish(e)},e.prototype._parseRatio=function(){var e=this.mark(),t=this.create(va);return this._parseNumeric()?this.acceptDelim("/")?this._parseNumeric()?this.finish(t):this.finish(t,Pl.NumberExpected):(this.restoreAtMark(e),null):null},e.prototype._parseMediaCondition=function(){var e=this.create(la);this.acceptIdent("not");for(var t=!0;t;){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[],[mo.CurlyL]);if(this.peek(mo.ParenthesisL)||this.peekIdent("not")?e.addChild(this._parseMediaCondition()):e.addChild(this._parseMediaFeature()),!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[],[mo.CurlyL]);t=this.acceptIdent("and")||this.acceptIdent("or")}return this.finish(e)},e.prototype._parseMediaFeature=function(){var e=this,t=[mo.ParenthesisR],n=this.create(ca),i=function(){return e.acceptDelim("<")||e.acceptDelim(">")?(e.hasWhitespace()||e.acceptDelim("="),!0):!!e.acceptDelim("=")};if(n.addChild(this._parseMediaFeatureName())){if(this.accept(mo.Colon)){if(!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t)}else if(i()){if(!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t);if(i()&&!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t)}}else{if(!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.IdentifierExpected,[],t);if(!i())return this.finish(n,Pl.OperatorExpected,[],t);if(!n.addChild(this._parseMediaFeatureName()))return this.finish(n,Pl.IdentifierExpected,[],t);if(i()&&!n.addChild(this._parseMediaFeatureValue()))return this.finish(n,Pl.TermExpected,[],t)}return this.finish(n)},e.prototype._parseMediaFeatureName=function(){return this._parseIdent()},e.prototype._parseMediaFeatureValue=function(){return this._parseRatio()||this._parseTermExpression()},e.prototype._parseMedium=function(){var e=this.create(Ss);return e.addChild(this._parseIdent())?this.finish(e):null},e.prototype._parsePageDeclaration=function(){return this._parsePageMarginBox()||this._parseRuleSetDeclaration()},e.prototype._parsePage=function(){if(!this.peekKeyword("@page"))return null;var e=this.create(da);if(this.consumeToken(),e.addChild(this._parsePageSelector()))for(;this.accept(mo.Comma);)if(!e.addChild(this._parsePageSelector()))return this.finish(e,Pl.IdentifierExpected);return this._parseBody(e,this._parsePageDeclaration.bind(this))},e.prototype._parsePageMarginBox=function(){if(!this.peek(mo.AtKeyword))return null;var e=this.create(pa);return this.acceptOneKeyword(Ah)||this.markError(e,Pl.UnknownAtRule,[],[mo.CurlyL]),this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},e.prototype._parsePageSelector=function(){if(!this.peek(mo.Ident)&&!this.peek(mo.Colon))return null;var e=this.create(Ss);return e.addChild(this._parseIdent()),this.accept(mo.Colon)&&!e.addChild(this._parseIdent())?this.finish(e,Pl.IdentifierExpected):this.finish(e)},e.prototype._parseDocument=function(){if(!this.peekKeyword("@-moz-document"))return null;var e=this.create(oa);return this.consumeToken(),this.resync([],[mo.CurlyL]),this._parseBody(e,this._parseStylesheetStatement.bind(this))},e.prototype._parseUnknownAtRule=function(){if(!this.peek(mo.AtKeyword))return null;var e=this.create(Ta);e.addChild(this._parseUnknownAtRuleName());var t=0,n=0,i=0,r=0;e:for(;;){switch(this.token.type){case mo.SemiColon:if(0===n&&0===i&&0===r)break e;break;case mo.EOF:return n>0?this.finish(e,Pl.RightCurlyExpected):r>0?this.finish(e,Pl.RightSquareBracketExpected):i>0?this.finish(e,Pl.RightParenthesisExpected):this.finish(e);case mo.CurlyL:t++,n++;break;case mo.CurlyR:if(n--,t>0&&0===n){if(this.consumeToken(),r>0)return this.finish(e,Pl.RightSquareBracketExpected);if(i>0)return this.finish(e,Pl.RightParenthesisExpected);break e}if(n<0){if(0===i&&0===r)break e;return this.finish(e,Pl.LeftCurlyExpected)}break;case mo.ParenthesisL:i++;break;case mo.ParenthesisR:if(--i<0)return this.finish(e,Pl.LeftParenthesisExpected);break;case mo.BracketL:r++;break;case mo.BracketR:if(--r<0)return this.finish(e,Pl.LeftSquareBracketExpected)}this.consumeToken()}return e},e.prototype._parseUnknownAtRuleName=function(){var e=this.create(Ss);return this.accept(mo.AtKeyword)?this.finish(e):e},e.prototype._parseOperator=function(){if(this.peekDelim("/")||this.peekDelim("*")||this.peekDelim("+")||this.peekDelim("-")||this.peek(mo.Dashmatch)||this.peek(mo.Includes)||this.peek(mo.SubstringOperator)||this.peek(mo.PrefixOperator)||this.peek(mo.SuffixOperator)||this.peekDelim("=")){var e=this.createNode(us.Operator);return this.consumeToken(),this.finish(e)}return null},e.prototype._parseUnaryOperator=function(){if(!this.peekDelim("+")&&!this.peekDelim("-"))return null;var e=this.create(Ss);return this.consumeToken(),this.finish(e)},e.prototype._parseCombinator=function(){if(this.peekDelim(">")){var e=this.create(Ss);this.consumeToken();var t=this.mark();if(!this.hasWhitespace()&&this.acceptDelim(">")){if(!this.hasWhitespace()&&this.acceptDelim(">"))return e.type=us.SelectorCombinatorShadowPiercingDescendant,this.finish(e);this.restoreAtMark(t)}return e.type=us.SelectorCombinatorParent,this.finish(e)}if(this.peekDelim("+")){e=this.create(Ss);return this.consumeToken(),e.type=us.SelectorCombinatorSibling,this.finish(e)}if(this.peekDelim("~")){e=this.create(Ss);return this.consumeToken(),e.type=us.SelectorCombinatorAllSiblings,this.finish(e)}if(this.peekDelim("/")){e=this.create(Ss);this.consumeToken();t=this.mark();if(!this.hasWhitespace()&&this.acceptIdent("deep")&&!this.hasWhitespace()&&this.acceptDelim("/"))return e.type=us.SelectorCombinatorShadowPiercingDescendant,this.finish(e);this.restoreAtMark(t)}return null},e.prototype._parseSimpleSelector=function(){var e=this.create(Ts),t=0;for(e.addChild(this._parseElementName())&&t++;(0===t||!this.hasWhitespace())&&e.addChild(this._parseSimpleSelectorBody());)t++;return t>0?this.finish(e):null},e.prototype._parseSimpleSelectorBody=function(){return this._parsePseudo()||this._parseHash()||this._parseClass()||this._parseAttrib()},e.prototype._parseSelectorIdent=function(){return this._parseIdent()},e.prototype._parseHash=function(){if(!this.peek(mo.Hash)&&!this.peekDelim("#"))return null;var e=this.createNode(us.IdentifierSelector);if(this.acceptDelim("#")){if(this.hasWhitespace()||!e.addChild(this._parseSelectorIdent()))return this.finish(e,Pl.IdentifierExpected)}else this.consumeToken();return this.finish(e)},e.prototype._parseClass=function(){if(!this.peekDelim("."))return null;var e=this.createNode(us.ClassSelector);return this.consumeToken(),this.hasWhitespace()||!e.addChild(this._parseSelectorIdent())?this.finish(e,Pl.IdentifierExpected):this.finish(e)},e.prototype._parseElementName=function(){var e=this.mark(),t=this.createNode(us.ElementNameSelector);return t.addChild(this._parseNamespacePrefix()),t.addChild(this._parseSelectorIdent())||this.acceptDelim("*")?this.finish(t):(this.restoreAtMark(e),null)},e.prototype._parseNamespacePrefix=function(){var e=this.mark(),t=this.createNode(us.NamespacePrefix);return!t.addChild(this._parseIdent())&&this.acceptDelim("*"),this.acceptDelim("|")?this.finish(t):(this.restoreAtMark(e),null)},e.prototype._parseAttrib=function(){if(!this.peek(mo.BracketL))return null;var e=this.create(ga);return this.consumeToken(),e.setNamespacePrefix(this._parseNamespacePrefix()),e.setIdentifier(this._parseIdent())?(e.setOperator(this._parseOperator())&&(e.setValue(this._parseBinaryExpr()),this.acceptIdent("i"),this.acceptIdent("s")),this.accept(mo.BracketR)?this.finish(e):this.finish(e,Pl.RightSquareBracketExpected)):this.finish(e,Pl.IdentifierExpected)},e.prototype._parsePseudo=function(){var e=this,t=this._tryParsePseudoIdentifier();if(t){if(!this.hasWhitespace()&&this.accept(mo.ParenthesisL)){if(t.addChild(this.try((function(){var t=e.create(Ss);if(!t.addChild(e._parseSelector(!1)))return null;for(;e.accept(mo.Comma)&&t.addChild(e._parseSelector(!1)););return e.peek(mo.ParenthesisR)?e.finish(t):null}))||this._parseBinaryExpr()),!this.accept(mo.ParenthesisR))return this.finish(t,Pl.RightParenthesisExpected)}return this.finish(t)}return null},e.prototype._tryParsePseudoIdentifier=function(){if(!this.peek(mo.Colon))return null;var e=this.mark(),t=this.createNode(us.PseudoSelector);return this.consumeToken(),this.hasWhitespace()?(this.restoreAtMark(e),null):(this.accept(mo.Colon),this.hasWhitespace()||!t.addChild(this._parseIdent())?this.finish(t,Pl.IdentifierExpected):this.finish(t))},e.prototype._tryParsePrio=function(){var e=this.mark(),t=this._parsePrio();return t||(this.restoreAtMark(e),null)},e.prototype._parsePrio=function(){if(!this.peek(mo.Exclamation))return null;var e=this.createNode(us.Prio);return this.accept(mo.Exclamation)&&this.acceptIdent("important")?this.finish(e):null},e.prototype._parseExpr=function(e){void 0===e&&(e=!1);var t=this.create(ua);if(!t.addChild(this._parseBinaryExpr()))return null;for(;;){if(this.peek(mo.Comma)){if(e)return this.finish(t);this.consumeToken()}else if(!this.hasWhitespace())break;if(!t.addChild(this._parseBinaryExpr()))break}return this.finish(t)},e.prototype._parseUnicodeRange=function(){if(!this.peekIdent("u"))return null;var e=this.create(_s);return this.acceptUnicodeRange()?this.finish(e):null},e.prototype._parseNamedLine=function(){if(!this.peek(mo.BracketL))return null;var e=this.createNode(us.GridLine);for(this.consumeToken();e.addChild(this._parseIdent()););return this.accept(mo.BracketR)?this.finish(e):this.finish(e,Pl.RightSquareBracketExpected)},e.prototype._parseBinaryExpr=function(e,t){var n=this.create(ma);if(!n.setLeft(e||this._parseTerm()))return null;if(!n.setOperator(t||this._parseOperator()))return this.finish(n);if(!n.setRight(this._parseTerm()))return this.finish(n,Pl.TermExpected);n=this.finish(n);var i=this._parseOperator();return i&&(n=this._parseBinaryExpr(n,i)),this.finish(n)},e.prototype._parseTerm=function(){var e=this.create(fa);return e.setOperator(this._parseUnaryOperator()),e.setExpression(this._parseTermExpression())?this.finish(e):null},e.prototype._parseTermExpression=function(){return this._parseURILiteral()||this._parseUnicodeRange()||this._parseFunction()||this._parseIdent()||this._parseStringLiteral()||this._parseNumeric()||this._parseHexColor()||this._parseOperation()||this._parseNamedLine()},e.prototype._parseOperation=function(){if(!this.peek(mo.ParenthesisL))return null;var e=this.create(Ss);return this.consumeToken(),e.addChild(this._parseExpr()),this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},e.prototype._parseNumeric=function(){if(this.peek(mo.Num)||this.peek(mo.Percentage)||this.peek(mo.Resolution)||this.peek(mo.Length)||this.peek(mo.EMS)||this.peek(mo.EXS)||this.peek(mo.Angle)||this.peek(mo.Time)||this.peek(mo.Dimension)||this.peek(mo.Freq)){var e=this.create(Sa);return this.consumeToken(),this.finish(e)}return null},e.prototype._parseStringLiteral=function(){if(!this.peek(mo.String)&&!this.peek(mo.BadString))return null;var e=this.createNode(us.StringLiteral);return this.consumeToken(),this.finish(e)},e.prototype._parseURILiteral=function(){if(!this.peekRegExp(mo.Ident,/^url(-prefix)?$/i))return null;var e=this.mark(),t=this.createNode(us.URILiteral);return this.accept(mo.Ident),this.hasWhitespace()||!this.peek(mo.ParenthesisL)?(this.restoreAtMark(e),null):(this.scanner.inURL=!0,this.consumeToken(),t.addChild(this._parseURLArgument()),this.scanner.inURL=!1,this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected))},e.prototype._parseURLArgument=function(){var e=this.create(Ss);return this.accept(mo.String)||this.accept(mo.BadString)||this.acceptUnquotedString()?this.finish(e):null},e.prototype._parseIdent=function(e){if(!this.peek(mo.Ident))return null;var t=this.create(ks);return e&&(t.referenceTypes=e),t.isCustomProperty=this.peekRegExp(mo.Ident,/^--/),this.consumeToken(),this.finish(t)},e.prototype._parseFunction=function(){var e=this.mark(),t=this.create(Ps);if(!t.setIdentifier(this._parseFunctionIdentifier()))return null;if(this.hasWhitespace()||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;if(t.getArguments().addChild(this._parseFunctionArgument()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)t.getArguments().addChild(this._parseFunctionArgument())||this.markError(t,Pl.ExpressionExpected);return this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected)},e.prototype._parseFunctionIdentifier=function(){if(!this.peek(mo.Ident))return null;var e=this.create(ks);if(e.referenceTypes=[fs.Function],this.acceptIdent("progid")){if(this.accept(mo.Colon))for(;this.accept(mo.Ident)&&this.acceptDelim("."););return this.finish(e)}return this.consumeToken(),this.finish(e)},e.prototype._parseFunctionArgument=function(){var e=this.create(Ws);return e.setValue(this._parseExpr(!0))?this.finish(e):null},e.prototype._parseHexColor=function(){if(this.peekRegExp(mo.Hash,/^#([A-Fa-f0-9]{3}|[A-Fa-f0-9]{4}|[A-Fa-f0-9]{6}|[A-Fa-f0-9]{8})$/g)){var e=this.create(ba);return this.consumeToken(),this.finish(e)}return null},e}();function Ph(e,t){return-1!==e.indexOf(t)}function Oh(){for(var e=[],t=0;te+t||this.offset===e&&this.length===t?this.findInScope(e,t):null},e.prototype.findInScope=function(e,t){void 0===t&&(t=0);var n=e+t,i=function(e,t){var n=0,i=e.length;if(0===i)return 0;for(;nn}));if(0===i)return this;var r=this.children[i-1];return r.offset<=e&&r.offset+r.length>=e+t?r.findInScope(e,t):this},e.prototype.addSymbol=function(e){this.symbols.push(e)},e.prototype.getSymbol=function(e,t){for(var n=0;n{var e={470:e=>{function t(e){if("string"!=typeof e)throw new TypeError("Path must be a string. Received "+JSON.stringify(e))}function n(e,t){for(var n,i="",r=0,o=-1,s=0,a=0;a<=e.length;++a){if(a2){var l=i.lastIndexOf("/");if(l!==i.length-1){-1===l?(i="",r=0):r=(i=i.slice(0,l)).length-1-i.lastIndexOf("/"),o=a,s=0;continue}}else if(2===i.length||1===i.length){i="",r=0,o=a,s=0;continue}t&&(i.length>0?i+="/..":i="..",r=2)}else i.length>0?i+="/"+e.slice(o+1,a):i=e.slice(o+1,a),r=a-o-1;o=a,s=0}else 46===n&&-1!==s?++s:s=-1}return i}var i={resolve:function(){for(var e,i="",r=!1,o=arguments.length-1;o>=-1&&!r;o--){var s;o>=0?s=arguments[o]:(void 0===e&&(e=process.cwd()),s=e),t(s),0!==s.length&&(i=s+"/"+i,r=47===s.charCodeAt(0))}return i=n(i,!r),r?i.length>0?"/"+i:"/":i.length>0?i:"."},normalize:function(e){if(t(e),0===e.length)return".";var i=47===e.charCodeAt(0),r=47===e.charCodeAt(e.length-1);return 0!==(e=n(e,!i)).length||i||(e="."),e.length>0&&r&&(e+="/"),i?"/"+e:e},isAbsolute:function(e){return t(e),e.length>0&&47===e.charCodeAt(0)},join:function(){if(0===arguments.length)return".";for(var e,n=0;n0&&(void 0===e?e=r:e+="/"+r)}return void 0===e?".":i.normalize(e)},relative:function(e,n){if(t(e),t(n),e===n)return"";if((e=i.resolve(e))===(n=i.resolve(n)))return"";for(var r=1;rc){if(47===n.charCodeAt(a+d))return n.slice(a+d+1);if(0===d)return n.slice(a+d)}else s>c&&(47===e.charCodeAt(r+d)?h=d:0===d&&(h=0));break}var p=e.charCodeAt(r+d);if(p!==n.charCodeAt(a+d))break;47===p&&(h=d)}var u="";for(d=r+h+1;d<=o;++d)d!==o&&47!==e.charCodeAt(d)||(0===u.length?u+="..":u+="/..");return u.length>0?u+n.slice(a+h):(a+=h,47===n.charCodeAt(a)&&++a,n.slice(a))},_makeLong:function(e){return e},dirname:function(e){if(t(e),0===e.length)return".";for(var n=e.charCodeAt(0),i=47===n,r=-1,o=!0,s=e.length-1;s>=1;--s)if(47===(n=e.charCodeAt(s))){if(!o){r=s;break}}else o=!1;return-1===r?i?"/":".":i&&1===r?"//":e.slice(0,r)},basename:function(e,n){if(void 0!==n&&"string"!=typeof n)throw new TypeError('"ext" argument must be a string');t(e);var i,r=0,o=-1,s=!0;if(void 0!==n&&n.length>0&&n.length<=e.length){if(n.length===e.length&&n===e)return"";var a=n.length-1,l=-1;for(i=e.length-1;i>=0;--i){var c=e.charCodeAt(i);if(47===c){if(!s){r=i+1;break}}else-1===l&&(s=!1,l=i+1),a>=0&&(c===n.charCodeAt(a)?-1==--a&&(o=i):(a=-1,o=l))}return r===o?o=l:-1===o&&(o=e.length),e.slice(r,o)}for(i=e.length-1;i>=0;--i)if(47===e.charCodeAt(i)){if(!s){r=i+1;break}}else-1===o&&(s=!1,o=i+1);return-1===o?"":e.slice(r,o)},extname:function(e){t(e);for(var n=-1,i=0,r=-1,o=!0,s=0,a=e.length-1;a>=0;--a){var l=e.charCodeAt(a);if(47!==l)-1===r&&(o=!1,r=a+1),46===l?-1===n?n=a:1!==s&&(s=1):-1!==n&&(s=-1);else if(!o){i=a+1;break}}return-1===n||-1===r||0===s||1===s&&n===r-1&&n===i+1?"":e.slice(n,r)},format:function(e){if(null===e||"object"!=typeof e)throw new TypeError('The "pathObject" argument must be of type Object. Received type '+typeof e);return function(e,t){var n=t.dir||t.root,i=t.base||(t.name||"")+(t.ext||"");return n?n===t.root?n+i:n+"/"+i:i}(0,e)},parse:function(e){t(e);var n={root:"",dir:"",base:"",ext:"",name:""};if(0===e.length)return n;var i,r=e.charCodeAt(0),o=47===r;o?(n.root="/",i=1):i=0;for(var s=-1,a=0,l=-1,c=!0,h=e.length-1,d=0;h>=i;--h)if(47!==(r=e.charCodeAt(h)))-1===l&&(c=!1,l=h+1),46===r?-1===s?s=h:1!==d&&(d=1):-1!==s&&(d=-1);else if(!c){a=h+1;break}return-1===s||-1===l||0===d||1===d&&s===l-1&&s===a+1?-1!==l&&(n.base=n.name=0===a&&o?e.slice(1,l):e.slice(a,l)):(0===a&&o?(n.name=e.slice(1,s),n.base=e.slice(1,l)):(n.name=e.slice(a,s),n.base=e.slice(a,l)),n.ext=e.slice(s,l)),a>0?n.dir=e.slice(0,a-1):o&&(n.dir="/"),n},sep:"/",delimiter:":",win32:null,posix:null};i.posix=i,e.exports=i},447:(e,t,n)=>{var i;if(n.r(t),n.d(t,{URI:()=>f,Utils:()=>E}),"object"==typeof process)i="win32"===process.platform;else if("object"==typeof navigator){var r=navigator.userAgent;i=r.indexOf("Windows")>=0}var o,s,a=(o=function(e,t){return(o=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(e,t)},function(e,t){if("function"!=typeof t&&null!==t)throw new TypeError("Class extends value "+String(t)+" is not a constructor or null");function n(){this.constructor=e}o(e,t),e.prototype=null===t?Object.create(t):(n.prototype=t.prototype,new n)}),l=/^\w[\w\d+.-]*$/,c=/^\//,h=/^\/\//;function d(e,t){if(!e.scheme&&t)throw new Error('[UriError]: Scheme is missing: {scheme: "", authority: "'.concat(e.authority,'", path: "').concat(e.path,'", query: "').concat(e.query,'", fragment: "').concat(e.fragment,'"}'));if(e.scheme&&!l.test(e.scheme))throw new Error("[UriError]: Scheme contains illegal characters.");if(e.path)if(e.authority){if(!c.test(e.path))throw new Error('[UriError]: If a URI contains an authority component, then the path component must either be empty or begin with a slash ("/") character')}else if(h.test(e.path))throw new Error('[UriError]: If a URI does not contain an authority component, then the path cannot begin with two slash characters ("//")')}var p="",u="/",m=/^(([^:/?#]+?):)?(\/\/([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/,f=function(){function e(e,t,n,i,r,o){void 0===o&&(o=!1),"object"==typeof e?(this.scheme=e.scheme||p,this.authority=e.authority||p,this.path=e.path||p,this.query=e.query||p,this.fragment=e.fragment||p):(this.scheme=function(e,t){return e||t?e:"file"}(e,o),this.authority=t||p,this.path=function(e,t){switch(e){case"https":case"http":case"file":t?t[0]!==u&&(t=u+t):t=u}return t}(this.scheme,n||p),this.query=i||p,this.fragment=r||p,d(this,o))}return e.isUri=function(t){return t instanceof e||!!t&&"string"==typeof t.authority&&"string"==typeof t.fragment&&"string"==typeof t.path&&"string"==typeof t.query&&"string"==typeof t.scheme&&"string"==typeof t.fsPath&&"function"==typeof t.with&&"function"==typeof t.toString},Object.defineProperty(e.prototype,"fsPath",{get:function(){return x(this,!1)},enumerable:!1,configurable:!0}),e.prototype.with=function(e){if(!e)return this;var t=e.scheme,n=e.authority,i=e.path,r=e.query,o=e.fragment;return void 0===t?t=this.scheme:null===t&&(t=p),void 0===n?n=this.authority:null===n&&(n=p),void 0===i?i=this.path:null===i&&(i=p),void 0===r?r=this.query:null===r&&(r=p),void 0===o?o=this.fragment:null===o&&(o=p),t===this.scheme&&n===this.authority&&i===this.path&&r===this.query&&o===this.fragment?this:new b(t,n,i,r,o)},e.parse=function(e,t){void 0===t&&(t=!1);var n=m.exec(e);return n?new b(n[2]||p,k(n[4]||p),k(n[5]||p),k(n[7]||p),k(n[9]||p),t):new b(p,p,p,p,p)},e.file=function(e){var t=p;if(i&&(e=e.replace(/\\/g,u)),e[0]===u&&e[1]===u){var n=e.indexOf(u,2);-1===n?(t=e.substring(2),e=u):(t=e.substring(2,n),e=e.substring(n)||u)}return new b("file",t,e,p,p)},e.from=function(e){var t=new b(e.scheme,e.authority,e.path,e.query,e.fragment);return d(t,!0),t},e.prototype.toString=function(e){return void 0===e&&(e=!1),S(this,e)},e.prototype.toJSON=function(){return this},e.revive=function(t){if(t){if(t instanceof e)return t;var n=new b(t);return n._formatted=t.external,n._fsPath=t._sep===g?t.fsPath:null,n}return t},e}(),g=i?1:void 0,b=function(e){function t(){var t=null!==e&&e.apply(this,arguments)||this;return t._formatted=null,t._fsPath=null,t}return a(t,e),Object.defineProperty(t.prototype,"fsPath",{get:function(){return this._fsPath||(this._fsPath=x(this,!1)),this._fsPath},enumerable:!1,configurable:!0}),t.prototype.toString=function(e){return void 0===e&&(e=!1),e?S(this,!0):(this._formatted||(this._formatted=S(this,!1)),this._formatted)},t.prototype.toJSON=function(){var e={$mid:1};return this._fsPath&&(e.fsPath=this._fsPath,e._sep=g),this._formatted&&(e.external=this._formatted),this.path&&(e.path=this.path),this.scheme&&(e.scheme=this.scheme),this.authority&&(e.authority=this.authority),this.query&&(e.query=this.query),this.fragment&&(e.fragment=this.fragment),e},t}(f),v=((s={})[58]="%3A",s[47]="%2F",s[63]="%3F",s[35]="%23",s[91]="%5B",s[93]="%5D",s[64]="%40",s[33]="%21",s[36]="%24",s[38]="%26",s[39]="%27",s[40]="%28",s[41]="%29",s[42]="%2A",s[43]="%2B",s[44]="%2C",s[59]="%3B",s[61]="%3D",s[32]="%20",s);function y(e,t){for(var n=void 0,i=-1,r=0;r=97&&o<=122||o>=65&&o<=90||o>=48&&o<=57||45===o||46===o||95===o||126===o||t&&47===o)-1!==i&&(n+=encodeURIComponent(e.substring(i,r)),i=-1),void 0!==n&&(n+=e.charAt(r));else{void 0===n&&(n=e.substr(0,r));var s=v[o];void 0!==s?(-1!==i&&(n+=encodeURIComponent(e.substring(i,r)),i=-1),n+=s):-1===i&&(i=r)}}return-1!==i&&(n+=encodeURIComponent(e.substring(i))),void 0!==n?n:e}function w(e){for(var t=void 0,n=0;n1&&"file"===e.scheme?"//".concat(e.authority).concat(e.path):47===e.path.charCodeAt(0)&&(e.path.charCodeAt(1)>=65&&e.path.charCodeAt(1)<=90||e.path.charCodeAt(1)>=97&&e.path.charCodeAt(1)<=122)&&58===e.path.charCodeAt(2)?t?e.path.substr(1):e.path[1].toLowerCase()+e.path.substr(2):e.path,i&&(n=n.replace(/\//g,"\\")),n}function S(e,t){var n=t?w:y,i="",r=e.scheme,o=e.authority,s=e.path,a=e.query,l=e.fragment;if(r&&(i+=r,i+=":"),(o||"file"===r)&&(i+=u,i+=u),o){var c=o.indexOf("@");if(-1!==c){var h=o.substr(0,c);o=o.substr(c+1),-1===(c=h.indexOf(":"))?i+=n(h,!1):(i+=n(h.substr(0,c),!1),i+=":",i+=n(h.substr(c+1),!1)),i+="@"}-1===(c=(o=o.toLowerCase()).indexOf(":"))?i+=n(o,!1):(i+=n(o.substr(0,c),!1),i+=o.substr(c))}if(s){if(s.length>=3&&47===s.charCodeAt(0)&&58===s.charCodeAt(2))(d=s.charCodeAt(1))>=65&&d<=90&&(s="/".concat(String.fromCharCode(d+32),":").concat(s.substr(3)));else if(s.length>=2&&58===s.charCodeAt(1)){var d;(d=s.charCodeAt(0))>=65&&d<=90&&(s="".concat(String.fromCharCode(d+32),":").concat(s.substr(2)))}i+=n(s,!0)}return a&&(i+="?",i+=n(a,!1)),l&&(i+="#",i+=t?l:y(l,!1)),i}function C(e){try{return decodeURIComponent(e)}catch(t){return e.length>3?e.substr(0,3)+C(e.substr(3)):e}}var _=/(%[0-9A-Za-z][0-9A-Za-z])+/g;function k(e){return e.match(_)?e.replace(_,(function(e){return C(e)})):e}var E,R,N=n(470),F=function(e,t,n){if(n||2===arguments.length)for(var i,r=0,o=t.length;r{for(var i in t)n.o(t,i)&&!n.o(e,i)&&Object.defineProperty(e,i,{enumerable:!0,get:t[i]})},n.o=(e,t)=>Object.prototype.hasOwnProperty.call(e,t),n.r=e=>{"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(e,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(e,"__esModule",{value:!0})},n(447)})();var{URI:$h,Utils:Hh}=Wh,Gh=function(e,t,n){if(n||2===arguments.length)for(var i,r=0,o=t.length;r0&&r[r.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!r||o[1]>r[0]&&o[1]0&&r[r.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!r||o[1]>r[0]&&o[1]=0&&-1===' \t\n\r":{[()]},*>+'.indexOf(i.charAt(n));)n--;return i.substring(n+1,t)}(e,this.offset),this.defaultReplaceRange=$a.create(Ba.create(this.position.line,this.position.character-this.currentWord.length),this.position),this.textDocument=e,this.styleSheet=n,this.documentSettings=i;try{var r={isIncomplete:!1,items:[]};this.nodePath=ys(this.styleSheet,this.offset);for(var o=this.nodePath.length-1;o>=0;o--){var s=this.nodePath[o];if(s instanceof Ls)this.getCompletionsForDeclarationProperty(s.getParent(),r);else if(s instanceof ua)s.parent instanceof _a?this.getVariableProposals(null,r):this.getCompletionsForExpression(s,r);else if(s instanceof Ts){var a=s.findAParent(us.ExtendsReference,us.Ruleset);if(a)if(a.type===us.ExtendsReference)this.getCompletionsForExtendsReference(a,s,r);else{var l=a;this.getCompletionsForSelector(l,l&&l.isNested(),r)}}else if(s instanceof Ws)this.getCompletionsForFunctionArgument(s,s.getParent(),r);else if(s instanceof Rs)this.getCompletionsForDeclarations(s,r);else if(s instanceof Ca)this.getCompletionsForVariableDeclaration(s,r);else if(s instanceof Fs)this.getCompletionsForRuleSet(s,r);else if(s instanceof _a)this.getCompletionsForInterpolation(s,r);else if(s instanceof js)this.getCompletionsForFunctionDeclaration(s,r);else if(s instanceof Fa)this.getCompletionsForMixinReference(s,r);else if(s instanceof Ps)this.getCompletionsForFunctionArgument(null,s,r);else if(s instanceof ra)this.getCompletionsForSupports(s,r);else if(s instanceof ha)this.getCompletionsForSupportsCondition(s,r);else if(s instanceof Ea)this.getCompletionsForExtendsReference(s,null,r);else if(s.type===us.URILiteral)this.getCompletionForUriLiteralValue(s,r);else if(null===s.parent)this.getCompletionForTopLevel(r);else{if(s.type!==us.StringLiteral||!this.isImportPathParent(s.parent.type))continue;this.getCompletionForImportPath(s,r)}if(r.items.length>0||this.offset>s.offset)return this.finalize(r)}return this.getCompletionsForStylesheet(r),0===r.items.length&&this.variablePrefix&&0===this.currentWord.indexOf(this.variablePrefix)&&this.getVariableProposals(null,r),this.finalize(r)}finally{this.position=null,this.currentWord=null,this.textDocument=null,this.styleSheet=null,this.symbolContext=null,this.defaultReplaceRange=null,this.nodePath=null}},e.prototype.isImportPathParent=function(e){return e===us.Import},e.prototype.finalize=function(e){return e},e.prototype.findInNodePath=function(){for(var e=[],t=0;t=0;n--){var i=this.nodePath[n];if(-1!==e.indexOf(i.type))return i}return null},e.prototype.getCompletionsForDeclarationProperty=function(e,t){return this.getPropertyProposals(e,t)},e.prototype.getPropertyProposals=function(e,t){var n=this,i=this.isTriggerPropertyValueCompletionEnabled,r=this.isCompletePropertyWithSemicolonEnabled;return this.cssDataManager.getProperties().forEach((function(o){var s,a,l=!1;e?(s=n.getCompletionRange(e.getProperty()),a=o.name,zh(e.colonPosition)||(a+=": ",l=!0)):(s=n.getCompletionRange(null),a=o.name+": ",l=!0),!e&&r&&(a+="$0;"),e&&!e.semicolonPosition&&r&&n.offset>=n.textDocument.offsetAt(s.end)&&(a+="$0;");var c={label:o.name,documentation:Yc(o,n.doesSupportMarkdown()),tags:ud(o)?[Zl.Deprecated]:[],textEdit:yl.replace(s,a),insertTextFormat:Yl.Snippet,kind:Jl.Property};o.restrictions||(l=!1),i&&l&&(c.command=dd);var h=(255-("number"===typeof o.relevance?Math.min(Math.max(o.relevance,0),99):50)).toString(16),d=cs(o.name,"-")?od.VendorPrefixed:od.Normal;c.sortText=d+"_"+h,t.items.push(c)})),this.completionParticipants.forEach((function(e){e.onCssProperty&&e.onCssProperty({propertyName:n.currentWord,range:n.defaultReplaceRange})})),t},Object.defineProperty(e.prototype,"isTriggerPropertyValueCompletionEnabled",{get:function(){var e,t;return null===(t=null===(e=this.documentSettings)||void 0===e?void 0:e.triggerPropertyValueCompletion)||void 0===t||t},enumerable:!1,configurable:!0}),Object.defineProperty(e.prototype,"isCompletePropertyWithSemicolonEnabled",{get:function(){var e,t;return null===(t=null===(e=this.documentSettings)||void 0===e?void 0:e.completePropertyWithSemicolon)||void 0===t||t},enumerable:!1,configurable:!0}),e.prototype.getCompletionsForDeclarationValue=function(e,t){for(var n=this,i=e.getFullPropertyName(),r=this.cssDataManager.getProperty(i),o=e.getValue()||null;o&&o.hasChildren();)o=o.findChildAtOffset(this.offset,!1);if(this.completionParticipants.forEach((function(e){e.onCssPropertyValue&&e.onCssPropertyValue({propertyName:i,propertyValue:n.currentWord,range:n.getCompletionRange(o)})})),r){if(r.restrictions)for(var s=0,a=r.restrictions;s=e.offset+2&&this.getVariableProposals(null,t),t},e.prototype.getVariableProposals=function(e,t){for(var n=0,i=this.getSymbolContext().findSymbolsAtOffset(this.offset,fs.Variable);n0){var r=this.currentWord.match(/^-?\d[\.\d+]*/);r&&(i=r[0],n.isIncomplete=i.length===this.currentWord.length)}else 0===this.currentWord.length&&(n.isIncomplete=!0);if(t&&t.parent&&t.parent.type===us.Term&&(t=t.getParent()),e.restrictions)for(var o=0,s=e.restrictions;o=n.end?this.getCompletionForTopLevel(t):!n||this.offset<=n.offset?this.getCompletionsForSelector(e,e.isNested(),t):this.getCompletionsForDeclarations(e.getDeclarations(),t)},e.prototype.getCompletionsForSelector=function(e,t,n){var i=this,r=this.findInNodePath(us.PseudoSelector,us.IdentifierSelector,us.ClassSelector,us.ElementNameSelector);if(!r&&this.hasCharacterAtPosition(this.offset-this.currentWord.length-1,":")&&(this.currentWord=":"+this.currentWord,this.hasCharacterAtPosition(this.offset-this.currentWord.length-1,":")&&(this.currentWord=":"+this.currentWord),this.defaultReplaceRange=$a.create(Ba.create(this.position.line,this.position.character-this.currentWord.length),this.position)),this.cssDataManager.getPseudoClasses().forEach((function(e){var t=fd(e.name),o={label:e.name,textEdit:yl.replace(i.getCompletionRange(r),t),documentation:Yc(e,i.doesSupportMarkdown()),tags:ud(e)?[Zl.Deprecated]:[],kind:Jl.Function,insertTextFormat:e.name!==t?hd:void 0};cs(e.name,":-")&&(o.sortText=od.VendorPrefixed),n.items.push(o)})),this.cssDataManager.getPseudoElements().forEach((function(e){var t=fd(e.name),o={label:e.name,textEdit:yl.replace(i.getCompletionRange(r),t),documentation:Yc(e,i.doesSupportMarkdown()),tags:ud(e)?[Zl.Deprecated]:[],kind:Jl.Function,insertTextFormat:e.name!==t?hd:void 0};cs(e.name,"::-")&&(o.sortText=od.VendorPrefixed),n.items.push(o)})),!t){for(var o=0,s=Dh;o0){var t=d.substr(e.offset,e.length);return"."!==t.charAt(0)||h[t]||(h[t]=!0,n.items.push({label:t,textEdit:yl.replace(i.getCompletionRange(r),t),kind:Jl.Keyword})),!1}return!0})),e&&e.isNested()){var p=e.getSelectors().findFirstChildBeforeOffset(this.offset);p&&0===e.getSelectors().getChildren().indexOf(p)&&this.getPropertyProposals(null,n)}return n},e.prototype.getCompletionsForDeclarations=function(e,t){if(!e||this.offset===e.offset)return t;var n=e.findFirstChildBeforeOffset(this.offset);if(!n)return this.getCompletionsForDeclarationProperty(null,t);if(n instanceof As){var i=n;if(!zh(i.colonPosition)||this.offset<=i.colonPosition)return this.getCompletionsForDeclarationProperty(i,t);if(zh(i.semicolonPosition)&&i.semicolonPositione.colonPosition&&this.getVariableProposals(e.getValue(),t),t},e.prototype.getCompletionsForExpression=function(e,t){var n=e.getParent();if(n instanceof Ws)return this.getCompletionsForFunctionArgument(n,n.getParent(),t),t;var i=e.findParent(us.Declaration);if(!i)return this.getTermProposals(void 0,null,t),t;var r=e.findChildAtOffset(this.offset,!0);return r?r instanceof Sa||r instanceof ks?this.getCompletionsForDeclarationValue(i,t):t:this.getCompletionsForDeclarationValue(i,t)},e.prototype.getCompletionsForFunctionArgument=function(e,t,n){var i=t.getIdentifier();return i&&i.matches("var")&&(t.getArguments().hasChildren()&&t.getArguments().getChild(0)!==e||this.getVariableProposalsForCSSVarFunction(n)),n},e.prototype.getCompletionsForFunctionDeclaration=function(e,t){var n=e.getDeclarations();return n&&this.offset>n.offset&&this.offsete.lParent&&(!zh(e.rParent)||this.offset<=e.rParent)?this.getCompletionsForDeclarationProperty(null,t):t},e.prototype.getCompletionsForSupports=function(e,t){var n=e.getDeclarations();if(!n||this.offset<=n.offset){var i=e.findFirstChildBeforeOffset(this.offset);return i instanceof ha?this.getCompletionsForSupportsCondition(i,t):t}return this.getCompletionForTopLevel(t)},e.prototype.getCompletionsForExtendsReference=function(e,t,n){return n},e.prototype.getCompletionForUriLiteralValue=function(e,t){var n,i,r;if(e.hasChildren()){var o=e.getChild(0);n=o.getText(),i=this.position,r=this.getCompletionRange(o)}else{n="",i=this.position;var s=this.textDocument.positionAt(e.offset+"url(".length);r=$a.create(s,s)}return this.completionParticipants.forEach((function(e){e.onCssURILiteralValue&&e.onCssURILiteralValue({uriValue:n,position:i,range:r})})),t},e.prototype.getCompletionForImportPath=function(e,t){var n=this;return this.completionParticipants.forEach((function(t){t.onCssImportPath&&t.onCssImportPath({pathValue:e.getText(),position:n.position,range:n.getCompletionRange(e)})})),t},e.prototype.hasCharacterAtPosition=function(e,t){var n=this.textDocument.getText();return e>=0&&e"),this.writeLine(t,i.join(""))}},e}();!function(e){function t(e){var t=e.match(/^['"](.*)["']$/);return t?t[1]:e}e.ensure=function(e,n){return n+t(e)+n},e.remove=t}(yd||(yd={}));var Ed=function(){return function(){this.id=0,this.attr=0,this.tag=0}}();function Rd(e,t){for(var n=new Sd,i=0,r=e.getChildren();i1){var l=t.cloneWithParent();n.addChild(l.findRoot()),n=l}n.append(s[a])}}break;case us.SelectorPlaceholder:if(o.matches("@at-root"))return n;case us.ElementNameSelector:var c=o.getText();n.addAttr("name","*"===c?"element":Nd(c));break;case us.ClassSelector:n.addAttr("class",Nd(o.getText().substring(1)));break;case us.IdentifierSelector:n.addAttr("id",Nd(o.getText().substring(1)));break;case us.MixinDeclaration:n.addAttr("class",o.getName());break;case us.PseudoSelector:n.addAttr(Nd(o.getText()),"");break;case us.AttributeSelector:var h=o,d=h.getIdentifier();if(d){var p=h.getValue(),u=h.getOperator(),m=void 0;if(p&&u)switch(Nd(u.getText())){case"|=":m="".concat(yd.remove(Nd(p.getText())),"-\u2026");break;case"^=":m="".concat(yd.remove(Nd(p.getText())),"\u2026");break;case"$=":m="\u2026".concat(yd.remove(Nd(p.getText())));break;case"~=":m=" \u2026 ".concat(yd.remove(Nd(p.getText()))," \u2026 ");break;case"*=":m="\u2026".concat(yd.remove(Nd(p.getText())),"\u2026");break;default:m=yd.remove(Nd(p.getText()))}n.addAttr(Nd(d.getText()),m)}}}return n}function Nd(e){var t=new ls;t.setSource(e);var n=t.scanUnquotedString();return n?n.text:e}var Fd=function(){function e(e){this.cssDataManager=e}return e.prototype.selectorToMarkedString=function(e){var t=function(e){if(e.matches("@at-root"))return null;var t=new Cd,n=[],i=e.getParent();if(i instanceof Fs)for(var r=i.getParent();r&&!Td(r);){if(r instanceof Fs){if(r.getSelectors().matches("@at-root"))break;n.push(r)}r=r.getParent()}for(var o=new Dd(t),s=n.length-1;s>=0;s--){var a=n[s].getSelectors().getChild(0);a&&o.processSelector(a)}return o.processSelector(e),t}(e);if(t){var n=new kd('"').print(t);return n.push(this.selectorToSpecificityMarkedString(e)),n}return[]},e.prototype.simpleSelectorToMarkedString=function(e){var t=Rd(e),n=new kd('"').print(t);return n.push(this.selectorToSpecificityMarkedString(e)),n},e.prototype.isPseudoElementIdentifier=function(e){var t=e.match(/^::?([\w-]+)/);return!!t&&!!this.cssDataManager.getPseudoElement("::"+t[1])},e.prototype.selectorToSpecificityMarkedString=function(e){var t=this,n=function(e){var i=new Ed;e:for(var r=0,o=e.getChildren();r0){for(var l=new Ed,c=0,h=s.getChildren();cl.id?l=f:f.idl.attr?l=f:f.attrl.tag&&(l=f))}}i.id+=l.id,i.attr+=l.attr,i.tag+=l.tag;continue e}i.attr++}if(s.getChildren().length>0){var f=n(s);i.id+=f.id,i.attr+=f.attr,i.tag+=f.tag}}return i},i=n(e);return xd("specificity","[Selector Specificity](https://developer.mozilla.org/en-US/docs/Web/CSS/Specificity): ({0}, {1}, {2})",i.id,i.attr,i.tag)},e}(),Dd=function(){function e(e){this.prev=null,this.element=e}return e.prototype.processSelector=function(e){var t=null;if(!(this.element instanceof Cd)&&e.getChildren().some((function(e){return e.hasChildren()&&e.getChild(0).type===us.SelectorCombinator}))){var n=this.element.findRoot();n.parent instanceof Cd&&(t=this.element,this.element=n.parent,this.element.removeChild(n),this.prev=null)}for(var i=0,r=e.getChildren();i0&&r[r.length-1])&&(6===o[0]||2===o[0])){s=0;continue}if(3===o[0]&&(!r||o[1]>r[0]&&o[1]n)return 0;var r,o,s=[],a=[];for(r=0;r=r.length/2&&o.push({property:e.name,score:t})})),o.sort((function(e,t){return t.score-e.score||e.property.localeCompare(t.property)}));for(var s=3,a=0,l=o;a=0;a--){var l=s[a];if(l instanceof zs){var c=l.getProperty();if(c&&c.offset===r&&c.end===o)return void this.getFixesForUnknownProperty(e,c,n,i)}}},e}(),Zd=function(){return function(e){this.fullPropertyName=e.getFullPropertyName().toLowerCase(),this.node=e}}();function ep(e,t,n,i){var r=e[t];r.value=n,n&&(Ph(r.properties,i)||r.properties.push(i))}function tp(e,t,n,i){"top"===t||"right"===t||"bottom"===t||"left"===t?ep(e,t,n,i):function(e,t,n){ep(e,"top",t,n),ep(e,"right",t,n),ep(e,"bottom",t,n),ep(e,"left",t,n)}(e,n,i)}function np(e,t,n){switch(t.length){case 1:tp(e,void 0,t[0],n);break;case 2:tp(e,"top",t[0],n),tp(e,"bottom",t[0],n),tp(e,"right",t[1],n),tp(e,"left",t[1],n);break;case 3:tp(e,"top",t[0],n),tp(e,"right",t[1],n),tp(e,"left",t[1],n),tp(e,"bottom",t[2],n);break;case 4:tp(e,"top",t[0],n),tp(e,"right",t[1],n),tp(e,"bottom",t[2],n),tp(e,"left",t[3],n)}}function ip(e,t){for(var n=0,i=t;n0)for(var m=this.fetch(i,"float"),f=0;f0)for(m=this.fetch(i,"vertical-align"),f=0;f1)for(var S=0;S")||this.peekDelim("<")||this.peekIdent("and")||this.peekIdent("or")||this.peekDelim("%")){var t=this.createNode(us.Operator);return this.consumeToken(),this.finish(t)}return e.prototype._parseOperator.call(this)},t.prototype._parseUnaryOperator=function(){if(this.peekIdent("not")){var t=this.create(Ss);return this.consumeToken(),this.finish(t)}return e.prototype._parseUnaryOperator.call(this)},t.prototype._parseRuleSetDeclaration=function(){return this.peek(mo.AtKeyword)?this._parseKeyframe()||this._parseImport()||this._parseMedia(!0)||this._parseFontFace()||this._parseWarnAndDebug()||this._parseControlStatement()||this._parseFunctionDeclaration()||this._parseExtends()||this._parseMixinReference()||this._parseMixinContent()||this._parseMixinDeclaration()||this._parseRuleset(!0)||this._parseSupports(!0)||e.prototype._parseRuleSetDeclarationAtStatement.call(this):this._parseVariableDeclaration()||this._tryParseRuleset(!0)||e.prototype._parseRuleSetDeclaration.call(this)},t.prototype._parseDeclaration=function(e){var t=this._tryParseCustomPropertyDeclaration(e);if(t)return t;var n=this.create(zs);if(!n.setProperty(this._parseProperty()))return null;if(!this.accept(mo.Colon))return this.finish(n,Pl.ColonExpected,[mo.Colon],e||[mo.SemiColon]);this.prevToken&&(n.colonPosition=this.prevToken.offset);var i=!1;if(n.setValue(this._parseExpr())&&(i=!0,n.addChild(this._parsePrio())),this.peek(mo.CurlyL))n.setNestedProperties(this._parseNestedProperties());else if(!i)return this.finish(n,Pl.PropertyValueExpected);return this.peek(mo.SemiColon)&&(n.semicolonPosition=this.token.offset),this.finish(n)},t.prototype._parseNestedProperties=function(){var e=this.create(Gs);return this._parseBody(e,this._parseDeclaration.bind(this))},t.prototype._parseExtends=function(){if(this.peekKeyword("@extend")){var e=this.create(Ea);if(this.consumeToken(),!e.getSelectors().addChild(this._parseSimpleSelector()))return this.finish(e,Pl.SelectorExpected);for(;this.accept(mo.Comma);)e.getSelectors().addChild(this._parseSimpleSelector());return this.accept(mo.Exclamation)&&!this.acceptIdent("optional")?this.finish(e,Pl.UnknownKeyword):this.finish(e)}return null},t.prototype._parseSimpleSelectorBody=function(){return this._parseSelectorCombinator()||this._parseSelectorPlaceholder()||e.prototype._parseSimpleSelectorBody.call(this)},t.prototype._parseSelectorCombinator=function(){if(this.peekDelim("&")){var e=this.createNode(us.SelectorCombinator);for(this.consumeToken();!this.hasWhitespace()&&(this.acceptDelim("-")||this.accept(mo.Num)||this.accept(mo.Dimension)||e.addChild(this._parseIdent())||this.acceptDelim("&")););return this.finish(e)}return null},t.prototype._parseSelectorPlaceholder=function(){if(this.peekDelim("%")){var e=this.createNode(us.SelectorPlaceholder);return this.consumeToken(),this._parseIdent(),this.finish(e)}if(this.peekKeyword("@at-root")){e=this.createNode(us.SelectorPlaceholder);return this.consumeToken(),this.finish(e)}return null},t.prototype._parseElementName=function(){var t=this.mark(),n=e.prototype._parseElementName.call(this);return n&&!this.hasWhitespace()&&this.peek(mo.ParenthesisL)?(this.restoreAtMark(t),null):n},t.prototype._tryParsePseudoIdentifier=function(){return this._parseInterpolation()||e.prototype._tryParsePseudoIdentifier.call(this)},t.prototype._parseWarnAndDebug=function(){if(!this.peekKeyword("@debug")&&!this.peekKeyword("@warn")&&!this.peekKeyword("@error"))return null;var e=this.createNode(us.Debug);return this.consumeToken(),e.addChild(this._parseExpr()),this.finish(e)},t.prototype._parseControlStatement=function(e){return void 0===e&&(e=this._parseRuleSetDeclaration.bind(this)),this.peek(mo.AtKeyword)?this._parseIfStatement(e)||this._parseForStatement(e)||this._parseEachStatement(e)||this._parseWhileStatement(e):null},t.prototype._parseIfStatement=function(e){return this.peekKeyword("@if")?this._internalParseIfStatement(e):null},t.prototype._internalParseIfStatement=function(e){var t=this.create(Vs);if(this.consumeToken(),!t.setExpression(this._parseExpr(!0)))return this.finish(t,Pl.ExpressionExpected);if(this._parseBody(t,e),this.acceptKeyword("@else"))if(this.peekIdent("if"))t.setElseClause(this._internalParseIfStatement(e));else if(this.peek(mo.CurlyL)){var n=this.create(Bs);this._parseBody(n,e),t.setElseClause(n)}return this.finish(t)},t.prototype._parseForStatement=function(e){if(!this.peekKeyword("@for"))return null;var t=this.create(Us);return this.consumeToken(),t.setVariable(this._parseVariable())?this.acceptIdent("from")?t.addChild(this._parseBinaryExpr())?this.acceptIdent("to")||this.acceptIdent("through")?t.addChild(this._parseBinaryExpr())?this._parseBody(t,e):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR]):this.finish(t,Pp.ThroughOrToExpected,[mo.CurlyR]):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR]):this.finish(t,Pp.FromExpected,[mo.CurlyR]):this.finish(t,Pl.VariableNameExpected,[mo.CurlyR])},t.prototype._parseEachStatement=function(e){if(!this.peekKeyword("@each"))return null;var t=this.create(Ks);this.consumeToken();var n=t.getVariables();if(!n.addChild(this._parseVariable()))return this.finish(t,Pl.VariableNameExpected,[mo.CurlyR]);for(;this.accept(mo.Comma);)if(!n.addChild(this._parseVariable()))return this.finish(t,Pl.VariableNameExpected,[mo.CurlyR]);return this.finish(n),this.acceptIdent("in")?t.addChild(this._parseExpr())?this._parseBody(t,e):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR]):this.finish(t,Pp.InExpected,[mo.CurlyR])},t.prototype._parseWhileStatement=function(e){if(!this.peekKeyword("@while"))return null;var t=this.create(qs);return this.consumeToken(),t.addChild(this._parseBinaryExpr())?this._parseBody(t,e):this.finish(t,Pl.ExpressionExpected,[mo.CurlyR])},t.prototype._parseFunctionBodyDeclaration=function(){return this._parseVariableDeclaration()||this._parseReturnStatement()||this._parseWarnAndDebug()||this._parseControlStatement(this._parseFunctionBodyDeclaration.bind(this))},t.prototype._parseFunctionDeclaration=function(){if(!this.peekKeyword("@function"))return null;var e=this.create(js);if(this.consumeToken(),!e.setIdentifier(this._parseIdent([fs.Function])))return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.CurlyR]);if(e.getParameters().addChild(this._parseParameterDeclaration()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseParameterDeclaration()))return this.finish(e,Pl.VariableNameExpected);return this.accept(mo.ParenthesisR)?this._parseBody(e,this._parseFunctionBodyDeclaration.bind(this)):this.finish(e,Pl.RightParenthesisExpected,[mo.CurlyR])},t.prototype._parseReturnStatement=function(){if(!this.peekKeyword("@return"))return null;var e=this.createNode(us.ReturnStatement);return this.consumeToken(),e.addChild(this._parseExpr())?this.finish(e):this.finish(e,Pl.ExpressionExpected)},t.prototype._parseMixinDeclaration=function(){if(!this.peekKeyword("@mixin"))return null;var e=this.create(Da);if(this.consumeToken(),!e.setIdentifier(this._parseIdent([fs.Mixin])))return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);if(this.accept(mo.ParenthesisL)){if(e.getParameters().addChild(this._parseParameterDeclaration()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseParameterDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[mo.CurlyR])}return this._parseBody(e,this._parseRuleSetDeclaration.bind(this))},t.prototype._parseParameterDeclaration=function(){var e=this.create(Os);return e.setIdentifier(this._parseVariable())?(this.accept(Mp),this.accept(mo.Colon)&&!e.setDefaultValue(this._parseExpr(!0))?this.finish(e,Pl.VariableValueExpected,[],[mo.Comma,mo.ParenthesisR]):this.finish(e)):null},t.prototype._parseMixinContent=function(){if(!this.peekKeyword("@content"))return null;var e=this.create(Ra);if(this.consumeToken(),this.accept(mo.ParenthesisL)){if(e.getArguments().addChild(this._parseFunctionArgument()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getArguments().addChild(this._parseFunctionArgument()))return this.finish(e,Pl.ExpressionExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}return this.finish(e)},t.prototype._parseMixinReference=function(){if(!this.peekKeyword("@include"))return null;var e=this.create(Fa);this.consumeToken();var t=this._parseIdent([fs.Mixin]);if(!e.setIdentifier(t))return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);if(!this.hasWhitespace()&&this.acceptDelim(".")&&!this.hasWhitespace()){var n=this._parseIdent([fs.Mixin]);if(!n)return this.finish(e,Pl.IdentifierExpected,[mo.CurlyR]);var i=this.create(Ia);t.referenceTypes=[fs.Module],i.setIdentifier(t),e.setIdentifier(n),e.addChild(i)}if(this.accept(mo.ParenthesisL)){if(e.getArguments().addChild(this._parseFunctionArgument()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getArguments().addChild(this._parseFunctionArgument()))return this.finish(e,Pl.ExpressionExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}return(this.peekIdent("using")||this.peek(mo.CurlyL))&&e.setContent(this._parseMixinContentDeclaration()),this.finish(e)},t.prototype._parseMixinContentDeclaration=function(){var e=this.create(Na);if(this.acceptIdent("using")){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.CurlyL]);if(e.getParameters().addChild(this._parseParameterDeclaration()))for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseParameterDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[mo.CurlyL])}return this.peek(mo.CurlyL)&&this._parseBody(e,this._parseMixinReferenceBodyStatement.bind(this)),this.finish(e)},t.prototype._parseMixinReferenceBodyStatement=function(){return this._tryParseKeyframeSelector()||this._parseRuleSetDeclaration()},t.prototype._parseFunctionArgument=function(){var e=this.create(Ws),t=this.mark(),n=this._parseVariable();if(n)if(this.accept(mo.Colon))e.setIdentifier(n);else{if(this.accept(Mp))return e.setValue(n),this.finish(e);this.restoreAtMark(t)}return e.setValue(this._parseExpr(!0))?(this.accept(Mp),e.addChild(this._parsePrio()),this.finish(e)):e.setValue(this._tryParsePrio())?this.finish(e):null},t.prototype._parseURLArgument=function(){var t=this.mark(),n=e.prototype._parseURLArgument.call(this);if(!n||!this.peek(mo.ParenthesisR)){this.restoreAtMark(t);var i=this.create(Ss);return i.addChild(this._parseBinaryExpr()),this.finish(i)}return n},t.prototype._parseOperation=function(){if(!this.peek(mo.ParenthesisL))return null;var e=this.create(Ss);for(this.consumeToken();e.addChild(this._parseListElement());)this.accept(mo.Comma);return this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},t.prototype._parseListElement=function(){var e=this.create(Aa),t=this._parseBinaryExpr();if(!t)return null;if(this.accept(mo.Colon)){if(e.setKey(t),!e.setValue(this._parseBinaryExpr()))return this.finish(e,Pl.ExpressionExpected)}else e.setValue(t);return this.finish(e)},t.prototype._parseUse=function(){if(!this.peekKeyword("@use"))return null;var e=this.create(Qs);if(this.consumeToken(),!e.addChild(this._parseStringLiteral()))return this.finish(e,Pl.StringLiteralExpected);if(!this.peek(mo.SemiColon)&&!this.peek(mo.EOF)){if(!this.peekRegExp(mo.Ident,/as|with/))return this.finish(e,Pl.UnknownKeyword);if(this.acceptIdent("as")&&!e.setIdentifier(this._parseIdent([fs.Module]))&&!this.acceptDelim("*"))return this.finish(e,Pl.IdentifierOrWildcardExpected);if(this.acceptIdent("with")){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.ParenthesisR]);if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}}return this.accept(mo.SemiColon)||this.accept(mo.EOF)?this.finish(e):this.finish(e,Pl.SemiColonExpected)},t.prototype._parseModuleConfigDeclaration=function(){var e=this.create(Zs);return e.setIdentifier(this._parseVariable())?this.accept(mo.Colon)&&e.setValue(this._parseExpr(!0))?!this.accept(mo.Exclamation)||!this.hasWhitespace()&&this.acceptIdent("default")?this.finish(e):this.finish(e,Pl.UnknownKeyword):this.finish(e,Pl.VariableValueExpected,[],[mo.Comma,mo.ParenthesisR]):null},t.prototype._parseForward=function(){if(!this.peekKeyword("@forward"))return null;var e=this.create(ea);if(this.consumeToken(),!e.addChild(this._parseStringLiteral()))return this.finish(e,Pl.StringLiteralExpected);if(this.acceptIdent("with")){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected,[mo.ParenthesisR]);if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);for(;this.accept(mo.Comma)&&!this.peek(mo.ParenthesisR);)if(!e.getParameters().addChild(this._parseModuleConfigDeclaration()))return this.finish(e,Pl.VariableNameExpected);if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected)}if(!this.peek(mo.SemiColon)&&!this.peek(mo.EOF)){if(!this.peekRegExp(mo.Ident,/as|hide|show/))return this.finish(e,Pl.UnknownKeyword);if(this.acceptIdent("as")){var t=this._parseIdent([fs.Forward]);if(!e.setIdentifier(t))return this.finish(e,Pl.IdentifierExpected);if(this.hasWhitespace()||!this.acceptDelim("*"))return this.finish(e,Pl.WildcardExpected)}if((this.peekIdent("hide")||this.peekIdent("show"))&&!e.addChild(this._parseForwardVisibility()))return this.finish(e,Pl.IdentifierOrVariableExpected)}return this.accept(mo.SemiColon)||this.accept(mo.EOF)?this.finish(e):this.finish(e,Pl.SemiColonExpected)},t.prototype._parseForwardVisibility=function(){var e=this.create(ta);for(e.setIdentifier(this._parseIdent());e.addChild(this._parseVariable()||this._parseIdent());)this.accept(mo.Comma);return e.getChildren().length>1?e:null},t.prototype._parseSupportsCondition=function(){return this._parseInterpolation()||e.prototype._parseSupportsCondition.call(this)},t}(Lh),Vp=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),Up=Wa(),Kp=function(e){function t(n,i){var r=e.call(this,"$",n,i)||this;return qp(t.scssModuleLoaders),qp(t.scssModuleBuiltIns),r}return Vp(t,e),t.prototype.isImportPathParent=function(t){return t===us.Forward||t===us.Use||e.prototype.isImportPathParent.call(this,t)},t.prototype.getCompletionForImportPath=function(n,i){var r=n.getParent().type;if(r===us.Forward||r===us.Use)for(var o=0,s=t.scssModuleBuiltIns;o0){var t="string"===typeof e.documentation?{kind:"markdown",value:e.documentation}:{kind:"markdown",value:e.documentation.value};t.value+="\n\n",t.value+=e.references.map((function(e){return"[".concat(e.name,"](").concat(e.url,")")})).join(" | "),e.documentation=t}}))}var Bp,jp=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),$p="/".charCodeAt(0),Hp="\n".charCodeAt(0),Gp="\r".charCodeAt(0),Jp="\f".charCodeAt(0),Xp="`".charCodeAt(0),Yp=".".charCodeAt(0),Qp=mo.CustomToken,Zp=Qp++,eu=function(e){function t(){return null!==e&&e.apply(this,arguments)||this}return jp(t,e),t.prototype.scanNext=function(t){var n=this.escapedJavaScript();return null!==n?this.finishToken(t,n):this.stream.advanceIfChars([Yp,Yp,Yp])?this.finishToken(t,Zp):e.prototype.scanNext.call(this,t)},t.prototype.comment=function(){return!!e.prototype.comment.call(this)||!(this.inURL||!this.stream.advanceIfChars([$p,$p]))&&(this.stream.advanceWhileChar((function(e){switch(e){case Hp:case Gp:case Jp:return!1;default:return!0}})),!0)},t.prototype.escapedJavaScript=function(){return this.stream.peekChar()===Xp?(this.stream.advance(1),this.stream.advanceWhileChar((function(e){return e!==Xp})),this.stream.advanceIfChar(Xp)?mo.EscapedJavaScript:mo.BadEscapedJavaScript):null},t}(ls),tu=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),nu=function(e){function t(){return e.call(this,new eu)||this}return tu(t,e),t.prototype._parseStylesheetStatement=function(t){return void 0===t&&(t=!1),this.peek(mo.AtKeyword)?this._parseVariableDeclaration()||this._parsePlugin()||e.prototype._parseStylesheetAtStatement.call(this,t):this._tryParseMixinDeclaration()||this._tryParseMixinReference()||this._parseFunction()||this._parseRuleset(!0)},t.prototype._parseImport=function(){if(!this.peekKeyword("@import")&&!this.peekKeyword("@import-once"))return null;var e=this.create(Ys);if(this.consumeToken(),this.accept(mo.ParenthesisL)){if(!this.accept(mo.Ident))return this.finish(e,Pl.IdentifierExpected,[mo.SemiColon]);do{if(!this.accept(mo.Comma))break}while(this.accept(mo.Ident));if(!this.accept(mo.ParenthesisR))return this.finish(e,Pl.RightParenthesisExpected,[mo.SemiColon])}return e.addChild(this._parseURILiteral())||e.addChild(this._parseStringLiteral())?(this.peek(mo.SemiColon)||this.peek(mo.EOF)||e.setMedialist(this._parseMediaQueryList()),this.finish(e)):this.finish(e,Pl.URIOrStringExpected,[mo.SemiColon])},t.prototype._parsePlugin=function(){if(!this.peekKeyword("@plugin"))return null;var e=this.createNode(us.Plugin);return this.consumeToken(),e.addChild(this._parseStringLiteral())?this.accept(mo.SemiColon)?this.finish(e):this.finish(e,Pl.SemiColonExpected):this.finish(e,Pl.StringLiteralExpected)},t.prototype._parseMediaQuery=function(){var t=e.prototype._parseMediaQuery.call(this);if(!t){var n=this.create(aa);return n.addChild(this._parseVariable())?this.finish(n):null}return t},t.prototype._parseMediaDeclaration=function(e){return void 0===e&&(e=!1),this._tryParseRuleset(e)||this._tryToParseDeclaration()||this._tryParseMixinDeclaration()||this._tryParseMixinReference()||this._parseDetachedRuleSetMixin()||this._parseStylesheetStatement(e)},t.prototype._parseMediaFeatureName=function(){return this._parseIdent()||this._parseVariable()},t.prototype._parseVariableDeclaration=function(e){void 0===e&&(e=[]);var t=this.create(Ca),n=this.mark();if(!t.setVariable(this._parseVariable(!0)))return null;if(!this.accept(mo.Colon))return this.restoreAtMark(n),null;if(this.prevToken&&(t.colonPosition=this.prevToken.offset),t.setValue(this._parseDetachedRuleSet()))t.needsSemicolon=!1;else if(!t.setValue(this._parseExpr()))return this.finish(t,Pl.VariableValueExpected,[],e);return t.addChild(this._parsePrio()),this.peek(mo.SemiColon)&&(t.semicolonPosition=this.token.offset),this.finish(t)},t.prototype._parseDetachedRuleSet=function(){var e=this.mark();if(this.peekDelim("#")||this.peekDelim(".")){if(this.consumeToken(),this.hasWhitespace()||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;var t=this.create(Da);if(t.getParameters().addChild(this._parseMixinParameter()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)t.getParameters().addChild(this._parseMixinParameter())||this.markError(t,Pl.IdentifierExpected,[],[mo.ParenthesisR]);if(!this.accept(mo.ParenthesisR))return this.restoreAtMark(e),null}if(!this.peek(mo.CurlyL))return null;var n=this.create(Ns);return this._parseBody(n,this._parseDetachedRuleSetBody.bind(this)),this.finish(n)},t.prototype._parseDetachedRuleSetBody=function(){return this._tryParseKeyframeSelector()||this._parseRuleSetDeclaration()},t.prototype._addLookupChildren=function(e){if(!e.addChild(this._parseLookupValue()))return!1;for(var t=!1;this.peek(mo.BracketL)&&(t=!0),e.addChild(this._parseLookupValue());)t=!1;return!t},t.prototype._parseLookupValue=function(){var e=this.create(Ss),t=this.mark();return this.accept(mo.BracketL)&&((e.addChild(this._parseVariable(!1,!0))||e.addChild(this._parsePropertyIdentifier()))&&this.accept(mo.BracketR)||this.accept(mo.BracketR))?e:(this.restoreAtMark(t),null)},t.prototype._parseVariable=function(e,t){void 0===e&&(e=!1),void 0===t&&(t=!1);var n=!e&&this.peekDelim("$");if(!this.peekDelim("@")&&!n&&!this.peek(mo.AtKeyword))return null;for(var i=this.create(ka),r=this.mark();this.acceptDelim("@")||!e&&this.acceptDelim("$");)if(this.hasWhitespace())return this.restoreAtMark(r),null;return(this.accept(mo.AtKeyword)||this.accept(mo.Ident))&&(t||!this.peek(mo.BracketL)||this._addLookupChildren(i))?i:(this.restoreAtMark(r),null)},t.prototype._parseTermExpression=function(){return this._parseVariable()||this._parseEscaped()||e.prototype._parseTermExpression.call(this)||this._tryParseMixinReference(!1)},t.prototype._parseEscaped=function(){if(this.peek(mo.EscapedJavaScript)||this.peek(mo.BadEscapedJavaScript)){var e=this.createNode(us.EscapedValue);return this.consumeToken(),this.finish(e)}if(this.peekDelim("~")){e=this.createNode(us.EscapedValue);return this.consumeToken(),this.accept(mo.String)||this.accept(mo.EscapedJavaScript)?this.finish(e):this.finish(e,Pl.TermExpected)}return null},t.prototype._parseOperator=function(){var t=this._parseGuardOperator();return t||e.prototype._parseOperator.call(this)},t.prototype._parseGuardOperator=function(){if(this.peekDelim(">")){var e=this.createNode(us.Operator);return this.consumeToken(),this.acceptDelim("="),e}if(this.peekDelim("=")){e=this.createNode(us.Operator);return this.consumeToken(),this.acceptDelim("<"),e}if(this.peekDelim("<")){e=this.createNode(us.Operator);return this.consumeToken(),this.acceptDelim("="),e}return null},t.prototype._parseRuleSetDeclaration=function(){return this.peek(mo.AtKeyword)?this._parseKeyframe()||this._parseMedia(!0)||this._parseImport()||this._parseSupports(!0)||this._parseDetachedRuleSetMixin()||this._parseVariableDeclaration()||e.prototype._parseRuleSetDeclarationAtStatement.call(this):this._tryParseMixinDeclaration()||this._tryParseRuleset(!0)||this._tryParseMixinReference()||this._parseFunction()||this._parseExtend()||e.prototype._parseRuleSetDeclaration.call(this)},t.prototype._parseKeyframeIdent=function(){return this._parseIdent([fs.Keyframe])||this._parseVariable()},t.prototype._parseKeyframeSelector=function(){return this._parseDetachedRuleSetMixin()||e.prototype._parseKeyframeSelector.call(this)},t.prototype._parseSimpleSelectorBody=function(){return this._parseSelectorCombinator()||e.prototype._parseSimpleSelectorBody.call(this)},t.prototype._parseSelector=function(e){var t=this.create(Ds),n=!1;for(e&&(n=t.addChild(this._parseCombinator()));t.addChild(this._parseSimpleSelector());){n=!0;var i=this.mark();if(t.addChild(this._parseGuard())&&this.peek(mo.CurlyL))break;this.restoreAtMark(i),t.addChild(this._parseCombinator())}return n?this.finish(t):null},t.prototype._parseSelectorCombinator=function(){if(this.peekDelim("&")){var e=this.createNode(us.SelectorCombinator);for(this.consumeToken();!this.hasWhitespace()&&(this.acceptDelim("-")||this.accept(mo.Num)||this.accept(mo.Dimension)||e.addChild(this._parseIdent())||this.acceptDelim("&")););return this.finish(e)}return null},t.prototype._parseSelectorIdent=function(){if(!this.peekInterpolatedIdent())return null;var e=this.createNode(us.SelectorInterpolation);return this._acceptInterpolatedIdent(e)?this.finish(e):null},t.prototype._parsePropertyIdentifier=function(e){void 0===e&&(e=!1);var t=/^[\w-]+/;if(!this.peekInterpolatedIdent()&&!this.peekRegExp(this.token.type,t))return null;var n=this.mark(),i=this.create(ks);i.isCustomProperty=this.acceptDelim("-")&&this.acceptDelim("-");return(e?i.isCustomProperty?i.addChild(this._parseIdent()):i.addChild(this._parseRegexp(t)):i.isCustomProperty?this._acceptInterpolatedIdent(i):this._acceptInterpolatedIdent(i,t))?(e||this.hasWhitespace()||(this.acceptDelim("+"),this.hasWhitespace()||this.acceptIdent("_")),this.finish(i)):(this.restoreAtMark(n),null)},t.prototype.peekInterpolatedIdent=function(){return this.peek(mo.Ident)||this.peekDelim("@")||this.peekDelim("$")||this.peekDelim("-")},t.prototype._acceptInterpolatedIdent=function(e,t){for(var n=this,i=!1,r=function(){var e=n.mark();return n.acceptDelim("-")&&(n.hasWhitespace()||n.acceptDelim("-"),n.hasWhitespace())?(n.restoreAtMark(e),null):n._parseInterpolation()},o=t?function(){return n.acceptRegexp(t)}:function(){return n.accept(mo.Ident)};(o()||e.addChild(this._parseInterpolation()||this.try(r)))&&(i=!0,!this.hasWhitespace()););return i},t.prototype._parseInterpolation=function(){var e=this.mark();if(this.peekDelim("@")||this.peekDelim("$")){var t=this.createNode(us.Interpolation);return this.consumeToken(),this.hasWhitespace()||!this.accept(mo.CurlyL)?(this.restoreAtMark(e),null):t.addChild(this._parseIdent())?this.accept(mo.CurlyR)?this.finish(t):this.finish(t,Pl.RightCurlyExpected):this.finish(t,Pl.IdentifierExpected)}return null},t.prototype._tryParseMixinDeclaration=function(){var e=this.mark(),t=this.create(Da);if(!t.setIdentifier(this._parseMixinDeclarationIdentifier())||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;if(t.getParameters().addChild(this._parseMixinParameter()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)t.getParameters().addChild(this._parseMixinParameter())||this.markError(t,Pl.IdentifierExpected,[],[mo.ParenthesisR]);return this.accept(mo.ParenthesisR)?(t.setGuard(this._parseGuard()),this.peek(mo.CurlyL)?this._parseBody(t,this._parseMixInBodyDeclaration.bind(this)):(this.restoreAtMark(e),null)):(this.restoreAtMark(e),null)},t.prototype._parseMixInBodyDeclaration=function(){return this._parseFontFace()||this._parseRuleSetDeclaration()},t.prototype._parseMixinDeclarationIdentifier=function(){var e;if(this.peekDelim("#")||this.peekDelim(".")){if(e=this.create(ks),this.consumeToken(),this.hasWhitespace()||!e.addChild(this._parseIdent()))return null}else{if(!this.peek(mo.Hash))return null;e=this.create(ks),this.consumeToken()}return e.referenceTypes=[fs.Mixin],this.finish(e)},t.prototype._parsePseudo=function(){if(!this.peek(mo.Colon))return null;var t=this.mark(),n=this.create(Ea);return this.consumeToken(),this.acceptIdent("extend")?this._completeExtends(n):(this.restoreAtMark(t),e.prototype._parsePseudo.call(this))},t.prototype._parseExtend=function(){if(!this.peekDelim("&"))return null;var e=this.mark(),t=this.create(Ea);return this.consumeToken(),!this.hasWhitespace()&&this.accept(mo.Colon)&&this.acceptIdent("extend")?this._completeExtends(t):(this.restoreAtMark(e),null)},t.prototype._completeExtends=function(e){if(!this.accept(mo.ParenthesisL))return this.finish(e,Pl.LeftParenthesisExpected);var t=e.getSelectors();if(!t.addChild(this._parseSelector(!0)))return this.finish(e,Pl.SelectorExpected);for(;this.accept(mo.Comma);)if(!t.addChild(this._parseSelector(!0)))return this.finish(e,Pl.SelectorExpected);return this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},t.prototype._parseDetachedRuleSetMixin=function(){if(!this.peek(mo.AtKeyword))return null;var e=this.mark(),t=this.create(Fa);return!t.addChild(this._parseVariable(!0))||!this.hasWhitespace()&&this.accept(mo.ParenthesisL)?this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected):(this.restoreAtMark(e),null)},t.prototype._tryParseMixinReference=function(e){void 0===e&&(e=!0);for(var t=this.mark(),n=this.create(Fa),i=this._parseMixinDeclarationIdentifier();i;){this.acceptDelim(">");var r=this._parseMixinDeclarationIdentifier();if(!r)break;n.getNamespaces().addChild(i),i=r}if(!n.setIdentifier(i))return this.restoreAtMark(t),null;var o=!1;if(this.accept(mo.ParenthesisL)){if(o=!0,n.getArguments().addChild(this._parseMixinArgument()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)if(!n.getArguments().addChild(this._parseMixinArgument()))return this.finish(n,Pl.ExpressionExpected);if(!this.accept(mo.ParenthesisR))return this.finish(n,Pl.RightParenthesisExpected);i.referenceTypes=[fs.Mixin]}else i.referenceTypes=[fs.Mixin,fs.Rule];return this.peek(mo.BracketL)?e||this._addLookupChildren(n):n.addChild(this._parsePrio()),o||this.peek(mo.SemiColon)||this.peek(mo.CurlyR)||this.peek(mo.EOF)?this.finish(n):(this.restoreAtMark(t),null)},t.prototype._parseMixinArgument=function(){var e=this.create(Ws),t=this.mark(),n=this._parseVariable();return n&&(this.accept(mo.Colon)?e.setIdentifier(n):this.restoreAtMark(t)),e.setValue(this._parseDetachedRuleSet()||this._parseExpr(!0))?this.finish(e):(this.restoreAtMark(t),null)},t.prototype._parseMixinParameter=function(){var e=this.create(Os);if(this.peekKeyword("@rest")){var t=this.create(Ss);return this.consumeToken(),this.accept(Zp)?(e.setIdentifier(this.finish(t)),this.finish(e)):this.finish(e,Pl.DotExpected,[],[mo.Comma,mo.ParenthesisR])}if(this.peek(Zp)){var n=this.create(Ss);return this.consumeToken(),e.setIdentifier(this.finish(n)),this.finish(e)}var i=!1;return e.setIdentifier(this._parseVariable())&&(this.accept(mo.Colon),i=!0),e.setDefaultValue(this._parseDetachedRuleSet()||this._parseExpr(!0))||i?this.finish(e):null},t.prototype._parseGuard=function(){if(!this.peekIdent("when"))return null;var e=this.create(Ma);if(this.consumeToken(),e.isNegated=this.acceptIdent("not"),!e.getConditions().addChild(this._parseGuardCondition()))return this.finish(e,Pl.ConditionExpected);for(;this.acceptIdent("and")||this.accept(mo.Comma);)if(!e.getConditions().addChild(this._parseGuardCondition()))return this.finish(e,Pl.ConditionExpected);return this.finish(e)},t.prototype._parseGuardCondition=function(){if(!this.peek(mo.ParenthesisL))return null;var e=this.create(za);return this.consumeToken(),e.addChild(this._parseExpr()),this.accept(mo.ParenthesisR)?this.finish(e):this.finish(e,Pl.RightParenthesisExpected)},t.prototype._parseFunction=function(){var e=this.mark(),t=this.create(Ps);if(!t.setIdentifier(this._parseFunctionIdentifier()))return null;if(this.hasWhitespace()||!this.accept(mo.ParenthesisL))return this.restoreAtMark(e),null;if(t.getArguments().addChild(this._parseMixinArgument()))for(;(this.accept(mo.Comma)||this.accept(mo.SemiColon))&&!this.peek(mo.ParenthesisR);)if(!t.getArguments().addChild(this._parseMixinArgument()))return this.finish(t,Pl.ExpressionExpected);return this.accept(mo.ParenthesisR)?this.finish(t):this.finish(t,Pl.RightParenthesisExpected)},t.prototype._parseFunctionIdentifier=function(){if(this.peekDelim("%")){var t=this.create(ks);return t.referenceTypes=[fs.Function],this.consumeToken(),this.finish(t)}return e.prototype._parseFunctionIdentifier.call(this)},t.prototype._parseURLArgument=function(){var t=this.mark(),n=e.prototype._parseURLArgument.call(this);if(!n||!this.peek(mo.ParenthesisR)){this.restoreAtMark(t);var i=this.create(Ss);return i.addChild(this._parseBinaryExpr()),this.finish(i)}return n},t}(Lh),iu=function(){var e=function(t,n){return(e=Object.setPrototypeOf||{__proto__:[]}instanceof Array&&function(e,t){e.__proto__=t}||function(e,t){for(var n in t)Object.prototype.hasOwnProperty.call(t,n)&&(e[n]=t[n])})(t,n)};return function(t,n){if("function"!==typeof n&&null!==n)throw new TypeError("Class extends value "+String(n)+" is not a constructor or null");function i(){this.constructor=t}e(t,n),t.prototype=null===n?Object.create(n):(i.prototype=n.prototype,new i)}}(),ru=Wa(),ou=function(e){function t(t,n){return e.call(this,"@",t,n)||this}return iu(t,e),t.prototype.createFunctionProposals=function(e,t,n,i){for(var r=0,o=e;r 50%"),example:"percentage(@number);",type:"percentage"},{name:"round",description:ru("less.builtin.round","rounds a number to a number of places"),example:"round(number, [places: 0]);"},{name:"sqrt",description:ru("less.builtin.sqrt","calculates square root of a number"),example:"sqrt(number);"},{name:"sin",description:ru("less.builtin.sin","sine function"),example:"sin(number);"},{name:"tan",description:ru("less.builtin.tan","tangent function"),example:"tan(number);"},{name:"atan",description:ru("less.builtin.atan","arctangent - inverse of tangent function"),example:"atan(number);"},{name:"pi",description:ru("less.builtin.pi","returns pi"),example:"pi();"},{name:"pow",description:ru("less.builtin.pow","first argument raised to the power of the second argument"),example:"pow(@base, @exponent);"},{name:"mod",description:ru("less.builtin.mod","first argument modulus second argument"),example:"mod(number, number);"},{name:"min",description:ru("less.builtin.min","returns the lowest of one or more values"),example:"min(@x, @y);"},{name:"max",description:ru("less.builtin.max","returns the lowest of one or more values"),example:"max(@x, @y);"}],t.colorProposals=[{name:"argb",example:"argb(@color);",description:ru("less.builtin.argb","creates a #AARRGGBB")},{name:"hsl",example:"hsl(@hue, @saturation, @lightness);",description:ru("less.builtin.hsl","creates a color")},{name:"hsla",example:"hsla(@hue, @saturation, @lightness, @alpha);",description:ru("less.builtin.hsla","creates a color")},{name:"hsv",example:"hsv(@hue, @saturation, @value);",description:ru("less.builtin.hsv","creates a color")},{name:"hsva",example:"hsva(@hue, @saturation, @value, @alpha);",description:ru("less.builtin.hsva","creates a color")},{name:"hue",example:"hue(@color);",description:ru("less.builtin.hue","returns the `hue` channel of `@color` in the HSL space")},{name:"saturation",example:"saturation(@color);",description:ru("less.builtin.saturation","returns the `saturation` channel of `@color` in the HSL space")},{name:"lightness",example:"lightness(@color);",description:ru("less.builtin.lightness","returns the `lightness` channel of `@color` in the HSL space")},{name:"hsvhue",example:"hsvhue(@color);",description:ru("less.builtin.hsvhue","returns the `hue` channel of `@color` in the HSV space")},{name:"hsvsaturation",example:"hsvsaturation(@color);",description:ru("less.builtin.hsvsaturation","returns the `saturation` channel of `@color` in the HSV space")},{name:"hsvvalue",example:"hsvvalue(@color);",description:ru("less.builtin.hsvvalue","returns the `value` channel of `@color` in the HSV space")},{name:"red",example:"red(@color);",description:ru("less.builtin.red","returns the `red` channel of `@color`")},{name:"green",example:"green(@color);",description:ru("less.builtin.green","returns the `green` channel of `@color`")},{name:"blue",example:"blue(@color);",description:ru("less.builtin.blue","returns the `blue` channel of `@color`")},{name:"alpha",example:"alpha(@color);",description:ru("less.builtin.alpha","returns the `alpha` channel of `@color`")},{name:"luma",example:"luma(@color);",description:ru("less.builtin.luma","returns the `luma` value (perceptual brightness) of `@color`")},{name:"saturate",example:"saturate(@color, 10%);",description:ru("less.builtin.saturate","return `@color` 10% points more saturated")},{name:"desaturate",example:"desaturate(@color, 10%);",description:ru("less.builtin.desaturate","return `@color` 10% points less saturated")},{name:"lighten",example:"lighten(@color, 10%);",description:ru("less.builtin.lighten","return `@color` 10% points lighter")},{name:"darken",example:"darken(@color, 10%);",description:ru("less.builtin.darken","return `@color` 10% points darker")},{name:"fadein",example:"fadein(@color, 10%);",description:ru("less.builtin.fadein","return `@color` 10% points less transparent")},{name:"fadeout",example:"fadeout(@color, 10%);",description:ru("less.builtin.fadeout","return `@color` 10% points more transparent")},{name:"fade",example:"fade(@color, 50%);",description:ru("less.builtin.fade","return `@color` with 50% transparency")},{name:"spin",example:"spin(@color, 10);",description:ru("less.builtin.spin","return `@color` with a 10 degree larger in hue")},{name:"mix",example:"mix(@color1, @color2, [@weight: 50%]);",description:ru("less.builtin.mix","return a mix of `@color1` and `@color2`")},{name:"greyscale",example:"greyscale(@color);",description:ru("less.builtin.greyscale","returns a grey, 100% desaturated color")},{name:"contrast",example:"contrast(@color1, [@darkcolor: black], [@lightcolor: white], [@threshold: 43%]);",description:ru("less.builtin.contrast","return `@darkcolor` if `@color1 is> 43% luma` otherwise return `@lightcolor`, see notes")},{name:"multiply",example:"multiply(@color1, @color2);"},{name:"screen",example:"screen(@color1, @color2);"},{name:"overlay",example:"overlay(@color1, @color2);"},{name:"softlight",example:"softlight(@color1, @color2);"},{name:"hardlight",example:"hardlight(@color1, @color2);"},{name:"difference",example:"difference(@color1, @color2);"},{name:"exclusion",example:"exclusion(@color1, @color2);"},{name:"average",example:"average(@color1, @color2);"},{name:"negation",example:"negation(@color1, @color2);"}],t}(pd);function su(e,t){var n=function(e){function t(t){return e.positionAt(t.offset).line}function n(t){return e.positionAt(t.offset+t.len).line}function i(){switch(e.languageId){case"scss":return new zp;case"less":return new eu;default:return new ls}}function r(e,i){var r=t(e),o=n(e);return r!==o?{startLine:r,endLine:o,kind:i}:null}var o=[],s=[],a=i();a.ignoreComment=!1,a.setSource(e.getText());var l=a.scan(),c=null,h=function(){switch(l.type){case mo.CurlyL:case Np:s.push({line:t(l),type:"brace",isStart:!0});break;case mo.CurlyR:if(0!==s.length){if(!(p=au(s,"brace")))break;var i=n(l);"brace"===p.type&&(c&&n(c)!==i&&i--,p.line!==i&&o.push({startLine:p.line,endLine:i,kind:void 0}))}break;case mo.Comment:var h=function(e){return"#region"===e?{line:t(l),type:"comment",isStart:!0}:{line:n(l),type:"comment",isStart:!1}},d=function(t){var n=t.text.match(/^\s*\/\*\s*(#region|#endregion)\b\s*(.*?)\s*\*\//);if(n)return h(n[1]);if("scss"===e.languageId||"less"===e.languageId){var i=t.text.match(/^\s*\/\/\s*(#region|#endregion)\b\s*(.*?)\s*/);if(i)return h(i[1])}return null}(l);if(d)if(d.isStart)s.push(d);else{var p;if(!(p=au(s,"comment")))break;"comment"===p.type&&p.line!==d.line&&o.push({startLine:p.line,endLine:d.line,kind:"region"})}else{var u=r(l,"comment");u&&o.push(u)}}c=l,l=a.scan()};for(;l.type!==mo.EOF;)h();return o}(e);return function(e,t){var n=t&&t.rangeLimit||Number.MAX_VALUE,i=e.sort((function(e,t){var n=e.startLine-t.startLine;return 0===n&&(n=e.endLine-t.endLine),n})),r=[],o=-1;return i.forEach((function(e){e.startLine=0;n--)if(e[n].type===t&&e[n].isStart)return e.splice(n,1)[0];return null}!function(){var e=[,,function(e){function t(e){this.__parent=e,this.__character_count=0,this.__indent_count=-1,this.__alignment_count=0,this.__wrap_point_index=0,this.__wrap_point_character_count=0,this.__wrap_point_indent_count=-1,this.__wrap_point_alignment_count=0,this.__items=[]}function n(e,t){this.__cache=[""],this.__indent_size=e.indent_size,this.__indent_string=e.indent_char,e.indent_with_tabs||(this.__indent_string=new Array(e.indent_size+1).join(e.indent_char)),t=t||"",e.indent_level>0&&(t=new Array(e.indent_level+1).join(this.__indent_string)),this.__base_string=t,this.__base_string_length=t.length}function i(e,i){this.__indent_cache=new n(e,i),this.raw=!1,this._end_with_newline=e.end_with_newline,this.indent_size=e.indent_size,this.wrap_line_length=e.wrap_line_length,this.indent_empty_lines=e.indent_empty_lines,this.__lines=[],this.previous_line=null,this.current_line=null,this.next_line=new t(this),this.space_before_token=!1,this.non_breaking_space=!1,this.previous_token_wrapped=!1,this.__add_outputline()}t.prototype.clone_empty=function(){var e=new t(this.__parent);return e.set_indent(this.__indent_count,this.__alignment_count),e},t.prototype.item=function(e){return e<0?this.__items[this.__items.length+e]:this.__items[e]},t.prototype.has_match=function(e){for(var t=this.__items.length-1;t>=0;t--)if(this.__items[t].match(e))return!0;return!1},t.prototype.set_indent=function(e,t){this.is_empty()&&(this.__indent_count=e||0,this.__alignment_count=t||0,this.__character_count=this.__parent.get_indent_size(this.__indent_count,this.__alignment_count))},t.prototype._set_wrap_point=function(){this.__parent.wrap_line_length&&(this.__wrap_point_index=this.__items.length,this.__wrap_point_character_count=this.__character_count,this.__wrap_point_indent_count=this.__parent.next_line.__indent_count,this.__wrap_point_alignment_count=this.__parent.next_line.__alignment_count)},t.prototype._should_wrap=function(){return this.__wrap_point_index&&this.__character_count>this.__parent.wrap_line_length&&this.__wrap_point_character_count>this.__parent.next_line.__character_count},t.prototype._allow_wrap=function(){if(this._should_wrap()){this.__parent.add_new_line();var e=this.__parent.current_line;return e.set_indent(this.__wrap_point_indent_count,this.__wrap_point_alignment_count),e.__items=this.__items.slice(this.__wrap_point_index),this.__items=this.__items.slice(0,this.__wrap_point_index),e.__character_count+=this.__character_count-this.__wrap_point_character_count,this.__character_count=this.__wrap_point_character_count," "===e.__items[0]&&(e.__items.splice(0,1),e.__character_count-=1),!0}return!1},t.prototype.is_empty=function(){return 0===this.__items.length},t.prototype.last=function(){return this.is_empty()?null:this.__items[this.__items.length-1]},t.prototype.push=function(e){this.__items.push(e);var t=e.lastIndexOf("\n");-1!==t?this.__character_count=e.length-t:this.__character_count+=e.length},t.prototype.pop=function(){var e=null;return this.is_empty()||(e=this.__items.pop(),this.__character_count-=e.length),e},t.prototype._remove_indent=function(){this.__indent_count>0&&(this.__indent_count-=1,this.__character_count-=this.__parent.indent_size)},t.prototype._remove_wrap_indent=function(){this.__wrap_point_indent_count>0&&(this.__wrap_point_indent_count-=1)},t.prototype.trim=function(){for(;" "===this.last();)this.__items.pop(),this.__character_count-=1},t.prototype.toString=function(){var e="";return this.is_empty()?this.__parent.indent_empty_lines&&(e=this.__parent.get_indent_string(this.__indent_count)):(e=this.__parent.get_indent_string(this.__indent_count,this.__alignment_count),e+=this.__items.join("")),e},n.prototype.get_indent_size=function(e,t){var n=this.__base_string_length;return t=t||0,e<0&&(n=0),n+=e*this.__indent_size,n+=t},n.prototype.get_indent_string=function(e,t){var n=this.__base_string;return t=t||0,e<0&&(e=0,n=""),t+=e*this.__indent_size,this.__ensure_cache(t),n+=this.__cache[t]},n.prototype.__ensure_cache=function(e){for(;e>=this.__cache.length;)this.__add_column()},n.prototype.__add_column=function(){var e=this.__cache.length,t=0,n="";this.__indent_size&&e>=this.__indent_size&&(e-=(t=Math.floor(e/this.__indent_size))*this.__indent_size,n=new Array(t+1).join(this.__indent_string)),e&&(n+=new Array(e+1).join(" ")),this.__cache.push(n)},i.prototype.__add_outputline=function(){this.previous_line=this.current_line,this.current_line=this.next_line.clone_empty(),this.__lines.push(this.current_line)},i.prototype.get_line_number=function(){return this.__lines.length},i.prototype.get_indent_string=function(e,t){return this.__indent_cache.get_indent_string(e,t)},i.prototype.get_indent_size=function(e,t){return this.__indent_cache.get_indent_size(e,t)},i.prototype.is_empty=function(){return!this.previous_line&&this.current_line.is_empty()},i.prototype.add_new_line=function(e){return!(this.is_empty()||!e&&this.just_added_newline())&&(this.raw||this.__add_outputline(),!0)},i.prototype.get_code=function(e){this.trim(!0);var t=this.current_line.pop();t&&("\n"===t[t.length-1]&&(t=t.replace(/\n+$/g,"")),this.current_line.push(t)),this._end_with_newline&&this.__add_outputline();var n=this.__lines.join("\n");return"\n"!==e&&(n=n.replace(/[\n]/g,e)),n},i.prototype.set_wrap_point=function(){this.current_line._set_wrap_point()},i.prototype.set_indent=function(e,t){return e=e||0,t=t||0,this.next_line.set_indent(e,t),this.__lines.length>1?(this.current_line.set_indent(e,t),!0):(this.current_line.set_indent(),!1)},i.prototype.add_raw_token=function(e){for(var t=0;t1&&this.current_line.is_empty();)this.__lines.pop(),this.current_line=this.__lines[this.__lines.length-1],this.current_line.trim();this.previous_line=this.__lines.length>1?this.__lines[this.__lines.length-2]:null},i.prototype.just_added_newline=function(){return this.current_line.is_empty()},i.prototype.just_added_blankline=function(){return this.is_empty()||this.current_line.is_empty()&&this.previous_line.is_empty()},i.prototype.ensure_empty_line_above=function(e,n){for(var i=this.__lines.length-2;i>=0;){var r=this.__lines[i];if(r.is_empty())break;if(0!==r.item(0).indexOf(e)&&r.item(-1)!==n){this.__lines.splice(i+1,0,new t(this)),this.previous_line=this.__lines[this.__lines.length-2];break}i--}},e.exports.Output=i},,,,function(e){function t(e,t){this.raw_options=n(e,t),this.disabled=this._get_boolean("disabled"),this.eol=this._get_characters("eol","auto"),this.end_with_newline=this._get_boolean("end_with_newline"),this.indent_size=this._get_number("indent_size",4),this.indent_char=this._get_characters("indent_char"," "),this.indent_level=this._get_number("indent_level"),this.preserve_newlines=this._get_boolean("preserve_newlines",!0),this.max_preserve_newlines=this._get_number("max_preserve_newlines",32786),this.preserve_newlines||(this.max_preserve_newlines=0),this.indent_with_tabs=this._get_boolean("indent_with_tabs","\t"===this.indent_char),this.indent_with_tabs&&(this.indent_char="\t",1===this.indent_size&&(this.indent_size=4)),this.wrap_line_length=this._get_number("wrap_line_length",this._get_number("max_char")),this.indent_empty_lines=this._get_boolean("indent_empty_lines"),this.templating=this._get_selection_list("templating",["auto","none","django","erb","handlebars","php","smarty"],["auto"])}function n(e,t){var n,r={};for(n in e=i(e))n!==t&&(r[n]=e[n]);if(t&&e[t])for(n in e[t])r[n]=e[t][n];return r}function i(e){var t,n={};for(t in e){n[t.replace(/-/g,"_")]=e[t]}return n}t.prototype._get_array=function(e,t){var n=this.raw_options[e],i=t||[];return"object"===typeof n?null!==n&&"function"===typeof n.concat&&(i=n.concat()):"string"===typeof n&&(i=n.split(/[^a-zA-Z0-9_\/\-]+/)),i},t.prototype._get_boolean=function(e,t){var n=this.raw_options[e];return void 0===n?!!t:!!n},t.prototype._get_characters=function(e,t){var n=this.raw_options[e],i=t||"";return"string"===typeof n&&(i=n.replace(/\\r/,"\r").replace(/\\n/,"\n").replace(/\\t/,"\t")),i},t.prototype._get_number=function(e,t){var n=this.raw_options[e];t=parseInt(t,10),isNaN(t)&&(t=0);var i=parseInt(n,10);return isNaN(i)&&(i=t),i},t.prototype._get_selection=function(e,t,n){var i=this._get_selection_list(e,t,n);if(1!==i.length)throw new Error("Invalid Option Value: The option '"+e+"' can only be one of the following values:\n"+t+"\nYou passed in: '"+this.raw_options[e]+"'");return i[0]},t.prototype._get_selection_list=function(e,t,n){if(!t||0===t.length)throw new Error("Selection list cannot be empty.");if(n=n||[t[0]],!this._is_valid_selection(n,t))throw new Error("Invalid Default Value!");var i=this._get_array(e,n);if(!this._is_valid_selection(i,t))throw new Error("Invalid Option Value: The option '"+e+"' can contain only the following values:\n"+t+"\nYou passed in: '"+this.raw_options[e]+"'");return i},t.prototype._is_valid_selection=function(e,t){return e.length&&t.length&&!e.some((function(e){return-1===t.indexOf(e)}))},e.exports.Options=t,e.exports.normalizeOpts=i,e.exports.mergeOpts=n},,function(e){var t=RegExp.prototype.hasOwnProperty("sticky");function n(e){this.__input=e||"",this.__input_length=this.__input.length,this.__position=0}n.prototype.restart=function(){this.__position=0},n.prototype.back=function(){this.__position>0&&(this.__position-=1)},n.prototype.hasNext=function(){return this.__position=0&&e=0&&t=e.length&&this.__input.substring(t-e.length,t).toLowerCase()===e},e.exports.InputScanner=n},,,,,function(e){function t(e,t){e="string"===typeof e?e:e.source,t="string"===typeof t?t:t.source,this.__directives_block_pattern=new RegExp(e+/ beautify( \w+[:]\w+)+ /.source+t,"g"),this.__directive_pattern=/ (\w+)[:](\w+)/g,this.__directives_end_ignore_pattern=new RegExp(e+/\sbeautify\signore:end\s/.source+t,"g")}t.prototype.get_directives=function(e){if(!e.match(this.__directives_block_pattern))return null;var t={};this.__directive_pattern.lastIndex=0;for(var n=this.__directive_pattern.exec(e);n;)t[n[1]]=n[2],n=this.__directive_pattern.exec(e);return t},t.prototype.readIgnored=function(e){return e.readUntilAfter(this.__directives_end_ignore_pattern)},e.exports.Directives=t},,function(e,t,n){var i=n(16).Beautifier,r=n(17).Options;e.exports=function(e,t){return new i(e,t).beautify()},e.exports.defaultOptions=function(){return new r}},function(e,t,n){var i=n(17).Options,r=n(2).Output,o=n(8).InputScanner,s=new(0,n(13).Directives)(/\/\*/,/\*\//),a=/\r\n|[\r\n]/,l=/\r\n|[\r\n]/g,c=/\s/,h=/(?:\s|\n)+/g,d=/\/\*(?:[\s\S]*?)((?:\*\/)|$)/g,p=/\/\/(?:[^\n\r\u2028\u2029]*)/g;function u(e,t){this._source_text=e||"",this._options=new i(t),this._ch=null,this._input=null,this.NESTED_AT_RULE={"@page":!0,"@font-face":!0,"@keyframes":!0,"@media":!0,"@supports":!0,"@document":!0},this.CONDITIONAL_GROUP_RULE={"@media":!0,"@supports":!0,"@document":!0}}u.prototype.eatString=function(e){var t="";for(this._ch=this._input.next();this._ch;){if(t+=this._ch,"\\"===this._ch)t+=this._input.next();else if(-1!==e.indexOf(this._ch)||"\n"===this._ch)break;this._ch=this._input.next()}return t},u.prototype.eatWhitespace=function(e){for(var t=c.test(this._input.peek()),n=0;c.test(this._input.peek());)this._ch=this._input.next(),e&&"\n"===this._ch&&(0===n||n0&&this._indentLevel--},u.prototype.beautify=function(){if(this._options.disabled)return this._source_text;var e=this._source_text,t=this._options.eol;"auto"===t&&(t="\n",e&&a.test(e||"")&&(t=e.match(a)[0]));var n=(e=e.replace(l,"\n")).match(/^[\t ]*/)[0];this._output=new r(this._options,n),this._input=new o(e),this._indentLevel=0,this._nestedLevel=0,this._ch=null;for(var i,u,m=0,f=!1,g=!1,b=!1,v=!1,y=!1,w=this._ch;i=""!==this._input.read(h),u=w,this._ch=this._input.next(),"\\"===this._ch&&this._input.hasNext()&&(this._ch+=this._input.next()),w=this._ch,this._ch;)if("/"===this._ch&&"*"===this._input.peek()){this._output.add_new_line(),this._input.back();var x=this._input.read(d),S=s.get_directives(x);S&&"start"===S.ignore&&(x+=s.readIgnored(this._input)),this.print_string(x),this.eatWhitespace(!0),this._output.add_new_line()}else if("/"===this._ch&&"/"===this._input.peek())this._output.space_before_token=!0,this._input.back(),this.print_string(this._input.read(p)),this.eatWhitespace(!0);else if("@"===this._ch)if(this.preserveSingleSpace(i),"{"===this._input.peek())this.print_string(this._ch+this.eatString("}"));else{this.print_string(this._ch);var C=this._input.peekUntilAfter(/[: ,;{}()[\]\/='"]/g);C.match(/[ :]$/)&&(C=this.eatString(": ").replace(/\s$/,""),this.print_string(C),this._output.space_before_token=!0),"extend"===(C=C.replace(/\s$/,""))?v=!0:"import"===C&&(y=!0),C in this.NESTED_AT_RULE?(this._nestedLevel+=1,C in this.CONDITIONAL_GROUP_RULE&&(b=!0)):f||0!==m||-1===C.indexOf(":")||(g=!0,this.indent())}else"#"===this._ch&&"{"===this._input.peek()?(this.preserveSingleSpace(i),this.print_string(this._ch+this.eatString("}"))):"{"===this._ch?(g&&(g=!1,this.outdent()),b?(b=!1,f=this._indentLevel>=this._nestedLevel):f=this._indentLevel>=this._nestedLevel-1,this._options.newline_between_rules&&f&&this._output.previous_line&&"{"!==this._output.previous_line.item(-1)&&this._output.ensure_empty_line_above("/",","),this._output.space_before_token=!0,"expand"===this._options.brace_style?(this._output.add_new_line(),this.print_string(this._ch),this.indent(),this._output.set_indent(this._indentLevel)):(this.indent(),this.print_string(this._ch)),this.eatWhitespace(!0),this._output.add_new_line()):"}"===this._ch?(this.outdent(),this._output.add_new_line(),"{"===u&&this._output.trim(!0),y=!1,v=!1,g&&(this.outdent(),g=!1),this.print_string(this._ch),f=!1,this._nestedLevel&&this._nestedLevel--,this.eatWhitespace(!0),this._output.add_new_line(),this._options.newline_between_rules&&!this._output.just_added_blankline()&&"}"!==this._input.peek()&&this._output.add_new_line(!0)):":"===this._ch?!f&&!b||this._input.lookBack("&")||this.foundNestedPseudoClass()||this._input.lookBack("(")||v||0!==m?(this._input.lookBack(" ")&&(this._output.space_before_token=!0),":"===this._input.peek()?(this._ch=this._input.next(),this.print_string("::")):this.print_string(":")):(this.print_string(":"),g||(g=!0,this._output.space_before_token=!0,this.eatWhitespace(!0),this.indent())):'"'===this._ch||"'"===this._ch?(this.preserveSingleSpace(i),this.print_string(this._ch+this.eatString(this._ch)),this.eatWhitespace(!0)):";"===this._ch?0===m?(g&&(this.outdent(),g=!1),v=!1,y=!1,this.print_string(this._ch),this.eatWhitespace(!0),"/"!==this._input.peek()&&this._output.add_new_line()):(this.print_string(this._ch),this.eatWhitespace(!0),this._output.space_before_token=!0):"("===this._ch?this._input.lookBack("url")?(this.print_string(this._ch),this.eatWhitespace(),m++,this.indent(),this._ch=this._input.next(),")"===this._ch||'"'===this._ch||"'"===this._ch?this._input.back():this._ch&&(this.print_string(this._ch+this.eatString(")")),m&&(m--,this.outdent()))):(this.preserveSingleSpace(i),this.print_string(this._ch),this.eatWhitespace(),m++,this.indent()):")"===this._ch?(m&&(m--,this.outdent()),this.print_string(this._ch)):","===this._ch?(this.print_string(this._ch),this.eatWhitespace(!0),!this._options.selector_separator_newline||g||0!==m||y||v?this._output.space_before_token=!0:this._output.add_new_line()):">"!==this._ch&&"+"!==this._ch&&"~"!==this._ch||g||0!==m?"]"===this._ch?this.print_string(this._ch):"["===this._ch?(this.preserveSingleSpace(i),this.print_string(this._ch)):"="===this._ch?(this.eatWhitespace(),this.print_string("="),c.test(this._ch)&&(this._ch="")):"!"!==this._ch||this._input.lookBack("\\")?(this.preserveSingleSpace(i),this.print_string(this._ch)):(this.print_string(" "),this.print_string(this._ch)):this._options.space_around_combinator?(this._output.space_before_token=!0,this.print_string(this._ch),this._output.space_before_token=!0):(this.print_string(this._ch),this.eatWhitespace(),this._ch&&c.test(this._ch)&&(this._ch=""));return this._output.get_code(t)},e.exports.Beautifier=u},function(e,t,n){var i=n(6).Options;function r(e){i.call(this,e,"css"),this.selector_separator_newline=this._get_boolean("selector_separator_newline",!0),this.newline_between_rules=this._get_boolean("newline_between_rules",!0);var t=this._get_boolean("space_around_selector_separator");this.space_around_combinator=this._get_boolean("space_around_combinator")||t;var n=this._get_selection_list("brace_style",["collapse","expand","end-expand","none","preserve-inline"]);this.brace_style="collapse";for(var r=0;r0&&fu(i,c-1);)c--;0===c||mu(i,c-1)?l=c:c=0;){var n=e.charCodeAt(t);if(n===du)return!0;if(n===pu)return!1;t--}return!1}(i,l),r=h===i.length,i=i.substring(l,h),0!==l){var p=e.offsetAt(Ba.create(t.start.line,0));o=function(e,t,n){var i=t,r=0,o=n.tabSize||4;for(;i0){var f=n.insertSpaces?ps(" ",a*o):ps("\t",o);m=m.split("\n").join("\n"+f),0===t.start.character&&(m=f+m)}return[{range:t,newText:m}]}function hu(e){return e.replace(/^\s+/,"")}var du="{".charCodeAt(0),pu="}".charCodeAt(0);function uu(e,t,n){if(e&&e.hasOwnProperty(t)){var i=e[t];if(null!==i)return i}return n}function mu(e,t){return-1!=="\r\n".indexOf(e.charAt(t))}function fu(e,t){return-1!==" \t".indexOf(e.charAt(t))}var gu={version:1.1,properties:[{name:"additive-symbols",browsers:["FF33"],syntax:"[ && ]#",relevance:50,description:"@counter-style descriptor. Specifies the symbols used by the marker-construction algorithm specified by the system descriptor. Needs to be specified if the counter system is 'additive'.",restrictions:["integer","string","image","identifier"]},{name:"align-content",values:[{name:"center",description:"Lines are packed toward the center of the flex container."},{name:"flex-end",description:"Lines are packed toward the end of the flex container."},{name:"flex-start",description:"Lines are packed toward the start of the flex container."},{name:"space-around",description:"Lines are evenly distributed in the flex container, with half-size spaces on either end."},{name:"space-between",description:"Lines are evenly distributed in the flex container."},{name:"stretch",description:"Lines stretch to take up the remaining space."}],syntax:"normal | | | ? ",relevance:62,description:"Aligns a flex container\u2019s lines within the flex container when there is extra space in the cross-axis, similar to how 'justify-content' aligns individual items within the main-axis.",restrictions:["enum"]},{name:"align-items",values:[{name:"baseline",description:"If the flex item\u2019s inline axis is the same as the cross axis, this value is identical to 'flex-start'. Otherwise, it participates in baseline alignment."},{name:"center",description:"The flex item\u2019s margin box is centered in the cross axis within the line."},{name:"flex-end",description:"The cross-end margin edge of the flex item is placed flush with the cross-end edge of the line."},{name:"flex-start",description:"The cross-start margin edge of the flex item is placed flush with the cross-start edge of the line."},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."}],syntax:"normal | stretch | | [ ? ]",relevance:85,description:"Aligns flex items along the cross axis of the current line of the flex container.",restrictions:["enum"]},{name:"justify-items",values:[{name:"auto"},{name:"normal"},{name:"end"},{name:"start"},{name:"flex-end",description:'"Flex items are packed toward the end of the line."'},{name:"flex-start",description:'"Flex items are packed toward the start of the line."'},{name:"self-end",description:"The item is packed flush to the edge of the alignment container of the end side of the item, in the appropriate axis."},{name:"self-start",description:"The item is packed flush to the edge of the alignment container of the start side of the item, in the appropriate axis.."},{name:"center",description:"The items are packed flush to each other toward the center of the of the alignment container."},{name:"left"},{name:"right"},{name:"baseline"},{name:"first baseline"},{name:"last baseline"},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."},{name:"save"},{name:"unsave"},{name:"legacy"}],syntax:"normal | stretch | | ? [ | left | right ] | legacy | legacy && [ left | right | center ]",relevance:53,description:"Defines the default justify-self for all items of the box, giving them the default way of justifying each box along the appropriate axis",restrictions:["enum"]},{name:"justify-self",values:[{name:"auto"},{name:"normal"},{name:"end"},{name:"start"},{name:"flex-end",description:'"Flex items are packed toward the end of the line."'},{name:"flex-start",description:'"Flex items are packed toward the start of the line."'},{name:"self-end",description:"The item is packed flush to the edge of the alignment container of the end side of the item, in the appropriate axis."},{name:"self-start",description:"The item is packed flush to the edge of the alignment container of the start side of the item, in the appropriate axis.."},{name:"center",description:"The items are packed flush to each other toward the center of the of the alignment container."},{name:"left"},{name:"right"},{name:"baseline"},{name:"first baseline"},{name:"last baseline"},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."},{name:"save"},{name:"unsave"}],syntax:"auto | normal | stretch | | ? [ | left | right ]",relevance:53,description:"Defines the way of justifying a box inside its container along the appropriate axis.",restrictions:["enum"]},{name:"align-self",values:[{name:"auto",description:"Computes to the value of 'align-items' on the element\u2019s parent, or 'stretch' if the element has no parent. On absolutely positioned elements, it computes to itself."},{name:"baseline",description:"If the flex item\u2019s inline axis is the same as the cross axis, this value is identical to 'flex-start'. Otherwise, it participates in baseline alignment."},{name:"center",description:"The flex item\u2019s margin box is centered in the cross axis within the line."},{name:"flex-end",description:"The cross-end margin edge of the flex item is placed flush with the cross-end edge of the line."},{name:"flex-start",description:"The cross-start margin edge of the flex item is placed flush with the cross-start edge of the line."},{name:"stretch",description:"If the cross size property of the flex item computes to auto, and neither of the cross-axis margins are auto, the flex item is stretched."}],syntax:"auto | normal | stretch | | ? ",relevance:72,description:"Allows the default alignment along the cross axis to be overridden for individual flex items.",restrictions:["enum"]},{name:"all",browsers:["E79","FF27","S9.1","C37","O24"],values:[],syntax:"initial | inherit | unset | revert",relevance:53,references:[{name:"MDN Reference",url:"https://developer.mozilla.org/docs/Web/CSS/all"}],description:"Shorthand that resets all properties except 'direction' and 'unicode-bidi'.",restrictions:["enum"]},{name:"alt",browsers:["S9"],values:[],relevance:50,references:[{name:"MDN Reference",url:"https://developer.mozilla.org/docs/Web/CSS/alt"}],description:"Provides alternative text for assistive technology to replace the generated content of a ::before or ::after element.",restrictions:["string","enum"]},{name:"animation",values:[{name:"alternate",description:"The animation cycle iterations that are odd counts are played in the normal direction, and the animation cycle iterations that are even counts are played in a reverse direction."},{name:"alternate-reverse",description:"The animation cycle iterations that are odd counts are played in the reverse direction, and the animation cycle iterations that are even counts are played in a normal direction."},{name:"backwards",description:"The beginning property value (as defined in the first @keyframes at-rule) is applied before the animation is displayed, during the period defined by 'animation-delay'."},{name:"both",description:"Both forwards and backwards fill modes are applied."},{name:"forwards",description:"The final property value (as defined in the last @keyframes at-rule) is maintained after the animation completes."},{name:"infinite",description:"Causes the animation to repeat forever."},{name:"none",description:"No animation is performed"},{name:"normal",description:"Normal playback."},{name:"reverse",description:"All iterations of the animation are played in the reverse direction from the way they were specified."}],syntax:"#",relevance:82,references:[{name:"MDN Reference",url:"https://developer.mozilla.org/docs/Web/CSS/animation"}],description:"Shorthand property combines six of the animation properties into a single property.",restrictions:["time","timing-function","enum","identifier","number"]},{name:"animation-delay",syntax:"