Skip to content

Commit 80bf6ef

Browse files
authored
24-2: schemeshard: reject operations with too big local tx commit (#6760) (#6850)
merged ebee36a from main Add commit redo size check for successfully ignited operations as a precaution measure to avoid infinite loop of schemeshard hitting local tx commit redo size limit, restarting, attempting to propose persisted operation again, hitting commit redo size limit again, restarting and so on. This could happen with inherently massive operations such as copy-tables used as a starting step of database export/backup. Coping large number of tables with huge number of partitions can result in so large TTxOperationPropose local transaction that its size would hit the limit imposed by the tablet executor. Tablet violating that limit is considered broken and will be immediately stopped. See ydb/core/tablet_flat/flat_executor.cpp, NTabletFlatExecutor::TExecutor::ExecuteTransaction(). KIKIMR-21751
1 parent c934c0a commit 80bf6ef

File tree

5 files changed

+232
-16
lines changed

5 files changed

+232
-16
lines changed

ydb/core/tx/schemeshard/schemeshard__operation.cpp

Lines changed: 110 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -112,14 +112,12 @@ THolder<TProposeResponse> TSchemeShard::IgniteOperation(TProposeRequest& request
112112
}
113113

114114
TOperation::TPtr operation = new TOperation(txId);
115-
Operations[txId] = operation; //record is erased at ApplyOnExecute if all parts are done at propose
116115

117116
for (const auto& transaction : record.GetTransaction()) {
118117
auto quotaResult = operation->ConsumeQuota(transaction, context);
119118
if (quotaResult.Status != NKikimrScheme::StatusSuccess) {
120119
response.Reset(new TProposeResponse(quotaResult.Status, ui64(txId), ui64(selfId)));
121120
response->SetError(quotaResult.Status, quotaResult.Reason);
122-
Operations.erase(txId);
123121
return std::move(response);
124122
}
125123
}
@@ -139,7 +137,6 @@ THolder<TProposeResponse> TSchemeShard::IgniteOperation(TProposeRequest& request
139137
if (splitResult.Status != NKikimrScheme::StatusSuccess) {
140138
response.Reset(new TProposeResponse(splitResult.Status, ui64(txId), ui64(selfId)));
141139
response->SetError(splitResult.Status, splitResult.Reason);
142-
Operations.erase(txId);
143140
return std::move(response);
144141
}
145142

@@ -148,11 +145,15 @@ THolder<TProposeResponse> TSchemeShard::IgniteOperation(TProposeRequest& request
148145

149146
const TString owner = record.HasOwner() ? record.GetOwner() : BUILTIN_ACL_ROOT;
150147

148+
bool prevProposeUndoSafe = true;
149+
150+
Operations[txId] = operation; //record is erased at ApplyOnExecute if all parts are done at propose
151+
151152
for (const auto& transaction : transactions) {
152153
auto parts = operation->ConstructParts(transaction, context);
153154

154155
if (parts.size() > 1) {
155-
// les't allow altering impl index tables as part of consistent operation
156+
// allow altering impl index tables as part of consistent operation
156157
context.IsAllowedPrivateTables = true;
157158
}
158159

@@ -206,25 +207,77 @@ THolder<TProposeResponse> TSchemeShard::IgniteOperation(TProposeRequest& request
206207
<< ", with reason: " << response->Record.GetReason()
207208
<< ", tx message: " << PrintSecurely(record));
208209

209-
context.OnComplete = {}; // recreate
210-
context.DbChanges = {};
210+
AbortOperationPropose(txId, context);
211211

212-
for (auto& toAbort : operation->Parts) {
213-
toAbort->AbortPropose(context);
214-
}
212+
return std::move(response);
213+
}
215214

216-
context.MemChanges.UnDo(context.SS);
217-
context.OnComplete.ApplyOnExecute(context.SS, context.GetTxc(), context.Ctx);
218-
Operations.erase(txId);
215+
// Check suboperations for undo safety. Log first unsafe suboperation in the schema transaction.
216+
if (prevProposeUndoSafe && !context.IsUndoChangesSafe()) {
217+
prevProposeUndoSafe = false;
219218

220-
return std::move(response);
219+
LOG_WARN_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD,
220+
"Operation part proposed ok, but propose itself is undo unsafe"
221+
<< ", suboperation type: " << NKikimrSchemeOp::EOperationType_Name(part->GetTransaction().GetOperationType())
222+
<< ", opId: " << part->GetOperationId()
223+
<< ", at schemeshard: " << selfId
224+
);
221225
}
222226
}
223227
}
224228

225229
return std::move(response);
226230
}
227231

232+
void TSchemeShard::AbortOperationPropose(const TTxId txId, TOperationContext& context) {
233+
Y_ABORT_UNLESS(Operations.contains(txId));
234+
TOperation::TPtr operation = Operations.at(txId);
235+
236+
// Drop operation side effects, undo memory changes
237+
// (Local db changes were already applied)
238+
context.OnComplete = {};
239+
context.DbChanges = {};
240+
241+
for (auto& i : operation->Parts) {
242+
i->AbortPropose(context);
243+
}
244+
245+
context.MemChanges.UnDo(context.SS);
246+
247+
// And remove aborted operation from existence
248+
Operations.erase(txId);
249+
}
250+
251+
void AbortOperation(TOperationContext& context, const TTxId txId, const TString& reason) {
252+
LOG_ERROR_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute"
253+
<< ", txId: " << txId
254+
<< ", operation is rejected and all changes reverted"
255+
<< ", " << reason
256+
<< ", at schemeshard: " << context.SS->SelfTabletId()
257+
);
258+
259+
context.GetTxc().DB.RollbackChanges();
260+
context.SS->AbortOperationPropose(txId, context);
261+
}
262+
263+
bool IsCommitRedoSizeOverLimit(TString* reason, TOperationContext& context) {
264+
// MaxCommitRedoMB is the ICB control shared with NTabletFlatExecutor::TExecutor.
265+
// We subtract from MaxCommitRedoMB additional 1MB for anything extra
266+
// that executor/tablet may (or may not) add under the hood
267+
const ui64 limitBytes = (context.SS->MaxCommitRedoMB - 1) << 20; // MB to bytes
268+
const ui64 commitRedoBytes = context.GetTxc().DB.GetCommitRedoBytes();
269+
if (commitRedoBytes >= limitBytes) {
270+
*reason = TStringBuilder()
271+
<< "local tx commit redo size generated by IgniteOperation() is more than allowed limit: "
272+
<< "commit redo size " << commitRedoBytes
273+
<< ", limit " << limitBytes
274+
<< ", excess " << (commitRedoBytes - limitBytes)
275+
;
276+
return true;
277+
}
278+
return false;
279+
}
280+
228281
struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransactionBase<TSchemeShard> {
229282
using TBase = NTabletFlatExecutor::TTransactionBase<TSchemeShard>;
230283

@@ -244,6 +297,7 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti
244297

245298
bool Execute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override {
246299
TTabletId selfId = Self->SelfTabletId();
300+
auto txId = TTxId(Request->Get()->Record.GetTxId());
247301

248302
LOG_DEBUG_S(ctx, NKikimrServices::FLAT_TX_SCHEMESHARD,
249303
"TTxOperationPropose Execute"
@@ -254,7 +308,6 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti
254308

255309
auto [userToken, tokenParseError] = ParseUserToken(Request->Get()->Record.GetUserToken());
256310
if (tokenParseError) {
257-
auto txId = Request->Get()->Record.GetTxId();
258311
Response = MakeHolder<TProposeResponse>(NKikimrScheme::StatusInvalidParameter, ui64(txId), ui64(selfId), "Failed to parse user token");
259312
return true;
260313
}
@@ -266,10 +319,52 @@ struct TSchemeShard::TTxOperationPropose: public NTabletFlatExecutor::TTransacti
266319
TStorageChanges dbChanges;
267320
TOperationContext context{Self, txc, ctx, OnComplete, memChanges, dbChanges, std::move(userToken)};
268321

322+
//NOTE: Successful IgniteOperation will leave created operation in Self->Operations and accumulated changes in the context.
323+
// Unsuccessful IgniteOperation will leave no operation and context will also be clean.
269324
Response = Self->IgniteOperation(*Request->Get(), context);
270325

271-
OnComplete.ApplyOnExecute(Self, txc, ctx);
326+
//NOTE: Successfully created operation also must be checked for the size of this local tx.
327+
//
328+
// Limitation on a commit redo size of local transactions is imposed at the tablet executor level
329+
// (See ydb/core/tablet_flat/flat_executor.cpp, NTabletFlatExecutor::TExecutor::ExecuteTransaction()).
330+
// And a tablet violating that limit is considered broken and will be stopped unconditionally and immediately.
331+
//
332+
// So even if operation was ignited successfully, it's local tx size still must be checked
333+
// as a precaution measure to avoid infinite loop of schemeshard restarting, attempting to propose
334+
// persisted operation again, hitting commit redo size limit and restarting again.
335+
//
336+
// On unsuccessful check, local tx should be rolled back, operation should be rejected and
337+
// all accumulated changes dropped or reverted.
338+
//
339+
340+
// Actually build commit redo (dbChanges could be empty)
272341
dbChanges.Apply(Self, txc, ctx);
342+
343+
if (Self->Operations.contains(txId)) {
344+
Y_ABORT_UNLESS(Response->IsDone() || Response->IsAccepted() || Response->IsConditionalAccepted());
345+
346+
// Check local tx commit redo size
347+
TString reason;
348+
if (IsCommitRedoSizeOverLimit(&reason, context)) {
349+
Response = MakeHolder<TProposeResponse>(NKikimrScheme::StatusSchemeError, ui64(txId), ui64(selfId), reason);
350+
351+
AbortOperation(context, txId, reason);
352+
353+
if (!context.IsUndoChangesSafe()) {
354+
LOG_ERROR_S(context.Ctx, NKikimrServices::FLAT_TX_SCHEMESHARD, "TTxOperationPropose Execute"
355+
<< ", opId: " << txId
356+
<< ", operation should be rejected and all changes be reverted"
357+
<< ", but context.IsUndoChangesSafe is false, which means some direct writes have been done"
358+
<< ", message: " << PrintSecurely(Request->Get()->Record)
359+
<< ", at schemeshard: " << context.SS->SelfTabletId()
360+
);
361+
}
362+
}
363+
}
364+
365+
// Apply accumulated changes (changes could be empty)
366+
OnComplete.ApplyOnExecute(Self, txc, ctx);
367+
273368
return true;
274369
}
275370

ydb/core/tx/schemeshard/schemeshard_impl.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4358,6 +4358,8 @@ void TSchemeShard::OnActivateExecutor(const TActorContext &ctx) {
43584358
appData->Icb->RegisterSharedControl(DisablePublicationsOfDropping, "SchemeShard_DisablePublicationsOfDropping");
43594359
appData->Icb->RegisterSharedControl(FillAllocatePQ, "SchemeShard_FillAllocatePQ");
43604360

4361+
appData->Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB");
4362+
43614363
AllowDataColumnForIndexTable = appData->FeatureFlags.GetEnableDataColumnForIndexTable();
43624364
appData->Icb->RegisterSharedControl(AllowDataColumnForIndexTable, "SchemeShard_AllowDataColumnForIndexTable");
43634365

ydb/core/tx/schemeshard/schemeshard_impl.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,9 @@ class TSchemeShard
181181
TControlWrapper DisablePublicationsOfDropping;
182182
TControlWrapper FillAllocatePQ;
183183

184+
// Shared with NTabletFlatExecutor::TExecutor
185+
TControlWrapper MaxCommitRedoMB;
186+
184187
TSplitSettings SplitSettings;
185188

186189
struct TTenantInitState {
@@ -350,6 +353,8 @@ class TSchemeShard
350353
NExternalSource::IExternalSourceFactory::TPtr ExternalSourceFactory{NExternalSource::CreateExternalSourceFactory({})};
351354

352355
THolder<TProposeResponse> IgniteOperation(TProposeRequest& request, TOperationContext& context);
356+
void AbortOperationPropose(const TTxId txId, TOperationContext& context);
357+
353358
THolder<TEvDataShard::TEvProposeTransaction> MakeDataShardProposal(const TPathId& pathId, const TOperationId& opId,
354359
const TString& body, const TActorContext& ctx) const;
355360

@@ -399,7 +404,7 @@ class TSchemeShard
399404
return MakeLocalId(NextLocalPathId);
400405
}
401406

402-
TPathId AllocatePathId () {
407+
TPathId AllocatePathId() {
403408
TPathId next = PeekNextPathId();
404409
++NextLocalPathId;
405410
return next;
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#include <ydb/core/tx/schemeshard/ut_helpers/helpers.h>
2+
3+
using namespace NKikimr;
4+
using namespace NSchemeShard;
5+
using namespace NSchemeShardUT_Private;
6+
7+
Y_UNIT_TEST_SUITE(TSchemeShardCheckProposeSize) {
8+
9+
//TODO: can't check all operations as many of them do not implement
10+
// TSubOperation::AbortPropose() properly and will abort.
11+
12+
Y_UNIT_TEST(CopyTable) {
13+
TTestBasicRuntime runtime;
14+
TTestEnv env(runtime);
15+
16+
// Take control over MaxCommitRedoMB ICB setting.
17+
// Drop down its min-value limit to be able to set it as low as test needs.
18+
TControlWrapper MaxCommitRedoMB;
19+
{
20+
runtime.GetAppData().Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB");
21+
MaxCommitRedoMB.Reset(200, 1, 4096);
22+
}
23+
24+
ui64 txId = 100;
25+
26+
TestCreateTable(runtime, ++txId, "/MyRoot", R"(
27+
Name: "table"
28+
Columns { Name: "key" Type: "Uint64"}
29+
Columns { Name: "value" Type: "Utf8"}
30+
KeyColumnNames: ["key"]
31+
)");
32+
env.TestWaitNotification(runtime, txId);
33+
34+
// 1. Set MaxCommitRedoMB to 1 and try to create table.
35+
//
36+
// (Check at the operation's Propose tests commit redo size against (MaxCommitRedoMB - 1)
37+
// to give 1MB leeway to executer/tablet inner stuff to may be do "something extra".
38+
// So MaxCommitRedoMB = 1 means effective 0 for the size of operation's commit.)
39+
{
40+
MaxCommitRedoMB = 1;
41+
AsyncCopyTable(runtime, ++txId, "/MyRoot", "table-copy", "/MyRoot/table");
42+
TestModificationResults(runtime, txId,
43+
{{NKikimrScheme::StatusSchemeError, "local tx commit redo size generated by IgniteOperation() is more than allowed limit"}}
44+
);
45+
env.TestWaitNotification(runtime, txId);
46+
}
47+
48+
// 2. Set MaxCommitRedoMB back to high value and try again.
49+
{
50+
MaxCommitRedoMB = 200;
51+
AsyncCopyTable(runtime, ++txId, "/MyRoot", "table-copy", "/MyRoot/table");
52+
env.TestWaitNotification(runtime, txId);
53+
}
54+
}
55+
56+
Y_UNIT_TEST(CopyTables) {
57+
TTestBasicRuntime runtime;
58+
TTestEnv env(runtime);
59+
60+
// Take control over MaxCommitRedoMB ICB setting.
61+
// Drop down its min-value limit to be able to set it as low as test needs.
62+
TControlWrapper MaxCommitRedoMB;
63+
{
64+
runtime.GetAppData().Icb->RegisterSharedControl(MaxCommitRedoMB, "TabletControls.MaxCommitRedoMB");
65+
MaxCommitRedoMB.Reset(200, 1, 4096);
66+
}
67+
68+
const ui64 tables = 100;
69+
const ui64 shardsPerTable = 1;
70+
71+
ui64 txId = 100;
72+
73+
for (ui64 i : xrange(tables)) {
74+
TestCreateTable(runtime, ++txId, "/MyRoot", Sprintf(
75+
R"(
76+
Name: "table-%lu"
77+
Columns { Name: "key" Type: "Uint64"}
78+
Columns { Name: "value" Type: "Utf8"}
79+
KeyColumnNames: ["key"]
80+
UniformPartitionsCount: %lu
81+
)",
82+
i,
83+
shardsPerTable
84+
));
85+
env.TestWaitNotification(runtime, txId);
86+
}
87+
88+
auto testCopyTables = [](auto& runtime, ui64 txId, ui64 tables) {
89+
TVector<TEvTx*> schemeTxs;
90+
for (ui64 i : xrange(tables)) {
91+
schemeTxs.push_back(CopyTableRequest(txId, "/MyRoot", Sprintf("table-%lu-copy", i), Sprintf("/MyRoot/table-%lu", i)));
92+
}
93+
AsyncSend(runtime, TTestTxConfig::SchemeShard, CombineSchemeTransactions(schemeTxs));
94+
};
95+
96+
// 1. Set MaxCommitRedoMB to 1 and try to copy tables.
97+
{
98+
MaxCommitRedoMB = 1;
99+
testCopyTables(runtime, ++txId, tables);
100+
TestModificationResults(runtime, txId,
101+
{{NKikimrScheme::StatusSchemeError, "local tx commit redo size generated by IgniteOperation() is more than allowed limit"}}
102+
);
103+
}
104+
105+
// 2. Set MaxCommitRedoMB back to high value and try again.
106+
{
107+
MaxCommitRedoMB = 200;
108+
testCopyTables(runtime, ++txId, tables);
109+
TestModificationResults(runtime, txId, {{NKikimrScheme::StatusAccepted}});
110+
}
111+
}
112+
113+
}

ydb/core/tx/schemeshard/ut_base/ya.make

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ SRCS(
2828
ut_info_types.cpp
2929
ut_allocate_pq.cpp
3030
ut_table_pg_types.cpp
31+
ut_commit_redo_limit.cpp
3132
)
3233

3334
END()

0 commit comments

Comments
 (0)