Skip to content

Commit 2edc9b5

Browse files
authored
Add export checksums (#12728)
1 parent c19b139 commit 2edc9b5

27 files changed

+398
-56
lines changed

ydb/core/protos/feature_flags.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,4 +184,5 @@ message TFeatureFlags {
184184
optional bool EnableDataShardInMemoryStateMigration = 159 [default = true];
185185
optional bool EnableDataShardInMemoryStateMigrationAcrossGenerations = 160 [default = false];
186186
optional bool DisableLocalDBEraseCache = 161 [default = false];
187+
optional bool EnableExportChecksums = 162 [default = false];
187188
}

ydb/core/protos/flat_scheme_op.proto

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,6 +1246,7 @@ message TBackupTask {
12461246

12471247
optional uint64 SnapshotStep = 14;
12481248
optional uint64 SnapshotTxId = 15;
1249+
optional bool EnableChecksums = 16; // currently available for s3
12491250
}
12501251

12511252
message TRestoreTask {

ydb/core/testlib/basics/feature_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class TTestFeatureFlagsHolder {
7171
FEATURE_FLAG_SETTER(EnableParameterizedDecimal)
7272
FEATURE_FLAG_SETTER(EnableTopicAutopartitioningForCDC)
7373
FEATURE_FLAG_SETTER(EnableFollowerStats)
74+
FEATURE_FLAG_SETTER(EnableExportChecksums)
7475

7576
#undef FEATURE_FLAG_SETTER
7677
};

ydb/core/tx/datashard/backup_restore_common.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@ void TMetadata::AddFullBackup(TFullBackupMetadata::TPtr fb) {
66
FullBackups.emplace(fb->SnapshotVts, fb);
77
}
88

9+
void TMetadata::SetVersion(ui64 version) {
10+
Version = version;
11+
}
12+
913
TString TMetadata::Serialize() const {
1014
NJson::TJsonMap m;
11-
m["version"] = 0;
15+
m["version"] = Version;
1216
NJson::TJsonArray fullBackups;
1317
for (auto &[tp, _] : FullBackups) {
1418
NJson::TJsonMap backupMap;

ydb/core/tx/datashard/backup_restore_common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,15 @@ class TMetadata {
175175
void AddFullBackup(TFullBackupMetadata::TPtr fullBackup);
176176
void AddLog(TLogMetadata::TPtr log);
177177
void SetConsistencyKey(const TString& key);
178+
void SetVersion(ui64 version);
178179

179180
TString Serialize() const;
180181
static TMetadata Deserialize(const TString& metadata);
181182
private:
182183
TString ConsistencyKey;
183184
TMap<TVirtualTimestamp, TFullBackupMetadata::TPtr> FullBackups;
184185
TMap<TVirtualTimestamp, TLogMetadata::TPtr> Logs;
186+
ui64 Version = 0;
185187
};
186188

187189
} // NBackupRestore

ydb/core/tx/datashard/backup_restore_traits.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,27 @@ TString DataFileExtension(EDataFormat format, ECompressionCodec codec) {
7272
return Sprintf("%s%s", fit->second.c_str(), cit->second.c_str());
7373
}
7474

75+
TString PermissionsKeySuffix() {
76+
return "permissions.pb";
77+
}
78+
79+
TString SchemeKeySuffix() {
80+
return "scheme.pb";
81+
}
82+
83+
TString MetadataKeySuffix() {
84+
return "metadata.json";
85+
}
86+
87+
TString DataKeySuffix(ui32 n, EDataFormat format, ECompressionCodec codec) {
88+
const auto ext = DataFileExtension(format, codec);
89+
return Sprintf("data_%02d%s", n, ext.c_str());
90+
}
91+
92+
TString ChecksumKey(const TString& objKey) {
93+
return objKey + ".sha256";
94+
}
95+
7596
} // NBackupRestoreTraits
7697
} // NDataShard
7798
} // NKikimr

ydb/core/tx/datashard/backup_restore_traits.h

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,22 +30,12 @@ ECompressionCodec NextCompressionCodec(ECompressionCodec cur);
3030

3131
TString DataFileExtension(EDataFormat format, ECompressionCodec codec);
3232

33-
inline TString SchemeKey(const TString& objKeyPattern) {
34-
return Sprintf("%s/scheme.pb", objKeyPattern.c_str());
35-
}
36-
37-
inline TString PermissionsKey(const TString& objKeyPattern) {
38-
return Sprintf("%s/permissions.pb", objKeyPattern.c_str());
39-
}
33+
TString PermissionsKeySuffix();
34+
TString SchemeKeySuffix();
35+
TString MetadataKeySuffix();
36+
TString DataKeySuffix(ui32 n, EDataFormat format, ECompressionCodec codec);
4037

41-
inline TString MetadataKey(const TString& objKeyPattern) {
42-
return Sprintf("%s/metadata.json", objKeyPattern.c_str());
43-
}
44-
45-
inline TString DataKey(const TString& objKeyPattern, ui32 n, EDataFormat format, ECompressionCodec codec) {
46-
const auto ext = DataFileExtension(format, codec);
47-
return Sprintf("%s/data_%02d%s", objKeyPattern.c_str(), n, ext.c_str());
48-
}
38+
TString ChecksumKey(const TString& objKey);
4939

5040
} // NBackupRestoreTraits
5141
} // NDataShard
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#include "export_checksum.h"
2+
3+
#include <openssl/sha.h>
4+
5+
#include <util/string/hex.h>
6+
7+
namespace NKikimr::NDataShard {
8+
9+
class TSHA256 : public IExportChecksum {
10+
public:
11+
TSHA256() {
12+
SHA256_Init(&Context);
13+
}
14+
15+
void AddData(TStringBuf data) override {
16+
SHA256_Update(&Context, data.data(), data.size());
17+
}
18+
19+
TString Serialize() override {
20+
unsigned char hash[SHA256_DIGEST_LENGTH];
21+
SHA256_Final(hash, &Context);
22+
return to_lower(HexEncode(hash, SHA256_DIGEST_LENGTH));
23+
}
24+
25+
private:
26+
SHA256_CTX Context;
27+
};
28+
29+
TString ComputeExportChecksum(TStringBuf data) {
30+
IExportChecksum::TPtr checksum(CreateExportChecksum());
31+
checksum->AddData(data);
32+
return checksum->Serialize();
33+
}
34+
35+
IExportChecksum* CreateExportChecksum() {
36+
return new TSHA256();
37+
}
38+
39+
} // NKikimr::NDataShard
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#pragma once
2+
3+
#include <util/generic/string.h>
4+
5+
namespace NKikimr::NDataShard {
6+
7+
class IExportChecksum {
8+
public:
9+
using TPtr = std::unique_ptr<IExportChecksum>;
10+
11+
virtual ~IExportChecksum() = default;
12+
13+
virtual void AddData(TStringBuf data) = 0;
14+
virtual TString Serialize() = 0;
15+
};
16+
17+
IExportChecksum* CreateExportChecksum();
18+
TString ComputeExportChecksum(TStringBuf data);
19+
20+
} // NKikimr::NDataShard

ydb/core/tx/datashard/export_s3.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,10 @@ class TS3Export: public IExport {
3131

3232
switch (CodecFromTask(Task)) {
3333
case ECompressionCodec::None:
34-
return CreateS3ExportBufferRaw(Columns, maxRows, maxBytes);
34+
return CreateS3ExportBufferRaw(Columns, maxRows, maxBytes, Task.GetEnableChecksums());
3535
case ECompressionCodec::Zstd:
36-
return CreateS3ExportBufferZstd(Task.GetCompression().GetLevel(), Columns, maxRows, maxBytes, minBytes);
36+
return CreateS3ExportBufferZstd(Task.GetCompression().GetLevel(), Columns, maxRows,
37+
maxBytes, minBytes, Task.GetEnableChecksums());
3738
case ECompressionCodec::Invalid:
3839
Y_ABORT("unreachable");
3940
}

0 commit comments

Comments
 (0)