Skip to content

Commit 6a230d5

Browse files
authored
support folders in S3 uri in CS tiers (#13337)
1 parent 9829467 commit 6a230d5

File tree

5 files changed

+229
-44
lines changed

5 files changed

+229
-44
lines changed

ydb/core/tx/tiering/tier/object.cpp

Lines changed: 5 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "object.h"
2+
#include "s3_uri.h"
23

34
#include <library/cpp/json/writer/json_value.h>
45
#include <library/cpp/protobuf/json/proto2json.h>
@@ -46,50 +47,11 @@ TConclusionStatus TTierConfig::DeserializeFromProto(const NKikimrSchemeOp::TExte
4647
}
4748
}
4849

49-
NUri::TUri url;
50-
if (url.Parse(proto.GetLocation(), NUri::TFeature::FeaturesAll) != NUri::TState::EParsed::ParsedOK) {
51-
return TConclusionStatus::Fail("Cannot parse url: " + proto.GetLocation());
52-
}
53-
54-
switch (url.GetScheme()) {
55-
case NUri::TScheme::SchemeEmpty:
56-
break;
57-
case NUri::TScheme::SchemeHTTP:
58-
ProtoConfig.SetScheme(::NKikimrSchemeOp::TS3Settings_EScheme_HTTP);
59-
break;
60-
case NUri::TScheme::SchemeHTTPS:
61-
ProtoConfig.SetScheme(::NKikimrSchemeOp::TS3Settings_EScheme_HTTPS);
62-
break;
63-
default:
64-
return TConclusionStatus::Fail("Unknown schema in url");
65-
}
66-
67-
{
68-
TStringBuf endpoint;
69-
TStringBuf bucket;
70-
71-
TStringBuf host = url.GetHost();
72-
TStringBuf path = url.GetField(NUri::TField::FieldPath);
73-
if (!path.Empty()) {
74-
endpoint = host;
75-
bucket = path;
76-
bucket.SkipPrefix("/");
77-
if (bucket.Contains("/")) {
78-
return TConclusionStatus::Fail(TStringBuilder() << "Not a bucket (contains directories): " << bucket);
79-
}
80-
} else {
81-
if (!path.TrySplit('.', endpoint, bucket)) {
82-
return TConclusionStatus::Fail(TStringBuilder() << "Bucket is not specified in URL: " << path);
83-
}
84-
}
85-
86-
if (url.GetField(NUri::TField::FieldPort)) {
87-
ProtoConfig.SetEndpoint(TStringBuilder() << endpoint << ":" << url.GetPort());
88-
} else {
89-
ProtoConfig.SetEndpoint(TString(endpoint));
90-
}
91-
ProtoConfig.SetBucket(TString(bucket));
50+
auto parsedUri = TS3Uri::ParseUri(proto.GetLocation());
51+
if (parsedUri.IsFail()) {
52+
return parsedUri;
9253
}
54+
parsedUri->FillSettings(ProtoConfig);
9355

9456
return TConclusionStatus::Success();
9557
}

ydb/core/tx/tiering/tier/s3_uri.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
#include "s3_uri.h"
2+
3+
namespace NKikimr::NColumnShard::NTiers {
4+
}

ydb/core/tx/tiering/tier/s3_uri.h

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
#include <ydb/core/protos/flat_scheme_op.pb.h>
2+
3+
#include <ydb/library/accessor/accessor.h>
4+
#include <ydb/library/conclusion/result.h>
5+
6+
#include <library/cpp/uri/uri.h>
7+
#include <util/string/builder.h>
8+
9+
namespace NKikimr::NColumnShard::NTiers {
10+
11+
class TS3Uri {
12+
private:
13+
YDB_READONLY_DEF(std::optional<NKikimrSchemeOp::TS3Settings_EScheme>, Scheme);
14+
YDB_READONLY_DEF(TString, Bucket);
15+
YDB_READONLY_DEF(TString, Host);
16+
YDB_READONLY_DEF(std::optional<ui16>, Port);
17+
YDB_READONLY_DEF(std::optional<TString>, Folder);
18+
19+
enum TUriStyle {
20+
PATH_STYLE = 1,
21+
VIRTUAL_HOSTED_STYLE = 2,
22+
};
23+
24+
inline static const std::vector<TString> BucketHostSeparators = { ".s3.", ".s3-" };
25+
26+
private:
27+
static TStringBuf StripPath(const TStringBuf& path) {
28+
TStringBuf stripped = path;
29+
while (stripped.SkipPrefix("/")) {
30+
}
31+
while (stripped.ChopSuffix("/")) {
32+
}
33+
return stripped;
34+
}
35+
36+
static std::optional<TUriStyle> DeduceUriStyle(const NUri::TUri& uri) {
37+
const bool hasSubdomain = std::count(uri.GetHost().begin(), uri.GetHost().end(), '.') >= 2;
38+
const bool hasPath = !StripPath(uri.GetField(NUri::TField::FieldPath)).Empty();
39+
if (hasSubdomain && !hasPath) {
40+
return VIRTUAL_HOSTED_STYLE;
41+
}
42+
if (!hasSubdomain && hasPath) {
43+
return PATH_STYLE;
44+
}
45+
46+
// URI style deduction copied from AWS SDK for Java
47+
for (const TString& sep : BucketHostSeparators) {
48+
if (uri.GetHost().StartsWith(sep.substr(1))) {
49+
return PATH_STYLE;
50+
}
51+
if (uri.GetHost().Contains(sep)) {
52+
return VIRTUAL_HOSTED_STYLE;
53+
}
54+
}
55+
56+
return std::nullopt;
57+
}
58+
59+
static TConclusion<TS3Uri> ParsePathStyleUri(const NUri::TUri& input) {
60+
TS3Uri result;
61+
62+
TStringBuf path = StripPath(input.GetField(NUri::TField::FieldPath));
63+
64+
if (path.Empty()) {
65+
return TConclusionStatus::Fail(TStringBuilder() << "Missing bucket in path-style S3 uri: " << input.Serialize());
66+
}
67+
68+
TStringBuf folder;
69+
TStringBuf bucket;
70+
if (path.TryRSplit('/', folder, bucket)) {
71+
result.Folder = folder;
72+
result.Bucket = bucket;
73+
} else {
74+
result.Bucket = path;
75+
}
76+
77+
result.Host = input.GetHost();
78+
79+
if (auto status = result.FillStyleAgnosticFields(input); status.IsFail()) {
80+
return status;
81+
}
82+
return result;
83+
}
84+
85+
static TConclusion<TS3Uri> ParseVirtualHostedStyleUri(const NUri::TUri& input) {
86+
TS3Uri result;
87+
88+
for (const TString& sep : BucketHostSeparators) {
89+
if (const ui64 findSep = input.GetHost().find(sep); findSep != TStringBuf::npos) {
90+
result.Bucket = input.GetHost().SubStr(0, findSep);
91+
result.Host = input.GetHost().SubStr(findSep + 1);
92+
break;
93+
}
94+
}
95+
if (result.Host.empty()) {
96+
TStringBuf host;
97+
TStringBuf bucket;
98+
if (input.GetHost().TrySplit('.', bucket, host)) {
99+
result.Host = host;
100+
result.Bucket = bucket;
101+
} else {
102+
return TConclusionStatus::Fail(TStringBuilder() << "Missing bucket in virtual-hosted style S3 uri: " << input.Serialize());
103+
}
104+
}
105+
106+
if (TStringBuf path = StripPath(input.GetField(NUri::TField::FieldPath))) {
107+
result.Folder = path;
108+
}
109+
110+
if (auto status = result.FillStyleAgnosticFields(input); status.IsFail()) {
111+
return status;
112+
}
113+
return result;
114+
}
115+
116+
TConclusionStatus FillStyleAgnosticFields(const NUri::TUri& from) {
117+
if (from.GetField(NUri::TField::FieldPort)) {
118+
Port = from.GetPort();
119+
}
120+
121+
switch (from.GetScheme()) {
122+
case NUri::TScheme::SchemeEmpty:
123+
break;
124+
case NUri::TScheme::SchemeHTTP:
125+
Scheme = NKikimrSchemeOp::TS3Settings_EScheme_HTTP;
126+
break;
127+
case NUri::TScheme::SchemeHTTPS:
128+
Scheme = NKikimrSchemeOp::TS3Settings_EScheme_HTTPS;
129+
break;
130+
default:
131+
return TConclusionStatus::Fail(TStringBuilder() << "Unexpected scheme in url: " << from.Serialize());
132+
}
133+
134+
return TConclusionStatus::Success();
135+
}
136+
137+
public:
138+
static TConclusion<TS3Uri> ParseUri(const TString& input) {
139+
NUri::TUri uri;
140+
if (uri.Parse(input, NUri::TFeature::NewFeaturesRecommended) != NUri::TState::EParsed::ParsedOK) {
141+
return TConclusionStatus::Fail("Cannot parse URI: " + input);
142+
}
143+
144+
TUriStyle uriStyle;
145+
if (const auto deducedStyle = DeduceUriStyle(uri)) {
146+
uriStyle = *deducedStyle;
147+
} else {
148+
uriStyle = PATH_STYLE;
149+
}
150+
151+
switch (uriStyle) {
152+
case PATH_STYLE:
153+
return ParsePathStyleUri(uri);
154+
case VIRTUAL_HOSTED_STYLE:
155+
return ParseVirtualHostedStyleUri(uri);
156+
}
157+
}
158+
159+
TString GetEndpoint() const {
160+
TString endpoint = Host;
161+
if (Port) {
162+
endpoint += TStringBuilder() << ':' << *Port;
163+
}
164+
if (Folder) {
165+
endpoint += TStringBuilder() << '/' << *Folder;
166+
}
167+
return endpoint;
168+
}
169+
170+
void FillSettings(NKikimrSchemeOp::TS3Settings& settings) const {
171+
settings.SetEndpoint(GetEndpoint());
172+
settings.SetBucket(Bucket);
173+
if (Scheme) {
174+
settings.SetScheme(*Scheme);
175+
}
176+
}
177+
};
178+
179+
} // namespace NKikimr::NColumnShard::NTiers

ydb/core/tx/tiering/tier/ya.make

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,13 @@ LIBRARY()
22

33
SRCS(
44
object.cpp
5+
s3_uri.cpp
56
)
67

78
PEERDIR(
89
ydb/library/conclusion
910
ydb/services/metadata/secret/accessor
11+
contrib/restricted/aws/aws-crt-cpp
1012
)
1113

1214
YQL_LAST_ABI_VERSION()

ydb/core/tx/tiering/ut/ut_object.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
#include <ydb/core/tx/tiering/tier/object.h>
2+
#include <ydb/core/tx/tiering/tier/s3_uri.h>
23

34
#include <library/cpp/testing/unittest/registar.h>
45

56
namespace NKikimr {
67

78
using namespace NColumnShard;
89

9-
Y_UNIT_TEST_SUITE(S3SettingsConvertion) {
10+
Y_UNIT_TEST_SUITE(S3SettingsConversion) {
1011
void ValidateConversion(
1112
const NKikimrSchemeOp::TExternalDataSourceDescription& input, TConclusion<const NKikimrSchemeOp::TS3Settings> expectedResult) {
1213
NTiers::TTierConfig config;
@@ -69,6 +70,43 @@ Y_UNIT_TEST_SUITE(S3SettingsConvertion) {
6970
)", &output));
7071
ValidateConversion(input, output);
7172
}
73+
74+
Y_UNIT_TEST(FoldersStrictStyle) {
75+
std::vector<TString> uris = {
76+
"http://s3.yandexcloud.net:8080/my-folder/subfolder/bucket",
77+
"http://bucket.s3.yandexcloud.net:8080/my-folder/subfolder",
78+
};
79+
for (const auto& input : uris) {
80+
NTiers::TS3Uri uri = NTiers::TS3Uri::ParseUri(input).DetachResult();
81+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetEndpoint(), "s3.yandexcloud.net:8080/my-folder/subfolder", input);
82+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetBucket(), "bucket", input);
83+
}
84+
}
85+
86+
Y_UNIT_TEST(FoldersStyleDeduction) {
87+
std::vector<TString> uris = {
88+
"http://storage.yandexcloud.net:8080/my-folder/subfolder/bucket",
89+
"http://storage.yandexcloud.net:8080///my-folder/subfolder/bucket//",
90+
};
91+
for (const auto& input : uris) {
92+
NTiers::TS3Uri uri = NTiers::TS3Uri::ParseUri(input).DetachResult();
93+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetEndpoint(), "storage.yandexcloud.net:8080/my-folder/subfolder", input);
94+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetBucket(), "bucket", input);
95+
}
96+
}
97+
98+
Y_UNIT_TEST(StyleDeduction) {
99+
std::vector<TString> uris = {
100+
"http://storage.yandexcloud.net/bucket",
101+
"http://my-s3.net/bucket",
102+
"http://bucket.my-s3.net",
103+
"http://bucket.my-s3.net/",
104+
};
105+
for (const auto& input : uris) {
106+
NTiers::TS3Uri uri = NTiers::TS3Uri::ParseUri(input).DetachResult();
107+
UNIT_ASSERT_STRINGS_EQUAL_C(uri.GetBucket(), "bucket", input);
108+
}
109+
}
72110
}
73111

74112
} // namespace NKikimr

0 commit comments

Comments
 (0)