Skip to content

Commit c0d6909

Browse files
authored
merge to ydb stable YQ-4052 fixed url escaping for s3 insert (#13996)
1 parent ffb9e1c commit c0d6909

File tree

12 files changed

+197
-23
lines changed

12 files changed

+197
-23
lines changed

ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,88 @@ Y_UNIT_TEST_SUITE(S3AwsCredentials) {
186186
}
187187

188188
}
189+
190+
Y_UNIT_TEST(TestInsertEscaping) {
191+
const TString externalDataSourceName = "/Root/external_data_source";
192+
auto s3ActorsFactory = NYql::NDq::CreateS3ActorsFactory();
193+
auto kikimr = MakeKikimrRunner(true, nullptr, nullptr, std::nullopt, s3ActorsFactory);
194+
195+
auto tc = kikimr->GetTableClient();
196+
auto session = tc.CreateSession().GetValueSync().GetSession();
197+
const TString query = fmt::format(R"(
198+
CREATE OBJECT id (TYPE SECRET) WITH (value=`minio`);
199+
CREATE OBJECT key (TYPE SECRET) WITH (value=`minio123`);
200+
CREATE EXTERNAL DATA SOURCE `{external_source}` WITH (
201+
SOURCE_TYPE="ObjectStorage",
202+
LOCATION="{location}",
203+
AUTH_METHOD="AWS",
204+
AWS_ACCESS_KEY_ID_SECRET_NAME="id",
205+
AWS_SECRET_ACCESS_KEY_SECRET_NAME="key",
206+
AWS_REGION="ru-central-1"
207+
);
208+
)",
209+
"external_source"_a = externalDataSourceName,
210+
"location"_a = "localhost:" + GetExternalPort("minio", "9000") + "/datalake/"
211+
);
212+
213+
auto result = session.ExecuteSchemeQuery(query).GetValueSync();
214+
UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString());
215+
216+
WaitBucket(kikimr, externalDataSourceName);
217+
218+
auto db = kikimr->GetQueryClient();
219+
220+
TString path = TStringBuilder() << "exp_folder/some_" << EscapeC(GetSymbolsString(' ', '~', "*?{}`")) << "\\`";
221+
222+
{
223+
// NB: AtomicUploadCommit = "false" because in minio ListMultipartUploads by prefix is not supported
224+
auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"(
225+
PRAGMA s3.AtomicUploadCommit = "false";
226+
INSERT INTO `{external_source}`.`{path}/` WITH (FORMAT = "csv_with_names")
227+
SELECT * FROM `{external_source}`.`/a/` WITH (
228+
format="json_each_row",
229+
schema(
230+
key Utf8 NOT NULL,
231+
value Utf8 NOT NULL
232+
)
233+
)
234+
)", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync();
235+
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
236+
UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty());
237+
238+
NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
239+
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
240+
}
241+
242+
{
243+
auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"(
244+
SELECT * FROM `{external_source}`.`{path}/` WITH (
245+
format="csv_with_names",
246+
schema(
247+
key Utf8 NOT NULL,
248+
value Utf8 NOT NULL
249+
)
250+
)
251+
)", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync();
252+
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
253+
UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty());
254+
255+
NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
256+
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
257+
TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync();
258+
UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString());
259+
260+
TResultSetParser resultSet(results.ExtractResultSet());
261+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2);
262+
UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 2);
263+
UNIT_ASSERT(resultSet.TryNextRow());
264+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1");
265+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo");
266+
UNIT_ASSERT(resultSet.TryNextRow());
267+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2");
268+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world");
269+
}
270+
}
189271
}
190272

191273
} // namespace NKikimr::NKqp

ydb/core/kqp/ut/federated_query/common/common.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@
33
#include <library/cpp/testing/unittest/registar.h>
44

55
namespace NKikimr::NKqp::NFederatedQueryTest {
6+
TString GetSymbolsString(char start, char end, const TString& skip) {
7+
TStringBuilder result;
8+
for (char symbol = start; symbol <= end; ++symbol) {
9+
if (skip.Contains(symbol)) {
10+
continue;
11+
}
12+
result << symbol;
13+
}
14+
return result;
15+
}
616

717
NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation(const NYdb::TOperation::TOperationId& operationId, const NYdb::TDriver& ydbDriver) {
818
NYdb::NOperation::TOperationClient client(ydbDriver);

ydb/core/kqp/ut/federated_query/common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
namespace NKikimr::NKqp::NFederatedQueryTest {
99
using namespace NKikimr::NKqp;
1010

11+
TString GetSymbolsString(char start, char end, const TString& skip = "");
12+
1113
NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation(
1214
const NYdb::TOperation::TOperationId& operationId,
1315
const NYdb::TDriver& ydbDriver);

ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,6 @@ using namespace NTestUtils;
2020
using namespace fmt::literals;
2121

2222
Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
23-
TString GetSymbolsString(char start, char end, const TString& skip = "") {
24-
TStringBuilder result;
25-
for (char symbol = start; symbol <= end; ++symbol) {
26-
if (skip.Contains(symbol)) {
27-
continue;
28-
}
29-
result << symbol;
30-
}
31-
return result;
32-
}
33-
3423
Y_UNIT_TEST(ExecuteScriptWithExternalTableResolve) {
3524
const TString externalDataSourceName = "/Root/external_data_source";
3625
const TString externalTableName = "/Root/test_binding_resolve";

ydb/library/yql/providers/common/http_gateway/yql_aws_signature.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,11 @@ TString TAwsSignature::HashSHA256(TStringBuf data) {
138138
return to_lower(HexEncode(hash, SHA256_DIGEST_LENGTH));
139139
}
140140

141-
TString TAwsSignature::UriEncode(const TStringBuf input, bool encodeSlash) {
141+
TString TAwsSignature::UriEncode(const TStringBuf input, bool encodeSlash, bool encodePercent) {
142142
TStringStream result;
143143
for (const char ch : input) {
144144
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' ||
145-
ch == '-' || ch == '~' || ch == '.') {
145+
ch == '-' || ch == '~' || ch == '.' || (ch == '%' && !encodePercent)) {
146146
result << ch;
147147
} else if (ch == '/') {
148148
if (encodeSlash) {
@@ -174,11 +174,10 @@ void TAwsSignature::PrepareCgiParameters() {
174174

175175
auto printSingleParam = [&canonicalCgi](const TString& key, const TVector<TString>& values) {
176176
auto it = values.begin();
177-
canonicalCgi << UriEncode(key, true) << "=" << UriEncode(*it, true);
177+
canonicalCgi << UriEncode(key, true, true) << "=" << UriEncode(*it, true, true);
178178
while (++it != values.end()) {
179-
canonicalCgi << "&" << UriEncode(key, true) << "=" << UriEncode(*it, true);
179+
canonicalCgi << "&" << UriEncode(key, true, true) << "=" << UriEncode(*it, true, true);
180180
}
181-
182181
};
183182

184183
auto it = sortedCgi.begin();

ydb/library/yql/providers/common/http_gateway/yql_aws_signature.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct TAwsSignature {
3232

3333
static TString HashSHA256(TStringBuf data);
3434

35-
static TString UriEncode(const TStringBuf input, bool encodeSlash = false);
35+
static TString UriEncode(const TStringBuf input, bool encodeSlash = false, bool encodePercent = false);
3636

3737
void PrepareCgiParameters();
3838

ydb/library/yql/providers/common/http_gateway/yql_aws_signature_ut.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <util/network/address.h>
44
#include <library/cpp/testing/unittest/registar.h>
5+
#include <library/cpp/string_utils/quote/quote.h>
56

67
namespace NYql {
78

@@ -68,5 +69,15 @@ Y_UNIT_TEST_SUITE(TAwsSignature) {
6869
UNIT_ASSERT_VALUES_EQUAL(signature1.GetAmzDate(), signature2.GetAmzDate());
6970
UNIT_ASSERT_VALUES_EQUAL(signature1.GetAuthorization(), signature2.GetAuthorization());
7071
}
72+
73+
Y_UNIT_TEST(SignWithEscaping) {
74+
auto time = TInstant::FromValue(30);
75+
NYql::TAwsSignature signature("GET", UrlEscapeRet("http://os.com/my-bucket/ !\"#$%&'()+,-./0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz|~/", true), "application/json", {}, "key", "pwd", time);
76+
UNIT_ASSERT_VALUES_EQUAL(signature.GetContentType(), "application/json");
77+
UNIT_ASSERT_VALUES_EQUAL(signature.GetXAmzContentSha256(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
78+
UNIT_ASSERT_VALUES_EQUAL(signature.GetAuthorization(), "AWS4-HMAC-SHA256 Credential=/19700101///aws4_request, SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, Signature=21470c8f999941fdc785c508f0c55afa1a12735eddd868aa7276e532d687c436");
79+
UNIT_ASSERT_VALUES_UNEQUAL(signature.GetAmzDate(), "");
80+
}
7181
} // Y_UNIT_TEST_SUITE(TAwsSignature)
82+
7283
} // namespace NYql

ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct TCompleteMultipartUpload {
4949
}
5050

5151
TString BuildUrl() const {
52-
TUrlBuilder urlBuilder(Url);
52+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
5353
urlBuilder.AddUrlParam("uploadId", UploadId);
5454
return urlBuilder.Build();
5555
}
@@ -87,7 +87,7 @@ struct TListMultipartUploads {
8787
// This requirement will be fixed in the curl library
8888
// https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e
8989
// https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5
90-
TUrlBuilder urlBuilder(Url);
90+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
9191
if (KeyMarker) {
9292
urlBuilder.AddUrlParam("key-marker", KeyMarker);
9393
}
@@ -114,7 +114,7 @@ struct TAbortMultipartUpload {
114114
}
115115

116116
TString BuildUrl() const {
117-
TUrlBuilder urlBuilder(Url);
117+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
118118
urlBuilder.AddUrlParam("uploadId", UploadId);
119119
return urlBuilder.Build();
120120
}
@@ -141,7 +141,7 @@ struct TListParts {
141141
// This requirement will be fixed in the curl library
142142
// https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e
143143
// https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5
144-
TUrlBuilder urlBuilder(Url);
144+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
145145
if (PartNumberMarker) {
146146
urlBuilder.AddUrlParam("part-number-marker", PartNumberMarker);
147147
}
@@ -682,4 +682,4 @@ THolder<NActors::IActor> MakeS3ApplicatorActor(
682682
);
683683
}
684684

685-
} // namespace NYql::NDq
685+
} // namespace NYql::NDq

ydb/library/yql/providers/s3/common/util.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,34 @@ TString UrlEscapeRet(const TStringBuf from) {
4848
return to;
4949
}
5050

51+
TUrlBuilder::TUrlBuilder(const TString& uri)
52+
: MainUri(uri)
53+
{}
54+
55+
TUrlBuilder& TUrlBuilder::AddUrlParam(const TString& name, const TString& value) {
56+
Params.emplace_back(TParam{name, value});
57+
return *this;
58+
}
59+
60+
TString TUrlBuilder::Build() const {
61+
if (Params.empty()) {
62+
return MainUri;
63+
}
64+
65+
TStringBuilder result;
66+
result << MainUri << "?";
67+
68+
TStringBuf separator = ""sv;
69+
for (const auto& p : Params) {
70+
result << separator << p.Name;
71+
if (auto value = p.Value) {
72+
Quote(value, "");
73+
result << "=" << value;
74+
}
75+
separator = "&"sv;
76+
}
77+
78+
return std::move(result);
79+
}
80+
5181
}

ydb/library/yql/providers/s3/common/util.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,22 @@ TIssues AddParentIssue(const TStringBuilder& prefix, TIssues&& issues);
1212
// '#', '?'
1313
TString UrlEscapeRet(const TStringBuf from);
1414

15+
class TUrlBuilder {
16+
struct TParam {
17+
TString Name;
18+
TString Value;
19+
};
20+
21+
public:
22+
explicit TUrlBuilder(const TString& uri);
23+
24+
TUrlBuilder& AddUrlParam(const TString& name, const TString& value = "");
25+
26+
TString Build() const;
27+
28+
private:
29+
std::vector<TParam> Params;
30+
TString MainUri;
31+
};
32+
1533
}

0 commit comments

Comments
 (0)