Skip to content

Commit 6b6bf81

Browse files
GrigoriyPAazevaykin
authored andcommitted
YQ-4052 fixed url escaping for s3 insert (ydb-platform#13891)
1 parent f60860b commit 6b6bf81

File tree

12 files changed

+197
-23
lines changed

12 files changed

+197
-23
lines changed

ydb/core/external_sources/s3/ut/s3_aws_credentials_ut.cpp

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,88 @@ Y_UNIT_TEST_SUITE(S3AwsCredentials) {
252252
}
253253

254254
}
255+
256+
Y_UNIT_TEST(TestInsertEscaping) {
257+
const TString externalDataSourceName = "/Root/external_data_source";
258+
auto s3ActorsFactory = NYql::NDq::CreateS3ActorsFactory();
259+
auto kikimr = MakeKikimrRunner(true, nullptr, nullptr, std::nullopt, s3ActorsFactory);
260+
261+
auto tc = kikimr->GetTableClient();
262+
auto session = tc.CreateSession().GetValueSync().GetSession();
263+
const TString query = fmt::format(R"(
264+
CREATE OBJECT id (TYPE SECRET) WITH (value=`minio`);
265+
CREATE OBJECT key (TYPE SECRET) WITH (value=`minio123`);
266+
CREATE EXTERNAL DATA SOURCE `{external_source}` WITH (
267+
SOURCE_TYPE="ObjectStorage",
268+
LOCATION="{location}",
269+
AUTH_METHOD="AWS",
270+
AWS_ACCESS_KEY_ID_SECRET_NAME="id",
271+
AWS_SECRET_ACCESS_KEY_SECRET_NAME="key",
272+
AWS_REGION="ru-central-1"
273+
);
274+
)",
275+
"external_source"_a = externalDataSourceName,
276+
"location"_a = "localhost:" + GetExternalPort("minio", "9000") + "/datalake/"
277+
);
278+
279+
auto result = session.ExecuteSchemeQuery(query).GetValueSync();
280+
UNIT_ASSERT_C(result.GetStatus() == NYdb::EStatus::SUCCESS, result.GetIssues().ToString());
281+
282+
WaitBucket(kikimr, externalDataSourceName);
283+
284+
auto db = kikimr->GetQueryClient();
285+
286+
TString path = TStringBuilder() << "exp_folder/some_" << EscapeC(GetSymbolsString(' ', '~', "*?{}`")) << "\\`";
287+
288+
{
289+
// NB: AtomicUploadCommit = "false" because in minio ListMultipartUploads by prefix is not supported
290+
auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"(
291+
PRAGMA s3.AtomicUploadCommit = "false";
292+
INSERT INTO `{external_source}`.`{path}/` WITH (FORMAT = "csv_with_names")
293+
SELECT * FROM `{external_source}`.`/a/` WITH (
294+
format="json_each_row",
295+
schema(
296+
key Utf8 NOT NULL,
297+
value Utf8 NOT NULL
298+
)
299+
)
300+
)", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync();
301+
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
302+
UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty());
303+
304+
NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
305+
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
306+
}
307+
308+
{
309+
auto scriptExecutionOperation = db.ExecuteScript(fmt::format(R"(
310+
SELECT * FROM `{external_source}`.`{path}/` WITH (
311+
format="csv_with_names",
312+
schema(
313+
key Utf8 NOT NULL,
314+
value Utf8 NOT NULL
315+
)
316+
)
317+
)", "external_source"_a = externalDataSourceName, "path"_a = path)).ExtractValueSync();
318+
UNIT_ASSERT_VALUES_EQUAL_C(scriptExecutionOperation.Status().GetStatus(), EStatus::SUCCESS, scriptExecutionOperation.Status().GetIssues().ToString());
319+
UNIT_ASSERT(!scriptExecutionOperation.Metadata().ExecutionId.empty());
320+
321+
NYdb::NQuery::TScriptExecutionOperation readyOp = WaitScriptExecutionOperation(scriptExecutionOperation.Id(), kikimr->GetDriver());
322+
UNIT_ASSERT_EQUAL_C(readyOp.Metadata().ExecStatus, EExecStatus::Completed, readyOp.Status().GetIssues().ToString());
323+
TFetchScriptResultsResult results = db.FetchScriptResults(scriptExecutionOperation.Id(), 0).ExtractValueSync();
324+
UNIT_ASSERT_C(results.IsSuccess(), results.GetIssues().ToString());
325+
326+
TResultSetParser resultSet(results.ExtractResultSet());
327+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnsCount(), 2);
328+
UNIT_ASSERT_VALUES_EQUAL(resultSet.RowsCount(), 2);
329+
UNIT_ASSERT(resultSet.TryNextRow());
330+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "1");
331+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "trololo");
332+
UNIT_ASSERT(resultSet.TryNextRow());
333+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(0).GetUtf8(), "2");
334+
UNIT_ASSERT_VALUES_EQUAL(resultSet.ColumnParser(1).GetUtf8(), "hello world");
335+
}
336+
}
255337
}
256338

257339
} // namespace NKikimr::NKqp

ydb/core/kqp/ut/federated_query/common/common.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@
33
#include <library/cpp/testing/unittest/registar.h>
44

55
namespace NKikimr::NKqp::NFederatedQueryTest {
6+
TString GetSymbolsString(char start, char end, const TString& skip) {
7+
TStringBuilder result;
8+
for (char symbol = start; symbol <= end; ++symbol) {
9+
if (skip.Contains(symbol)) {
10+
continue;
11+
}
12+
result << symbol;
13+
}
14+
return result;
15+
}
616

717
NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation(const NYdb::TOperation::TOperationId& operationId, const NYdb::TDriver& ydbDriver) {
818
NYdb::NOperation::TOperationClient client(ydbDriver);

ydb/core/kqp/ut/federated_query/common/common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
namespace NKikimr::NKqp::NFederatedQueryTest {
99
using namespace NKikimr::NKqp;
1010

11+
TString GetSymbolsString(char start, char end, const TString& skip = "");
12+
1113
NYdb::NQuery::TScriptExecutionOperation WaitScriptExecutionOperation(
1214
const NYdb::TOperation::TOperationId& operationId,
1315
const NYdb::TDriver& ydbDriver);

ydb/core/kqp/ut/federated_query/s3/kqp_federated_query_ut.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,6 @@ using namespace NTestUtils;
2020
using namespace fmt::literals;
2121

2222
Y_UNIT_TEST_SUITE(KqpFederatedQuery) {
23-
TString GetSymbolsString(char start, char end, const TString& skip = "") {
24-
TStringBuilder result;
25-
for (char symbol = start; symbol <= end; ++symbol) {
26-
if (skip.Contains(symbol)) {
27-
continue;
28-
}
29-
result << symbol;
30-
}
31-
return result;
32-
}
33-
3423
Y_UNIT_TEST(ExecuteScriptWithExternalTableResolve) {
3524
const TString externalDataSourceName = "/Root/external_data_source";
3625
const TString externalTableName = "/Root/test_binding_resolve";

ydb/library/yql/providers/common/http_gateway/yql_aws_signature.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -138,11 +138,11 @@ TString TAwsSignature::HashSHA256(TStringBuf data) {
138138
return to_lower(HexEncode(hash, SHA256_DIGEST_LENGTH));
139139
}
140140

141-
TString TAwsSignature::UriEncode(const TStringBuf input, bool encodeSlash) {
141+
TString TAwsSignature::UriEncode(const TStringBuf input, bool encodeSlash, bool encodePercent) {
142142
TStringStream result;
143143
for (const char ch : input) {
144144
if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' ||
145-
ch == '-' || ch == '~' || ch == '.') {
145+
ch == '-' || ch == '~' || ch == '.' || (ch == '%' && !encodePercent)) {
146146
result << ch;
147147
} else if (ch == '/') {
148148
if (encodeSlash) {
@@ -175,11 +175,10 @@ void TAwsSignature::PrepareCgiParameters() {
175175

176176
auto printSingleParam = [&canonicalCgi](const TString& key, const TVector<TString>& values) {
177177
auto it = values.begin();
178-
canonicalCgi << UriEncode(key, true) << "=" << UriEncode(*it, true);
178+
canonicalCgi << UriEncode(key, true, true) << "=" << UriEncode(*it, true, true);
179179
while (++it != values.end()) {
180-
canonicalCgi << "&" << UriEncode(key, true) << "=" << UriEncode(*it, true);
180+
canonicalCgi << "&" << UriEncode(key, true, true) << "=" << UriEncode(*it, true, true);
181181
}
182-
183182
};
184183

185184
auto it = sortedCgi.begin();

ydb/library/yql/providers/common/http_gateway/yql_aws_signature.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ struct TAwsSignature {
3232

3333
static TString HashSHA256(TStringBuf data);
3434

35-
static TString UriEncode(const TStringBuf input, bool encodeSlash = false);
35+
static TString UriEncode(const TStringBuf input, bool encodeSlash = false, bool encodePercent = false);
3636

3737
void PrepareCgiParameters();
3838

ydb/library/yql/providers/common/http_gateway/yql_aws_signature_ut.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <util/network/address.h>
44
#include <library/cpp/testing/unittest/registar.h>
5+
#include <library/cpp/string_utils/quote/quote.h>
56

67
namespace NYql {
78

@@ -68,5 +69,15 @@ Y_UNIT_TEST_SUITE(TAwsSignature) {
6869
UNIT_ASSERT_VALUES_EQUAL(signature1.GetAmzDate(), signature2.GetAmzDate());
6970
UNIT_ASSERT_VALUES_EQUAL(signature1.GetAuthorization(), signature2.GetAuthorization());
7071
}
72+
73+
Y_UNIT_TEST(SignWithEscaping) {
74+
auto time = TInstant::FromValue(30);
75+
NYql::TAwsSignature signature("GET", UrlEscapeRet("http://os.com/my-bucket/ !\"#$%&'()+,-./0123456789:;<=>@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz|~/", true), "application/json", {}, "key", "pwd", time);
76+
UNIT_ASSERT_VALUES_EQUAL(signature.GetContentType(), "application/json");
77+
UNIT_ASSERT_VALUES_EQUAL(signature.GetXAmzContentSha256(), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
78+
UNIT_ASSERT_VALUES_EQUAL(signature.GetAuthorization(), "AWS4-HMAC-SHA256 Credential=/19700101///aws4_request, SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, Signature=21470c8f999941fdc785c508f0c55afa1a12735eddd868aa7276e532d687c436");
79+
UNIT_ASSERT_VALUES_UNEQUAL(signature.GetAmzDate(), "");
80+
}
7181
} // Y_UNIT_TEST_SUITE(TAwsSignature)
82+
7283
} // namespace NYql

ydb/library/yql/providers/s3/actors/yql_s3_applicator_actor.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ struct TCompleteMultipartUpload {
4949
}
5050

5151
TString BuildUrl() const {
52-
TUrlBuilder urlBuilder(Url);
52+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
5353
urlBuilder.AddUrlParam("uploadId", UploadId);
5454
return urlBuilder.Build();
5555
}
@@ -87,7 +87,7 @@ struct TListMultipartUploads {
8787
// This requirement will be fixed in the curl library
8888
// https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e
8989
// https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5
90-
TUrlBuilder urlBuilder(Url);
90+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
9191
if (KeyMarker) {
9292
urlBuilder.AddUrlParam("key-marker", KeyMarker);
9393
}
@@ -114,7 +114,7 @@ struct TAbortMultipartUpload {
114114
}
115115

116116
TString BuildUrl() const {
117-
TUrlBuilder urlBuilder(Url);
117+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
118118
urlBuilder.AddUrlParam("uploadId", UploadId);
119119
return urlBuilder.Build();
120120
}
@@ -141,7 +141,7 @@ struct TListParts {
141141
// This requirement will be fixed in the curl library
142142
// https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e
143143
// https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5
144-
TUrlBuilder urlBuilder(Url);
144+
NS3Util::TUrlBuilder urlBuilder(NS3Util::UrlEscapeRet(Url));
145145
if (PartNumberMarker) {
146146
urlBuilder.AddUrlParam("part-number-marker", PartNumberMarker);
147147
}
@@ -682,4 +682,4 @@ THolder<NActors::IActor> MakeS3ApplicatorActor(
682682
);
683683
}
684684

685-
} // namespace NYql::NDq
685+
} // namespace NYql::NDq

ydb/library/yql/providers/s3/common/util.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,34 @@ bool ValidateS3ReadWriteSchema(const TStructExprType* schemaStructRowType, TExpr
6464
return true;
6565
}
6666

67+
TUrlBuilder::TUrlBuilder(const TString& uri)
68+
: MainUri(uri)
69+
{}
70+
71+
TUrlBuilder& TUrlBuilder::AddUrlParam(const TString& name, const TString& value) {
72+
Params.emplace_back(name, value);
73+
return *this;
74+
}
75+
76+
TString TUrlBuilder::Build() const {
77+
if (Params.empty()) {
78+
return MainUri;
79+
}
80+
81+
TStringBuilder result;
82+
result << MainUri << "?";
83+
84+
TStringBuf separator = ""sv;
85+
for (const auto& p : Params) {
86+
result << separator << p.Name;
87+
if (auto value = p.Value) {
88+
Quote(value, "");
89+
result << "=" << value;
90+
}
91+
separator = "&"sv;
92+
}
93+
94+
return std::move(result);
95+
}
96+
6797
}

ydb/library/yql/providers/s3/common/util.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,22 @@ TString UrlEscapeRet(const TStringBuf from);
1515

1616
bool ValidateS3ReadWriteSchema(const TStructExprType* schemaStructRowType, TExprContext& ctx);
1717

18+
class TUrlBuilder {
19+
struct TParam {
20+
TString Name;
21+
TString Value;
22+
};
23+
24+
public:
25+
explicit TUrlBuilder(const TString& uri);
26+
27+
TUrlBuilder& AddUrlParam(const TString& name, const TString& value = "");
28+
29+
TString Build() const;
30+
31+
private:
32+
std::vector<TParam> Params;
33+
TString MainUri;
34+
};
35+
1836
}

ydb/library/yql/providers/s3/common/util_ut.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,37 @@ Y_UNIT_TEST_SUITE(TestS3UrlEscape) {
3030
}
3131
}
3232

33+
Y_UNIT_TEST_SUITE(TestUrlBuilder) {
34+
Y_UNIT_TEST(UriOnly) {
35+
TUrlBuilder builder("https://localhost/abc");
36+
UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc");
37+
}
38+
39+
Y_UNIT_TEST(Basic) {
40+
TUrlBuilder builder("https://localhost/abc");
41+
builder.AddUrlParam("param1", "val1");
42+
builder.AddUrlParam("param2", "val2");
43+
44+
UNIT_ASSERT_VALUES_EQUAL(builder.Build(), "https://localhost/abc?param1=val1&param2=val2");
45+
}
46+
47+
Y_UNIT_TEST(BasicWithEncoding) {
48+
auto url = TUrlBuilder("https://localhost/abc")
49+
.AddUrlParam("param1", "=!@#$%^&*(){}[]\" ")
50+
.AddUrlParam("param2", "val2")
51+
.Build();
52+
53+
UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc?param1=%3D%21%40%23%24%25%5E%26%2A%28%29%7B%7D%5B%5D%22+&param2=val2");
54+
}
55+
56+
Y_UNIT_TEST(BasicWithAdditionalEncoding) {
57+
auto url = TUrlBuilder("https://localhost/abc")
58+
.AddUrlParam("param1", ":/?#[]@!$&\'()*+,;=")
59+
.AddUrlParam("param2", "val2")
60+
.Build();
61+
62+
UNIT_ASSERT_VALUES_EQUAL(url, "https://localhost/abc?param1=%3A%2F%3F%23%5B%5D%40%21%24%26%27%28%29%2A%2B%2C%3B%3D&param2=val2");
63+
}
64+
}
65+
3366
} // namespace NYql::NS3Util

ydb/library/yql/providers/s3/object_listers/yql_s3_list.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,7 @@ class TS3Lister : public IS3Lister {
304304
// This requirement will be fixed in the curl library
305305
// https://github.com/curl/curl/commit/fc76a24c53b08cdf6eec8ba787d8eac64651d56e
306306
// https://github.com/curl/curl/commit/c87920353883ef9d5aa952e724a8e2589d76add5
307-
TUrlBuilder urlBuilder(ctx.ListingRequest.Url);
307+
NS3Util::TUrlBuilder urlBuilder(ctx.ListingRequest.Url);
308308
if (ctx.ContinuationToken.Defined()) {
309309
urlBuilder.AddUrlParam("continuation-token", *ctx.ContinuationToken);
310310
}

0 commit comments

Comments
 (0)