Skip to content

Commit 8e85a8d

Browse files
Merge d6575ed into a02f6d5
2 parents a02f6d5 + d6575ed commit 8e85a8d

File tree

4 files changed

+75
-0
lines changed

4 files changed

+75
-0
lines changed

ydb/core/external_sources/object_storage.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ struct TObjectStorageExternalSource : public IExternalSource {
150150
}
151151
const bool hasPartitioning = objectStorage.projection_size() || objectStorage.partitioned_by_size();
152152
issues.AddIssues(ValidateFormatSetting(objectStorage.format(), objectStorage.format_setting(), location, hasPartitioning));
153+
issues.AddIssues(ValidateSchema(schema));
153154
issues.AddIssues(ValidateJsonListFormat(objectStorage.format(), schema, objectStorage.partitioned_by()));
154155
issues.AddIssues(ValidateRawFormat(objectStorage.format(), schema, objectStorage.partitioned_by()));
155156
if (hasPartitioning) {
@@ -268,6 +269,22 @@ struct TObjectStorageExternalSource : public IExternalSource {
268269
return issues;
269270
}
270271

272+
template<typename TScheme>
273+
static NYql::TIssues ValidateSchema(const TScheme& schema) {
274+
NYql::TIssues issues;
275+
for (const auto& column: schema.column()) {
276+
const auto type = column.type();
277+
if (type.has_optional_type() && type.optional_type().item().has_optional_type()) {
278+
issues.AddIssue(MakeErrorIssue(
279+
Ydb::StatusIds::BAD_REQUEST,
280+
TStringBuilder{} << "Double optional types are not supported for bindings (you have '"
281+
<< column.name() << " " << NYdb::TType(column.type()).ToString() << "' field)"));
282+
}
283+
}
284+
285+
return issues;
286+
}
287+
271288
template<typename TScheme>
272289
static NYql::TIssues ValidateJsonListFormat(const TString& format, const TScheme& schema, const google::protobuf::RepeatedPtrField<TString>& partitionedBy) {
273290
NYql::TIssues issues;

ydb/core/external_sources/object_storage_ut.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,15 @@ Y_UNIT_TEST_SUITE(ObjectStorageTest) {
5555
UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Date, Timestamp and Interval types are not allowed in json_list format");
5656
}
5757

58+
Y_UNIT_TEST(FailedOptionalTypeValidation) {
59+
auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false);
60+
NKikimrExternalSources::TSchema schema;
61+
NKikimrExternalSources::TGeneral general;
62+
auto newColumn = schema.add_column();
63+
newColumn->mutable_type()->mutable_optional_type()->mutable_item()->mutable_optional_type()->mutable_item()->set_type_id(Ydb::Type::INT32);
64+
UNIT_ASSERT_EXCEPTION_CONTAINS(source->Pack(schema, general), NExternalSource::TExternalSourceException, "Double optional types are not supported for bindings");
65+
}
66+
5867
Y_UNIT_TEST(WildcardsValidation) {
5968
auto source = NExternalSource::CreateObjectStorageExternalSource({}, nullptr, 1000, nullptr, false, false);
6069
NKikimrExternalSources::TSchema schema;

ydb/library/yql/providers/s3/provider/yql_s3_datasink_type_ann.cpp

+21
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,23 @@ TExprNode::TListType GetPartitionKeys(const TExprNode::TPtr& partBy) {
2626

2727
return {};
2828
}
29+
30+
bool ValidateSchemaForOutput(const TStructExprType* schemaStructRowType, TExprContext& ctx) {
31+
for (const TItemExprType* item : schemaStructRowType->GetItems()) {
32+
const TTypeAnnotationNode* rowType = item->GetItemType();
33+
if (rowType->GetKind() == ETypeAnnotationKind::Optional) {
34+
rowType = rowType->Cast<TOptionalExprType>()->GetItemType();
35+
}
36+
37+
if (rowType->GetKind() == ETypeAnnotationKind::Optional) {
38+
ctx.AddError(TIssue(TStringBuilder() << "Double optional types are not supported for output (you have '"
39+
<< item->GetName() << " " << FormatType(item->GetItemType()) << "' field)"));
40+
return false;
41+
}
42+
}
43+
return true;
44+
}
45+
2946
}
3047

3148
namespace {
@@ -74,6 +91,10 @@ class TS3DataSinkTypeAnnotationTransformer : public TVisitorTransformerBase {
7491
return TStatus::Error;
7592
}
7693

94+
if (!ValidateSchemaForOutput(sourceType->Cast<TStructExprType>(), ctx)) {
95+
return TStatus::Error;
96+
}
97+
7798
auto target = input->Child(TS3WriteObject::idx_Target);
7899
if (!TS3Target::Match(target)) {
79100
ctx.AddError(TIssue(ctx.GetPosition(target->Pos()), "Expected S3 target."));

ydb/tests/fq/s3/test_insert.py

+28
Original file line numberDiff line numberDiff line change
@@ -535,3 +535,31 @@ def test_insert_without_format_error(self, kikimr, s3, client, unique_prefix):
535535
issues = str(client.describe_query(query_id).result.query.issue)
536536

537537
assert "Missing format - please use WITH FORMAT when writing into S3" in issues, "Incorrect Issues: " + issues
538+
539+
@yq_all
540+
@pytest.mark.parametrize("client", [{"folder_id": "my_folder"}], indirect=True)
541+
def test_insert_type_validation(self, kikimr, s3, client, unique_prefix):
542+
resource = boto3.resource(
543+
"s3", endpoint_url=s3.s3_url, aws_access_key_id="key", aws_secret_access_key="secret_key"
544+
)
545+
546+
bucket = resource.Bucket("insert_bucket")
547+
bucket.create(ACL='public-read')
548+
bucket.objects.all().delete()
549+
550+
storage_connection_name = unique_prefix + "ibucket"
551+
client.create_storage_connection(storage_connection_name, "insert_bucket")
552+
553+
sql = f'''
554+
INSERT INTO `{storage_connection_name}`.`insert/`
555+
WITH
556+
(
557+
FORMAT="csv_with_names"
558+
)
559+
SELECT CAST(42 AS Int32??) as Weight;'''
560+
561+
query_id = client.create_query("simple", sql, type=fq.QueryContent.QueryType.ANALYTICS).result.query_id
562+
client.wait_query_status(query_id, fq.QueryMeta.FAILED)
563+
issues = str(client.describe_query(query_id).result.query.issue)
564+
565+
assert "Double optional types are not supported for output" in issues, "Incorrect issues: " + issues

0 commit comments

Comments
 (0)