Skip to content

Commit 085a2e5

Browse files
committed
support StartsWith predicates for pg types
1 parent fdcdd6f commit 085a2e5

File tree

10 files changed

+220
-7
lines changed

10 files changed

+220
-7
lines changed

ydb/library/yql/core/common_opt/yql_co_simple1.cpp

+10
Original file line numberDiff line numberDiff line change
@@ -4665,6 +4665,16 @@ void RegisterCoSimpleCallables1(TCallableOptimizerMap& map) {
46654665
map["IsDistinctFrom"] = std::bind(&OptimizeDistinctFrom<false>, _1, _2);
46664666

46674667
map["StartsWith"] = map["EndsWith"] = map["StringContains"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& /*optCtx*/) {
4668+
if (node->Head().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg || node->Tail().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
4669+
TExprNodeList converted;
4670+
for (auto& child : node->ChildrenList()) {
4671+
const bool isPg = child->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg;
4672+
converted.emplace_back(ctx.WrapByCallableIf(isPg, "FromPg", std::move(child)));
4673+
}
4674+
YQL_CLOG(DEBUG, Core) << "Converting Pg strings to YQL strings in " << node->Content();
4675+
return ctx.ChangeChildren(*node, std::move(converted));
4676+
}
4677+
46684678
if (node->Tail().IsCallable("String") && node->Tail().Head().Content().empty()) {
46694679
YQL_CLOG(DEBUG, Core) << node->Content() << " with empty string in second argument";
46704680
if (node->GetTypeAnn()->GetKind() == ETypeAnnotationKind::Optional) {

ydb/library/yql/core/extract_predicate/extract_predicate_impl.cpp

+23-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "extract_predicate_impl.h"
22

3+
#include <ydb/library/yql/core/type_ann/type_ann_pg.h>
34
#include <ydb/library/yql/core/yql_expr_type_annotation.h>
45
#include <ydb/library/yql/core/yql_opt_utils.h>
56
#include <ydb/library/yql/core/yql_expr_constraint.h>
@@ -781,6 +782,17 @@ TExprNode::TPtr OptimizeNodeForRangeExtraction(const TExprNode::TPtr& node, cons
781782
}
782783
}
783784

785+
if (node->IsCallable("StartsWith")) {
786+
if (node->Head().IsCallable("FromPg")) {
787+
YQL_CLOG(DEBUG, Core) << "Get rid of FromPg() in " << node->Content() << " first argument";
788+
return ctx.ChangeChild(*node, 0, node->Head().HeadPtr());
789+
}
790+
if (node->Tail().GetTypeAnn()->GetKind() == ETypeAnnotationKind::Pg) {
791+
YQL_CLOG(DEBUG, Core) << "Convert second argument of " << node->Content() << " from PG type";
792+
return ctx.ChangeChild(*node, 1, ctx.NewCallable(node->Tail().Pos(), "FromPg", {node->TailPtr()}));
793+
}
794+
}
795+
784796
return node;
785797
}
786798

@@ -911,13 +923,22 @@ TExprNode::TPtr BuildSingleComputeRange(const TStructExprType& rowType,
911923

912924
if (opNode->IsCallable("StartsWith")) {
913925
YQL_ENSURE(keys.size() == 1);
914-
return ctx.Builder(pos)
926+
const bool keyIsPg = firstKeyType->GetKind() == ETypeAnnotationKind::Pg;
927+
const TTypeAnnotationNode* rangeForType = firstKeyType;
928+
if (keyIsPg) {
929+
const TTypeAnnotationNode* yqlType = NTypeAnnImpl::FromPgImpl(pos, firstKeyType, ctx);
930+
YQL_ENSURE(yqlType);
931+
rangeForType = yqlType;
932+
YQL_ENSURE(opNode->Tail().GetTypeAnn()->GetKind() != ETypeAnnotationKind::Pg);
933+
}
934+
auto rangeForNode = ctx.Builder(pos)
915935
.Callable("RangeFor")
916936
.Atom(0, hasNot ? "NotStartsWith" : "StartsWith", TNodeFlags::Default)
917937
.Add(1, opNode->TailPtr())
918-
.Add(2, ExpandType(pos, *firstKeyType, ctx))
938+
.Add(2, ExpandType(pos, *rangeForType, ctx))
919939
.Seal()
920940
.Build();
941+
return ctx.WrapByCallableIf(keyIsPg, "RangeToPg", std::move(rangeForNode));
921942
}
922943

923944
if (opNode->IsCallable("SqlIn")) {

ydb/library/yql/core/extract_predicate/ya.make

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ SRCS(
1010

1111
PEERDIR(
1212
ydb/library/yql/core/services
13+
ydb/library/yql/core/type_ann
1314
)
1415

1516
YQL_LAST_ABI_VERSION()

ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp

+1
Original file line numberDiff line numberDiff line change
@@ -7602,6 +7602,7 @@ struct TPeepHoleRules {
76027602
{"RangeEmpty", &ExpandRangeEmpty},
76037603
{"AsRange", &ExpandAsRange},
76047604
{"RangeFor", &ExpandRangeFor},
7605+
{"RangeToPg", &ExpandRangeToPg},
76057606
{"ToFlow", &DropToFlowDeps},
76067607
{"CheckedAdd", &ExpandCheckedAdd},
76077608
{"CheckedSub", &ExpandCheckedSub},

ydb/library/yql/core/type_ann/type_ann_core.cpp

+66-5
Original file line numberDiff line numberDiff line change
@@ -3225,14 +3225,32 @@ namespace NTypeAnnImpl {
32253225
return IGraphTransformer::TStatus::Repeat;
32263226
}
32273227

3228-
bool isOptional1, isOptional2;
3229-
if (const TDataExprType *dataTypeOne, *dataTypeTwo;
3230-
!(EnsureDataOrOptionalOfData(input->Head(), isOptional1, dataTypeOne, ctx.Expr) && EnsureDataOrOptionalOfData(input->Tail(), isOptional2, dataTypeTwo, ctx.Expr)
3231-
&& EnsureStringOrUtf8Type(input->Head().Pos(), *dataTypeOne, ctx.Expr) && EnsureStringOrUtf8Type(input->Tail().Pos(), *dataTypeTwo, ctx.Expr))) {
3228+
if (!EnsureComputable(input->Head(), ctx.Expr) || !EnsureComputable(input->Tail(), ctx.Expr)) {
32323229
return IGraphTransformer::TStatus::Error;
32333230
}
32343231

3235-
if (isOptional1 || isOptional2)
3232+
bool hasOptionals = false;
3233+
for (auto& child : input->ChildrenList()) {
3234+
const TTypeAnnotationNode* type = child->GetTypeAnn();
3235+
if (type->GetKind() == ETypeAnnotationKind::Pg) {
3236+
type = FromPgImpl(child->Pos(), type, ctx.Expr);
3237+
if (!type) {
3238+
return IGraphTransformer::TStatus::Error;
3239+
}
3240+
}
3241+
bool isOptional = false;
3242+
const TDataExprType* dataType = nullptr;
3243+
if (!IsDataOrOptionalOfData(type, isOptional, dataType) ||
3244+
!(dataType->GetSlot() == EDataSlot::String || dataType->GetSlot() == EDataSlot::Utf8))
3245+
{
3246+
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(child->Pos()), TStringBuilder()
3247+
<< "Expected (optional) string/utf8 or corresponding Pg type, but got: " << *child->GetTypeAnn()));
3248+
return IGraphTransformer::TStatus::Error;
3249+
}
3250+
hasOptionals = hasOptionals || isOptional;
3251+
}
3252+
3253+
if (hasOptionals)
32363254
input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(ctx.Expr.MakeType<TDataExprType>(EDataSlot::Bool)));
32373255
else
32383256
input->SetTypeAnn(ctx.Expr.MakeType<TDataExprType>(EDataSlot::Bool));
@@ -11107,6 +11125,48 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
1110711125
return IGraphTransformer::TStatus::Ok;
1110811126
}
1110911127

11128+
IGraphTransformer::TStatus RangeToPgWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
11129+
Y_UNUSED(output);
11130+
11131+
if (!EnsureArgsCount(*input, 1, ctx.Expr)) {
11132+
return IGraphTransformer::TStatus::Error;
11133+
}
11134+
11135+
if (!EnsureListType(input->Head(), ctx.Expr)) {
11136+
return IGraphTransformer::TStatus::Error;
11137+
}
11138+
11139+
auto argType = input->Head().GetTypeAnn();
11140+
auto rangeType = argType->Cast<TListExprType>()->GetItemType();
11141+
if (!EnsureValidRange(input->Head().Pos(), rangeType, ctx.Expr)) {
11142+
return IGraphTransformer::TStatus::Error;
11143+
}
11144+
11145+
auto boundaryType = rangeType->Cast<TTupleExprType>()->GetItems().front();
11146+
const auto& boundaryItems = boundaryType->Cast<TTupleExprType>()->GetItems();
11147+
11148+
TTypeAnnotationNode::TListType resultBoundaryItems;
11149+
resultBoundaryItems.reserve(boundaryItems.size());
11150+
for (size_t i = 0; i < boundaryItems.size(); ++i) {
11151+
if (i % 2 == 0) {
11152+
resultBoundaryItems.push_back(boundaryItems[i]);
11153+
} else {
11154+
auto keyType = boundaryItems[i]->Cast<TOptionalExprType>()->GetItemType();
11155+
auto pgKeyType = ToPgImpl(input->Head().Pos(), keyType, ctx.Expr);
11156+
if (!pgKeyType) {
11157+
return IGraphTransformer::TStatus::Error;
11158+
}
11159+
resultBoundaryItems.push_back(ctx.Expr.MakeType<TOptionalExprType>(pgKeyType));
11160+
}
11161+
}
11162+
11163+
const TTypeAnnotationNode* resultBoundaryType = ctx.Expr.MakeType<TTupleExprType>(resultBoundaryItems);
11164+
const TTypeAnnotationNode* resultRangeType =
11165+
ctx.Expr.MakeType<TTupleExprType>(TTypeAnnotationNode::TListType{resultBoundaryType, resultBoundaryType});
11166+
input->SetTypeAnn(ctx.Expr.MakeType<TListExprType>(resultRangeType));
11167+
return IGraphTransformer::TStatus::Ok;
11168+
}
11169+
1111011170
IGraphTransformer::TStatus RangeCreateWrapper(const TExprNode::TPtr& input, TExprNode::TPtr& output, TContext& ctx) {
1111111171
Y_UNUSED(output);
1111211172

@@ -12164,6 +12224,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
1216412224
ExtFunctions["OrderedSqlRename"] = &SqlRenameWrapper;
1216512225

1216612226
Functions["AsRange"] = &AsRangeWrapper;
12227+
Functions["RangeToPg"] = &RangeToPgWrapper;
1216712228
Functions["RangeCreate"] = &RangeCreateWrapper;
1216812229
Functions["RangeEmpty"] = &RangeEmptyWrapper;
1216912230
Functions["RangeFor"] = &RangeForWrapper;

ydb/library/yql/core/yql_opt_range.cpp

+47
Original file line numberDiff line numberDiff line change
@@ -519,4 +519,51 @@ TExprNode::TPtr ExpandRangeFor(const TExprNode::TPtr& node, TExprContext& ctx) {
519519
return result;
520520
}
521521

522+
TExprNode::TPtr ExpandRangeToPg(const TExprNode::TPtr& node, TExprContext& ctx) {
523+
YQL_ENSURE(node->IsCallable("RangeToPg"));
524+
const size_t numComponents = node->Head().GetTypeAnn()->Cast<TListExprType>()->GetItemType()->
525+
Cast<TTupleExprType>()->GetItems().front()->Cast<TTupleExprType>()->GetSize();
526+
return ctx.Builder(node->Pos())
527+
.Callable("OrderedMap")
528+
.Add(0, node->HeadPtr())
529+
.Lambda(1)
530+
.Param("range")
531+
.Callable("StaticMap")
532+
.Arg(0, "range")
533+
.Lambda(1)
534+
.Param("boundary")
535+
.List()
536+
.Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& {
537+
for (size_t i = 0; i < numComponents; ++i) {
538+
if (i % 2 == 0) {
539+
parent
540+
.Callable(i, "Nth")
541+
.Arg(0, "boundary")
542+
.Atom(1, i)
543+
.Seal();
544+
} else {
545+
parent
546+
.Callable(i, "Map")
547+
.Callable(0, "Nth")
548+
.Arg(0, "boundary")
549+
.Atom(1, i)
550+
.Seal()
551+
.Lambda(1)
552+
.Param("unwrapped")
553+
.Callable("ToPg")
554+
.Arg(0, "unwrapped")
555+
.Seal()
556+
.Seal()
557+
.Seal();
558+
}
559+
}
560+
return parent;
561+
})
562+
.Seal()
563+
.Seal()
564+
.Seal()
565+
.Seal()
566+
.Seal()
567+
.Build();
568+
}
522569
}

ydb/library/yql/core/yql_opt_range.h

+1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ namespace NYql {
66
TExprNode::TPtr ExpandRangeEmpty(const TExprNode::TPtr& node, TExprContext& ctx);
77
TExprNode::TPtr ExpandAsRange(const TExprNode::TPtr& node, TExprContext& ctx);
88
TExprNode::TPtr ExpandRangeFor(const TExprNode::TPtr& node, TExprContext& ctx);
9+
TExprNode::TPtr ExpandRangeToPg(const TExprNode::TPtr& node, TExprContext& ctx);
910

1011
}
1112

ydb/library/yql/tests/sql/sql2yql/canondata/result.json

+14
Original file line numberDiff line numberDiff line change
@@ -3940,6 +3940,13 @@
39403940
"uri": "https://{canondata_backend}/1773845/fe2146df711e0729e3c3cc1bc9b2c5b1fdfcfea1/resource.tar.gz#test_sql2yql.test_compute_range-pg_sqlin_/sql.yql"
39413941
}
39423942
],
3943+
"test_sql2yql.test[compute_range-pg_startswith]": [
3944+
{
3945+
"checksum": "f2e42e95b7b84fd210244e0c61c3f614",
3946+
"size": 4450,
3947+
"uri": "https://{canondata_backend}/1031349/96841816c51116681477e138bb81b6493013c777/resource.tar.gz#test_sql2yql.test_compute_range-pg_startswith_/sql.yql"
3948+
}
3949+
],
39433950
"test_sql2yql.test[compute_range-preserve_rest_predicates_order]": [
39443951
{
39453952
"checksum": "4915841ad83886d7f63fe939e0848687",
@@ -21230,6 +21237,13 @@
2123021237
"uri": "https://{canondata_backend}/1773845/fe2146df711e0729e3c3cc1bc9b2c5b1fdfcfea1/resource.tar.gz#test_sql_format.test_compute_range-pg_sqlin_/formatted.sql"
2123121238
}
2123221239
],
21240+
"test_sql_format.test[compute_range-pg_startswith]": [
21241+
{
21242+
"checksum": "b06b88f1965f643fea24cb7e5d8d0459",
21243+
"size": 955,
21244+
"uri": "https://{canondata_backend}/1031349/96841816c51116681477e138bb81b6493013c777/resource.tar.gz#test_sql_format.test_compute_range-pg_startswith_/formatted.sql"
21245+
}
21246+
],
2123321247
"test_sql_format.test[compute_range-preserve_rest_predicates_order]": [
2123421248
{
2123521249
"checksum": "77cd36176a336f2a79ee10f5697b124f",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
/* syntax version 1 */
2+
/* postgres can not */
3+
/* dq can not */
4+
/* dqfile can not */
5+
/* yt can not */
6+
pragma warning("disable", "4510");
7+
pragma warning("disable", "1108");
8+
9+
-- like 'aaaa'
10+
select YQL::RangeComputeFor(
11+
Struct<a:PgInt4,b:PgText>,
12+
($row) -> (StartsWith(FromPg($row.b), 'aaaa') ?? false),
13+
AsTuple(AsAtom("b"))
14+
);
15+
16+
-- not like 'aaaa'
17+
select YQL::RangeComputeFor(
18+
Struct<a:PgInt4,b:PgText>,
19+
($row) -> (not (StartsWith(FromPg($row.b), 'aaaa') ?? true)),
20+
AsTuple(AsAtom("b"))
21+
);
22+
23+
24+
-- like <invalid utf8>
25+
select YQL::RangeComputeFor(
26+
Struct<a:PgInt4,b:PgText>,
27+
($row) -> (StartsWith(FromPg($row.b), 'a\xf5') ?? false),
28+
AsTuple(AsAtom("b"))
29+
);
30+
31+
-- not like <invalid utf8>
32+
select YQL::RangeComputeFor(
33+
Struct<a:PgInt4,b:PgText>,
34+
($row) -> (not (StartsWith(FromPg($row.b), 'a\xf5') ?? true)),
35+
AsTuple(AsAtom("b"))
36+
);

ydb/library/yql/tests/sql/yt_native_file/part5/canondata/result.json

+21
Original file line numberDiff line numberDiff line change
@@ -742,6 +742,27 @@
742742
"uri": "https://{canondata_backend}/1942671/812d348532a02502eb8901f04707aeea3f495e62/resource.tar.gz#test.test_compute_range-multiply_limit_with_dups-default.txt-Results_/results.txt"
743743
}
744744
],
745+
"test.test[compute_range-pg_startswith-default.txt-Debug]": [
746+
{
747+
"checksum": "fb20b05a49ae3533e4b581ad09bc01f4",
748+
"size": 1242,
749+
"uri": "https://{canondata_backend}/1781765/f97b29106f835508c9465d1d8ba8cc89cdfb0bdc/resource.tar.gz#test.test_compute_range-pg_startswith-default.txt-Debug_/opt.yql"
750+
}
751+
],
752+
"test.test[compute_range-pg_startswith-default.txt-Plan]": [
753+
{
754+
"checksum": "55515ae638f317612d048052be489bfd",
755+
"size": 1740,
756+
"uri": "https://{canondata_backend}/1781765/f97b29106f835508c9465d1d8ba8cc89cdfb0bdc/resource.tar.gz#test.test_compute_range-pg_startswith-default.txt-Plan_/plan.txt"
757+
}
758+
],
759+
"test.test[compute_range-pg_startswith-default.txt-Results]": [
760+
{
761+
"checksum": "cc3057a2f21b5e8e4ef004621d352021",
762+
"size": 19014,
763+
"uri": "https://{canondata_backend}/1781765/f97b29106f835508c9465d1d8ba8cc89cdfb0bdc/resource.tar.gz#test.test_compute_range-pg_startswith-default.txt-Results_/results.txt"
764+
}
765+
],
745766
"test.test[count-count_all-default.txt-Debug]": [
746767
{
747768
"checksum": "4545bbb3b7c7d6ac6fbcccdae8916f50",

0 commit comments

Comments
 (0)