Skip to content

Commit a8fb87e

Browse files
authored
Support of table functions in pg syntax (#9040)
1 parent f9cd248 commit a8fb87e

File tree

7 files changed

+224
-16
lines changed

7 files changed

+224
-16
lines changed

ydb/library/yql/core/type_ann/type_ann_core.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12715,7 +12715,7 @@ template <NKikimr::NUdf::EDataSlot DataSlot>
1271512715

1271612716
ColumnOrderFunctions["Merge"] = ColumnOrderFunctions["Extend"] = &OrderForMergeExtend;
1271712717
ColumnOrderFunctions[RightName] = &OrderFromFirst;
12718-
ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
12718+
ColumnOrderFunctions["UnionMerge"] = ColumnOrderFunctions["UnionAll"] = &OrderForUnionAll;
1271912719
ColumnOrderFunctions["Union"] = &OrderForUnionAll;
1272012720
ColumnOrderFunctions["EquiJoin"] = &OrderForEquiJoin;
1272112721
ColumnOrderFunctions["CalcOverWindow"] = &OrderForCalcOverWindow;

ydb/library/yql/sql/pg/pg_sql.cpp

Lines changed: 144 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1520,12 +1520,9 @@ class TConverter : public IPGParseEvents {
15201520
}
15211521
} else if (NodeTag(r->val) == T_FuncCall) {
15221522
auto func = CAST_NODE(FuncCall, r->val);
1523-
TVector<TString> names;
1524-
if (!ExtractFuncName(func, names)) {
1523+
if (!ExtractFuncName(func, name, nullptr)) {
15251524
return nullptr;
15261525
}
1527-
1528-
name = names.back();
15291526
}
15301527
}
15311528

@@ -3427,12 +3424,13 @@ class TConverter : public IPGParseEvents {
34273424
return {};
34283425
}
34293426

3430-
auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true);
3427+
bool injectRead = false;
3428+
auto func = ParseFuncCall(CAST_NODE(FuncCall, node), settings, true, injectRead);
34313429
if (!func) {
34323430
return {};
34333431
}
34343432

3435-
return TFromDesc{ func, alias, colnames, false };
3433+
return TFromDesc{ func, alias, colnames, injectRead };
34363434
}
34373435

34383436
TMaybe<TFromDesc> ParseRangeSubselect(const RangeSubselect* value) {
@@ -3723,7 +3721,8 @@ class TConverter : public IPGParseEvents {
37233721
return ParseNullTestExpr(CAST_NODE(NullTest, node), settings);
37243722
}
37253723
case T_FuncCall: {
3726-
return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false);
3724+
bool injectRead;
3725+
return ParseFuncCall(CAST_NODE(FuncCall, node), settings, false, injectRead);
37273726
}
37283727
case T_A_ArrayExpr: {
37293728
return ParseAArrayExpr(CAST_NODE(A_ArrayExpr, node), settings);
@@ -4009,7 +4008,124 @@ class TConverter : public IPGParseEvents {
40094008
return L(A("PgSubLink"), QA(linkType), L(A("Void")), L(A("Void")), rowTest, L(A("lambda"), QL(), select));
40104009
}
40114010

4012-
TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction) {
4011+
TAstNode* ParseTableRangeFunction(const TString& name, const TString& schema, List* args) {
4012+
auto source = BuildClusterSinkOrSourceExpression(false, schema);
4013+
if (!source) {
4014+
return nullptr;
4015+
}
4016+
4017+
TVector<TString> argStrs;
4018+
for (int i = 0; i < ListLength(args); ++i) {
4019+
auto arg = ListNodeNth(args, i);
4020+
if (NodeTag(arg) == T_A_Const && (NodeTag(CAST_NODE(A_Const, arg)->val) == T_String)) {
4021+
TString rawStr = StrVal(CAST_NODE(A_Const, arg)->val);
4022+
argStrs.push_back(rawStr);
4023+
} else {
4024+
AddError("Expected String argument for table function");
4025+
return nullptr;
4026+
}
4027+
}
4028+
4029+
if (argStrs.empty()) {
4030+
AddError("Expected at least one argument for table function");
4031+
return nullptr;
4032+
}
4033+
4034+
TAstNode* key;
4035+
auto lowerName = to_lower(name);
4036+
auto options = QL();
4037+
if (lowerName == "concat") {
4038+
TVector<TAstNode*> concatArgs;
4039+
concatArgs.push_back(A("MrTableConcat"));
4040+
for (const auto& s : argStrs) {
4041+
concatArgs.push_back(L(A("Key"), QL(QA("table"),L(A("String"), QAX(s)))));
4042+
}
4043+
4044+
key = VL(concatArgs);
4045+
} else if (lowerName == "concat_view") {
4046+
if (argStrs.size() % 2 != 0) {
4047+
AddError("Expected sequence of pairs of table and view for concat_view");
4048+
return nullptr;
4049+
}
4050+
4051+
TVector<TAstNode*> concatArgs;
4052+
concatArgs.push_back(A("MrTableConcat"));
4053+
for (ui32 i = 0; i < argStrs.size(); i += 2) {
4054+
concatArgs.push_back(L(A("Key"),
4055+
QL(QA("table"),L(A("String"), QAX(argStrs[i]))),
4056+
QL(QA("view"),L(A("String"), QAX(argStrs[i + 1])))));
4057+
}
4058+
4059+
key = VL(concatArgs);
4060+
} else if (lowerName == "range") {
4061+
if (argStrs.size() > 5) {
4062+
AddError("Too many arguments");
4063+
return nullptr;
4064+
}
4065+
4066+
options = QL(QL(QA("ignorenonexisting")));
4067+
TAstNode* expr;
4068+
if (argStrs.size() == 1) {
4069+
expr = L(A("Bool"),QA("true"));
4070+
} else if (argStrs.size() == 2) {
4071+
expr = L(A(">="),A("item"),L(A("String"),QAX(argStrs[1])));
4072+
} else {
4073+
expr = L(A("And"),
4074+
L(A(">="),A("item"),L(A("String"),QAX(argStrs[1]))),
4075+
L(A("<="),A("item"),L(A("String"),QAX(argStrs[2])))
4076+
);
4077+
}
4078+
4079+
auto lambda = L(A("lambda"), QL(A("item")), expr);
4080+
auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 4 ? "" : argStrs[3]));
4081+
if (argStrs.size() < 5) {
4082+
key = L(A("Key"), QL(QA("table"),range));
4083+
} else {
4084+
key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[4]))));
4085+
}
4086+
} else if (lowerName == "regexp" || lowerName == "like") {
4087+
if (argStrs.size() < 2 || argStrs.size() > 4) {
4088+
AddError("Expected from 2 to 4 arguments");
4089+
return nullptr;
4090+
}
4091+
4092+
options = QL(QL(QA("ignorenonexisting")));
4093+
TAstNode* expr;
4094+
if (lowerName == "regexp") {
4095+
expr = L(A("Apply"),L(A("Udf"),QA("Re2.Grep"),
4096+
QL(L(A("String"),QAX(argStrs[1])),L(A("Null")))),
4097+
A("item"));
4098+
} else {
4099+
expr = L(A("Apply"),L(A("Udf"),QA("Re2.Match"),
4100+
QL(L(A("Apply"),
4101+
L(A("Udf"), QA("Re2.PatternFromLike")),
4102+
L(A("String"),QAX(argStrs[1]))),L(A("Null")))),
4103+
A("item"));
4104+
}
4105+
4106+
auto lambda = L(A("lambda"), QL(A("item")), expr);
4107+
auto range = L(A("MrTableRange"), QAX(argStrs[0]), lambda, QAX(argStrs.size() < 3 ? "" : argStrs[2]));
4108+
if (argStrs.size() < 4) {
4109+
key = L(A("Key"), QL(QA("table"),range));
4110+
} else {
4111+
key = L(A("Key"), QL(QA("table"),range), QL(QA("view"),L(A("String"), QAX(argStrs[3]))));
4112+
}
4113+
} else {
4114+
AddError(TStringBuilder() << "Unknown table function: " << name);
4115+
return nullptr;
4116+
}
4117+
4118+
return L(
4119+
A("Read!"),
4120+
A("world"),
4121+
source,
4122+
key,
4123+
L(A("Void")),
4124+
options
4125+
);
4126+
}
4127+
4128+
TAstNode* ParseFuncCall(const FuncCall* value, const TExprSettings& settings, bool rangeFunction, bool& injectRead) {
40134129
AT_LOCATION(value);
40144130
if (ListLength(value->agg_order) > 0) {
40154131
AddError("FuncCall: unsupported agg_order");
@@ -4052,12 +4168,17 @@ class TConverter : public IPGParseEvents {
40524168
}
40534169
}
40544170

4055-
TVector<TString> names;
4056-
if (!ExtractFuncName(value, names)) {
4171+
TString name;
4172+
TString schema;
4173+
if (!ExtractFuncName(value, name, rangeFunction ? &schema : nullptr)) {
40574174
return nullptr;
40584175
}
40594176

4060-
auto name = names.back();
4177+
if (rangeFunction && !schema.empty() && schema != "pg_catalog") {
4178+
injectRead = true;
4179+
return ParseTableRangeFunction(name, schema, value->args);
4180+
}
4181+
40614182
if (name == "shobj_description" || name == "obj_description") {
40624183
AddWarning(TIssuesIds::PG_COMPAT, name + " function forced to NULL");
40634184
return L(A("Null"));
@@ -4159,7 +4280,8 @@ class TConverter : public IPGParseEvents {
41594280
return VL(args.data(), args.size());
41604281
}
41614282

4162-
bool ExtractFuncName(const FuncCall* value, TVector<TString>& names) {
4283+
bool ExtractFuncName(const FuncCall* value, TString& name, TString* schemaName) {
4284+
TVector<TString> names;
41634285
for (int i = 0; i < ListLength(value->funcname); ++i) {
41644286
auto x = ListNodeNth(value->funcname, i);
41654287
if (NodeTag(x) != T_String) {
@@ -4180,11 +4302,18 @@ class TConverter : public IPGParseEvents {
41804302
return false;
41814303
}
41824304

4183-
if (names.size() == 2 && names[0] != "pg_catalog") {
4184-
AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
4185-
return false;
4305+
if (names.size() == 2) {
4306+
if (!schemaName && names[0] != "pg_catalog") {
4307+
AddError(TStringBuilder() << "FuncCall: expected pg_catalog, but got: " << names[0]);
4308+
return false;
4309+
}
4310+
4311+
if (schemaName) {
4312+
*schemaName = names[0];
4313+
}
41864314
}
41874315

4316+
name = names.back();
41884317
return true;
41894318
}
41904319

ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2288,6 +2288,28 @@
22882288
}
22892289
],
22902290
"test.test[pg-sublink_having_any-default.txt-Results]": [],
2291+
"test.test[pg-table_func-default.txt-Analyze]": [
2292+
{
2293+
"checksum": "90f90cb0bb8d60304471e5cf9a37436a",
2294+
"size": 22788,
2295+
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Analyze_/plan.txt"
2296+
}
2297+
],
2298+
"test.test[pg-table_func-default.txt-Debug]": [
2299+
{
2300+
"checksum": "009e570dc4b46891c5263130b7e90036",
2301+
"size": 6644,
2302+
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
2303+
}
2304+
],
2305+
"test.test[pg-table_func-default.txt-Plan]": [
2306+
{
2307+
"checksum": "90f90cb0bb8d60304471e5cf9a37436a",
2308+
"size": 22788,
2309+
"uri": "https://{canondata_backend}/1917492/73fd38e9ffcd658585f52c248a634ae9046b0ff5/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
2310+
}
2311+
],
2312+
"test.test[pg-table_func-default.txt-Results]": [],
22912313
"test.test[pg-tpcds-q20-default.txt-Analyze]": [
22922314
{
22932315
"checksum": "212be881133a20b5b73ef1250dbeda51",

ydb/library/yql/tests/sql/hybrid_file/part10/canondata/result.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2295,6 +2295,20 @@
22952295
"uri": "https://{canondata_backend}/1775319/3515b86fb929979a6751f93bd43a0291eaa01262/resource.tar.gz#test.test_pg-sublink_projection_exists_corr-default.txt-Plan_/plan.txt"
22962296
}
22972297
],
2298+
"test.test[pg-table_func-default.txt-Debug]": [
2299+
{
2300+
"checksum": "f58d79752c5632a904d7c675fd2cd887",
2301+
"size": 6681,
2302+
"uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql_patched"
2303+
}
2304+
],
2305+
"test.test[pg-table_func-default.txt-Plan]": [
2306+
{
2307+
"checksum": "95e2fb9330b8431fa9d166b01b6a47b0",
2308+
"size": 19319,
2309+
"uri": "https://{canondata_backend}/1871182/03581f8f43b6630387f93dcffb64efda102a5104/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
2310+
}
2311+
],
22982312
"test.test[pg-tpcds-q07-default.txt-Debug]": [
22992313
{
23002314
"checksum": "f61d3822f18e6a66d0991534554f20fb",

ydb/library/yql/tests/sql/sql2yql/canondata/result.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14076,6 +14076,13 @@
1407614076
"uri": "https://{canondata_backend}/1881367/79a71c1478c556da1931a7565c12bdd14cc63567/resource.tar.gz#test_sql2yql.test_pg-sublink_where_in_corr_/sql.yql"
1407714077
}
1407814078
],
14079+
"test_sql2yql.test[pg-table_func]": [
14080+
{
14081+
"checksum": "52fc030d0a5ec71d08efd26d9f101c65",
14082+
"size": 8198,
14083+
"uri": "https://{canondata_backend}/1784826/4a52e4f284dee1aa5ddb5ef05566fbf6d624ec38/resource.tar.gz#test_sql2yql.test_pg-table_func_/sql.yql"
14084+
}
14085+
],
1407914086
"test_sql2yql.test[pg-tpcds-q01]": [
1408014087
{
1408114088
"checksum": "d7a119a877ea0e8b9211601d372e99b9",
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
--!syntax_pg
2+
select count(*) from plato.concat('Input','Input');
3+
select count(*) from plato.concat_view('Input','raw','Input','raw');
4+
select count(*) from plato.range('');
5+
select count(*) from plato.range('','A');
6+
select count(*) from plato.range('','A','Z');
7+
select count(*) from plato.range('','A','Z','');
8+
select count(*) from plato.range('','A','Z','','raw');
9+
select count(*) from plato.regexp('','Inpu.?');
10+
select count(*) from plato.regexp('','Inpu.?','');
11+
select count(*) from plato.regexp('','Inpu.?','','raw');
12+
select count(*) from plato.like('','Inpu%');
13+
select count(*) from plato.like('','Inpu%','');
14+
select count(*) from plato.like('','Inpu%','','raw');
15+

ydb/library/yql/tests/sql/yt_native_file/part17/canondata/result.json

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2042,6 +2042,27 @@
20422042
"uri": "https://{canondata_backend}/1942415/9dc26178536314feaac77333a6a0e27c8703d1e2/resource.tar.gz#test.test_pg-sublink_having_any-default.txt-Results_/results.txt"
20432043
}
20442044
],
2045+
"test.test[pg-table_func-default.txt-Debug]": [
2046+
{
2047+
"checksum": "afed4824bc574f8c4d4470e01e377627",
2048+
"size": 4991,
2049+
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Debug_/opt.yql"
2050+
}
2051+
],
2052+
"test.test[pg-table_func-default.txt-Plan]": [
2053+
{
2054+
"checksum": "bf2b0c772eaf69c15399605d7fbd7b0e",
2055+
"size": 14773,
2056+
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Plan_/plan.txt"
2057+
}
2058+
],
2059+
"test.test[pg-table_func-default.txt-Results]": [
2060+
{
2061+
"checksum": "db24edd3094d41f02121a7b1f3629af3",
2062+
"size": 9490,
2063+
"uri": "https://{canondata_backend}/1784826/f75802903846b7b25514887495c6eaa1859db07a/resource.tar.gz#test.test_pg-table_func-default.txt-Results_/results.txt"
2064+
}
2065+
],
20452066
"test.test[pg-tpcds-q20-default.txt-Debug]": [
20462067
{
20472068
"checksum": "3d98e10d734329d04d97423b4026d52d",

0 commit comments

Comments
 (0)