Skip to content

Commit 8637bd3

Browse files
authored
Fixed count(*) inside correlated queries w/o group by (#2916)
1 parent a78ceaf commit 8637bd3

File tree

6 files changed

+144
-13
lines changed

6 files changed

+144
-13
lines changed

ydb/library/yql/core/common_opt/yql_co_pgselect.cpp

+73-13
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ std::pair<TExprNode::TPtr, TExprNode::TPtr> SplitByPredicate(TPositionHandle pos
8484
}
8585

8686
TExprNode::TPtr JoinColumns(TPositionHandle pos, const TExprNode::TPtr& list1, const TExprNode::TPtr& list2,
87-
TExprNode::TPtr leftJoinColumns, ui32 subLinkId, TExprContext& ctx, const TString& leftPrefix = {}) {
87+
TExprNode::TPtr leftJoinColumns, TMaybe<ui32> subLinkId, TExprContext& ctx, const TString& leftPrefix = {}) {
8888
auto join = ctx.Builder(pos)
8989
.Callable("EquiJoin")
9090
.List(0)
@@ -116,8 +116,8 @@ TExprNode::TPtr JoinColumns(TPositionHandle pos, const TExprNode::TPtr& list1, c
116116
if (leftJoinColumns) {
117117
for (ui32 i = 0; i < leftJoinColumns->ChildrenSize(); ++i) {
118118
parent.Atom(2 * i, "b");
119-
parent.Atom(2 * i + 1, TString("_yql_join_sublink_") + ToString(subLinkId) +
120-
"_" + leftJoinColumns->Child(i)->Content() );
119+
parent.Atom(2 * i + 1, subLinkId ? TString("_yql_join_sublink_") + ToString(*subLinkId) +
120+
"_" + leftJoinColumns->Child(i)->Content() : leftJoinColumns->ChildPtr(i)->Content());
121121
}
122122
}
123123

@@ -133,8 +133,8 @@ TExprNode::TPtr JoinColumns(TPositionHandle pos, const TExprNode::TPtr& list1, c
133133
for (ui32 i = 0; i < leftJoinColumns->ChildrenSize(); ++i) {
134134
parent.List(i)
135135
.Atom(0, "rename")
136-
.Atom(1, TString("b._yql_join_sublink_") + ToString(subLinkId) +
137-
"_" + leftJoinColumns->Child(i)->Content())
136+
.Atom(1, TString("b.") + (subLinkId ? (TString("_yql_join_sublink_") + ToString(*subLinkId) +
137+
"_") : "") + leftJoinColumns->Child(i)->Content())
138138
.Atom(2, "")
139139
.Seal();
140140
}
@@ -1335,7 +1335,7 @@ TExprNode::TPtr BuildSingleInputPredicateJoin(TPositionHandle pos, TStringBuf jo
13351335
.Seal()
13361336
.Build();
13371337

1338-
auto main = JoinColumns(pos, filteredLeft, right, nullptr, 0, ctx);
1338+
auto main = JoinColumns(pos, filteredLeft, right, nullptr, {}, ctx);
13391339

13401340
auto extraLeft = [&]() {
13411341
return ctx.Builder(pos)
@@ -1514,7 +1514,7 @@ std::tuple<TVector<ui32>, TExprNode::TListType> BuildJoinGroups(TPositionHandle
15141514
// current = join current & with
15151515
auto join = groupTuple->Child(i);
15161516
auto joinType = join->Child(0)->Content();
1517-
auto cartesian = JoinColumns(pos, current, with, nullptr, 0, ctx);
1517+
auto cartesian = JoinColumns(pos, current, with, nullptr, {}, ctx);
15181518
if (joinType == "cross") {
15191519
current = cartesian;
15201520
continue;
@@ -1944,7 +1944,8 @@ TExprNode::TPtr BuildAggregationTraits(TPositionHandle pos, bool onWindow, const
19441944

19451945
TExprNode::TPtr BuildGroup(TPositionHandle pos, TExprNode::TPtr list,
19461946
const TAggs& aggs, const TExprNode::TPtr& groupExprs, const TExprNode::TPtr& groupSets,
1947-
const TExprNode::TPtr& finalExtTypes, TExprContext& ctx, TOptimizeContext& optCtx) {
1947+
const TExprNode::TPtr& finalExtTypes, const TExprNode::TPtr& joinedUniqueExt,
1948+
TExprContext& ctx, TOptimizeContext& optCtx) {
19481949

19491950
bool needRemapForDistinct = false;
19501951
for (ui32 i = 0; i < aggs.size(); ++i) {
@@ -2008,7 +2009,12 @@ TExprNode::TPtr BuildGroup(TPositionHandle pos, TExprNode::TPtr list,
20082009
.Build();
20092010

20102011
TExprNode::TListType payloadItems;
2012+
TVector<ui32> nonNullDefAggs;
20112013
for (ui32 i = 0; i < aggs.size(); ++i) {
2014+
if (aggs[i].first->Head().Content() == "count" && aggs[i].first->ChildrenSize() == 2) {
2015+
nonNullDefAggs.push_back(i);
2016+
}
2017+
20122018
const bool distinct = GetSetting(*aggs[i].first->Child(1), "distinct") != nullptr;
20132019
auto traits = BuildAggregationTraits(pos, false, distinct ? "_yql_distinct_" + ToString(i) : "", aggs[i], listTypeNode, nullptr, ctx, optCtx);
20142020
if (distinct) {
@@ -2129,6 +2135,50 @@ TExprNode::TPtr BuildGroup(TPositionHandle pos, TExprNode::TPtr list,
21292135
.Seal()
21302136
.Build();
21312137

2138+
if (!extKeysItems.empty() && !nonNullDefAggs.empty()) {
2139+
// restore aggregation keys
2140+
auto joinColumns = ctx.NewList(pos, TExprNode::TListType(extKeysItems));
2141+
auto joined = JoinColumns(pos, joinedUniqueExt, aggregate, joinColumns, {}, ctx);
2142+
2143+
auto pgZero = ctx.Builder(pos)
2144+
.Callable("PgConst")
2145+
.Atom(0, "0")
2146+
.Callable(1, "PgType")
2147+
.Atom(0, "int8")
2148+
.Seal()
2149+
.Seal()
2150+
.Build();
2151+
2152+
auto arg = ctx.NewArgument(pos, "row");
2153+
auto root = arg;
2154+
for (ui32 i = 0; i < nonNullDefAggs.size(); ++i) {
2155+
auto column = ToString("_yql_agg_") + ToString(nonNullDefAggs[i]);
2156+
root = ctx.Builder(pos)
2157+
.Callable("ReplaceMember")
2158+
.Add(0, root)
2159+
.Atom(1, column)
2160+
.Callable(2, "Coalesce")
2161+
.Callable(0, "Member")
2162+
.Add(0, root)
2163+
.Atom(1, column)
2164+
.Seal()
2165+
.Add(1, pgZero)
2166+
.Seal()
2167+
.Seal()
2168+
.Build();
2169+
}
2170+
2171+
auto coalesceLambda = ctx.NewLambda(pos, ctx.NewArguments(pos, { arg }), std::move(root));
2172+
2173+
// replace nulls with def values
2174+
aggregate = ctx.Builder(pos)
2175+
.Callable("OrderedMap")
2176+
.Add(0, joined)
2177+
.Add(1, coalesceLambda)
2178+
.Seal()
2179+
.Build();
2180+
}
2181+
21322182
if (currentKeys.size() < groupExprs->Tail().ChildrenSize()) {
21332183
// mark missing columns
21342184
aggregate = ctx.Builder(pos)
@@ -2971,12 +3021,13 @@ TExprNode::TPtr RemoveExtraSortColumns(const TExprNode::TPtr& list, const TExprN
29713021
.Build();
29723022
}
29733023

2974-
TExprNode::TPtr JoinOuter(TPositionHandle pos, TExprNode::TPtr list,
3024+
std::pair<TExprNode::TPtr, TExprNode::TPtr> JoinOuter(TPositionHandle pos, TExprNode::TPtr list,
29753025
const TExprNode::TPtr& finalExtTypes, const TExprNode::TListType& outerInputs,
29763026
const TVector<TString>& outerInputAliases,
29773027
TExprNode::TListType& cleanedInputs, TVector<TString>& inputAliases, TExprContext& ctx) {
29783028
YQL_ENSURE(finalExtTypes);
29793029
YQL_ENSURE(outerInputs.size() == finalExtTypes->Tail().ChildrenSize());
3030+
TExprNode::TPtr joinedUniqueExt;
29803031
for (ui32 index = 0; index < finalExtTypes->Tail().ChildrenSize(); ++index) {
29813032
const auto& input = finalExtTypes->Tail().Child(index);
29823033
const auto& inputAlias = input->Head().Content();
@@ -3018,10 +3069,18 @@ TExprNode::TPtr JoinOuter(TPositionHandle pos, TExprNode::TPtr list,
30183069
.Seal()
30193070
.Build();
30203071

3021-
list = JoinColumns(pos, list, uniqueOuterInput, nullptr, 0, ctx);
3072+
if (!joinedUniqueExt) {
3073+
joinedUniqueExt = uniqueOuterInput;
3074+
} else {
3075+
joinedUniqueExt = JoinColumns(pos, joinedUniqueExt, uniqueOuterInput, nullptr, {}, ctx);
3076+
}
3077+
}
3078+
3079+
if (joinedUniqueExt) {
3080+
list = JoinColumns(pos, list, joinedUniqueExt, nullptr, {}, ctx);
30223081
}
30233082

3024-
return list;
3083+
return { list, joinedUniqueExt };
30253084
}
30263085

30273086
TExprNode::TPtr CombineSetItems(TPositionHandle pos, const TExprNode::TPtr& left, const TExprNode::TPtr& right, const TStringBuf& op, TExprContext& ctx) {
@@ -3328,8 +3387,9 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
33283387
}
33293388
}
33303389

3390+
TExprNode::TPtr joinedUniqueExt;
33313391
if (!outerInputs.empty() && finalExtTypes && 0 < finalExtTypes->Tail().ChildrenSize()) {
3332-
list = JoinOuter(node->Pos(), list, finalExtTypes, outerInputs, outerInputAliases, cleanedInputs, inputAliases, ctx);
3392+
std::tie(list, joinedUniqueExt) = JoinOuter(node->Pos(), list, finalExtTypes, outerInputs, outerInputAliases, cleanedInputs, inputAliases, ctx);
33333393
}
33343394

33353395
if (filter) {
@@ -3368,7 +3428,7 @@ TExprNode::TPtr ExpandPgSelectImpl(const TExprNode::TPtr& node, TExprContext& ct
33683428
}
33693429

33703430
if (groupExprs) {
3371-
list = BuildGroup(node->Pos(), list, aggs, groupExprs, groupSets, finalExtTypes, ctx, optCtx);
3431+
list = BuildGroup(node->Pos(), list, aggs, groupExprs, groupSets, finalExtTypes, joinedUniqueExt, ctx, optCtx);
33723432
}
33733433

33743434
if (having) {

ydb/library/yql/tests/sql/dq_file/part15/canondata/result.json

+22
Original file line numberDiff line numberDiff line change
@@ -1839,6 +1839,28 @@
18391839
"uri": "file://test.test_params-primitives--Results_/extracted"
18401840
}
18411841
],
1842+
"test.test[pg-pg_corr_count-default.txt-Analyze]": [
1843+
{
1844+
"checksum": "b4dd508a329723c74293d80f0278c705",
1845+
"size": 505,
1846+
"uri": "https://{canondata_backend}/1814674/8032c8c75c4a0135917efb7e8a36a553203d3792/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Analyze_/plan.txt"
1847+
}
1848+
],
1849+
"test.test[pg-pg_corr_count-default.txt-Debug]": [
1850+
{
1851+
"checksum": "7c2fd26a8c3f66ec85afda5c68a365e9",
1852+
"size": 2975,
1853+
"uri": "https://{canondata_backend}/1814674/8032c8c75c4a0135917efb7e8a36a553203d3792/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Debug_/opt.yql_patched"
1854+
}
1855+
],
1856+
"test.test[pg-pg_corr_count-default.txt-Plan]": [
1857+
{
1858+
"checksum": "b4dd508a329723c74293d80f0278c705",
1859+
"size": 505,
1860+
"uri": "https://{canondata_backend}/1814674/8032c8c75c4a0135917efb7e8a36a553203d3792/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Plan_/plan.txt"
1861+
}
1862+
],
1863+
"test.test[pg-pg_corr_count-default.txt-Results]": [],
18421864
"test.test[pg-pg_types_array_literal-default.txt-Analyze]": [
18431865
{
18441866
"checksum": "b4dd508a329723c74293d80f0278c705",

ydb/library/yql/tests/sql/hybrid_file/part3/canondata/result.json

+14
Original file line numberDiff line numberDiff line change
@@ -1917,6 +1917,20 @@
19171917
"uri": "https://{canondata_backend}/1936842/11d23d4a39031af80d6dc470ce99f9427771e7d4/resource.tar.gz#test.test_pg-pg_array_compare-default.txt-Plan_/plan.txt"
19181918
}
19191919
],
1920+
"test.test[pg-pg_corr_count-default.txt-Debug]": [
1921+
{
1922+
"checksum": "35937f42dd5cfc0c8f99b7af7031bdf7",
1923+
"size": 2974,
1924+
"uri": "https://{canondata_backend}/1931696/fca86c589326e9bc05817a71a47f8b9d16219dcc/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Debug_/opt.yql_patched"
1925+
}
1926+
],
1927+
"test.test[pg-pg_corr_count-default.txt-Plan]": [
1928+
{
1929+
"checksum": "b4dd508a329723c74293d80f0278c705",
1930+
"size": 505,
1931+
"uri": "https://{canondata_backend}/1931696/fca86c589326e9bc05817a71a47f8b9d16219dcc/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Plan_/plan.txt"
1932+
}
1933+
],
19201934
"test.test[pg-pg_types_window1-default.txt-Debug]": [
19211935
{
19221936
"checksum": "49054099e0442c8e0e079eb2db22f624",

ydb/library/yql/tests/sql/sql2yql/canondata/result.json

+7
Original file line numberDiff line numberDiff line change
@@ -11255,6 +11255,13 @@
1125511255
"uri": "https://{canondata_backend}/1817427/5ebfc414781230f90b896d4ed143ea00cf4c7aaa/resource.tar.gz#test_sql2yql.test_pg-pg_column_case_/sql.yql"
1125611256
}
1125711257
],
11258+
"test_sql2yql.test[pg-pg_corr_count]": [
11259+
{
11260+
"checksum": "f00ab698b55ffaf7954edda1a6b26de6",
11261+
"size": 1193,
11262+
"uri": "https://{canondata_backend}/1942173/ecd084687ae5760c5e06e044ba0475f6a397ad72/resource.tar.gz#test_sql2yql.test_pg-pg_corr_count_/sql.yql"
11263+
}
11264+
],
1125811265
"test_sql2yql.test[pg-pg_in_dict_key_with_stable_pickle]": [
1125911266
{
1126011267
"checksum": "6a6eee65e3a6d24460e366d794c5ae6a",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
--!syntax_pg
2+
SELECT y,
3+
(select count(*) from (values (1),(2),(3)) a(x) where a.x=y
4+
)
5+
FROM
6+
(values (4)) b(y)
7+

ydb/library/yql/tests/sql/yt_native_file/part15/canondata/result.json

+21
Original file line numberDiff line numberDiff line change
@@ -1641,6 +1641,27 @@
16411641
"uri": "file://test.test_params-primitives--Results_/extracted"
16421642
}
16431643
],
1644+
"test.test[pg-pg_corr_count-default.txt-Debug]": [
1645+
{
1646+
"checksum": "a4b15e574922faca68b49f8374cc5536",
1647+
"size": 2914,
1648+
"uri": "https://{canondata_backend}/1814674/325fbee1dd0f68af7c932136e87a3e4238a54039/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Debug_/opt.yql"
1649+
}
1650+
],
1651+
"test.test[pg-pg_corr_count-default.txt-Plan]": [
1652+
{
1653+
"checksum": "b4dd508a329723c74293d80f0278c705",
1654+
"size": 505,
1655+
"uri": "https://{canondata_backend}/1814674/325fbee1dd0f68af7c932136e87a3e4238a54039/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Plan_/plan.txt"
1656+
}
1657+
],
1658+
"test.test[pg-pg_corr_count-default.txt-Results]": [
1659+
{
1660+
"checksum": "b8d526ff0918161871c513efedd04afb",
1661+
"size": 946,
1662+
"uri": "https://{canondata_backend}/1814674/325fbee1dd0f68af7c932136e87a3e4238a54039/resource.tar.gz#test.test_pg-pg_corr_count-default.txt-Results_/results.txt"
1663+
}
1664+
],
16441665
"test.test[pg-pg_types_array_literal-default.txt-Debug]": [
16451666
{
16461667
"checksum": "cf667cff95b19d7112b1c0792044f364",

0 commit comments

Comments
 (0)