From 881c60a481f8df3947d5c04f904601f6163ab45a Mon Sep 17 00:00:00 2001 From: Andrey Neporada Date: Sun, 7 Jan 2024 20:40:05 +0300 Subject: [PATCH] allow fieldsubset optimizer with multiusage input --- .../yql/core/common_opt/yql_co_flow2.cpp | 267 +++++++++--------- ydb/library/yql/core/yql_type_annotation.h | 1 + .../providers/config/yql_config_provider.cpp | 9 + .../sql/dq_file/part14/canondata/result.json | 22 ++ .../sql/dq_file/part3/canondata/result.json | 22 ++ .../tests/sql/sql2yql/canondata/result.json | 28 ++ .../group_by_ru_join_simple_fs_multiusage.cfg | 1 + .../group_by_ru_join_simple_fs_multiusage.sql | 6 + .../length_over_merge_fs_multiusage.cfg | 2 + .../length_over_merge_fs_multiusage.sql | 14 + .../part14/canondata/result.json | 21 ++ .../part3/canondata/result.json | 21 ++ 12 files changed, 280 insertions(+), 134 deletions(-) create mode 100644 ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.cfg create mode 100644 ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.sql create mode 100644 ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.cfg create mode 100644 ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.sql diff --git a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp index 4faa6e35734c..32e19059bb6c 100644 --- a/ydb/library/yql/core/common_opt/yql_co_flow2.cpp +++ b/ydb/library/yql/core/common_opt/yql_co_flow2.cpp @@ -16,6 +16,12 @@ namespace { using namespace NNodes; +bool AllowSubsetFieldsForNode(const TExprNode& node, const TOptimizeContext& optCtx) { + YQL_ENSURE(optCtx.Types); + static const TString multiUsageFlags = to_lower(TString("FieldSubsetEnableMultiusage")); + return optCtx.IsSingleUsage(node) || optCtx.Types->OptimizerFlags.contains(multiUsageFlags); +} + TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprContext& ctx, const TParentsMap& parentsMap) { auto inputType = node.Input().Ref().GetTypeAnn(); auto structType = inputType->GetKind() == ETypeAnnotationKind::List @@ -135,15 +141,10 @@ TExprNode::TPtr AggregateSubsetFieldsAnalyzer(const TCoAggregate& node, TExprCon return ret; } -TExprNode::TPtr FlatMapSubsetFields(const TCoFlatMapBase& node, TExprContext& ctx, const TParentsMap& parentsMap) { - auto it = parentsMap.find(node.Input().Raw()); - YQL_ENSURE(it != parentsMap.cend()); - auto inputParentsCount = it->second.size(); - - if (inputParentsCount > 1) { +TExprNode::TPtr FlatMapSubsetFields(const TCoFlatMapBase& node, TExprContext& ctx, TOptimizeContext& optCtx) { + if (!AllowSubsetFieldsForNode(node.Input().Ref(), optCtx)) { return node.Ptr(); } - auto itemArg = node.Lambda().Args().Arg(0); auto itemType = itemArg.Ref().GetTypeAnn(); if (itemType->GetKind() != ETypeAnnotationKind::Struct) { @@ -156,7 +157,7 @@ TExprNode::TPtr FlatMapSubsetFields(const TCoFlatMapBase& node, TExprContext& ct } TSet usedFields; - if (!HaveFieldsSubset(node.Lambda().Body().Ptr(), itemArg.Ref(), usedFields, parentsMap)) { + if (!HaveFieldsSubset(node.Lambda().Body().Ptr(), itemArg.Ref(), usedFields, *optCtx.ParentsMap)) { return node.Ptr(); } @@ -1108,147 +1109,145 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map["FlatMap"] = map["OrderedFlatMap"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) -> TExprNode::TPtr { TCoFlatMapBase self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { - return node; - } - - if (self.Input().Ref().IsCallable("EquiJoin")) { - auto ret = FlatMapOverEquiJoin(self, ctx, *optCtx.ParentsMap, false); - if (!ret.Raw()) { - return nullptr; - } + if (optCtx.IsSingleUsage(self.Input().Ref())) { + if (self.Input().Ref().IsCallable("EquiJoin")) { + auto ret = FlatMapOverEquiJoin(self, ctx, *optCtx.ParentsMap, false); + if (!ret.Raw()) { + return nullptr; + } - if (ret.Raw() != self.Raw()) { - YQL_CLOG(DEBUG, Core) << node->Content() << "OverEquiJoin"; - return ret.Ptr(); + if (ret.Raw() != self.Raw()) { + YQL_CLOG(DEBUG, Core) << node->Content() << "OverEquiJoin"; + return ret.Ptr(); + } } - } - if (self.Input().Ref().IsCallable("Aggregate")) { - auto ret = FilterOverAggregate(self, ctx, *optCtx.ParentsMap); - if (!ret.Raw()) { - return nullptr; - } + if (self.Input().Ref().IsCallable("Aggregate")) { + auto ret = FilterOverAggregate(self, ctx, *optCtx.ParentsMap); + if (!ret.Raw()) { + return nullptr; + } - if (ret.Raw() != self.Raw()) { - YQL_CLOG(DEBUG, Core) << "Filter over Aggregate"; - return ret.Ptr(); + if (ret.Raw() != self.Raw()) { + YQL_CLOG(DEBUG, Core) << "Filter over Aggregate"; + return ret.Ptr(); + } } - } - if (self.Input().Ref().IsCallable(TCoGroupingCore::CallableName())) { - auto groupingCore = self.Input().Cast(); - const TExprNode* extract = nullptr; - // Find pattern: (FlatMap (GroupingCore ...) (lambda (x) ( ... (ExtractMembers (Nth x '1) ...)))) - const auto arg = self.Lambda().Args().Arg(0).Raw(); - if (const auto parents = optCtx.ParentsMap->find(arg); parents != optCtx.ParentsMap->cend()) { - for (const auto& parent : parents->second) { - if (parent->IsCallable(TCoNth::CallableName()) && &parent->Head() == arg && parent->Tail().Content() == "1") { - if (const auto nthParents = optCtx.ParentsMap->find(parent); nthParents != optCtx.ParentsMap->cend()) { - if (nthParents->second.size() == 1 && (*nthParents->second.begin())->IsCallable(TCoExtractMembers::CallableName())) { - extract = *nthParents->second.begin(); - break; + if (self.Input().Ref().IsCallable(TCoGroupingCore::CallableName())) { + auto groupingCore = self.Input().Cast(); + const TExprNode* extract = nullptr; + // Find pattern: (FlatMap (GroupingCore ...) (lambda (x) ( ... (ExtractMembers (Nth x '1) ...)))) + const auto arg = self.Lambda().Args().Arg(0).Raw(); + if (const auto parents = optCtx.ParentsMap->find(arg); parents != optCtx.ParentsMap->cend()) { + for (const auto& parent : parents->second) { + if (parent->IsCallable(TCoNth::CallableName()) && &parent->Head() == arg && parent->Tail().Content() == "1") { + if (const auto nthParents = optCtx.ParentsMap->find(parent); nthParents != optCtx.ParentsMap->cend()) { + if (nthParents->second.size() == 1 && (*nthParents->second.begin())->IsCallable(TCoExtractMembers::CallableName())) { + extract = *nthParents->second.begin(); + break; + } } } } } - } - if (extract) { - if (const auto handler = groupingCore.ConvertHandler()) { - auto newBody = Build(ctx, handler.Cast().Body().Pos()) - .Struct(handler.Cast().Body()) - .Type(ExpandType(handler.Cast().Body().Pos(), GetSeqItemType(*extract->GetTypeAnn()), ctx)) - .Done(); - - groupingCore = Build(ctx, groupingCore.Pos()) - .InitFrom(groupingCore) - .ConvertHandler() - .Args({"item"}) - .Body() - .Apply(newBody) - .With(handler.Cast().Args().Arg(0), "item") + if (extract) { + if (const auto handler = groupingCore.ConvertHandler()) { + auto newBody = Build(ctx, handler.Cast().Body().Pos()) + .Struct(handler.Cast().Body()) + .Type(ExpandType(handler.Cast().Body().Pos(), GetSeqItemType(*extract->GetTypeAnn()), ctx)) + .Done(); + + groupingCore = Build(ctx, groupingCore.Pos()) + .InitFrom(groupingCore) + .ConvertHandler() + .Args({"item"}) + .Body() + .Apply(newBody) + .With(handler.Cast().Args().Arg(0), "item") + .Build() .Build() - .Build() - .Done(); - - YQL_CLOG(DEBUG, Core) << "Pull out " << extract->Content() << " from " << node->Content() << " to " << groupingCore.Ref().Content() << " handler"; - return Build(ctx, node->Pos()) - .CallableName(node->Content()) - .Input(groupingCore) - .Lambda(ctx.DeepCopyLambda(self.Lambda().Ref())) - .Done().Ptr(); - } - - std::map usedFields; - auto fields = extract->Tail().ChildrenList(); - std::for_each(fields.cbegin(), fields.cend(), [&](const TExprNode::TPtr& field) { usedFields.emplace(field->Content(), field); }); - - if (HaveFieldsSubset(groupingCore.KeyExtractor().Body().Ptr(), groupingCore.KeyExtractor().Args().Arg(0).Ref(), usedFields, *optCtx.ParentsMap, false) - && !usedFields.empty() - && HaveFieldsSubset(groupingCore.GroupSwitch().Body().Ptr(), groupingCore.GroupSwitch().Args().Arg(1).Ref(), usedFields, *optCtx.ParentsMap, false) - && !usedFields.empty() - && usedFields.size() < GetSeqItemType(*groupingCore.Input().Ref().GetTypeAnn()).Cast()->GetSize()) { - if (usedFields.size() != fields.size()) { - fields.reserve(usedFields.size()); - fields.clear(); - std::transform(usedFields.begin(), usedFields.end(), std::back_inserter(fields), - [](std::pair& item){ return std::move(item.second); }); + .Done(); + + YQL_CLOG(DEBUG, Core) << "Pull out " << extract->Content() << " from " << node->Content() << " to " << groupingCore.Ref().Content() << " handler"; + return Build(ctx, node->Pos()) + .CallableName(node->Content()) + .Input(groupingCore) + .Lambda(ctx.DeepCopyLambda(self.Lambda().Ref())) + .Done().Ptr(); } - YQL_CLOG(DEBUG, Core) << "Pull out " << extract->Content() << " from " << node->Content() << " to " << groupingCore.Ref().Content() << " input"; - return Build(ctx, node->Pos()) - .CallableName(node->Content()) - .Input() - .Input() - .Input(groupingCore.Input()) - .Members() - .Add(std::move(fields)) + std::map usedFields; + auto fields = extract->Tail().ChildrenList(); + std::for_each(fields.cbegin(), fields.cend(), [&](const TExprNode::TPtr& field) { usedFields.emplace(field->Content(), field); }); + + if (HaveFieldsSubset(groupingCore.KeyExtractor().Body().Ptr(), groupingCore.KeyExtractor().Args().Arg(0).Ref(), usedFields, *optCtx.ParentsMap, false) + && !usedFields.empty() + && HaveFieldsSubset(groupingCore.GroupSwitch().Body().Ptr(), groupingCore.GroupSwitch().Args().Arg(1).Ref(), usedFields, *optCtx.ParentsMap, false) + && !usedFields.empty() + && usedFields.size() < GetSeqItemType(*groupingCore.Input().Ref().GetTypeAnn()).Cast()->GetSize()) { + if (usedFields.size() != fields.size()) { + fields.reserve(usedFields.size()); + fields.clear(); + std::transform(usedFields.begin(), usedFields.end(), std::back_inserter(fields), + [](std::pair& item){ return std::move(item.second); }); + } + + YQL_CLOG(DEBUG, Core) << "Pull out " << extract->Content() << " from " << node->Content() << " to " << groupingCore.Ref().Content() << " input"; + return Build(ctx, node->Pos()) + .CallableName(node->Content()) + .Input() + .Input() + .Input(groupingCore.Input()) + .Members() + .Add(std::move(fields)) + .Build() .Build() + .GroupSwitch(ctx.DeepCopyLambda(groupingCore.GroupSwitch().Ref())) + .KeyExtractor(ctx.DeepCopyLambda(groupingCore.KeyExtractor().Ref())) .Build() - .GroupSwitch(ctx.DeepCopyLambda(groupingCore.GroupSwitch().Ref())) - .KeyExtractor(ctx.DeepCopyLambda(groupingCore.KeyExtractor().Ref())) - .Build() - .Lambda(ctx.DeepCopyLambda(self.Lambda().Ref())) - .Done().Ptr(); + .Lambda(ctx.DeepCopyLambda(self.Lambda().Ref())) + .Done().Ptr(); + } } } - } - if (self.Input().Ref().IsCallable("Take") || self.Input().Ref().IsCallable("Skip") - || self.Input().Maybe()) { - - auto& arg = self.Lambda().Args().Arg(0).Ref(); - auto body = self.Lambda().Body().Ptr(); - TSet usedFields; - if (HaveFieldsSubset(body, arg, usedFields, *optCtx.ParentsMap)) { - YQL_CLOG(DEBUG, Core) << "FieldsSubset in " << node->Content() << " over " << self.Input().Ref().Content(); + if (self.Input().Ref().IsCallable("Take") || self.Input().Ref().IsCallable("Skip") + || self.Input().Maybe()) { + + auto& arg = self.Lambda().Args().Arg(0).Ref(); + auto body = self.Lambda().Body().Ptr(); + TSet usedFields; + if (HaveFieldsSubset(body, arg, usedFields, *optCtx.ParentsMap)) { + YQL_CLOG(DEBUG, Core) << "FieldsSubset in " << node->Content() << " over " << self.Input().Ref().Content(); + + TExprNode::TListType filteredInputs; + filteredInputs.reserve(self.Input().Ref().ChildrenSize()); + for (ui32 index = 0; index < self.Input().Ref().ChildrenSize(); ++index) { + auto x = self.Input().Ref().ChildPtr(index); + if (!self.Input().Maybe() && index > 0) { + filteredInputs.push_back(x); + continue; + } - TExprNode::TListType filteredInputs; - filteredInputs.reserve(self.Input().Ref().ChildrenSize()); - for (ui32 index = 0; index < self.Input().Ref().ChildrenSize(); ++index) { - auto x = self.Input().Ref().ChildPtr(index); - if (!self.Input().Maybe() && index > 0) { - filteredInputs.push_back(x); - continue; + filteredInputs.push_back(FilterByFields(node->Pos(), x, usedFields, ctx, false)); } - filteredInputs.push_back(FilterByFields(node->Pos(), x, usedFields, ctx, false)); - } - - auto newInput = ctx.ChangeChildren(self.Input().Ref(), std::move(filteredInputs)); - return ctx.Builder(node->Pos()) - .Callable(node->Content()) - .Add(0, newInput) - .Lambda(1) - .Param("item") - .Apply(self.Lambda().Ptr()).With(0, "item").Seal() + auto newInput = ctx.ChangeChildren(self.Input().Ref(), std::move(filteredInputs)); + return ctx.Builder(node->Pos()) + .Callable(node->Content()) + .Add(0, newInput) + .Lambda(1) + .Param("item") + .Apply(self.Lambda().Ptr()).With(0, "item").Seal() + .Seal() .Seal() - .Seal() - .Build(); + .Build(); + } } } - auto ret = FlatMapSubsetFields(self, ctx, *optCtx.ParentsMap); + auto ret = FlatMapSubsetFields(self, ctx, optCtx); if (ret != node) { YQL_CLOG(DEBUG, Core) << node->Content() << "SubsetFields"; return ret; @@ -1259,7 +1258,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoGroupingCore::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { TCoGroupingCore self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -1299,7 +1298,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map["CombineByKey"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { TCoCombineByKey self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -1855,7 +1854,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map["Aggregate"] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { TCoAggregate self(node); - if (!optCtx.IsSingleUsage(self.Input()) && !optCtx.IsPersistentNode(self.Input())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx) && !optCtx.IsPersistentNode(self.Input())) { return node; } @@ -1890,7 +1889,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoCondense::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { const TCoCondense self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -1924,7 +1923,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoCondense1::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { const TCoCondense1 self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -1960,7 +1959,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoChain1Map::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { const TCoChain1Map self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -1993,7 +1992,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoMapNext::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { TCoMapNext self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -2024,7 +2023,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoSqueezeToDict::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { const TCoSqueezeToDict self(node); - if (!optCtx.IsSingleUsage(self.Stream().Ref())) { + if (!AllowSubsetFieldsForNode(self.Stream().Ref(), optCtx)) { return node; } @@ -2058,7 +2057,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoCombineCore::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { const TCoCombineCore self(node); - if (!optCtx.IsSingleUsage(self.Input().Ref())) { + if (!AllowSubsetFieldsForNode(self.Input().Ref(), optCtx)) { return node; } @@ -2096,7 +2095,7 @@ void RegisterCoFlowCallables2(TCallableOptimizerMap& map) { map[TCoMapJoinCore::CallableName()] = [](const TExprNode::TPtr& node, TExprContext& ctx, TOptimizeContext& optCtx) { const TCoMapJoinCore self(node); - if (!optCtx.IsSingleUsage(self.LeftInput().Ref())) { + if (!AllowSubsetFieldsForNode(self.LeftInput().Ref(), optCtx)) { return node; } diff --git a/ydb/library/yql/core/yql_type_annotation.h b/ydb/library/yql/core/yql_type_annotation.h index 4b70a01540a7..ce3c2e0ee54a 100644 --- a/ydb/library/yql/core/yql_type_annotation.h +++ b/ydb/library/yql/core/yql_type_annotation.h @@ -269,6 +269,7 @@ struct TTypeAnnotationContext: public TThrRefBase { // compatibility with v0 or raw s-expression code bool OrderedColumns = false; TColumnOrderStorage::TPtr ColumnOrderStorage = new TColumnOrderStorage; + THashSet OptimizerFlags; TMaybe LookupColumnOrder(const TExprNode& node) const; IGraphTransformer::TStatus SetColumnOrder(const TExprNode& node, const TColumnOrder& columnOrder, TExprContext& ctx); diff --git a/ydb/library/yql/providers/config/yql_config_provider.cpp b/ydb/library/yql/providers/config/yql_config_provider.cpp index 19dd4313ed25..03c7a7f4a4d5 100644 --- a/ydb/library/yql/providers/config/yql_config_provider.cpp +++ b/ydb/library/yql/providers/config/yql_config_provider.cpp @@ -900,6 +900,15 @@ namespace { return false; } } + else if (name == "OptimizerFlags") { + for (auto& arg : args) { + if (arg.empty()) { + ctx.AddError(TIssue(pos, "Empty flags are not supported")); + return false; + } + Types.OptimizerFlags.insert(to_lower(ToString(arg))); + } + } else { ctx.AddError(TIssue(pos, TStringBuilder() << "Unsupported command: " << name)); return false; diff --git a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json index e84ac2575544..bee87e8e8515 100644 --- a/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part14/canondata/result.json @@ -326,6 +326,28 @@ } ], "test.test[aggregate-group_by_rollup_rename-default.txt-Results]": [], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Analyze]": [ + { + "checksum": "24740252670115e3bbd1772580e48adf", + "size": 9776, + "uri": "https://{canondata_backend}/1781765/75774e90f574004e23fc9aacf32e1f561a8c66ec/resource.tar.gz#test.test_aggregate-group_by_ru_join_simple_fs_multiusage--Analyze_/plan.txt" + } + ], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Debug]": [ + { + "checksum": "94020f8a1e57024f031cdeecd89e11a9", + "size": 5934, + "uri": "https://{canondata_backend}/1781765/75774e90f574004e23fc9aacf32e1f561a8c66ec/resource.tar.gz#test.test_aggregate-group_by_ru_join_simple_fs_multiusage--Debug_/opt.yql_patched" + } + ], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Plan]": [ + { + "checksum": "24740252670115e3bbd1772580e48adf", + "size": 9776, + "uri": "https://{canondata_backend}/1781765/75774e90f574004e23fc9aacf32e1f561a8c66ec/resource.tar.gz#test.test_aggregate-group_by_ru_join_simple_fs_multiusage--Plan_/plan.txt" + } + ], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Results]": [], "test.test[ansi_idents-escaped_udf_name-default.txt-Analyze]": [ { "checksum": "0f0658d4cb6fcef484dd34676f7939e7", diff --git a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json index 153cf221a492..ae3c217c85cc 100644 --- a/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part3/canondata/result.json @@ -1500,6 +1500,28 @@ } ], "test.test[optimizers-direct_row_after_merge--Results]": [], + "test.test[optimizers-length_over_merge_fs_multiusage--Analyze]": [ + { + "checksum": "212be881133a20b5b73ef1250dbeda51", + "size": 960, + "uri": "https://{canondata_backend}/1599023/892497444bbacbe92ad2c557c09c697b859ad48d/resource.tar.gz#test.test_optimizers-length_over_merge_fs_multiusage--Analyze_/plan.txt" + } + ], + "test.test[optimizers-length_over_merge_fs_multiusage--Debug]": [ + { + "checksum": "880889c8fc79fc17434df304ebe066fb", + "size": 2791, + "uri": "https://{canondata_backend}/1599023/892497444bbacbe92ad2c557c09c697b859ad48d/resource.tar.gz#test.test_optimizers-length_over_merge_fs_multiusage--Debug_/opt.yql_patched" + } + ], + "test.test[optimizers-length_over_merge_fs_multiusage--Plan]": [ + { + "checksum": "212be881133a20b5b73ef1250dbeda51", + "size": 960, + "uri": "https://{canondata_backend}/1599023/892497444bbacbe92ad2c557c09c697b859ad48d/resource.tar.gz#test.test_optimizers-length_over_merge_fs_multiusage--Plan_/plan.txt" + } + ], + "test.test[optimizers-length_over_merge_fs_multiusage--Results]": [], "test.test[optimizers-yql-7324_duplicate_arg--Analyze]": [ { "checksum": "b913ead12af51bc046e1f3344ff5134c", diff --git a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json index 5be9cbf81736..523223bcf09e 100644 --- a/ydb/library/yql/tests/sql/sql2yql/canondata/result.json +++ b/ydb/library/yql/tests/sql/sql2yql/canondata/result.json @@ -2267,6 +2267,13 @@ "uri": "https://{canondata_backend}/1936997/d01e529b02a008637591c42bdd9c20f7eeda6588/resource.tar.gz#test_sql2yql.test_aggregate-group_by_ru_join_simple_/sql.yql" } ], + "test_sql2yql.test[aggregate-group_by_ru_join_simple_fs_multiusage]": [ + { + "checksum": "6e0fe40b211146a9bdab0d2e61d26168", + "size": 6000, + "uri": "https://{canondata_backend}/1599023/c4917fc01ae646b5e708e20b50ea7ae2325f5bb7/resource.tar.gz#test_sql2yql.test_aggregate-group_by_ru_join_simple_fs_multiusage_/sql.yql" + } + ], "test_sql2yql.test[aggregate-group_by_ru_join_star]": [ { "checksum": "1d817501fac482312d72ad2e4a7881ba", @@ -9785,6 +9792,13 @@ "uri": "https://{canondata_backend}/1937027/973c239492ba32946806ddc66cf0af4b38c06ae8/resource.tar.gz#test_sql2yql.test_optimizers-length_over_merge_/sql.yql" } ], + "test_sql2yql.test[optimizers-length_over_merge_fs_multiusage]": [ + { + "checksum": "a923d44d0dc694741762e7272690257f", + "size": 4487, + "uri": "https://{canondata_backend}/1599023/c4917fc01ae646b5e708e20b50ea7ae2325f5bb7/resource.tar.gz#test_sql2yql.test_optimizers-length_over_merge_fs_multiusage_/sql.yql" + } + ], "test_sql2yql.test[optimizers-multi_to_empty_constraint]": [ { "checksum": "4288eb5377c92428681954e99403cbf0", @@ -19557,6 +19571,13 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_aggregate-group_by_ru_join_simple_/formatted.sql" } ], + "test_sql_format.test[aggregate-group_by_ru_join_simple_fs_multiusage]": [ + { + "checksum": "0369dc43a2062b648a486f7efb2873d0", + "size": 334, + "uri": "https://{canondata_backend}/1599023/c4917fc01ae646b5e708e20b50ea7ae2325f5bb7/resource.tar.gz#test_sql_format.test_aggregate-group_by_ru_join_simple_fs_multiusage_/formatted.sql" + } + ], "test_sql_format.test[aggregate-group_by_ru_join_star]": [ { "checksum": "e4a4a97de8afd6e188163c71da45788d", @@ -27075,6 +27096,13 @@ "uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_optimizers-length_over_merge_/formatted.sql" } ], + "test_sql_format.test[optimizers-length_over_merge_fs_multiusage]": [ + { + "checksum": "81f76674682498288b92904264c7a0cd", + "size": 498, + "uri": "https://{canondata_backend}/1599023/c4917fc01ae646b5e708e20b50ea7ae2325f5bb7/resource.tar.gz#test_sql_format.test_optimizers-length_over_merge_fs_multiusage_/formatted.sql" + } + ], "test_sql_format.test[optimizers-multi_to_empty_constraint]": [ { "checksum": "417fc8b1ee413a204123ed56af1aa32f", diff --git a/ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.cfg b/ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.cfg new file mode 100644 index 000000000000..3df78137907c --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.cfg @@ -0,0 +1 @@ +in Input input_intersect.txt diff --git a/ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.sql b/ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.sql new file mode 100644 index 000000000000..1932c5825274 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/aggregate/group_by_ru_join_simple_fs_multiusage.sql @@ -0,0 +1,6 @@ +/* syntax version 1 */ +/* postgres can not */ + +pragma sampleselect; +pragma config.flags("OptimizerFlags", "FieldSubsetEnableMultiusage"); +select kk, sk, max(t2.subkey) as ss FROM plato.Input AS t1 INNER JOIN plato.Input AS t2 USING (key) GROUP BY ROLLUP(t1.key as kk, t1.subkey as sk) ORDER BY kk, sk; diff --git a/ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.cfg b/ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.cfg new file mode 100644 index 000000000000..0474ee88ca89 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.cfg @@ -0,0 +1,2 @@ +in Input input3.txt +res result.txt diff --git a/ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.sql b/ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.sql new file mode 100644 index 000000000000..ff060f1ab3d1 --- /dev/null +++ b/ydb/library/yql/tests/sql/suites/optimizers/length_over_merge_fs_multiusage.sql @@ -0,0 +1,14 @@ +/* postgres can not */ +pragma config.flags("OptimizerFlags", "FieldSubsetEnableMultiusage"); + +use plato; + +$input = (select key, key || subkey as subkey, value from Input); + +$total_count = (select count(1) from $input); + +$filtered = (select * from $input where key in ("023", "037", "075")); + +$filtered_cnt = (select count(1) from $filtered); + +select $filtered_cnt / cast($total_count as Double) as cnt; diff --git a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json index acbe947fcd12..db31f6e348a5 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part14/canondata/result.json @@ -296,6 +296,27 @@ "uri": "https://{canondata_backend}/1923547/5154c8bd8ef9ead4f609771f831f20c15e795571/resource.tar.gz#test.test_aggregate-group_by_rollup_rename-default.txt-Results_/results.txt" } ], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Debug]": [ + { + "checksum": "ceeb8af83f2b01a10deebe2f805a3d01", + "size": 9322, + "uri": "https://{canondata_backend}/1936997/61b460f95ad76ee0a1465a60da61733fdb91dab4/resource.tar.gz#test.test_aggregate-group_by_ru_join_simple_fs_multiusage--Debug_/opt.yql" + } + ], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Plan]": [ + { + "checksum": "760c79b2808d478d74cbc5d7682ae91d", + "size": 12501, + "uri": "https://{canondata_backend}/1936997/61b460f95ad76ee0a1465a60da61733fdb91dab4/resource.tar.gz#test.test_aggregate-group_by_ru_join_simple_fs_multiusage--Plan_/plan.txt" + } + ], + "test.test[aggregate-group_by_ru_join_simple_fs_multiusage--Results]": [ + { + "checksum": "2469d61da89000051d881cc1a1aceb8a", + "size": 7925, + "uri": "https://{canondata_backend}/1936997/61b460f95ad76ee0a1465a60da61733fdb91dab4/resource.tar.gz#test.test_aggregate-group_by_ru_join_simple_fs_multiusage--Results_/results.txt" + } + ], "test.test[ansi_idents-escaped_udf_name-default.txt-Debug]": [ { "checksum": "4988877c69725bebc3eb77a48625f5cd", diff --git a/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json b/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json index 6a2b074791e1..516c18f6e3fd 100644 --- a/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json +++ b/ydb/library/yql/tests/sql/yt_native_file/part3/canondata/result.json @@ -1214,6 +1214,27 @@ "uri": "https://{canondata_backend}/1881367/66c7cb3390a7d019f5c9ce886d24fe86e37e82d0/resource.tar.gz#test.test_optimizers-direct_row_after_merge--Results_/results.txt" } ], + "test.test[optimizers-length_over_merge_fs_multiusage--Debug]": [ + { + "checksum": "e27c0e4ca190c9095c7cdfc6031ab1ea", + "size": 1740, + "uri": "https://{canondata_backend}/1889210/f5cbb7beff327fcfa5cbcdb1e04fd02c24f4e33f/resource.tar.gz#test.test_optimizers-length_over_merge_fs_multiusage--Debug_/opt.yql" + } + ], + "test.test[optimizers-length_over_merge_fs_multiusage--Plan]": [ + { + "checksum": "17aa2b0bb49eb66fecaf1b350cccaabc", + "size": 5190, + "uri": "https://{canondata_backend}/1889210/f5cbb7beff327fcfa5cbcdb1e04fd02c24f4e33f/resource.tar.gz#test.test_optimizers-length_over_merge_fs_multiusage--Plan_/plan.txt" + } + ], + "test.test[optimizers-length_over_merge_fs_multiusage--Results]": [ + { + "checksum": "5d1098186fdb44867314966f03aded2a", + "size": 857, + "uri": "https://{canondata_backend}/1889210/f5cbb7beff327fcfa5cbcdb1e04fd02c24f4e33f/resource.tar.gz#test.test_optimizers-length_over_merge_fs_multiusage--Results_/results.txt" + } + ], "test.test[optimizers-yql-7324_duplicate_arg--Debug]": [ { "checksum": "3bd44f4f74db4b23c13174edccd97e7d",