@@ -24,6 +24,18 @@ static const std::unordered_set<std::string> SecondLevelFilters = {
24
24
" ends_with"
25
25
};
26
26
27
+ static TMaybeNode<TExprBase> CombinePrdicateWithOlapAnd (const TVector<TExprBase>& preds, TExprContext& ctx, TPositionHandle pos) {
28
+ if (preds.empty ()) {
29
+ return {};
30
+ } else if (preds.size () == 1 ) {
31
+ return preds[0 ];
32
+ } else {
33
+ return Build<TKqpOlapAnd>(ctx, pos)
34
+ .Add (preds)
35
+ .Done ();
36
+ }
37
+ }
38
+
27
39
struct TFilterOpsLevels {
28
40
TFilterOpsLevels (const TMaybeNode<TExprBase>& firstLevel, const TMaybeNode<TExprBase>& secondLevel)
29
41
: FirstLevelOps(firstLevel)
@@ -69,6 +81,23 @@ struct TFilterOpsLevels {
69
81
}
70
82
71
83
84
+ static TFilterOpsLevels Merge (TVector<TFilterOpsLevels> predicates, TExprContext& ctx, TPositionHandle pos) {
85
+ TVector<TExprBase> predicatesFirstLevel;
86
+ TVector<TExprBase> predicatesSecondLevel;
87
+ for (const auto & p: predicates) {
88
+ if (p.FirstLevelOps .IsValid ()) {
89
+ predicatesFirstLevel.emplace_back (p.FirstLevelOps .Cast ());
90
+ }
91
+ if (p.SecondLevelOps .IsValid ()) {
92
+ predicatesSecondLevel.emplace_back (p.SecondLevelOps .Cast ());
93
+ }
94
+ }
95
+ return {
96
+ CombinePrdicateWithOlapAnd (predicatesFirstLevel, ctx, pos),
97
+ CombinePrdicateWithOlapAnd (predicatesSecondLevel, ctx, pos),
98
+ };
99
+ }
100
+
72
101
TMaybeNode<TExprBase> FirstLevelOps;
73
102
TMaybeNode<TExprBase> SecondLevelOps;
74
103
};
@@ -674,49 +703,39 @@ TFilterOpsLevels PredicatePushdown(const TExprBase& predicate, const TExprNode&
674
703
}
675
704
676
705
TOLAPPredicateNode WrapPredicates (const std::vector<TOLAPPredicateNode>& predicates, TExprContext& ctx, TPositionHandle pos) {
677
- if (predicates.empty ()) {
678
- return {};
679
- }
706
+
707
+ TOLAPPredicateNode result;
708
+ result.CanBePushed = true ;
709
+ TVector<NNodes::TExprBase> exprNodes;
680
710
681
- if (const auto predicatesSize = predicates.size (); 1U == predicatesSize) {
682
- return predicates.front ();
711
+ for (const auto & pred : predicates) {
712
+ exprNodes.emplace_back (pred.ExprNode );
713
+ result.CanBePushed &= pred.CanBePushed ;
714
+ }
715
+ if (exprNodes.empty ()) {
716
+ result.ExprNode = MakeBool<true >(pos, ctx);
683
717
} else {
684
- TOLAPPredicateNode result;
685
- result.Children = predicates;
686
- result.CanBePushed = true ;
687
-
688
- TVector<NNodes::TExprBase> exprNodes;
689
- exprNodes.reserve (predicatesSize);
690
- for (const auto & pred : predicates) {
691
- exprNodes.emplace_back (pred.ExprNode );
692
- result.CanBePushed &= pred.CanBePushed ;
693
- }
694
- result.ExprNode = NNodes::Build<NNodes::TCoAnd>(ctx, pos)
695
- .Add (exprNodes)
696
- .Done ().Ptr ();
697
- return result;
718
+ result.ExprNode = CombinePrdicateWithOlapAnd (exprNodes, ctx, pos).Cast ().Ptr ();
698
719
}
720
+ return result;
699
721
}
700
722
701
- void SplitForPartialPushdown (const TOLAPPredicateNode& predicateTree, TOLAPPredicateNode& predicatesToPush, TOLAPPredicateNode& remainingPredicates,
702
- TExprContext& ctx, TPositionHandle pos)
723
+ std::pair<std::vector<TOLAPPredicateNode>, std::vector<TOLAPPredicateNode>> SplitForPartialPushdown (const TOLAPPredicateNode& predicateTree)
703
724
{
704
725
if (predicateTree.CanBePushed ) {
705
- predicatesToPush = predicateTree;
706
- remainingPredicates.ExprNode = MakeBool<true >(pos, ctx);
707
- return ;
726
+ return {{predicateTree}, {}};
708
727
}
709
728
710
729
if (!TCoAnd::Match (predicateTree.ExprNode .Get ())) {
711
730
// We can partially pushdown predicates from AND operator only.
712
731
// For OR operator we would need to have several read operators which is not acceptable.
713
732
// TODO: Add support for NOT(op1 OR op2), because it expands to (!op1 AND !op2).
714
- remainingPredicates = predicateTree;
715
- return ;
733
+ return {{}, {predicateTree}};
716
734
}
717
735
718
736
bool isFoundNotStrictOp = false ;
719
- std::vector<TOLAPPredicateNode> pushable, remaining;
737
+ std::vector<TOLAPPredicateNode> pushable;
738
+ std::vector<TOLAPPredicateNode> remaining;
720
739
for (const auto & predicate : predicateTree.Children ) {
721
740
if (predicate.CanBePushed && !isFoundNotStrictOp) {
722
741
pushable.emplace_back (predicate);
@@ -727,8 +746,7 @@ void SplitForPartialPushdown(const TOLAPPredicateNode& predicateTree, TOLAPPredi
727
746
remaining.emplace_back (predicate);
728
747
}
729
748
}
730
- predicatesToPush = WrapPredicates (pushable, ctx, pos);
731
- remainingPredicates = WrapPredicates (remaining, ctx, pos);
749
+ return {pushable, remaining};
732
750
}
733
751
734
752
} // anonymous namespace end
@@ -752,6 +770,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
752
770
}
753
771
754
772
const auto & lambda = flatmap.Lambda ();
773
+ const auto & lambdaArg = lambda.Args ().Arg (0 ).Ref ();
755
774
756
775
YQL_CLOG (TRACE, ProviderKqp) << " Initial OLAP lambda: " << KqpExprToPrettyString (lambda, ctx);
757
776
@@ -764,42 +783,53 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
764
783
auto predicate = optionaIf.Predicate ();
765
784
auto value = optionaIf.Value ();
766
785
767
- if constexpr (NSsa::RuntimeVersion >= 5U ) {
768
- TExprNode::TPtr afterPeephole;
769
- bool hasNonDeterministicFunctions;
770
- if (const auto status = PeepHoleOptimizeNode (optionaIf.Ptr (), afterPeephole, ctx, typesCtx, nullptr , hasNonDeterministicFunctions);
771
- status != IGraphTransformer::TStatus::Ok) {
772
- YQL_CLOG (ERROR, ProviderKqp) << " Peephole OLAP failed." << Endl << ctx.IssueManager .GetIssues ().ToString ();
773
- return node;
774
- }
775
-
776
- const TCoIf simplified (std::move (afterPeephole));
777
- predicate = simplified.Predicate ();
778
- value = simplified.ThenValue ().Cast <TCoJust>().Input ();
779
- }
780
-
781
786
TOLAPPredicateNode predicateTree;
782
787
predicateTree.ExprNode = predicate.Ptr ();
783
- const auto & lambdaArg = lambda.Args ().Arg (0 ).Ref ();
784
- CollectPredicates (predicate, predicateTree, &lambdaArg, read.Process ().Body ());
788
+ CollectPredicates (predicate, predicateTree, &lambdaArg, read.Process ().Body (), false );
785
789
YQL_ENSURE (predicateTree.IsValid (), " Collected OLAP predicates are invalid" );
786
790
787
- TOLAPPredicateNode predicatesToPush, remainingPredicates ;
788
- SplitForPartialPushdown (predicateTree, predicatesToPush, remainingPredicates, ctx, node. Pos ()) ;
789
- if (!predicatesToPush. IsValid () ) {
790
- return node;
791
+ auto [pushable, remaining] = SplitForPartialPushdown (predicateTree) ;
792
+ TVector<TFilterOpsLevels> pushedPredicates ;
793
+ for ( const auto & p: pushable ) {
794
+ pushedPredicates. emplace_back ( PredicatePushdown ( TExprBase (p. ExprNode ), lambdaArg, ctx, node. Pos ())) ;
791
795
}
792
796
793
- YQL_ENSURE (predicatesToPush.IsValid (), " Predicates to push is invalid" );
794
- YQL_ENSURE (remainingPredicates.IsValid (), " Remaining predicates is invalid" );
797
+ if constexpr (NSsa::RuntimeVersion >= 5U ) {
798
+ if (!remaining.empty ()) {
799
+ const auto remainingPredicates = WrapPredicates (remaining, ctx, node.Pos ());
800
+ const auto recoveredOptinalIfForNonPushedDownPredicates = Build<TCoOptionalIf>(ctx, node.Pos ())
801
+ .Predicate (remainingPredicates.ExprNode )
802
+ .Value (value)
803
+ .Build ();
804
+ TExprNode::TPtr afterPeephole;
805
+ bool hasNonDeterministicFunctions;
806
+ if (const auto status = PeepHoleOptimizeNode (recoveredOptinalIfForNonPushedDownPredicates.Value ().Ptr (), afterPeephole, ctx, typesCtx, nullptr , hasNonDeterministicFunctions);
807
+ status != IGraphTransformer::TStatus::Ok) {
808
+ YQL_CLOG (ERROR, ProviderKqp) << " Peephole OLAP failed." << Endl << ctx.IssueManager .GetIssues ().ToString ();
809
+ return node;
810
+ }
811
+ const TCoIf simplified (std::move (afterPeephole));
812
+ predicate = simplified.Predicate ();
813
+ value = simplified.ThenValue ().Cast <TCoJust>().Input ();
814
+
815
+ TOLAPPredicateNode predicateTree;
816
+ predicateTree.ExprNode = predicate.Ptr ();
817
+ CollectPredicates (predicate, predicateTree, &lambdaArg, read.Process ().Body (), true );
818
+ YQL_ENSURE (predicateTree.IsValid (), " Collected OLAP predicates are invalid" );
819
+ auto [pushableWithApply, remaining2] = SplitForPartialPushdown (predicateTree);
820
+ for (const auto & p: pushableWithApply) {
821
+ pushedPredicates.emplace_back (PredicatePushdown (TExprBase (p.ExprNode ), lambdaArg, ctx, node.Pos ()));
822
+ }
823
+ remaining = std::move (remaining2);
824
+ }
825
+ }
826
+
827
+ const auto & pushedFilters = TFilterOpsLevels::Merge (pushedPredicates, ctx, node.Pos ());
795
828
796
- const auto pushedFilters = PredicatePushdown ( TExprBase (predicatesToPush. ExprNode ), lambdaArg , ctx, node.Pos ());
829
+ const auto remainingPredicates = WrapPredicates (remaining , ctx, node.Pos ());
797
830
// Temporary fix for https://st.yandex-team.ru/KIKIMR-22560
798
831
// YQL_ENSURE(pushedFilters.IsValid(), "Pushed predicate should be always valid!");
799
832
800
- if (!pushedFilters.IsValid ()) {
801
- return node;
802
- }
803
833
804
834
TMaybeNode<TExprBase> olapFilter;
805
835
if (pushedFilters.FirstLevelOps .IsValid ()) {
@@ -824,7 +854,7 @@ TExprBase KqpPushOlapFilter(TExprBase node, TExprContext& ctx, const TKqpOptimiz
824
854
.Build ()
825
855
.Done ();
826
856
827
- YQL_CLOG (TRACE , ProviderKqp) << " Pushed OLAP lambda: " << KqpExprToPrettyString (newProcessLambda, ctx);
857
+ YQL_CLOG (ERROR , ProviderKqp) << " Pushed OLAP lambda: " << KqpExprToPrettyString (newProcessLambda, ctx);
828
858
829
859
#ifdef ENABLE_COLUMNS_PRUNING
830
860
TMaybeNode<TCoAtomList> readColumns = BuildColumnsFromLambda (lambda, ctx, node.Pos ());
0 commit comments