Skip to content

Commit 517f14c

Browse files
clean yql operation id usage in CS scan (#16706)
1 parent 67065b8 commit 517f14c

File tree

8 files changed

+148
-128
lines changed

8 files changed

+148
-128
lines changed

ydb/core/formats/arrow/program/assign_internal.cpp

+3-20
Original file line numberDiff line numberDiff line change
@@ -40,33 +40,16 @@ TConclusion<std::shared_ptr<TCalculationProcessor>> TCalculationProcessor::Build
4040

4141
NJson::TJsonValue TCalculationProcessor::DoDebugJson() const {
4242
NJson::TJsonValue result = NJson::JSON_MAP;
43-
if (!!YqlOperationId) {
44-
result.InsertValue("yql_op", ::ToString((NYql::TKernelRequestBuilder::EBinaryOp)*YqlOperationId));
45-
}
46-
if (!!KernelLogic) {
47-
result.InsertValue("kernel", KernelLogic->GetClassName());
48-
}
43+
result.InsertValue("kernel", KernelLogic->GetClassName());
4944
return result;
5045
}
5146

5247
ui64 TCalculationProcessor::DoGetWeight() const {
53-
if (KernelLogic) {
54-
return (ui64)KernelLogic->GetWeight();
55-
}
56-
if (!YqlOperationId) {
57-
return (ui64)ECalculationHardness::Unknown;
58-
}
59-
return (ui64)ECalculationHardness::NotSpecified;
48+
return (ui64)KernelLogic->GetWeight();
6049
}
6150

6251
TString TCalculationProcessor::DoGetSignalCategoryName() const {
63-
if (KernelLogic) {
64-
return ::ToString(GetProcessorType()) + "::" + KernelLogic->GetClassName();
65-
} else if (YqlOperationId) {
66-
return ::ToString(GetProcessorType()) + "::" + ::ToString((NYql::TKernelRequestBuilder::EBinaryOp)*YqlOperationId);
67-
} else {
68-
return ::ToString(GetProcessorType());
69-
}
52+
return ::ToString(GetProcessorType()) + "::" + KernelLogic->SignalDescription();
7053
}
7154

7255
} // namespace NKikimr::NArrow::NSSA

ydb/core/formats/arrow/program/assign_internal.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ class TCalculationProcessor: public IResourceProcessor {
1111
private:
1212
using TBase = IResourceProcessor;
1313

14-
YDB_ACCESSOR_DEF(std::optional<ui32>, YqlOperationId);
1514
YDB_ACCESSOR_DEF(std::shared_ptr<IKernelLogic>, KernelLogic);
1615

1716
std::shared_ptr<IStepFunction> Function;
@@ -27,6 +26,7 @@ class TCalculationProcessor: public IResourceProcessor {
2726
: TBase(std::move(input), std::move(output), EProcessorType::Calculation)
2827
, KernelLogic(kernelLogic)
2928
, Function(function) {
29+
AFL_VERIFY(KernelLogic);
3030
}
3131

3232
virtual bool IsAggregation() const override {
@@ -37,7 +37,7 @@ class TCalculationProcessor: public IResourceProcessor {
3737

3838
public:
3939
static TConclusion<std::shared_ptr<TCalculationProcessor>> Build(std::vector<TColumnChainInfo>&& input, const TColumnChainInfo& output,
40-
const std::shared_ptr<IStepFunction>& function, const std::shared_ptr<IKernelLogic>& kernelLogic = nullptr);
40+
const std::shared_ptr<IStepFunction>& function, const std::shared_ptr<IKernelLogic>& kernelLogic);
4141
};
4242

4343
} // namespace NKikimr::NArrow::NSSA

ydb/core/formats/arrow/program/graph_optimization.cpp

+9-62
Original file line numberDiff line numberDiff line change
@@ -426,34 +426,6 @@ TConclusion<bool> TGraph::OptimizeConditionsForIndexes(TGraphNode* condNode) {
426426
return true;
427427
}
428428

429-
bool TGraph::IsBoolResultYqlOperator(const NYql::TKernelRequestBuilder::EBinaryOp op) const {
430-
switch (op) {
431-
case NYql::TKernelRequestBuilder::EBinaryOp::And:
432-
case NYql::TKernelRequestBuilder::EBinaryOp::Or:
433-
case NYql::TKernelRequestBuilder::EBinaryOp::Xor:
434-
return true;
435-
case NYql::TKernelRequestBuilder::EBinaryOp::Add:
436-
case NYql::TKernelRequestBuilder::EBinaryOp::Sub:
437-
case NYql::TKernelRequestBuilder::EBinaryOp::Mul:
438-
case NYql::TKernelRequestBuilder::EBinaryOp::Div:
439-
case NYql::TKernelRequestBuilder::EBinaryOp::Mod:
440-
case NYql::TKernelRequestBuilder::EBinaryOp::Coalesce:
441-
return false;
442-
443-
case NYql::TKernelRequestBuilder::EBinaryOp::StartsWith:
444-
case NYql::TKernelRequestBuilder::EBinaryOp::EndsWith:
445-
case NYql::TKernelRequestBuilder::EBinaryOp::StringContains:
446-
447-
case NYql::TKernelRequestBuilder::EBinaryOp::Equals:
448-
case NYql::TKernelRequestBuilder::EBinaryOp::NotEquals:
449-
case NYql::TKernelRequestBuilder::EBinaryOp::Less:
450-
case NYql::TKernelRequestBuilder::EBinaryOp::LessOrEqual:
451-
case NYql::TKernelRequestBuilder::EBinaryOp::Greater:
452-
case NYql::TKernelRequestBuilder::EBinaryOp::GreaterOrEqual:
453-
return true;
454-
}
455-
}
456-
457429
TConclusion<bool> TGraph::OptimizeConditionsForHeadersCheck(TGraphNode* condNode) {
458430
if (condNode->GetProcessor()->GetProcessorType() != EProcessorType::Calculation) {
459431
return false;
@@ -467,17 +439,7 @@ TConclusion<bool> TGraph::OptimizeConditionsForHeadersCheck(TGraphNode* condNode
467439
}
468440
auto* dest = condNode->GetOutputEdges().begin()->second;
469441
const ui32 destResourceId = condNode->GetOutputEdges().begin()->first.GetResourceId();
470-
if (!!calc->GetKernelLogic()) {
471-
if (!calc->GetKernelLogic()->IsBoolInResult()) {
472-
return false;
473-
}
474-
}
475-
if (calc->GetYqlOperationId()) {
476-
if (!IsBoolResultYqlOperator((NYql::TKernelRequestBuilder::EBinaryOp)*calc->GetYqlOperationId())) {
477-
return false;
478-
}
479-
}
480-
if (!calc->GetYqlOperationId() && !calc->GetKernelLogic()) {
442+
if (!calc->GetKernelLogic() || !calc->GetKernelLogic()->IsBoolInResult()) {
481443
return false;
482444
}
483445
auto* node = GetProducerVerified(condNode->GetProcessor()->GetInput()[0].GetColumnId());
@@ -527,10 +489,11 @@ TConclusion<bool> TGraph::OptimizeFilterWithCoalesce(TGraphNode* cNode) {
527489
return false;
528490
}
529491
const auto calc = cNode->GetProcessorAs<TCalculationProcessor>();
530-
if (!calc->GetYqlOperationId()) {
492+
if (!calc->GetKernelLogic()->GetYqlOperationId()) {
531493
return false;
532494
}
533-
if ((NYql::TKernelRequestBuilder::EBinaryOp)*calc->GetYqlOperationId() != NYql::TKernelRequestBuilder::EBinaryOp::Coalesce) {
495+
if ((NYql::TKernelRequestBuilder::EBinaryOp)*calc->GetKernelLogic()->GetYqlOperationId() !=
496+
NYql::TKernelRequestBuilder::EBinaryOp::Coalesce) {
534497
return false;
535498
}
536499
if (cNode->GetOutputEdges().size() != 1) {
@@ -551,30 +514,14 @@ TConclusion<bool> TGraph::OptimizeFilterWithCoalesce(TGraphNode* cNode) {
551514

552515
auto* nextNode = cNode->GetOutputEdges().begin()->second;
553516
if (nextNode->GetProcessor()->GetProcessorType() != EProcessorType::Filter) {
554-
if (nextNode->GetProcessor()->GetProcessorType() == EProcessorType::Calculation) {
555-
const auto outputCalc = nextNode->GetProcessorAs<TCalculationProcessor>();
556-
if (!outputCalc->GetYqlOperationId()) {
557-
return false;
558-
}
559-
if ((NYql::TKernelRequestBuilder::EBinaryOp)*outputCalc->GetYqlOperationId() != NYql::TKernelRequestBuilder::EBinaryOp::And) {
560-
return false;
561-
}
562-
} else if (nextNode->GetProcessor()->GetProcessorType() == EProcessorType::StreamLogic) {
563-
const auto outputCalc = nextNode->GetProcessorAs<TStreamLogicProcessor>();
564-
if (outputCalc->GetOperation() != NKernels::EOperation::And) {
565-
return false;
566-
}
517+
if (nextNode->GetProcessor()->GetProcessorType() != EProcessorType::StreamLogic) {
518+
return false;
567519
}
568-
if (nextNode->GetOutputEdges().size() != 1) {
520+
const auto outputCalc = nextNode->GetProcessorAs<TStreamLogicProcessor>();
521+
if (outputCalc->GetOperation() != NKernels::EOperation::And) {
569522
return false;
570523
}
571-
if (nextNode->GetOutputEdges().begin()->second->GetProcessor()->GetProcessorType() == EProcessorType::StreamLogic) {
572-
const auto outputCalc = nextNode->GetOutputEdges().begin()->second->GetProcessorAs<TStreamLogicProcessor>();
573-
if (outputCalc->GetOperation() != NKernels::EOperation::And) {
574-
return false;
575-
}
576-
} else if (nextNode->GetOutputEdges().begin()->second->GetProcessor()->GetProcessorType() == EProcessorType::Filter) {
577-
} else {
524+
if (nextNode->GetOutputEdges().size() != 1) {
578525
return false;
579526
}
580527
}

ydb/core/formats/arrow/program/graph_optimization.h

-1
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ class TGraph {
147147
std::optional<TResourceAddress> GetOriginalAddress(TGraphNode* condNode) const;
148148
TConclusion<bool> OptimizeForFetchSubColumns(TGraphNode* condNode);
149149
TConclusion<bool> OptimizeConditionsForHeadersCheck(TGraphNode* condNode);
150-
bool IsBoolResultYqlOperator(const NYql::TKernelRequestBuilder::EBinaryOp op) const;
151150

152151
TConclusion<bool> OptimizeConditionsForStream(TGraphNode* condNode);
153152
TConclusion<bool> OptimizeConditionsForIndexes(TGraphNode* condNode);

ydb/core/formats/arrow/program/kernel_logic.cpp

+42
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
#include <ydb/core/formats/arrow/accessor/sub_columns/accessor.h>
66
#include <ydb/core/formats/arrow/accessor/sub_columns/partial.h>
77

8+
#include <yql/essentials/core/arrow_kernels/request/request.h>
9+
810
namespace NKikimr::NArrow::NSSA {
911

1012
TConclusion<bool> TGetJsonPath::DoExecute(const std::vector<TColumnChainInfo>& input, const std::vector<TColumnChainInfo>& output,
@@ -68,4 +70,44 @@ NAccessor::TCompositeChunkedArray::TBuilder TExistsJsonPath::MakeCompositeBuilde
6870
return NAccessor::TCompositeChunkedArray::TBuilder(arrow::uint8());
6971
}
7072

73+
TString TSimpleKernelLogic::SignalDescription() const {
74+
if (YqlOperationId) {
75+
return ::ToString((NYql::TKernelRequestBuilder::EBinaryOp)*YqlOperationId);
76+
} else {
77+
return "UNKNOWN";
78+
}
79+
}
80+
81+
bool TSimpleKernelLogic::IsBoolInResult() const {
82+
if (YqlOperationId) {
83+
switch ((NYql::TKernelRequestBuilder::EBinaryOp)*YqlOperationId) {
84+
case NYql::TKernelRequestBuilder::EBinaryOp::And:
85+
case NYql::TKernelRequestBuilder::EBinaryOp::Or:
86+
case NYql::TKernelRequestBuilder::EBinaryOp::Xor:
87+
return true;
88+
case NYql::TKernelRequestBuilder::EBinaryOp::Add:
89+
case NYql::TKernelRequestBuilder::EBinaryOp::Sub:
90+
case NYql::TKernelRequestBuilder::EBinaryOp::Mul:
91+
case NYql::TKernelRequestBuilder::EBinaryOp::Div:
92+
case NYql::TKernelRequestBuilder::EBinaryOp::Mod:
93+
case NYql::TKernelRequestBuilder::EBinaryOp::Coalesce:
94+
return false;
95+
96+
case NYql::TKernelRequestBuilder::EBinaryOp::StartsWith:
97+
case NYql::TKernelRequestBuilder::EBinaryOp::EndsWith:
98+
case NYql::TKernelRequestBuilder::EBinaryOp::StringContains:
99+
100+
case NYql::TKernelRequestBuilder::EBinaryOp::Equals:
101+
case NYql::TKernelRequestBuilder::EBinaryOp::NotEquals:
102+
case NYql::TKernelRequestBuilder::EBinaryOp::Less:
103+
case NYql::TKernelRequestBuilder::EBinaryOp::LessOrEqual:
104+
case NYql::TKernelRequestBuilder::EBinaryOp::Greater:
105+
case NYql::TKernelRequestBuilder::EBinaryOp::GreaterOrEqual:
106+
return true;
107+
}
108+
} else {
109+
return false;
110+
}
111+
}
112+
71113
} // namespace NKikimr::NArrow::NSSA

ydb/core/formats/arrow/program/kernel_logic.h

+56-5
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,20 @@ class IKernelLogic {
2222
const std::shared_ptr<TAccessorsCollection>& resources) const = 0;
2323

2424
virtual std::optional<TIndexCheckOperation> DoGetIndexCheckerOperation() const = 0;
25+
YDB_ACCESSOR_DEF(std::optional<ui32>, YqlOperationId);
2526

2627
public:
28+
IKernelLogic() = default;
29+
30+
IKernelLogic(const ui32 yqlOperationId)
31+
: YqlOperationId(yqlOperationId) {
32+
}
33+
2734
virtual ~IKernelLogic() = default;
2835

36+
virtual TString SignalDescription() const {
37+
return GetClassName();
38+
}
2939
virtual ECalculationHardness GetWeight() const = 0;
3040

3141
using TFactory = NObjectFactory::TObjectFactory<IKernelLogic, TString>;
@@ -46,8 +56,46 @@ class IKernelLogic {
4656
}
4757
};
4858

59+
class TSimpleKernelLogic: public IKernelLogic {
60+
private:
61+
using TBase = IKernelLogic;
62+
YDB_READONLY_DEF(std::optional<ui32>, YqlOperationId);
63+
64+
virtual TConclusion<bool> DoExecute(const std::vector<TColumnChainInfo>& /*input*/, const std::vector<TColumnChainInfo>& /*output*/,
65+
const std::shared_ptr<TAccessorsCollection>& /*resources*/) const override {
66+
return false;
67+
}
68+
69+
virtual std::optional<TIndexCheckOperation> DoGetIndexCheckerOperation() const override {
70+
return std::nullopt;
71+
}
72+
73+
public:
74+
TSimpleKernelLogic() = default;
75+
TSimpleKernelLogic(const ui32 yqlOperationId)
76+
: TBase(yqlOperationId)
77+
, YqlOperationId(yqlOperationId) {
78+
}
79+
80+
virtual TString SignalDescription() const override;
81+
82+
virtual ECalculationHardness GetWeight() const override {
83+
if (!YqlOperationId) {
84+
return ECalculationHardness::Unknown;
85+
}
86+
return ECalculationHardness::NotSpecified;
87+
}
88+
89+
virtual TString GetClassName() const override {
90+
return "SIMPLE";
91+
}
92+
93+
virtual bool IsBoolInResult() const override;
94+
};
95+
4996
class TLogicMatchString: public IKernelLogic {
5097
private:
98+
using TBase = IKernelLogic;
5199
virtual TConclusion<bool> DoExecute(const std::vector<TColumnChainInfo>& /*input*/, const std::vector<TColumnChainInfo>& /*output*/,
52100
const std::shared_ptr<TAccessorsCollection>& /*resources*/) const override {
53101
return false;
@@ -67,21 +115,25 @@ class TLogicMatchString: public IKernelLogic {
67115
TLogicMatchString(const TIndexCheckOperation::EOperation operation, const bool caseSensitive, const bool isSimpleFunction)
68116
: Operation(operation)
69117
, CaseSensitive(caseSensitive)
70-
, IsSimpleFunction(isSimpleFunction)
71-
{
118+
, IsSimpleFunction(isSimpleFunction) {
72119
}
73120

74-
virtual TString GetClassName() const override {
121+
virtual TString SignalDescription() const override {
75122
return "MATCH_STRING::" + ::ToString(Operation) + "::" + ::ToString(CaseSensitive);
76123
}
77124

125+
virtual TString GetClassName() const override {
126+
return "MATCH_STRING";
127+
}
128+
78129
virtual bool IsBoolInResult() const override {
79130
return !IsSimpleFunction;
80131
}
81132
};
82133

83134
class TLogicEquals: public IKernelLogic {
84135
private:
136+
using TBase = IKernelLogic;
85137
virtual TConclusion<bool> DoExecute(const std::vector<TColumnChainInfo>& /*input*/, const std::vector<TColumnChainInfo>& /*output*/,
86138
const std::shared_ptr<TAccessorsCollection>& /*resources*/) const override {
87139
return false;
@@ -97,8 +149,7 @@ class TLogicEquals: public IKernelLogic {
97149

98150
public:
99151
TLogicEquals(const bool isSimpleFunction)
100-
: IsSimpleFunction(isSimpleFunction)
101-
{
152+
: IsSimpleFunction(isSimpleFunction) {
102153
}
103154

104155
virtual TString GetClassName() const override {

0 commit comments

Comments
 (0)