Skip to content

Commit 1345ce6

Browse files
authored
Collect statistic about unsuccessful block rewrites for callables and types (#7642)
1 parent 976e2c7 commit 1345ce6

File tree

31 files changed

+422
-47
lines changed

31 files changed

+422
-47
lines changed

ydb/library/yql/core/facade/yql_facade.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
#include <util/stream/file.h>
3232
#include <util/stream/null.h>
33+
#include <util/string/join.h>
3334
#include <util/string/split.h>
3435
#include <util/generic/guid.h>
3536
#include <util/system/rusage.h>
@@ -1629,6 +1630,48 @@ NThreading::TFuture<void> TProgram::Abort()
16291630
return CloseLastSession();
16301631
}
16311632

1633+
TIssues TProgram::Issues() const {
1634+
TIssues result;
1635+
if (ExprCtx_) {
1636+
result.AddIssues(ExprCtx_->IssueManager.GetIssues());
1637+
}
1638+
result.AddIssues(FinalIssues_);
1639+
return result;
1640+
}
1641+
1642+
TIssues TProgram::CompletedIssues() const {
1643+
TIssues result;
1644+
if (ExprCtx_) {
1645+
result.AddIssues(ExprCtx_->IssueManager.GetCompletedIssues());
1646+
}
1647+
result.AddIssues(FinalIssues_);
1648+
return result;
1649+
}
1650+
1651+
TIssue MakeNoBlocksInfoIssue(const TVector<TString>& names, bool isTypes) {
1652+
TIssue result;
1653+
TString msg = TStringBuilder() << "Most frequent " << (isTypes ? "types " : "callables ")
1654+
<< "which do not support block mode: " << JoinRange(", ", names.begin(), names.end());
1655+
result.SetMessage(msg);
1656+
result.SetCode(isTypes ? TIssuesIds::CORE_TOP_UNSUPPORTED_BLOCK_TYPES : TIssuesIds::CORE_TOP_UNSUPPORTED_BLOCK_CALLABLES, TSeverityIds::S_INFO);
1657+
return result;
1658+
}
1659+
1660+
void TProgram::FinalizeIssues() {
1661+
FinalIssues_.Clear();
1662+
if (TypeCtx_) {
1663+
static const size_t topCount = 10;
1664+
auto noBlockTypes = TypeCtx_->GetTopNoBlocksTypes(topCount);
1665+
if (!noBlockTypes.empty()) {
1666+
FinalIssues_.AddIssue(MakeNoBlocksInfoIssue(noBlockTypes, true));
1667+
}
1668+
auto noBlockCallables = TypeCtx_->GetTopNoBlocksCallables(topCount);
1669+
if (!noBlockCallables.empty()) {
1670+
FinalIssues_.AddIssue(MakeNoBlocksInfoIssue(noBlockCallables, false));
1671+
}
1672+
}
1673+
}
1674+
16321675
NThreading::TFuture<void> TProgram::CleanupLastSession() {
16331676
YQL_LOG_CTX_ROOT_SESSION_SCOPE(GetSessionId());
16341677

ydb/library/yql/core/facade/yql_facade.h

Lines changed: 5 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -192,28 +192,14 @@ class TProgram: public TThrRefBase, private TNonCopyable
192192
[[nodiscard]]
193193
NThreading::TFuture<void> Abort();
194194

195-
inline TIssues Issues() {
196-
if (ExprCtx_) {
197-
return ExprCtx_->IssueManager.GetIssues();
198-
} else {
199-
return {};
200-
}
201-
}
202-
203-
inline TIssues CompletedIssues() const {
204-
if (ExprCtx_) {
205-
return ExprCtx_->IssueManager.GetCompletedIssues();
206-
} else {
207-
return {};
208-
}
209-
}
195+
TIssues Issues() const;
196+
TIssues CompletedIssues() const;
197+
void FinalizeIssues();
210198

211199
void Print(IOutputStream* exprOut, IOutputStream* planOut, bool cleanPlan = false);
212200

213201
inline void PrintErrorsTo(IOutputStream& out) const {
214-
if (ExprCtx_) {
215-
ExprCtx_->IssueManager.GetIssues().PrintWithProgramTo(out, Filename_, SourceCode_);
216-
}
202+
Issues().PrintWithProgramTo(out, Filename_, SourceCode_);
217203
}
218204

219205
inline const TAstNode* AstRoot() const {
@@ -455,6 +441,7 @@ class TProgram: public TThrRefBase, private TNonCopyable
455441
TMaybe<TString> LineageStr_;
456442

457443
TQContext QContext_;
444+
TIssues FinalIssues_;
458445
};
459446

460447
} // namspace NYql

ydb/library/yql/core/issue/protos/issue_id.proto

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@ message TIssuesIds {
3939
CORE_ALIAS_SHADOWS_COLUMN = 1111;
4040
CORE_LINEAGE_INTERNAL_ERROR = 1112;
4141

42+
// core informational
43+
CORE_TOP_UNSUPPORTED_BLOCK_TYPES = 1200;
44+
CORE_TOP_UNSUPPORTED_BLOCK_CALLABLES = 1201;
45+
4246
// core errors
4347
CORE_GC_NODES_LIMIT_EXCEEDED = 1500;
4448
CORE_GC_STRINGS_LIMIT_EXCEEDED = 1501;

ydb/library/yql/core/issue/yql_issue.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -663,3 +663,11 @@ ids {
663663
code: YT_SECURE_DATA_IN_COMMON_TMP
664664
severity: S_WARNING
665665
}
666+
ids {
667+
code: CORE_TOP_UNSUPPORTED_BLOCK_TYPES
668+
severity: S_INFO
669+
}
670+
ids {
671+
code: CORE_TOP_UNSUPPORTED_BLOCK_CALLABLES
672+
severity: S_INFO
673+
}

ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2291,10 +2291,11 @@ IGraphTransformer::TStatus PeepHoleFinalStage(const TExprNode::TPtr& input, TExp
22912291
}
22922292

22932293
IGraphTransformer::TStatus PeepHoleBlockStage(const TExprNode::TPtr& input, TExprNode::TPtr& output,
2294-
TExprContext& ctx, TTypeAnnotationContext& types, const TExtPeepHoleOptimizerMap& extOptimizers)
2294+
TExprContext& ctx, TTypeAnnotationContext& types, const TExtPeepHoleOptimizerMap& extOptimizers, TProcessedNodesSet& cache)
22952295
{
22962296
TOptimizeExprSettings settings(&types);
22972297
settings.CustomInstantTypeTransformer = types.CustomInstantTypeTransformer.Get();
2298+
settings.ProcessedNodes = &cache;
22982299

22992300
return OptimizeExpr(input, output, [&types, &extOptimizers](
23002301
const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr {
@@ -5693,7 +5694,11 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
56935694
allInputTypes.push_back(i);
56945695
}
56955696

5696-
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(lambda->Pos()), allInputTypes, ctx);
5697+
const IArrowResolver::TUnsupportedTypeCallback onUnsupportedType = [&types](const auto& typeKindOrSlot) {
5698+
std::visit([&types](const auto& value) { types.IncNoBlockType(value); }, typeKindOrSlot);
5699+
};
5700+
5701+
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(lambda->Pos()), allInputTypes, ctx, onUnsupportedType);
56975702
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
56985703
if (resolveStatus != IArrowResolver::OK) {
56995704
return false;
@@ -5747,7 +5752,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
57475752
if (node->IsList() || rewriteAsIs ||
57485753
node->IsCallable({"And", "Or", "Xor", "Not", "Coalesce", "Exists", "If", "Just", "AsStruct", "Member", "Nth", "ToPg", "FromPg", "PgResolvedCall", "PgResolvedOp"}))
57495754
{
5750-
if (node->IsCallable() && !IsSupportedAsBlockType(node->Pos(), *node->GetTypeAnn(), ctx, types)) {
5755+
if (node->IsCallable() && !IsSupportedAsBlockType(node->Pos(), *node->GetTypeAnn(), ctx, types, true)) {
57515756
return true;
57525757
}
57535758

@@ -5775,7 +5780,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
57755780
auto child = node->ChildPtr(index);
57765781
if (!child->GetTypeAnn()->IsComputable()) {
57775782
funcArgs.push_back(child);
5778-
} else if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types)) {
5783+
} else if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types, true)) {
57795784
funcArgs.push_back(ctx.NewCallable(node->Pos(), "AsScalar", { child }));
57805785
} else if (auto rit = rewrites.find(child.Get()); rit != rewrites.end()) {
57815786
funcArgs.push_back(rit->second);
@@ -5796,7 +5801,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
57965801
auto member = funcArgs[index];
57975802
auto child = member->TailPtr();
57985803
TExprNodePtr rewrite;
5799-
if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types)) {
5804+
if (child->IsComplete() && IsSupportedAsBlockType(child->Pos(), *child->GetTypeAnn(), ctx, types, true)) {
58005805
rewrite = ctx.NewCallable(child->Pos(), "AsScalar", { child });
58015806
} else if (auto rit = rewrites.find(child.Get()); rit != rewrites.end()) {
58025807
rewrite = rit->second;
@@ -5821,6 +5826,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
58215826
const bool isUdf = node->IsCallable("Apply") && node->Head().IsCallable("Udf");
58225827
if (isUdf) {
58235828
if (!GetSetting(*node->Head().Child(7), "blocks")) {
5829+
types.IncNoBlockCallable(node->Head().Head().Content());
58245830
return true;
58255831
}
58265832
}
@@ -5832,7 +5838,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
58325838
allTypes.push_back(node->Child(i)->GetTypeAnn());
58335839
}
58345840

5835-
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(node->Pos()), allTypes, ctx);
5841+
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(node->Pos()), allTypes, ctx, onUnsupportedType);
58365842
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
58375843
if (resolveStatus != IArrowResolver::OK) {
58385844
return true;
@@ -5898,6 +5904,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
58985904
} else {
58995905
auto fit = funcs.find(node->Content());
59005906
if (fit == funcs.end()) {
5907+
types.IncNoBlockCallable(node->Content());
59015908
return true;
59025909
}
59035910

@@ -5907,6 +5914,7 @@ bool CollectBlockRewrites(const TMultiExprType* multiInputType, bool keepInputCo
59075914
auto resolveStatus = types.ArrowResolver->LoadFunctionMetadata(ctx.GetPosition(node->Pos()), arrowFunctionName, argTypes, outType, ctx);
59085915
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
59095916
if (resolveStatus != IArrowResolver::OK) {
5917+
types.IncNoBlockCallable(node->Content());
59105918
return true;
59115919
}
59125920
funcArgs.push_back(ExpandType(node->Pos(), *outType, ctx));
@@ -8555,10 +8563,10 @@ THolder<IGraphTransformer> CreatePeepHoleFinalStageTransformer(TTypeAnnotationCo
85558563

85568564
pipeline.Add(
85578565
CreateFunctorTransformer(
8558-
[&types](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus {
8566+
[&types, cache = TProcessedNodesSet()](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) mutable -> IGraphTransformer::TStatus {
85598567
if (types.IsBlockEngineEnabled()) {
85608568
const auto& extStageRules = TPeepHoleRules::Instance().BlockStageExtRules;
8561-
return PeepHoleBlockStage(input, output, ctx, types, extStageRules);
8569+
return PeepHoleBlockStage(input, output, ctx, types, extStageRules, cache);
85628570
} else {
85638571
output = input;
85648572
return IGraphTransformer::TStatus::Ok;
@@ -8574,10 +8582,10 @@ THolder<IGraphTransformer> CreatePeepHoleFinalStageTransformer(TTypeAnnotationCo
85748582

85758583
pipeline.Add(
85768584
CreateFunctorTransformer(
8577-
[&types](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) -> IGraphTransformer::TStatus {
8585+
[&types, cache = TProcessedNodesSet()](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) mutable -> IGraphTransformer::TStatus {
85788586
if (types.IsBlockEngineEnabled()) {
85798587
const auto& extStageRules = TPeepHoleRules::Instance().BlockStageExtFinalRules;
8580-
return PeepHoleBlockStage(input, output, ctx, types, extStageRules);
8588+
return PeepHoleBlockStage(input, output, ctx, types, extStageRules, cache);
85818589
} else {
85828590
output = input;
85838591
return IGraphTransformer::TStatus::Ok;

ydb/library/yql/core/yql_arrow_resolver.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,15 @@
22

33
#include <ydb/library/yql/ast/yql_expr.h>
44

5+
#include <functional>
6+
#include <variant>
7+
58
namespace NYql {
69

710
class IArrowResolver : public TThrRefBase {
811
public:
912
using TPtr = TIntrusiveConstPtr<IArrowResolver>;
13+
using TUnsupportedTypeCallback = std::function<void(std::variant<ETypeAnnotationKind, NUdf::EDataSlot>)>;
1014

1115
enum EStatus {
1216
OK,
@@ -21,7 +25,8 @@ class IArrowResolver : public TThrRefBase {
2125

2226
virtual EStatus HasCast(const TPosition& pos, const TTypeAnnotationNode* from, const TTypeAnnotationNode* to, TExprContext& ctx) const = 0;
2327

24-
virtual EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx) const = 0;
28+
virtual EStatus AreTypesSupported(const TPosition& pos, const TVector<const TTypeAnnotationNode*>& types, TExprContext& ctx,
29+
const TUnsupportedTypeCallback& onUnsupported = {}) const = 0;
2530
};
2631

2732
}

ydb/library/yql/core/yql_expr_type_annotation.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3126,12 +3126,20 @@ bool IsWideSequenceBlockType(const TTypeAnnotationNode& type) {
31263126
return IsWideBlockType(*itemType);
31273127
}
31283128

3129-
bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types) {
3129+
bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types,
3130+
bool reportUnspported)
3131+
{
31303132
if (!types.ArrowResolver) {
31313133
return false;
31323134
}
31333135

3134-
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), { &type }, ctx);
3136+
IArrowResolver::TUnsupportedTypeCallback onUnsupportedType;
3137+
if (reportUnspported) {
3138+
onUnsupportedType = [&types](const auto& typeKindOrSlot) {
3139+
std::visit([&types](const auto& value) { types.IncNoBlockType(value); }, typeKindOrSlot);
3140+
};
3141+
}
3142+
auto resolveStatus = types.ArrowResolver->AreTypesSupported(ctx.GetPosition(pos), { &type }, ctx, onUnsupportedType);
31353143
YQL_ENSURE(resolveStatus != IArrowResolver::ERROR);
31363144
return resolveStatus == IArrowResolver::OK;
31373145
}

ydb/library/yql/core/yql_expr_type_annotation.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ bool EnsureWideStreamType(const TExprNode& node, TExprContext& ctx);
130130
bool EnsureWideStreamType(TPositionHandle position, const TTypeAnnotationNode& type, TExprContext& ctx);
131131
bool IsWideBlockType(const TTypeAnnotationNode& type);
132132
bool IsWideSequenceBlockType(const TTypeAnnotationNode& type);
133-
bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types);
133+
bool IsSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types, bool reportUnspported = false);
134134
bool EnsureSupportedAsBlockType(TPositionHandle pos, const TTypeAnnotationNode& type, TExprContext& ctx, TTypeAnnotationContext& types);
135135
bool EnsureWideBlockType(TPositionHandle position, const TTypeAnnotationNode& type, TTypeAnnotationNode::TListType& blockItemTypes, TExprContext& ctx, bool allowScalar = true);
136136
bool EnsureWideFlowBlockType(const TExprNode& node, TTypeAnnotationNode::TListType& blockItemTypes, TExprContext& ctx, bool allowScalar = true);

ydb/library/yql/core/yql_type_annotation.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,61 @@ void TTypeAnnotationContext::Reset() {
5656
ExpectedConstraints.clear();
5757
ExpectedColumnOrders.clear();
5858
StatisticsMap.clear();
59+
NoBlockRewriteCallableStats.clear();
60+
NoBlockRewriteTypeStats.clear();
61+
}
62+
63+
void TTypeAnnotationContext::IncNoBlockCallable(TStringBuf callableName) {
64+
++NoBlockRewriteCallableStats[callableName];
65+
}
66+
67+
void TTypeAnnotationContext::IncNoBlockType(const TTypeAnnotationNode& type) {
68+
if (type.GetKind() == ETypeAnnotationKind::Data) {
69+
IncNoBlockType(type.Cast<TDataExprType>()->GetSlot());
70+
} else {
71+
IncNoBlockType(type.GetKind());
72+
}
73+
}
74+
75+
void TTypeAnnotationContext::IncNoBlockType(ETypeAnnotationKind kind) {
76+
++NoBlockRewriteTypeStats[ToString(kind)];
77+
}
78+
79+
void TTypeAnnotationContext::IncNoBlockType(NUdf::EDataSlot slot) {
80+
++NoBlockRewriteTypeStats[ToString(slot)];
81+
}
82+
83+
namespace {
84+
85+
template<typename T>
86+
TVector<T> GetMaxByCount(const THashMap<T, size_t>& stats, size_t maxCount) {
87+
TVector<T> result;
88+
result.reserve(stats.size());
89+
for (auto& [key, _] : stats) {
90+
result.push_back(key);
91+
}
92+
size_t n = std::min(maxCount, stats.size());
93+
std::partial_sort(result.begin(), result.begin() + n, result.end(),
94+
[&stats](const T& l, const T& r) {
95+
const auto& cntLeft = stats.find(l)->second;
96+
const auto& cntRight = stats.find(r)->second;
97+
if (cntLeft != cntRight) {
98+
return cntLeft < cntRight;
99+
}
100+
return l < r;
101+
});
102+
result.resize(n);
103+
return result;
104+
}
105+
106+
}
107+
108+
TVector<TString> TTypeAnnotationContext::GetTopNoBlocksCallables(size_t maxCount) const {
109+
return GetMaxByCount(NoBlockRewriteCallableStats, maxCount);
110+
}
111+
112+
TVector<TString> TTypeAnnotationContext::GetTopNoBlocksTypes(size_t maxCount) const {
113+
return GetMaxByCount(NoBlockRewriteTypeStats, maxCount);
59114
}
60115

61116
TString TColumnOrder::Find(const TString& name) const {

ydb/library/yql/core/yql_type_annotation.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,8 @@ struct TTypeAnnotationContext: public TThrRefBase {
334334
ui32 FolderSubDirsLimit = 1000;
335335
bool UseBlocks = false;
336336
EBlockEngineMode BlockEngineMode = EBlockEngineMode::Disable;
337+
THashMap<TString, size_t> NoBlockRewriteCallableStats;
338+
THashMap<TString, size_t> NoBlockRewriteTypeStats;
337339
TMaybe<bool> PgEmitAggApply;
338340
IArrowResolver::TPtr ArrowResolver;
339341
TFileStoragePtr FileStorage;
@@ -441,6 +443,14 @@ struct TTypeAnnotationContext: public TThrRefBase {
441443
bool IsBlockEngineEnabled() const {
442444
return BlockEngineMode != EBlockEngineMode::Disable || UseBlocks;
443445
}
446+
447+
void IncNoBlockCallable(TStringBuf callableName);
448+
void IncNoBlockType(const TTypeAnnotationNode& type);
449+
void IncNoBlockType(ETypeAnnotationKind kind);
450+
void IncNoBlockType(NUdf::EDataSlot slot);
451+
452+
TVector<TString> GetTopNoBlocksCallables(size_t maxCount) const;
453+
TVector<TString> GetTopNoBlocksTypes(size_t maxCount) const;
444454
};
445455

446456
template <> inline

0 commit comments

Comments
 (0)