Skip to content

Commit 3cd44d8

Browse files
authored
[KQP] fix plan range scan bug with empty string in bounds (#10190)
1 parent e66ee0c commit 3cd44d8

File tree

5 files changed

+148
-207
lines changed

5 files changed

+148
-207
lines changed

ydb/core/kqp/opt/kqp_query_plan.cpp

Lines changed: 97 additions & 207 deletions
Original file line numberDiff line numberDiff line change
@@ -599,206 +599,94 @@ class TxPlanSerializer {
599599
if (sourceSettings.RangesExpr().Maybe<TKqlKeyRange>()) {
600600
auto table = TString(sourceSettings.Table().Path());
601601
auto range = sourceSettings.RangesExpr().Cast<TKqlKeyRange>();
602+
Visit(table, range, sourceSettings, planNode);
603+
return;
604+
}
605+
606+
const auto table = TString(sourceSettings.Table().Path());
607+
const auto explainPrompt = TKqpReadTableExplainPrompt::Parse(sourceSettings.ExplainPrompt().Cast());
602608

603-
TOperator op;
604-
TTableRead readInfo;
605-
606-
auto describeBoundary = [this](const TExprBase& key) {
607-
if (auto param = key.Maybe<TCoParameter>()) {
608-
return param.Cast().Name().StringValue();
609-
}
610-
611-
if (auto param = key.Maybe<TCoNth>().Tuple().Maybe<TCoParameter>()) {
612-
if (auto maybeResultBinding = ContainResultBinding(param.Cast().Name().StringValue())) {
613-
auto [txId, resId] = *maybeResultBinding;
614-
if (auto result = GetResult(txId, resId)) {
615-
auto index = FromString<ui32>(key.Cast<TCoNth>().Index());
616-
Y_ENSURE(index < result->Size());
617-
return DescribeValue((*result)[index]);
618-
}
619-
}
620-
}
621-
622-
if (auto literal = key.Maybe<TCoUuid>()) {
623-
return NUuid::UuidBytesToString(literal.Cast().Literal().StringValue());
624-
}
625-
626-
if (auto literal = key.Maybe<TCoDataCtor>()) {
627-
return literal.Cast().Literal().StringValue();
628-
}
629-
630-
if (auto literal = key.Maybe<TCoNothing>()) {
631-
return TString("null");
632-
}
633-
634-
return TString("n/a");
635-
};
636-
637-
/* Collect info about scan range */
638-
struct TKeyPartRange {
639-
TString From;
640-
TString To;
641-
TString ColumnName;
642-
};
643-
auto& tableData = SerializerCtx.TablesData->GetTable(SerializerCtx.Cluster, table);
644-
op.Properties["Table"] = tableData.RelativePath ? *tableData.RelativePath : table;
645-
planNode.NodeInfo["Tables"].AppendValue(op.Properties["Table"]);
646-
TVector<TKeyPartRange> scanRangeDescr(tableData.Metadata->KeyColumnNames.size());
647-
648-
auto maybeFromKey = range.From().Maybe<TKqlKeyTuple>();
649-
auto maybeToKey = range.To().Maybe<TKqlKeyTuple>();
650-
if (maybeFromKey && maybeToKey) {
651-
auto fromKey = maybeFromKey.Cast();
652-
auto toKey = maybeToKey.Cast();
653-
654-
for (ui32 i = 0; i < fromKey.ArgCount(); ++i) {
655-
scanRangeDescr[i].From = describeBoundary(fromKey.Arg(i));
656-
}
657-
for (ui32 i = 0; i < toKey.ArgCount(); ++i) {
658-
scanRangeDescr[i].To = describeBoundary(toKey.Arg(i));
659-
}
660-
for (ui32 i = 0; i < scanRangeDescr.size(); ++i) {
661-
scanRangeDescr[i].ColumnName = tableData.Metadata->KeyColumnNames[i];
662-
}
663-
664-
TString leftParen = range.From().Maybe<TKqlKeyInc>().IsValid() ? "[" : "(";
665-
TString rightParen = range.To().Maybe<TKqlKeyInc>().IsValid() ? "]" : ")";
666-
bool hasRangeScans = false;
667-
auto& ranges = op.Properties["ReadRange"];
668-
for (const auto& keyPartRange : scanRangeDescr) {
669-
TStringBuilder rangeDescr;
670-
671-
if (keyPartRange.From == keyPartRange.To) {
672-
if (keyPartRange.From.Empty()) {
673-
rangeDescr << keyPartRange.ColumnName << " (-∞, +∞)";
674-
readInfo.ScanBy.push_back(rangeDescr);
675-
} else {
676-
rangeDescr << keyPartRange.ColumnName
677-
<< " (" << RemoveForbiddenChars(keyPartRange.From) << ")";
678-
readInfo.LookupBy.push_back(rangeDescr);
679-
}
680-
} else {
681-
rangeDescr << keyPartRange.ColumnName << " "
682-
<< (keyPartRange.From.Empty() ? "(" : leftParen)
683-
<< (keyPartRange.From.Empty() ? "-∞" : RemoveForbiddenChars(keyPartRange.From)) << ", "
684-
<< (keyPartRange.To.Empty() ? "+∞" : RemoveForbiddenChars(keyPartRange.To))
685-
<< (keyPartRange.To.Empty() ? ")" : rightParen);
686-
readInfo.ScanBy.push_back(rangeDescr);
687-
hasRangeScans = true;
688-
}
609+
TTableRead readInfo;
610+
TOperator op;
689611

690-
ranges.AppendValue(rangeDescr);
691-
}
612+
auto& tableData = SerializerCtx.TablesData->GetTable(SerializerCtx.Cluster, table);
613+
op.Properties["Table"] = tableData.RelativePath ? *tableData.RelativePath : table;
614+
planNode.NodeInfo["Tables"].AppendValue(op.Properties["Table"]);
692615

693-
if (readInfo.LookupBy.size() > 0) {
694-
bool isFullPk = readInfo.LookupBy.size() == tableData.Metadata->KeyColumnNames.size();
695-
readInfo.Type = isFullPk ? EPlanTableReadType::Lookup : EPlanTableReadType::Scan;
696-
} else {
697-
readInfo.Type = hasRangeScans ? EPlanTableReadType::Scan : EPlanTableReadType::FullScan;
698-
}
699-
}
616+
auto rangesDesc = NPlanUtils::PrettyExprStr(sourceSettings.RangesExpr());
617+
if (rangesDesc == "Void" || explainPrompt.UsedKeyColumns.empty()) {
618+
readInfo.Type = EPlanTableReadType::FullScan;
700619

701-
auto& columns = op.Properties["ReadColumns"];
702-
for (auto const& col : sourceSettings.Columns()) {
703-
readInfo.Columns.emplace_back(TString(col.Value()));
704-
columns.AppendValue(col.Value());
620+
auto& ranges = op.Properties["ReadRanges"];
621+
for (const auto& col : tableData.Metadata->KeyColumnNames) {
622+
TStringBuilder rangeDesc;
623+
rangeDesc << col << " (-∞, +∞)";
624+
readInfo.ScanBy.push_back(rangeDesc);
625+
ranges.AppendValue(rangeDesc);
705626
}
627+
} else if (auto maybeResultBinding = ContainResultBinding(rangesDesc)) {
628+
readInfo.Type = EPlanTableReadType::Scan;
706629

707-
AddReadTableSettings(op, sourceSettings.Settings(), readInfo);
708-
709-
AddOptimizerEstimates(op, sourceSettings);
710-
711-
SerializerCtx.Tables[table].Reads.push_back(readInfo);
712-
713-
auto readName = GetNameByReadType(readInfo.Type);
714-
op.Properties["Name"] = readName;
715-
AddOperator(planNode, readName, std::move(op));
716-
} else {
717-
const auto table = TString(sourceSettings.Table().Path());
718-
const auto explainPrompt = TKqpReadTableExplainPrompt::Parse(sourceSettings.ExplainPrompt().Cast());
719-
720-
TTableRead readInfo;
721-
TOperator op;
722-
723-
auto& tableData = SerializerCtx.TablesData->GetTable(SerializerCtx.Cluster, table);
724-
op.Properties["Table"] = tableData.RelativePath ? *tableData.RelativePath : table;
725-
planNode.NodeInfo["Tables"].AppendValue(op.Properties["Table"]);
726-
727-
auto rangesDesc = NPlanUtils::PrettyExprStr(sourceSettings.RangesExpr());
728-
if (rangesDesc == "Void" || explainPrompt.UsedKeyColumns.empty()) {
729-
readInfo.Type = EPlanTableReadType::FullScan;
730-
731-
auto& ranges = op.Properties["ReadRanges"];
732-
for (const auto& col : tableData.Metadata->KeyColumnNames) {
733-
TStringBuilder rangeDesc;
734-
rangeDesc << col << " (-∞, +∞)";
735-
readInfo.ScanBy.push_back(rangeDesc);
736-
ranges.AppendValue(rangeDesc);
737-
}
738-
} else if (auto maybeResultBinding = ContainResultBinding(rangesDesc)) {
739-
readInfo.Type = EPlanTableReadType::Scan;
630+
auto [txId, resId] = *maybeResultBinding;
631+
if (auto result = GetResult(txId, resId)) {
632+
auto ranges = (*result)[0];
633+
const auto& keyColumns = tableData.Metadata->KeyColumnNames;
634+
for (size_t rangeId = 0; rangeId < ranges.Size(); ++rangeId) {
635+
Y_ENSURE(ranges[rangeId].HaveValue() && ranges[rangeId].Size() == 2);
636+
auto from = ranges[rangeId][0];
637+
auto to = ranges[rangeId][1];
740638

741-
auto [txId, resId] = *maybeResultBinding;
742-
if (auto result = GetResult(txId, resId)) {
743-
auto ranges = (*result)[0];
744-
const auto& keyColumns = tableData.Metadata->KeyColumnNames;
745-
for (size_t rangeId = 0; rangeId < ranges.Size(); ++rangeId) {
746-
Y_ENSURE(ranges[rangeId].HaveValue() && ranges[rangeId].Size() == 2);
747-
auto from = ranges[rangeId][0];
748-
auto to = ranges[rangeId][1];
749-
750-
for (size_t colId = 0; colId < keyColumns.size(); ++colId) {
751-
if (!from[colId].HaveValue() && !to[colId].HaveValue()) {
752-
continue;
753-
}
639+
for (size_t colId = 0; colId < keyColumns.size(); ++colId) {
640+
if (!from[colId].HaveValue() && !to[colId].HaveValue()) {
641+
continue;
642+
}
754643

755-
TStringBuilder rangeDesc;
756-
rangeDesc << keyColumns[colId] << " "
757-
<< (from[keyColumns.size()].GetDataText() == "1" ? "[" : "(")
758-
<< (from[colId].HaveValue() ? RemoveForbiddenChars(from[colId].GetSimpleValueText()) : "-∞") << ", "
759-
<< (to[colId].HaveValue() ? RemoveForbiddenChars(to[colId].GetSimpleValueText()) : "+∞")
760-
<< (to[keyColumns.size()].GetDataText() == "1" ? "]" : ")");
644+
TStringBuilder rangeDesc;
645+
rangeDesc << keyColumns[colId] << " "
646+
<< (from[keyColumns.size()].GetDataText() == "1" ? "[" : "(")
647+
<< (from[colId].HaveValue() ? RemoveForbiddenChars(from[colId].GetSimpleValueText()) : "-∞") << ", "
648+
<< (to[colId].HaveValue() ? RemoveForbiddenChars(to[colId].GetSimpleValueText()) : "+∞")
649+
<< (to[keyColumns.size()].GetDataText() == "1" ? "]" : ")");
761650

762-
readInfo.ScanBy.push_back(rangeDesc);
763-
op.Properties["ReadRanges"].AppendValue(rangeDesc);
764-
}
651+
readInfo.ScanBy.push_back(rangeDesc);
652+
op.Properties["ReadRanges"].AppendValue(rangeDesc);
765653
}
766-
} else {
767-
op.Properties["ReadRanges"] = rangesDesc;
768654
}
769655
} else {
770-
Y_ENSURE(false, rangesDesc);
656+
op.Properties["ReadRanges"] = rangesDesc;
771657
}
658+
} else {
659+
Y_ENSURE(false, rangesDesc);
660+
}
772661

773-
if (!explainPrompt.UsedKeyColumns.empty()) {
774-
auto& usedColumns = op.Properties["ReadRangesKeys"];
775-
for (const auto& col : explainPrompt.UsedKeyColumns) {
776-
usedColumns.AppendValue(col);
777-
}
662+
if (!explainPrompt.UsedKeyColumns.empty()) {
663+
auto& usedColumns = op.Properties["ReadRangesKeys"];
664+
for (const auto& col : explainPrompt.UsedKeyColumns) {
665+
usedColumns.AppendValue(col);
778666
}
667+
}
779668

780-
if (explainPrompt.ExpectedMaxRanges) {
781-
op.Properties["ReadRangesExpectedSize"] = ToString(*explainPrompt.ExpectedMaxRanges);
782-
}
669+
if (explainPrompt.ExpectedMaxRanges) {
670+
op.Properties["ReadRangesExpectedSize"] = ToString(*explainPrompt.ExpectedMaxRanges);
671+
}
783672

784-
op.Properties["ReadRangesPointPrefixLen"] = ToString(explainPrompt.PointPrefixLen);
673+
op.Properties["ReadRangesPointPrefixLen"] = ToString(explainPrompt.PointPrefixLen);
785674

786-
auto& columns = op.Properties["ReadColumns"];
787-
for (const auto& col : sourceSettings.Columns()) {
788-
readInfo.Columns.emplace_back(TString(col.Value()));
789-
columns.AppendValue(col.Value());
790-
}
675+
auto& columns = op.Properties["ReadColumns"];
676+
for (const auto& col : sourceSettings.Columns()) {
677+
readInfo.Columns.emplace_back(TString(col.Value()));
678+
columns.AppendValue(col.Value());
679+
}
791680

792-
AddReadTableSettings(op, sourceSettings.Settings(), readInfo);
681+
AddReadTableSettings(op, sourceSettings.Settings(), readInfo);
793682

794-
AddOptimizerEstimates(op, sourceSettings);
683+
AddOptimizerEstimates(op, sourceSettings);
795684

796-
auto readName = GetNameByReadType(readInfo.Type);
797-
op.Properties["Name"] = readName;
798-
AddOperator(planNode, readName, std::move(op));
685+
auto readName = GetNameByReadType(readInfo.Type);
686+
op.Properties["Name"] = readName;
687+
AddOperator(planNode, readName, std::move(op));
799688

800-
SerializerCtx.Tables[table].Reads.push_back(std::move(readInfo));
801-
}
689+
SerializerCtx.Tables[table].Reads.push_back(std::move(readInfo));
802690
}
803691

804692
// Try get cluster from data surce or data sink node
@@ -1015,7 +903,9 @@ class TxPlanSerializer {
1015903
TMaybe<std::variant<ui32, TArgContext>> operatorId;
1016904

1017905
if (auto maybeRead = TMaybeNode<TKqlReadTableBase>(node)) {
1018-
operatorId = Visit(maybeRead.Cast(), planNode);
906+
auto read = maybeRead.Cast();
907+
TString table = TString(read.Table().Path()); TKqlKeyRange range = read.Range();
908+
operatorId = Visit(table, range, read, planNode);
1019909
} else if (TMaybeNode<TKqlReadTableRangesBase>(node) && !TMaybeNode<TKqpReadOlapTableRangesBase>(node)) {
1020910
auto maybeReadRanges = TMaybeNode<TKqlReadTableRangesBase>(node);
1021911
operatorId = Visit(maybeReadRanges.Cast(), planNode);
@@ -1742,45 +1632,41 @@ class TxPlanSerializer {
17421632
return operatorId;
17431633
}
17441634

1745-
std::variant<ui32, TArgContext> Visit(const TKqlReadTableBase& read, TQueryPlanNode& planNode) {
1746-
auto table = TString(read.Table().Path());
1747-
auto range = read.Range();
1748-
1635+
template <typename TReadTableNode>
1636+
std::variant<ui32, TArgContext> Visit(const TString& table, const TKqlKeyRange& range, const TReadTableNode& read, TQueryPlanNode& planNode) {
17491637
TOperator op;
17501638
TTableRead readInfo;
17511639

17521640
auto describeBoundary = [this](const TExprBase& key) {
1753-
if (auto param = key.Maybe<TCoParameter>()) {
1754-
return param.Cast().Name().StringValue();
1755-
}
1641+
TString res("n/a");
17561642

1757-
if (auto param = key.Maybe<TCoNth>().Tuple().Maybe<TCoParameter>()) {
1643+
if (auto param = key.Maybe<TCoParameter>()) {
1644+
res = param.Cast().Name().StringValue();
1645+
} else if (auto param = key.Maybe<TCoNth>().Tuple().Maybe<TCoParameter>()) {
17581646
if (auto maybeResultBinding = ContainResultBinding(param.Cast().Name().StringValue())) {
17591647
auto [txId, resId] = *maybeResultBinding;
17601648
if (auto result = GetResult(txId, resId)) {
17611649
auto index = FromString<ui32>(key.Cast<TCoNth>().Index());
17621650
Y_ENSURE(index < result->Size());
1763-
return DescribeValue((*result)[index]);
1651+
res = DescribeValue((*result)[index]);
17641652
}
17651653
}
1654+
} else if (auto literal = key.Maybe<TCoUuid>()) {
1655+
res = NUuid::UuidBytesToString(literal.Cast().Literal().StringValue());
1656+
} else if (auto literal = key.Maybe<TCoDataCtor>()) {
1657+
res = literal.Cast().Literal().StringValue();
1658+
} else if (auto literal = key.Maybe<TCoNothing>()) {
1659+
res = TString("null");
17661660
}
17671661

1768-
if (auto literal = key.Maybe<TCoDataCtor>()) {
1769-
return literal.Cast().Literal().StringValue();
1770-
}
1771-
1772-
if (auto literal = key.Maybe<TCoNothing>()) {
1773-
return TString("null");
1774-
}
1775-
1776-
return TString("n/a");
1662+
return res.empty()? "«»" : res;
17771663
};
17781664

17791665
/* Collect info about scan range */
17801666
struct TKeyPartRange {
1781-
TString From;
1782-
TString To;
1783-
TString ColumnName;
1667+
std::optional<std::string> From{};
1668+
std::optional<std::string> To{};
1669+
std::string ColumnName{};
17841670
};
17851671
auto& tableData = SerializerCtx.TablesData->GetTable(SerializerCtx.Cluster, table);
17861672
op.Properties["Table"] = tableData.RelativePath ? *tableData.RelativePath : table;
@@ -1811,20 +1697,20 @@ class TxPlanSerializer {
18111697
TStringBuilder rangeDescr;
18121698

18131699
if (keyPartRange.From == keyPartRange.To) {
1814-
if (keyPartRange.From.Empty()) {
1700+
if (!keyPartRange.From.has_value()) {
18151701
rangeDescr << keyPartRange.ColumnName << " (-∞, +∞)";
18161702
readInfo.ScanBy.push_back(rangeDescr);
18171703
} else {
18181704
rangeDescr << keyPartRange.ColumnName
1819-
<< " (" << RemoveForbiddenChars(keyPartRange.From) << ")";
1705+
<< " (" << RemoveForbiddenChars(*keyPartRange.From) << ")";
18201706
readInfo.LookupBy.push_back(rangeDescr);
18211707
}
18221708
} else {
18231709
rangeDescr << keyPartRange.ColumnName << " "
1824-
<< (keyPartRange.From.Empty() ? "(" : leftParen)
1825-
<< (keyPartRange.From.Empty() ? "-∞" : RemoveForbiddenChars(keyPartRange.From)) << ", "
1826-
<< (keyPartRange.To.Empty() ? "+∞" : RemoveForbiddenChars(keyPartRange.To))
1827-
<< (keyPartRange.To.Empty() ? ")" : rightParen);
1710+
<< (!keyPartRange.From.has_value() ? "(" : leftParen)
1711+
<< (!keyPartRange.From.has_value() ? "-∞" : RemoveForbiddenChars(*keyPartRange.From)) << ", "
1712+
<< (!keyPartRange.To.has_value() ? "+∞" : RemoveForbiddenChars(*keyPartRange.To))
1713+
<< (!keyPartRange.To.has_value() ? ")" : rightParen);
18281714
readInfo.ScanBy.push_back(rangeDescr);
18291715
hasRangeScans = true;
18301716
}
@@ -1846,7 +1732,11 @@ class TxPlanSerializer {
18461732
columns.AppendValue(col.Value());
18471733
}
18481734

1849-
AddReadTableSettings(op, read, readInfo);
1735+
if constexpr (std::is_same_v<TKqpReadRangesSourceSettings, TReadTableNode>) {
1736+
AddReadTableSettings(op, read.Settings(), readInfo);
1737+
} else {
1738+
AddReadTableSettings(op, read, readInfo);
1739+
}
18501740

18511741
SerializerCtx.Tables[table].Reads.push_back(readInfo);
18521742

0 commit comments

Comments
 (0)