Skip to content

Commit b9ac376

Browse files
authored
Split view_utils (#14492)
1 parent b481980 commit b9ac376

File tree

8 files changed

+321
-269
lines changed

8 files changed

+321
-269
lines changed

ydb/public/lib/ydb_cli/dump/restore_impl.cpp

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99
#include <ydb/public/lib/ydb_cli/common/retry_func.h>
1010
#include <ydb/public/lib/ydb_cli/dump/files/files.h>
1111
#include <ydb/public/lib/ydb_cli/dump/util/log.h>
12-
#include <ydb/public/lib/ydb_cli/dump/util/rewrite_query.h>
12+
#include <ydb/public/lib/ydb_cli/dump/util/query_utils.h>
1313
#include <ydb/public/lib/ydb_cli/dump/util/util.h>
1414
#include <ydb/public/lib/ydb_cli/dump/util/view_utils.h>
1515
#include <ydb-cpp-sdk/client/proto/accessor.h>
16+
#include <yql/essentials/public/issue/yql_issue.h>
1617

1718
#include <library/cpp/threading/future/core/future.h>
1819

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
#include "query_utils.h"
2+
3+
#include <yql/essentials/parser/proto_ast/gen/v1/SQLv1Lexer.h>
4+
#include <yql/essentials/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
5+
#include <yql/essentials/sql/settings/translation_settings.h>
6+
#include <yql/essentials/sql/v1/format/sql_format.h>
7+
#include <yql/essentials/sql/v1/proto_parser/proto_parser.h>
8+
#include <yql/essentials/public/issue/yql_issue.h>
9+
10+
#include <library/cpp/protobuf/util/simple_reflection.h>
11+
12+
#include <util/folder/pathsplit.h>
13+
#include <util/string/builder.h>
14+
#include <util/string/strip.h>
15+
16+
#include <re2/re2.h>
17+
18+
#include <format>
19+
20+
namespace NYdb::NDump {
21+
22+
using namespace NSQLv1Generated;
23+
24+
TString RewriteAbsolutePath(TStringBuf path, TStringBuf backupRoot, TStringBuf restoreRoot) {
25+
if (backupRoot == restoreRoot) {
26+
return TString(path);
27+
}
28+
29+
TPathSplitUnix pathSplit(path);
30+
TPathSplitUnix backupRootSplit(backupRoot);
31+
32+
size_t matchedParts = 0;
33+
while (matchedParts < pathSplit.size() && matchedParts < backupRootSplit.size()
34+
&& pathSplit[matchedParts] == backupRootSplit[matchedParts]
35+
) {
36+
++matchedParts;
37+
}
38+
39+
TPathSplitUnix restoreRootSplit(restoreRoot);
40+
for (size_t unmatchedParts = matchedParts + 1; unmatchedParts <= backupRootSplit.size(); ++unmatchedParts) {
41+
restoreRootSplit.AppendComponent("..");
42+
}
43+
44+
return restoreRootSplit.AppendMany(pathSplit.begin() + matchedParts, pathSplit.end()).Reconstruct();
45+
}
46+
47+
namespace {
48+
49+
struct TAbsolutePathRewriter {
50+
const TStringBuf BackupRoot;
51+
const TStringBuf RestoreRoot;
52+
53+
static bool IsAbsolutePath(TStringBuf path) {
54+
return path.StartsWith("`/") && path.EndsWith('`');
55+
}
56+
57+
TString RewriteAbsolutePath(TStringBuf path) const {
58+
if (BackupRoot == RestoreRoot) {
59+
return TString(path);
60+
}
61+
62+
return TStringBuilder() << '`' << NDump::RewriteAbsolutePath(path.Skip(1).Chop(1), BackupRoot, RestoreRoot) << '`';
63+
}
64+
65+
public:
66+
explicit TAbsolutePathRewriter(TStringBuf backupRoot, TStringBuf restoreRoot)
67+
: BackupRoot(backupRoot)
68+
, RestoreRoot(restoreRoot)
69+
{
70+
}
71+
72+
TString operator()(const TString& path) const {
73+
if (IsAbsolutePath(path)) {
74+
return RewriteAbsolutePath(path);
75+
}
76+
77+
return path;
78+
}
79+
};
80+
81+
struct TTokenCollector {
82+
explicit TTokenCollector(std::function<TString(const TString&)>&& pathRewriter = {})
83+
: PathRewriter(std::move(pathRewriter))
84+
{
85+
}
86+
87+
void operator()(const NProtoBuf::Message& message) {
88+
if (const auto* token = dynamic_cast<const TToken*>(&message)) {
89+
const auto& value = token->GetValue();
90+
if (token->GetId() != NALPDefault::SQLv1LexerTokens::TOKEN_EOF) {
91+
if (!Tokens.empty()) {
92+
Tokens << ' ';
93+
}
94+
Tokens << (IsRefDescendent && PathRewriter ? PathRewriter(value) : value);
95+
}
96+
}
97+
}
98+
99+
TStringBuilder Tokens;
100+
bool IsRefDescendent = false;
101+
std::function<TString(const TString&)> PathRewriter;
102+
};
103+
104+
void VisitAllFields(const NProtoBuf::Message& msg, const std::function<bool(const NProtoBuf::Message&)>& callback) {
105+
const auto* md = msg.GetDescriptor();
106+
for (int i = 0; i < md->field_count(); ++i) {
107+
const auto* fd = md->field(i);
108+
NProtoBuf::TConstField field(msg, fd);
109+
if (field.IsMessage()) {
110+
for (size_t j = 0; j < field.Size(); ++j) {
111+
const auto& message = *field.Get<NProtoBuf::Message>(j);
112+
if (callback(message)) {
113+
VisitAllFields(message, callback);
114+
}
115+
}
116+
}
117+
}
118+
}
119+
120+
template <typename TRef>
121+
void VisitAllFields(const NProtoBuf::Message& msg, TTokenCollector& callback) {
122+
const auto* md = msg.GetDescriptor();
123+
for (int i = 0; i < md->field_count(); ++i) {
124+
const auto* fd = md->field(i);
125+
NProtoBuf::TConstField field(msg, fd);
126+
if (field.IsMessage()) {
127+
for (size_t j = 0; j < field.Size(); ++j) {
128+
const auto& message = *field.Get<NProtoBuf::Message>(j);
129+
const auto* ref = dynamic_cast<const TRef*>(&message);
130+
if (ref) {
131+
callback.IsRefDescendent = true;
132+
}
133+
134+
callback(message);
135+
VisitAllFields<TRef>(message, callback);
136+
137+
if (ref) {
138+
callback.IsRefDescendent = false;
139+
}
140+
}
141+
}
142+
}
143+
}
144+
145+
struct TTableRefValidator {
146+
// returns true if the message is not a table ref and we need to dive deeper to find it
147+
bool operator()(const NProtoBuf::Message& message) {
148+
const auto* ref = dynamic_cast<const TRule_table_ref*>(&message);
149+
if (!ref) {
150+
return true;
151+
}
152+
153+
// implementation note: a better idea might be to create a custom grammar for validation
154+
if (ref->HasBlock3() && ref->GetBlock3().HasAlt1() && ref->GetBlock3().GetAlt1().HasRule_table_key1()) {
155+
// Table keys are considered save for view backups.
156+
return false;
157+
}
158+
159+
// The only kind of table references in views that we really cannot restore are evaluated absolute paths:
160+
// $path = "/old_db" || "/t"; select * from $path;
161+
// If the view is being restored to a different database (like "/new_db"),
162+
// then the saved create view statement will need manual patching to succeed.
163+
TTokenCollector tokenCollector;
164+
VisitAllFields<TRule_table_ref>(*ref, tokenCollector);
165+
const TString refString = tokenCollector.Tokens;
166+
167+
Issues.AddIssue(TStringBuilder() << "Please check that the reference: " << refString.Quote()
168+
<< " contains no evaluated expressions."
169+
);
170+
Issues.back().Severity = NYql::TSeverityIds::S_WARNING;
171+
172+
return false;
173+
}
174+
175+
NYql::TIssues& Issues;
176+
};
177+
178+
} // anonymous
179+
180+
bool SqlToProtoAst(const TString& queryStr, TRule_sql_query& queryProto, NYql::TIssues& issues) {
181+
NSQLTranslation::TTranslationSettings settings;
182+
if (!NSQLTranslation::ParseTranslationSettings(queryStr, settings, issues)) {
183+
return false;
184+
}
185+
if (settings.SyntaxVersion == 0) {
186+
issues.AddIssue("cannot handle YQL syntax version 0");
187+
return false;
188+
}
189+
190+
google::protobuf::Arena arena;
191+
const auto* parserProto = NSQLTranslationV1::SqlAST(
192+
queryStr, "query", issues, 0, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, &arena
193+
);
194+
if (!parserProto) {
195+
return false;
196+
}
197+
198+
queryProto = static_cast<const TSQLv1ParserAST&>(*parserProto).GetRule_sql_query();
199+
return true;
200+
}
201+
202+
bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues) {
203+
google::protobuf::Arena arena;
204+
NSQLTranslation::TTranslationSettings settings;
205+
settings.Arena = &arena;
206+
207+
auto formatter = NSQLFormat::MakeSqlFormatter(settings);
208+
return formatter->Format(query, formattedQuery, issues);
209+
}
210+
211+
bool ValidateTableRefs(const TRule_sql_query& query, NYql::TIssues& issues) {
212+
TTableRefValidator tableRefValidator(issues);
213+
VisitAllFields(query, tableRefValidator);
214+
return tableRefValidator.Issues.Empty();
215+
}
216+
217+
template <typename TRef>
218+
TString RewriteRefs(const TRule_sql_query& query, TStringBuf backupRoot, TStringBuf restoreRoot) {
219+
TTokenCollector tokenCollector(TAbsolutePathRewriter(backupRoot, restoreRoot));
220+
VisitAllFields<TRef>(query, tokenCollector);
221+
return tokenCollector.Tokens;
222+
}
223+
224+
template <typename TRef>
225+
bool RewriteRefs(TString& queryStr, TStringBuf backupRoot, TStringBuf restoreRoot, NYql::TIssues& issues) {
226+
TRule_sql_query queryProto;
227+
if (!SqlToProtoAst(queryStr, queryProto, issues)) {
228+
return false;
229+
}
230+
231+
const auto rewrittenQuery = RewriteRefs<TRef>(queryProto, backupRoot, restoreRoot);
232+
// formatting here is necessary for the view to have pretty text inside it after the creation
233+
if (!Format(rewrittenQuery, queryStr, issues)) {
234+
return false;
235+
}
236+
237+
return true;
238+
}
239+
240+
bool RewriteTableRefs(TString& query, TStringBuf backupRoot, TStringBuf restoreRoot, NYql::TIssues& issues) {
241+
return RewriteRefs<TRule_table_ref>(query, backupRoot, restoreRoot, issues);
242+
}
243+
244+
bool RewriteObjectRefs(TString& query, TStringBuf backupRoot, TStringBuf restoreRoot, NYql::TIssues& issues) {
245+
return RewriteRefs<TRule_object_ref>(query, backupRoot, restoreRoot, issues);
246+
}
247+
248+
bool RewriteCreateQuery(TString& query, std::string_view pattern, const std::string& dbPath, NYql::TIssues& issues) {
249+
const auto searchPattern = std::vformat(pattern, std::make_format_args("\\S+"));
250+
if (re2::RE2::Replace(&query, searchPattern, std::vformat(pattern, std::make_format_args(dbPath)))) {
251+
return true;
252+
}
253+
254+
issues.AddIssue(TStringBuilder() << "Pattern: \"" << pattern << "\" was not found: " << query.Quote());
255+
return false;
256+
}
257+
258+
TString GetBackupRoot(TStringInput query) {
259+
constexpr TStringBuf targetLinePrefix = "-- backup root: \"";
260+
constexpr TStringBuf discardedSuffix = "\"";
261+
262+
TString line;
263+
while (query.ReadLine(line)) {
264+
StripInPlace(line);
265+
if (line.StartsWith(targetLinePrefix)) {
266+
return TString(TStringBuf(line).Skip(targetLinePrefix.size()).Chop(discardedSuffix.size()));
267+
}
268+
}
269+
270+
return "";
271+
}
272+
273+
} // NYdb::NDump
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#pragma once
2+
3+
#include <util/generic/string.h>
4+
#include <util/stream/str.h>
5+
6+
#include <string>
7+
#include <string_view>
8+
9+
namespace NYql {
10+
class TIssues;
11+
}
12+
13+
namespace NSQLv1Generated {
14+
class TRule_sql_query;
15+
}
16+
17+
namespace NYdb::NDump {
18+
19+
bool SqlToProtoAst(const TString& queryStr, NSQLv1Generated::TRule_sql_query& queryProto, NYql::TIssues& issues);
20+
bool Format(const TString& query, TString& formattedQuery, NYql::TIssues& issues);
21+
22+
bool ValidateTableRefs(const NSQLv1Generated::TRule_sql_query& query, NYql::TIssues& issues);
23+
24+
TString RewriteAbsolutePath(TStringBuf path, TStringBuf backupRoot, TStringBuf restoreRoot);
25+
bool RewriteTableRefs(TString& query, TStringBuf backupRoot, TStringBuf restoreRoot, NYql::TIssues& issues);
26+
bool RewriteObjectRefs(TString& query, TStringBuf backupRoot, TStringBuf restoreRoot, NYql::TIssues& issues);
27+
bool RewriteCreateQuery(TString& query, std::string_view pattern, const std::string& dbPath, NYql::TIssues& issues);
28+
29+
TString GetBackupRoot(TStringInput query);
30+
31+
}

ydb/public/lib/ydb_cli/dump/util/rewrite_query.cpp

-21
This file was deleted.

ydb/public/lib/ydb_cli/dump/util/rewrite_query.h

-7
This file was deleted.

0 commit comments

Comments
 (0)