Skip to content

Commit 7a02b59

Browse files
authored
Validate protos from antlr4 (#8832)
1 parent 1643e7a commit 7a02b59

File tree

10 files changed

+100
-14
lines changed

10 files changed

+100
-14
lines changed

ydb/library/yql/sql/settings/translation_settings.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ namespace NSQLTranslation {
6060
, V0ForceDisable(InTestEnvironment())
6161
, PGDisable(false)
6262
, WarnOnV0(true)
63+
, TestAntlr4(false)
6364
, V0WarnAsError(ISqlFeaturePolicy::MakeAlwaysDisallow())
6465
, DqDefaultAuto(ISqlFeaturePolicy::MakeAlwaysDisallow())
6566
, BlockDefaultAuto(ISqlFeaturePolicy::MakeAlwaysDisallow())

ydb/library/yql/sql/settings/translation_settings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ namespace NSQLTranslation {
108108
bool V0ForceDisable;
109109
bool PGDisable;
110110
bool WarnOnV0;
111+
bool TestAntlr4;
111112
ISqlFeaturePolicy::TPtr V0WarnAsError;
112113
ISqlFeaturePolicy::TPtr DqDefaultAuto;
113114
ISqlFeaturePolicy::TPtr BlockDefaultAuto;

ydb/library/yql/sql/sql.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ namespace NSQLTranslation {
9696

9797
return NSQLTranslationV0::SqlAST(query, queryName, issues, maxErrors, settings.Arena);
9898
case 1:
99-
return NSQLTranslationV1::SqlAST(query, queryName, issues, maxErrors, parsedSettings.AnsiLexer, settings.Arena);
99+
return NSQLTranslationV1::SqlAST(query, queryName, issues, maxErrors, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, settings.Arena);
100100
default:
101101
issues.AddIssue(NYql::YqlIssue(NYql::TPosition(), NYql::TIssuesIds::DEFAULT_ERROR,
102102
TStringBuilder() << "Unknown SQL syntax version: " << parsedSettings.SyntaxVersion));

ydb/library/yql/sql/v1/format/sql_format.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2872,7 +2872,7 @@ class TSqlFormatter : public NSQLFormat::ISqlFormatter {
28722872
}
28732873

28742874
if (mode == EFormatMode::Obfuscate) {
2875-
auto message = NSQLTranslationV1::SqlAST(query, "Query", issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.Arena);
2875+
auto message = NSQLTranslationV1::SqlAST(query, "Query", issues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena);
28762876
if (!message) {
28772877
return false;
28782878
}
@@ -2935,7 +2935,7 @@ class TSqlFormatter : public NSQLFormat::ISqlFormatter {
29352935
}
29362936

29372937
NYql::TIssues parserIssues;
2938-
auto message = NSQLTranslationV1::SqlAST(currentQuery, "Query", parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.Arena);
2938+
auto message = NSQLTranslationV1::SqlAST(currentQuery, "Query", parserIssues, NSQLTranslation::SQL_MAX_PARSER_ERRORS, parsedSettings.AnsiLexer, parsedSettings.Antlr4Parser, parsedSettings.TestAntlr4, parsedSettings.Arena);
29392939
if (!message) {
29402940
finalFormattedQuery << currentQuery;
29412941
if (!currentQuery.EndsWith("\n")) {

ydb/library/yql/sql/v1/proto_parser/proto_parser.cpp

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
#include <ydb/library/yql/parser/proto_ast/gen/v1_proto_split/SQLv1Parser.pb.main.h>
1616

17+
#include <library/cpp/protobuf/util/simple_reflection.h>
18+
#include <util/generic/algorithm.h>
19+
1720
#if defined(_tsan_enabled_)
1821
#include <util/system/mutex.h>
1922
#endif
@@ -29,18 +32,75 @@ namespace NSQLTranslationV1 {
2932

3033
using namespace NSQLv1Generated;
3134

32-
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, TIssues& err, size_t maxErrors, bool ansiLexer, bool anlr4Parser, google::protobuf::Arena* arena) {
35+
void ValidateMessages(const google::protobuf::Message* msg1, const google::protobuf::Message* msg2, bool& hasNonAscii) {
36+
YQL_ENSURE(!msg1 == !msg2);
37+
if (!msg1) {
38+
return;
39+
}
40+
41+
YQL_ENSURE(msg1->GetDescriptor() == msg2->GetDescriptor());
42+
const auto descr = msg1->GetDescriptor();
43+
if (descr == NSQLv1Generated::TToken::GetDescriptor()) {
44+
const auto& token1 = dynamic_cast<const NSQLv1Generated::TToken&>(*msg1);
45+
const auto& token2 = dynamic_cast<const NSQLv1Generated::TToken&>(*msg2);
46+
const bool isEof1 = token1.GetId() == Max<ui32>();
47+
const bool isEof2 = token2.GetId() == Max<ui32>();
48+
YQL_ENSURE(isEof1 == isEof2);
49+
YQL_ENSURE(token1.GetValue() == token2.GetValue());
50+
hasNonAscii = hasNonAscii || AnyOf(token1.GetValue(), [](char c) { return !isascii(c);});
51+
if (!isEof1) {
52+
YQL_ENSURE(token1.GetLine() == token2.GetLine());
53+
if (!hasNonAscii) {
54+
YQL_ENSURE(token1.GetColumn() == token2.GetColumn());
55+
}
56+
}
57+
58+
return;
59+
}
60+
61+
for (int i = 0; i < descr->field_count(); ++i) {
62+
const NProtoBuf::FieldDescriptor* fd = descr->field(i);
63+
NProtoBuf::TConstField field1(*msg1, fd);
64+
NProtoBuf::TConstField field2(*msg2, fd);
65+
YQL_ENSURE(field1.IsMessage() == field2.IsMessage());
66+
if (field1.IsMessage()) {
67+
YQL_ENSURE(field1.Size() == field2.Size());
68+
for (size_t j = 0; j < field1.Size(); ++j) {
69+
ValidateMessages(field1.template Get<NProtoBuf::Message>(j), field2.template Get<NProtoBuf::Message>(j), hasNonAscii);
70+
}
71+
}
72+
}
73+
}
74+
75+
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, TIssues& err,
76+
size_t maxErrors, bool ansiLexer, bool anlr4Parser, bool testAntlr4, google::protobuf::Arena* arena) {
3377
YQL_ENSURE(arena);
3478
#if defined(_tsan_enabled_)
3579
TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
3680
#endif
3781
NSQLTranslation::TErrorCollectorOverIssues collector(err, maxErrors, "");
3882
if (ansiLexer && !anlr4Parser) {
3983
NProtoAST::TProtoASTBuilder<NALPAnsi::SQLv1Parser, NALPAnsi::SQLv1Lexer> builder(query, queryName, arena);
40-
return builder.BuildAST(collector);
84+
auto res = builder.BuildAST(collector);
85+
if (testAntlr4) {
86+
NProtoAST::TProtoASTBuilder<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
87+
auto res2 = builder.BuildAST(collector);
88+
bool hasNonAscii = false;
89+
ValidateMessages(res, res2, hasNonAscii);
90+
}
91+
92+
return res;
4193
} else if (!ansiLexer && !anlr4Parser) {
4294
NProtoAST::TProtoASTBuilder<NALPDefault::SQLv1Parser, NALPDefault::SQLv1Lexer> builder(query, queryName, arena);
43-
return builder.BuildAST(collector);
95+
auto res = builder.BuildAST(collector);
96+
if (testAntlr4) {
97+
NProtoAST::TProtoASTBuilder<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
98+
auto res2 = builder.BuildAST(collector);
99+
bool hasNonAscii = false;
100+
ValidateMessages(res, res2, hasNonAscii);
101+
}
102+
103+
return res;
44104
} else if (ansiLexer && anlr4Parser) {
45105
NProtoAST::TProtoASTBuilder<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
46106
return builder.BuildAST(collector);
@@ -50,17 +110,34 @@ google::protobuf::Message* SqlAST(const TString& query, const TString& queryName
50110
}
51111
}
52112

53-
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, bool ansiLexer, bool anlr4Parser, google::protobuf::Arena* arena) {
113+
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err,
114+
bool ansiLexer, bool anlr4Parser, bool testAntlr4, google::protobuf::Arena* arena) {
54115
YQL_ENSURE(arena);
55116
#if defined(_tsan_enabled_)
56117
TGuard<TMutex> grd(SanitizerSQLTranslationMutex);
57118
#endif
58119
if (ansiLexer && !anlr4Parser) {
59120
NProtoAST::TProtoASTBuilder<NALPAnsi::SQLv1Parser, NALPAnsi::SQLv1Lexer> builder(query, queryName, arena);
60-
return builder.BuildAST(err);
121+
auto res = builder.BuildAST(err);
122+
if (testAntlr4) {
123+
NProtoAST::TProtoASTBuilder<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
124+
auto res2 = builder.BuildAST(err);
125+
bool hasNonAscii = false;
126+
ValidateMessages(res, res2, hasNonAscii);
127+
}
128+
129+
return res;
61130
} else if (!ansiLexer && !anlr4Parser) {
62131
NProtoAST::TProtoASTBuilder<NALPDefault::SQLv1Parser, NALPDefault::SQLv1Lexer> builder(query, queryName, arena);
63-
return builder.BuildAST(err);
132+
auto res = builder.BuildAST(err);
133+
if (testAntlr4) {
134+
NProtoAST::TProtoASTBuilder<NALPDefaultAntlr4::SQLv1Antlr4Parser, NALPDefaultAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
135+
auto res2 = builder.BuildAST(err);
136+
bool hasNonAscii = false;
137+
ValidateMessages(res, res2, hasNonAscii);
138+
}
139+
140+
return res;
64141
} else if (ansiLexer && anlr4Parser) {
65142
NProtoAST::TProtoASTBuilder<NALPAnsiAntlr4::SQLv1Antlr4Parser, NALPAnsiAntlr4::SQLv1Antlr4Lexer> builder(query, queryName, arena);
66143
return builder.BuildAST(err);

ydb/library/yql/sql/v1/proto_parser/proto_parser.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ namespace NSQLTranslation {
1414

1515
namespace NSQLTranslationV1 {
1616

17-
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena = nullptr);
18-
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName, NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, google::protobuf::Arena* arena);
17+
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName,
18+
NYql::TIssues& err, size_t maxErrors, bool ansiLexer, bool antlr4Parser, bool testAntlr4, google::protobuf::Arena* arena);
19+
google::protobuf::Message* SqlAST(const TString& query, const TString& queryName,
20+
NProtoAST::IErrorCollector& err, bool ansiLexer, bool antlr4Parser, bool testAntlr4, google::protobuf::Arena* arena);
1921

2022
} // namespace NSQLTranslationV1

ydb/library/yql/sql/v1/sql.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ NYql::TAstParseResult SqlToYql(const TString& query, const NSQLTranslation::TTra
102102
TContext ctx(settings, hints, res.Issues);
103103
NSQLTranslation::TErrorCollectorOverIssues collector(res.Issues, settings.MaxErrors, settings.File);
104104

105-
google::protobuf::Message* ast(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.Arena));
105+
google::protobuf::Message* ast(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena));
106106
if (ast) {
107107
SqlASTToYqlImpl(res, *ast, ctx);
108108
} else {
@@ -195,7 +195,7 @@ TVector<NYql::TAstParseResult> SqlToAstStatements(const TString& query, const NS
195195
TContext ctx(settings, hints, issues);
196196
NSQLTranslation::TErrorCollectorOverIssues collector(issues, settings.MaxErrors, settings.File);
197197

198-
google::protobuf::Message* astProto(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.Arena));
198+
google::protobuf::Message* astProto(SqlAST(query, queryName, collector, settings.AnsiLexer, settings.Antlr4Parser, settings.TestAntlr4, settings.Arena));
199199
if (astProto) {
200200
auto ast = static_cast<const TSQLv1ParserAST&>(*astProto);
201201
const auto& query = ast.GetRule_sql_query();

ydb/library/yql/tests/sql/sql2yql/test_sql2yql.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,8 @@ def get_sql2yql_cmd(suite, case, case_file, out_dir, ansi_lexer, test_format, te
1313
cmd = [
1414
SQLRUN_PATH,
1515
case_file,
16-
'--syntax-version=1'
16+
'--syntax-version=1',
17+
'--test-antlr4',
1718
]
1819

1920
if ansi_lexer:

ydb/library/yql/tools/sql2yql/sql2yql.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ int BuildAST(int argc, char* argv[]) {
198198
opts.AddLongOption("assume-ydb-on-slash", "Assume YDB provider if cluster name starts with '/'").NoArgument();
199199
opts.AddLongOption("test-format", "compare formatted query's AST with the original query's AST (only syntaxVersion=1 is supported).").NoArgument();
200200
opts.AddLongOption("test-double-format", "check if formatting already formatted query produces the same result").NoArgument();
201+
opts.AddLongOption("test-antlr4", "check antlr4 parser").NoArgument();
201202
opts.AddLongOption("format-output", "Saves formatted query to it").RequiredArgument("format-output").StoreResult(&outFileNameFormat);
202203
opts.SetFreeArgDefaultTitle("query file");
203204
opts.AddHelpOption();
@@ -270,6 +271,7 @@ int BuildAST(int argc, char* argv[]) {
270271
settings.WarnOnV0 = false;
271272
settings.V0ForceDisable = false;
272273
settings.AssumeYdbOnClusterWithSlash = res.Has("assume-ydb-on-slash");
274+
settings.TestAntlr4 = res.Has("test-antlr4");
273275

274276
if (res.Has("lexer")) {
275277
NYql::TIssues issues;

ydb/library/yql/tools/yqlrun/yqlrun.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ int Main(int argc, const char *argv[])
486486
opts.AddLongOption("pg-ext", "pg extensions config file").StoreResult(&pgExtConfig);
487487
opts.AddLongOption("with-final-issues", "Include some final messages (like statistic) in issues").NoArgument();
488488
opts.AddLongOption("validate-result-format", "Check that result-format can parse Result").NoArgument();
489+
opts.AddLongOption("test-antlr4", "check antlr4 parser").NoArgument();
489490

490491
opts.SetFreeArgsMax(0);
491492
TOptsParseResult res(&opts, argc, argv);
@@ -739,6 +740,7 @@ int Main(int argc, const char *argv[])
739740
settings.Flags = sqlFlags;
740741
settings.SyntaxVersion = syntaxVersion;
741742
settings.AnsiLexer = res.Has("ansi-lexer");
743+
settings.TestAntlr4 = res.Has("test-antlr4");
742744
settings.V0Behavior = NSQLTranslation::EV0Behavior::Report;
743745
settings.AssumeYdbOnClusterWithSlash = res.Has("assume-ydb-on-slash");
744746
if (res.Has("discover")) {

0 commit comments

Comments
 (0)