Skip to content

Commit f93ea93

Browse files
authored
Extracted yson result formatting from DQ service node (#7563)
1 parent 7cf1acd commit f93ea93

File tree

15 files changed

+146
-90
lines changed

15 files changed

+146
-90
lines changed

ydb/core/fq/libs/actors/result_writer.cpp

+4-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,10 @@ class TResultWriter : public NActors::TActorBootstrapped<TResultWriter> {
111111
Finished = true;
112112
NYql::NDqProto::TQueryResponse queryResult(ev->Get()->Record);
113113

114-
*queryResult.MutableYson() = ResultBuilder->BuildYson(std::move(Head));
114+
for (const auto& x : Head) {
115+
queryResult.AddSample()->CopyFrom(x.Proto);
116+
}
117+
115118
Head.clear();
116119
if (!Issues.Empty()) {
117120
IssuesToMessage(Issues, queryResult.MutableIssues());

ydb/core/fq/libs/actors/run_actor.cpp

+36-3
Original file line numberDiff line numberDiff line change
@@ -1264,7 +1264,21 @@ class TRunActor : public NActors::TActorBootstrapped<TRunActor> {
12641264
<< ". " << it->second.Index << " response. Issues count: " << result.IssuesSize()
12651265
<< ". Rows count: " << result.GetRowsCount());
12661266

1267-
queryResult.Data = result.yson();
1267+
TVector<NDq::TDqSerializedBatch> rows;
1268+
for (const auto& s : result.GetSample()) {
1269+
NDq::TDqSerializedBatch batch;
1270+
batch.Proto = s;
1271+
rows.emplace_back(std::move(batch));
1272+
}
1273+
1274+
TProtoBuilder protoBuilder(ResultFormatSettings->ResultType, ResultFormatSettings->Columns);
1275+
1276+
bool ysonTruncated = false;
1277+
queryResult.Data = protoBuilder.BuildYson(std::move(rows), ResultFormatSettings->SizeLimit.GetOrElse(Max<ui64>()),
1278+
ResultFormatSettings->RowsLimit.GetOrElse(Max<ui64>()), &ysonTruncated);
1279+
1280+
queryResult.RowsCount = result.GetRowsCount();
1281+
queryResult.Truncated = result.GetTruncated() || ysonTruncated;
12681282

12691283
TIssues issues;
12701284
IssuesFromMessage(result.GetIssues(), issues);
@@ -1294,8 +1308,6 @@ class TRunActor : public NActors::TActorBootstrapped<TRunActor> {
12941308
}
12951309

12961310
queryResult.AddIssues(issues);
1297-
queryResult.Truncated = result.GetTruncated();
1298-
queryResult.RowsCount = result.GetRowsCount();
12991311
it->second.Result.SetValue(queryResult);
13001312
EvalInfos.erase(it);
13011313
}
@@ -1515,6 +1527,7 @@ class TRunActor : public NActors::TActorBootstrapped<TRunActor> {
15151527
*request.MutableSettings() = dqGraphParams.GetSettings();
15161528
*request.MutableSecureParams() = dqGraphParams.GetSecureParams();
15171529
*request.MutableColumns() = dqGraphParams.GetColumns();
1530+
PrepareResultFormatSettings(dqGraphParams, *dqConfiguration);
15181531
NTasksPacker::UnPack(*request.MutableTask(), dqGraphParams.GetTasks(), dqGraphParams.GetStageProgram());
15191532
Send(info.ExecuterId, new NYql::NDqs::TEvGraphRequest(request, info.ControlId, info.ResultId));
15201533
LOG_D("Evaluation Executer: " << info.ExecuterId << ", Controller: " << info.ControlId << ", ResultActor: " << info.ResultId);
@@ -1552,9 +1565,12 @@ class TRunActor : public NActors::TActorBootstrapped<TRunActor> {
15521565
CreateResultWriter(
15531566
ExecuterId, dqGraphParams.GetResultType(),
15541567
writerResultId, columns, dqGraphParams.GetSession(), Params.Deadline, Params.ResultBytesLimit));
1568+
1569+
PrepareResultFormatSettings(dqGraphParams, *dqConfiguration);
15551570
} else {
15561571
LOG_D("ResultWriter was NOT CREATED since ResultType is empty");
15571572
resultId = ExecuterId;
1573+
ClearResultFormatSettings();
15581574
}
15591575

15601576
if (enableCheckpointCoordinator) {
@@ -1604,6 +1620,21 @@ class TRunActor : public NActors::TActorBootstrapped<TRunActor> {
16041620
LOG_D("Executer: " << ExecuterId << ", Controller: " << ControlId << ", ResultIdActor: " << resultId);
16051621
}
16061622

1623+
void PrepareResultFormatSettings(NFq::NProto::TGraphParams& dqGraphParams, const TDqConfiguration& dqConfiguration) {
1624+
ResultFormatSettings.ConstructInPlace();
1625+
for (const auto& c : dqGraphParams.GetColumns()) {
1626+
ResultFormatSettings->Columns.push_back(c);
1627+
}
1628+
1629+
ResultFormatSettings->ResultType = dqGraphParams.GetResultType();
1630+
ResultFormatSettings->SizeLimit = dqConfiguration._AllResultsBytesLimit.Get();
1631+
ResultFormatSettings->RowsLimit = dqConfiguration._RowsLimitPerWrite.Get();
1632+
}
1633+
1634+
void ClearResultFormatSettings() {
1635+
ResultFormatSettings.Clear();
1636+
}
1637+
16071638
void SetupYqlCore(NYql::TYqlCoreConfig& yqlCore) const {
16081639
auto flags = yqlCore.MutableFlags();
16091640
*flags = Params.Config.GetGateways().GetYqlCore().GetFlags();
@@ -2256,6 +2287,8 @@ class TRunActor : public NActors::TActorBootstrapped<TRunActor> {
22562287
NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE;
22572288
TMap<TString, TString> Statistics;
22582289

2290+
TMaybe<NCommon::TResultFormatSettings> ResultFormatSettings;
2291+
22592292
// Consumers creation
22602293
NActors::TActorId ReadRulesCreatorId;
22612294

ydb/library/yql/dq/common/dq_serialized_batch.cpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,27 @@ void TDqSerializedBatch::SetPayload(TRope&& payload) {
3939
payload.Erase(it, it + it.ContiguousSize());
4040
}
4141
}
42-
}
42+
}
43+
44+
void TDqSerializedBatch::ConvertToNoOOB() {
45+
if (!IsOOB()) {
46+
return;
47+
}
48+
49+
YQL_ENSURE(Proto.GetRaw().empty());
50+
Proto.SetRaw(Payload.ConvertToString());
51+
Payload.clear();
52+
switch ((NDqProto::EDataTransportVersion)Proto.GetTransportVersion()) {
53+
case NDqProto::EDataTransportVersion::DATA_TRANSPORT_OOB_FAST_PICKLE_1_0:
54+
Proto.SetTransportVersion(NDqProto::EDataTransportVersion::DATA_TRANSPORT_UV_FAST_PICKLE_1_0);
55+
break;
56+
case NDqProto::EDataTransportVersion::DATA_TRANSPORT_OOB_PICKLE_1_0:
57+
Proto.SetTransportVersion(NDqProto::EDataTransportVersion::DATA_TRANSPORT_UV_PICKLE_1_0);
58+
break;
59+
default:
60+
YQL_ENSURE(false, "Unexpected transport version" << Proto.GetTransportVersion());
61+
}
62+
}
4363

4464
TRope SaveForSpilling(TDqSerializedBatch&& batch) {
4565
TRope result;

ydb/library/yql/dq/common/dq_serialized_batch.h

+2
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ struct TDqSerializedBatch {
5050
Clear();
5151
return result;
5252
}
53+
54+
void ConvertToNoOOB();
5355
};
5456

5557
TRope SaveForSpilling(TDqSerializedBatch&& batch);

ydb/library/yql/providers/common/schema/mkql/yql_mkql_schema.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ struct TRuntimeTypeLoader {
221221
}
222222

223223
TMaybe<TType> LoadPgType(const TString& pgType, ui32 /*level*/) {
224-
auto typeId = NYql::NPg::LookupType(pgType).TypeId;
224+
auto typeId = NYql::NPg::HasType(pgType) ? NYql::NPg::LookupType(pgType).TypeId : Max<ui32>();
225225
return Builder.NewPgType(typeId);
226226
}
227227

ydb/library/yql/providers/dq/actors/proto_builder.cpp

+13-6
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,19 @@ TString TProtoBuilder::BuildYson(TVector<NYql::NDq::TDqSerializedBatch>&& rows,
7474
writer.OnBeginList();
7575

7676
auto full = WriteData(std::move(rows), [&](const NYql::NUdf::TUnboxedValuePod& value) {
77-
auto rowYson = NCommon::WriteYsonValue(value, ResultType, ColumnOrder.empty() ? nullptr : &ColumnOrder);
78-
writer.OnListItem();
79-
writer.OnRaw(rowYson);
80-
size += rowYson.size();
81-
++count;
82-
return size <= maxBytesLimit && count <= maxRowsLimit;
77+
bool ret = (size <= maxBytesLimit && count <= maxRowsLimit);
78+
if (ret) {
79+
auto rowYson = NCommon::WriteYsonValue(value, ResultType, ColumnOrder.empty() ? nullptr : &ColumnOrder);
80+
size += rowYson.size();
81+
++count;
82+
ret = (size <= maxBytesLimit && count <= maxRowsLimit);
83+
if (ret) {
84+
writer.OnListItem();
85+
writer.OnRaw(rowYson);
86+
}
87+
}
88+
89+
return ret;
8390
});
8491

8592
if (!full) {

ydb/library/yql/providers/dq/actors/result_actor_base.h

+22-26
Original file line numberDiff line numberDiff line change
@@ -106,14 +106,12 @@ struct TWriteQueue {
106106
, Truncated(false)
107107
, FullResultWriterID()
108108
, ResultBuilder(resultType ? MakeHolder<TProtoBuilder>(resultType, columns) : nullptr)
109-
, ResultYson()
110-
, ResultYsonOut(new THoldingStream<TCountingOutput>(MakeHolder<TStringOutput>(ResultYson)))
111-
, ResultYsonWriter(MakeHolder<NYson::TYsonWriter>(ResultYsonOut.Get(), NYson::EYsonFormat::Binary, ::NYson::EYsonType::Node, true))
109+
, ResultSampleDataSize(0)
110+
, ResultSampleData()
112111
, Issues()
113112
, BlockingActors()
114113
, QueryResponse()
115114
, WaitingAckFromFRW(false) {
116-
ResultYsonWriter->OnBeginList();
117115
YQL_CLOG(DEBUG, ProviderDq) << "_AllResultsBytesLimit = " << SizeLimit;
118116
YQL_CLOG(DEBUG, ProviderDq) << "_RowsLimitPerWrite = " << (RowsLimit.Defined() ? ToString(RowsLimit.GetRef()) : "nothing");
119117
}
@@ -141,19 +139,21 @@ struct TWriteQueue {
141139
bool exceedRows = false;
142140
try {
143141
TFailureInjector::Reach("result_actor_base_fail_on_response_write", [] { throw yexception() << "result_actor_base_fail_on_response_write"; });
144-
NDq::TDqSerializedBatch dataCopy = WriteQueue.back().Data;
145-
full = ResultBuilder->WriteYsonData(std::move(dataCopy), [this, &exceedRows](const TString& rawYson) {
146-
if (RowsLimit && Rows + 1 > *RowsLimit) {
142+
if (!Truncated) {
143+
NDq::TDqSerializedBatch dataCopy = WriteQueue.back().Data;
144+
dataCopy.ConvertToNoOOB();
145+
Rows += dataCopy.RowCount();
146+
ResultSampleDataSize += dataCopy.Size();
147+
148+
if (RowsLimit && Rows > *RowsLimit) {
147149
exceedRows = true;
148-
return false;
149-
} else if (ResultYsonOut->Counter() + rawYson.size() > SizeLimit) {
150-
return false;
150+
full = false;
151+
} else if (ResultSampleDataSize > SizeLimit) {
152+
full = false;
151153
}
152-
ResultYsonWriter->OnListItem();
153-
ResultYsonWriter->OnRaw(rawYson);
154-
++Rows;
155-
return true;
156-
});
154+
155+
ResultSampleData.emplace_back(std::move(dataCopy.Proto));
156+
}
157157
} catch (...) {
158158
OnError(NYql::NDqProto::StatusIds::UNSUPPORTED, CurrentExceptionMessage());
159159
return;
@@ -245,7 +245,7 @@ struct TWriteQueue {
245245
private:
246246
void OnQueryResult(TEvQueryResponse::TPtr& ev, const NActors::TActorContext&) {
247247
YQL_LOG_CTX_ROOT_SESSION_SCOPE(TraceId);
248-
YQL_ENSURE(!ev->Get()->Record.HasResultSet() && ev->Get()->Record.GetYson().empty());
248+
YQL_ENSURE(!ev->Get()->Record.SampleSize());
249249
YQL_CLOG(DEBUG, ProviderDq) << "Shutting down TResultAggregator";
250250

251251
BlockingActors.clear();
@@ -361,17 +361,14 @@ struct TWriteQueue {
361361
YQL_CLOG(DEBUG, ProviderDq) << __FUNCTION__;
362362
NDqProto::TQueryResponse result = QueryResponse->Record;
363363

364-
YQL_ENSURE(!result.HasResultSet() && result.GetYson().empty());
364+
YQL_ENSURE(!result.SampleSize());
365365
FlushCounters(result);
366366

367-
if (ResultYsonWriter) {
368-
ResultYsonWriter->OnEndList();
369-
ResultYsonWriter.Destroy();
367+
for (const auto& x : ResultSampleData) {
368+
result.AddSample()->CopyFrom(x);
370369
}
371-
ResultYsonOut.Destroy();
372-
373-
*result.MutableYson() = ResultYson;
374370

371+
ResultSampleData.clear();
375372
if (!Issues.Empty()) {
376373
NYql::IssuesToMessage(Issues, result.MutableIssues());
377374
}
@@ -429,9 +426,8 @@ struct TWriteQueue {
429426
bool Truncated;
430427
NActors::TActorId FullResultWriterID;
431428
THolder<TProtoBuilder> ResultBuilder;
432-
TString ResultYson;
433-
THolder<TCountingOutput> ResultYsonOut;
434-
THolder<NYson::TYsonWriter> ResultYsonWriter;
429+
ui64 ResultSampleDataSize;
430+
TVector<NDqProto::TData> ResultSampleData;
435431
TIssues Issues;
436432
THashSet<NActors::TActorId> BlockingActors;
437433
THolder<TEvQueryResponse> QueryResponse;

ydb/library/yql/providers/dq/actors/result_aggregator.cpp

-39
Original file line numberDiff line numberDiff line change
@@ -204,45 +204,6 @@ class TResultAggregator: public TResultActorBase<TResultAggregator> {
204204
bool Continue;
205205
};
206206

207-
class TResultPrinter: public TActor<TResultPrinter> {
208-
public:
209-
static constexpr char ActorName[] = "YQL_DQ_RESULT_PRINTER";
210-
211-
TResultPrinter(IOutputStream& output, NThreading::TPromise<void>& promise)
212-
: TActor<TResultPrinter>(&TResultPrinter::Handler)
213-
, Output(output)
214-
, Promise(promise)
215-
{
216-
}
217-
218-
private:
219-
STRICT_STFUNC(Handler, { HFunc(TEvQueryResponse, OnQueryResult); })
220-
221-
void OnQueryResult(TEvQueryResponse::TPtr& ev, const TActorContext&) {
222-
if (!ev->Get()->Record.HasResultSet()&&ev->Get()->Record.GetYson().empty()) {
223-
NYql::TIssues issues;
224-
NYql::IssuesFromMessage(ev->Get()->Record.GetIssues(), issues);
225-
Cerr << issues.ToString() << Endl;
226-
} else {
227-
auto ysonString = !ev->Get()->Record.GetYson().empty()
228-
? ev->Get()->Record.GetYson()
229-
: NYdb::FormatResultSetYson(ev->Get()->Record.GetResultSet(), NYson::EYsonFormat::Binary);
230-
auto ysonNode = NYT::NodeFromYsonString(ysonString, NYson::EYsonType::Node);
231-
YQL_ENSURE(ysonNode.GetType() == NYT::TNode::EType::List);
232-
for (const auto& row : ysonNode.AsList()) {
233-
Output << NYT::NodeToYsonString(row) << "\n";
234-
}
235-
}
236-
237-
Promise.SetValue();
238-
PassAway();
239-
}
240-
241-
private:
242-
IOutputStream& Output;
243-
NThreading::TPromise<void>& Promise;
244-
};
245-
246207
} // unnamed
247208

248209
THolder<NActors::IActor> MakeResultAggregator(

ydb/library/yql/providers/dq/actors/task_controller_impl.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,7 @@ class TTaskControllerImpl: public NActors::TActor<TDerived> {
600600

601601
public:
602602
void OnQueryResult(TEvQueryResponse::TPtr& ev) {
603-
YQL_ENSURE(!ev->Get()->Record.HasResultSet() && ev->Get()->Record.GetYson().empty());
603+
YQL_ENSURE(!ev->Get()->Record.SampleSize());
604604
FinalStat().FlushCounters(ev->Get()->Record);
605605
if (!Issues.Empty()) {
606606
IssuesToMessage(Issues.ToIssues(), ev->Get()->Record.MutableIssues());

ydb/library/yql/providers/dq/api/protos/dqs.proto

+3-2
Original file line numberDiff line numberDiff line change
@@ -175,13 +175,14 @@ message TPullResponse {
175175

176176
message TQueryResponse {
177177
reserved 4, 6;
178-
Ydb.ResultSet ResultSet = 1;
178+
reserved 1;
179179
repeated Ydb.Issue.IssueMessage Issues = 2;
180-
bytes Yson = 3;
180+
reserved 3;
181181
repeated TMetric Metric = 5;
182182
bool Truncated = 7;
183183
uint64 RowsCount = 8;
184184
NYql.NDqProto.StatusIds.StatusCode StatusCode = 9;
185+
repeated NDqProto.TData Sample = 10;
185186
}
186187

187188
message TDqFailure {

ydb/library/yql/providers/dq/api/protos/service.proto

+2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import "ydb/public/api/protos/ydb_operation.proto";
55
import "ydb/public/api/protos/ydb_value.proto";
66
import "ydb/library/yql/dq/actors/protos/dq_stats.proto";
77
import "ydb/library/yql/dq/proto/dq_tasks.proto";
8+
import "ydb/library/yql/dq/proto/dq_transport.proto";
89

910
package Yql.DqsProto;
1011

@@ -25,6 +26,7 @@ message ResponseMetric {
2526
message ExecuteQueryResult {
2627
Ydb.ResultSet result = 1;
2728
bytes yson = 2;
29+
repeated NYql.NDqProto.TData sample = 3;
2830
}
2931

3032
message TFile {

ydb/library/yql/providers/dq/common/yql_dq_common.h

+7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@
1010
namespace NYql {
1111
namespace NCommon {
1212

13+
struct TResultFormatSettings {
14+
TString ResultType;
15+
TVector<TString> Columns;
16+
TMaybe<ui64> SizeLimit;
17+
TMaybe<ui64> RowsLimit;
18+
};
19+
1320
TMaybe<TString> SqlToSExpr(const TString& query);
1421

1522
TString GetSerializedTypeAnnotation(const NYql::TTypeAnnotationNode* typeAnn);

ydb/library/yql/providers/dq/provider/ya.make

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ PEERDIR(
5757
ydb/library/yql/providers/dq/expr_nodes
5858
ydb/library/yql/providers/dq/opt
5959
ydb/library/yql/providers/dq/planner
60+
ydb/library/yql/providers/dq/actors
6061
ydb/library/yql/providers/result/expr_nodes
6162
ydb/library/yql/minikql
6263
)

0 commit comments

Comments
 (0)