Skip to content

Commit fda767c

Browse files
get rid of wide fields in wide combiner. Not used in llvm (#6536)
1 parent 02f9150 commit fda767c

File tree

1 file changed

+55
-40
lines changed

1 file changed

+55
-40
lines changed

ydb/library/yql/minikql/comp_nodes/mkql_wide_combine.cpp

+55-40
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,24 @@ struct TCombinerNodes {
132132
}
133133
}
134134

135+
void ExtractValues(TComputationContext& ctx, NUdf::TUnboxedValue** from, NUdf::TUnboxedValue* to) const {
136+
for (ui32 i = 0U; i < ItemNodes.size(); ++i) {
137+
if (from[i]) {
138+
to[i] = std::move(*(from[i]));
139+
}
140+
}
141+
}
142+
143+
void ExtractValues(TComputationContext& ctx, NUdf::TUnboxedValue* from, NUdf::TUnboxedValue** to) const {
144+
for (size_t i = 0, j = 0; i != ItemNodes.size(); ++i) {
145+
if (IsInputItemNodeUsed(i)) {
146+
*to[i] = std::move(from[j++]);
147+
} else {
148+
to[i] = nullptr;
149+
}
150+
}
151+
}
152+
135153
void ProcessItem(TComputationContext& ctx, NUdf::TUnboxedValue* keys, NUdf::TUnboxedValue* state) const {
136154
if (keys) {
137155
std::fill_n(keys, KeyResultNodes.size(), NUdf::TUnboxedValuePod());
@@ -346,16 +364,16 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
346364
enum class ETasteResult: i8 {
347365
Init = -1,
348366
Update,
349-
Skip
367+
ConsumeRawData,
368+
ExtractRawData
350369
};
351370
TSpillingSupportState(
352-
TMemoryUsageInfo* memInfo, size_t wideFieldsIndex,
371+
TMemoryUsageInfo* memInfo,
353372
const TMultiType* usedInputItemType, const TMultiType* keyAndStateType, ui32 keyWidth, size_t itemNodesSize,
354373
const THashFunc& hash, const TEqualsFunc& equal, bool allowSpilling, TComputationContext& ctx
355374
)
356375
: TBase(memInfo)
357376
, InMemoryProcessingState(memInfo, keyWidth, keyAndStateType->GetElementsCount() - keyWidth, hash, equal)
358-
, WideFieldsIndex(wideFieldsIndex)
359377
, UsedInputItemType(usedInputItemType)
360378
, KeyAndStateType(keyAndStateType)
361379
, KeyWidth(keyWidth)
@@ -380,7 +398,7 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
380398
bool IsProcessingRequired() const {
381399
if (InputStatus != EFetchResult::Finish) return true;
382400

383-
return HasDataForProcessing;
401+
return HasRawDataToExtract || HasDataForProcessing;
384402
}
385403

386404
bool UpdateAndWait() {
@@ -424,10 +442,19 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
424442
return isNew ? ETasteResult::Init : ETasteResult::Update;
425443
}
426444
if (GetMode() == EOperatingMode::ProcessSpilled) {
445+
if (HasRawDataToExtract) {
446+
// Tongue not used here.
447+
Throat = BufferForUsedInputItems.data();
448+
HasRawDataToExtract = false;
449+
HasDataForProcessing = true;
450+
return ETasteResult::ExtractRawData;
451+
}
452+
HasDataForProcessing = false;
427453
// while restoration we process buckets one by one starting from the first in a queue
428454
bool isNew = SpilledBuckets.front().InMemoryProcessingState->TasteIt();
429455
Throat = SpilledBuckets.front().InMemoryProcessingState->Throat;
430456
Tongue = SpilledBuckets.front().InMemoryProcessingState->Tongue;
457+
BufferForUsedInputItems.resize(0);
431458
return isNew ? ETasteResult::Init : ETasteResult::Update;
432459
}
433460

@@ -445,9 +472,13 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
445472

446473
// Corresponding bucket is spilled, we don't need a key anymore, full input will be spilled
447474
BufferForKeyAndState.resize(0);
448-
TryToSpillRawData(bucket, bucketId);
475+
// Prepare space for raw data
476+
MKQL_ENSURE(BufferForUsedInputItems.size() == 0, "Internal logic error");
477+
BufferForUsedInputItems.resize(ItemNodesSize);
478+
BufferForUsedInputItemsBucketId = bucketId;
479+
Throat = BufferForUsedInputItems.data();
449480

450-
return ETasteResult::Skip;
481+
return ETasteResult::ConsumeRawData;
451482
}
452483

453484
NUdf::TUnboxedValuePod* Extract() {
@@ -472,25 +503,6 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
472503
BufferForKeyAndState.resize(0);
473504
}
474505

475-
// Copies data from WideFields to local and tries to spill it using suitable bucket.
476-
// if the bucket is already busy, then the buffer will wait for the next iteration.
477-
void TryToSpillRawData(TSpilledBucket& bucket, size_t bucketId) {
478-
auto **fields = Ctx.WideFields.data() + WideFieldsIndex;
479-
MKQL_ENSURE(BufferForUsedInputItems.empty(), "Internal logic error");
480-
481-
for (size_t i = 0; i < ItemNodesSize; ++i) {
482-
if (fields[i]) {
483-
BufferForUsedInputItems.push_back(*fields[i]);
484-
}
485-
}
486-
if (bucket.AsyncWriteOperation.has_value()) {
487-
BufferForUsedInputItemsBucketId = bucketId;
488-
return;
489-
}
490-
bucket.AsyncWriteOperation = bucket.SpilledData->WriteWideItem(BufferForUsedInputItems);
491-
BufferForUsedInputItems.resize(0);
492-
}
493-
494506
bool FlushSpillingBuffersAndWait() {
495507
UpdateSpillingBuckets();
496508

@@ -620,8 +632,14 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
620632
}
621633
AsyncReadOperation = std::nullopt;
622634
}
635+
623636
auto& bucket = SpilledBuckets.front();
624637
if (bucket.BucketState == TSpilledBucket::EBucketState::InMemory) return false;
638+
if (HasDataForProcessing) {
639+
Tongue = bucket.InMemoryProcessingState->Tongue;
640+
Throat = bucket.InMemoryProcessingState->Throat;
641+
return false;
642+
}
625643
//recover spilled state
626644
while(!bucket.SpilledState->Empty()) {
627645
RecoverState = true;
@@ -651,17 +669,11 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
651669
if (AsyncReadOperation) {
652670
return true;
653671
}
654-
auto **fields = Ctx.WideFields.data() + WideFieldsIndex;
655-
for (size_t i = 0, j = 0; i < ItemNodesSize; ++i) {
656-
if (fields[i]) {
657-
fields[i] = &(BufferForUsedInputItems[j++]);
658-
}
659-
}
660672

661673
Tongue = bucket.InMemoryProcessingState->Tongue;
662674
Throat = bucket.InMemoryProcessingState->Throat;
663675

664-
HasDataForProcessing = true;
676+
HasRawDataToExtract = true;
665677
return false;
666678
}
667679
bucket.BucketState = TSpilledBucket::EBucketState::InMemory;
@@ -725,8 +737,9 @@ class TSpillingSupportState : public TComputationValue<TSpillingSupportState> {
725737

726738
bool HasDataForProcessing = false;
727739

740+
bool HasRawDataToExtract = false;
741+
728742
TState InMemoryProcessingState;
729-
const size_t WideFieldsIndex;
730743
const TMultiType* const UsedInputItemType;
731744
const TMultiType* const KeyAndStateType;
732745
const size_t KeyWidth;
@@ -1237,6 +1250,7 @@ using TBaseComputation = TStatefulWideFlowCodegeneratorNode<TWideLastCombinerWra
12371250
, AllowSpilling(allowSpilling)
12381251
{}
12391252

1253+
// MARK: DoCAlculate
12401254
EFetchResult DoCalculate(NUdf::TUnboxedValue& state, TComputationContext& ctx, NUdf::TUnboxedValue*const* output) const {
12411255
if (!state.HasValue()) {
12421256
MakeState(ctx, state);
@@ -1246,14 +1260,12 @@ using TBaseComputation = TStatefulWideFlowCodegeneratorNode<TWideLastCombinerWra
12461260
auto **fields = ctx.WideFields.data() + WideFieldsIndex;
12471261

12481262
while (true) {
1249-
for (auto i = 0U; i < Nodes.ItemNodes.size(); ++i)
1250-
fields[i] = Nodes.GetUsedInputItemNodePtrOrNull(ctx, i);
1251-
12521263
if (ptr->UpdateAndWait()) {
12531264
return EFetchResult::Yield;
12541265
}
1255-
12561266
if (ptr->InputStatus != EFetchResult::Finish) {
1267+
for (auto i = 0U; i < Nodes.ItemNodes.size(); ++i)
1268+
fields[i] = Nodes.GetUsedInputItemNodePtrOrNull(ctx, i);
12571269
switch (ptr->InputStatus = Flow->FetchValues(ctx, fields)) {
12581270
case EFetchResult::One:
12591271
break;
@@ -1274,7 +1286,11 @@ using TBaseComputation = TStatefulWideFlowCodegeneratorNode<TWideLastCombinerWra
12741286
case TSpillingSupportState::ETasteResult::Update:
12751287
Nodes.ProcessItem(ctx, static_cast<NUdf::TUnboxedValue*>(ptr->Tongue), static_cast<NUdf::TUnboxedValue*>(ptr->Throat));
12761288
break;
1277-
case TSpillingSupportState::ETasteResult::Skip:
1289+
case TSpillingSupportState::ETasteResult::ConsumeRawData:
1290+
Nodes.ExtractValues(ctx, fields, static_cast<NUdf::TUnboxedValue*>(ptr->Throat));
1291+
break;
1292+
case TSpillingSupportState::ETasteResult::ExtractRawData:
1293+
Nodes.ExtractValues(ctx, static_cast<NUdf::TUnboxedValue*>(ptr->Throat), fields);
12781294
break;
12791295
}
12801296
continue;
@@ -1553,8 +1569,7 @@ using TBaseComputation = TStatefulWideFlowCodegeneratorNode<TWideLastCombinerWra
15531569
#endif
15541570
private:
15551571
void MakeState(TComputationContext& ctx, NUdf::TUnboxedValue& state) const {
1556-
state = ctx.HolderFactory.Create<TSpillingSupportState>(WideFieldsIndex,
1557-
UsedInputItemType, KeyAndStateType,
1572+
state = ctx.HolderFactory.Create<TSpillingSupportState>(UsedInputItemType, KeyAndStateType,
15581573
Nodes.KeyNodes.size(),
15591574
Nodes.ItemNodes.size(),
15601575
#ifdef MKQL_DISABLE_CODEGEN

0 commit comments

Comments
 (0)