@@ -30,14 +30,23 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(
30
30
resultSchema->GetIndexInfo ().GetReplaceKey (), resultDataSchema, false , IIndexInfo::GetSnapshotColumnNames ());
31
31
32
32
THashSet<ui64> portionsInUsage;
33
+ std::set<ui32> columnIds;
33
34
for (auto && i : portions) {
35
+ if (columnIds.size () != resultSchema->GetColumnsCount ()) {
36
+ for (auto id : i.GetPortionInfo ().GetColumnIds ()) {
37
+ if (resultSchema->GetFieldIndex (id) > 0 ) {
38
+ columnIds.emplace (id);
39
+ }
40
+ }
41
+ }
34
42
AFL_VERIFY (portionsInUsage.emplace (i.GetPortionInfo ().GetPortionId ()).second );
35
43
}
44
+ AFL_VERIFY (columnIds.size () <= resultSchema->GetColumnsCount ());
36
45
37
46
for (auto && i : portions) {
38
47
auto dataSchema = i.GetPortionInfo ().GetSchema (context.SchemaVersions );
39
48
auto batch = i.RestoreBatch (dataSchema, *resultSchema);
40
- batch = resultSchema->NormalizeBatch (*dataSchema, batch).DetachResult ();
49
+ batch = resultSchema->NormalizeBatch (*dataSchema, batch, columnIds ).DetachResult ();
41
50
IIndexInfo::NormalizeDeletionColumn (*batch);
42
51
auto filter = BuildPortionFilter (shardingActual, batch, i.GetPortionInfo (), portionsInUsage, resultSchema);
43
52
mergeStream.AddSource (batch, filter);
@@ -175,13 +184,32 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(
175
184
}
176
185
177
186
std::shared_ptr<TSerializationStats> stats = std::make_shared<TSerializationStats>();
178
- for (auto && i : SwitchedPortions) {
179
- stats->Merge (i.GetSerializationStat (*resultSchema));
187
+ std::set<ui32> columnIds;
188
+ {
189
+ {
190
+ THashMap<ui64, ISnapshotSchema::TPtr> schemas;
191
+ for (auto & portion : SwitchedPortions) {
192
+ auto dataSchema = portion.GetSchema (context.SchemaVersions );
193
+ schemas.emplace (dataSchema->GetVersion (), dataSchema);
194
+ }
195
+ columnIds = ISnapshotSchema::GetColumnsWithDifferentDefaults (schemas, resultSchema);
196
+ }
197
+ for (auto && i : SwitchedPortions) {
198
+ stats->Merge (i.GetSerializationStat (*resultSchema));
199
+ if (columnIds.size () != resultSchema->GetColumnsCount ()) {
200
+ for (auto id : i.GetColumnIds ()) {
201
+ if (resultSchema->HasColumnId (id)) {
202
+ columnIds.emplace (id);
203
+ }
204
+ }
205
+ }
206
+ }
207
+ AFL_VERIFY (columnIds.size () <= resultSchema->GetColumnsCount ());
180
208
}
181
209
182
210
std::vector<std::map<ui32, std::vector<TColumnPortionResult>>> chunkGroups;
183
211
chunkGroups.resize (batchResults.size ());
184
- for (auto && columnId : resultSchema-> GetIndexInfo (). GetColumnIds () ) {
212
+ for (auto && columnId : columnIds ) {
185
213
NActors::TLogContextGuard logGuard (
186
214
NActors::TLogContextBuilder::Build ()(" field_name" , resultSchema->GetIndexInfo ().GetColumnName (columnId)));
187
215
auto columnInfo = stats->GetColumnInfo (columnId);
@@ -196,11 +224,10 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(
196
224
if (!p.ExtractColumnChunks (columnId, records, chunks)) {
197
225
if (!loader) {
198
226
loader = resultSchema->GetColumnLoaderVerified (columnId);
199
- } else {
200
- AFL_VERIFY (dataSchema->IsSpecialColumnId (columnId));
201
227
}
228
+ auto f = resultSchema->GetFieldByColumnIdVerified (columnId);
202
229
chunks.emplace_back (std::make_shared<NChunks::TDefaultChunkPreparation>(columnId, p.GetPortionInfo ().GetRecordsCount (),
203
- p. GetPortionInfo (). GetColumnRawBytes ({ columnId }), resultField, resultSchema->GetDefaultValueVerified (columnId),
230
+ resultField, resultSchema->GetExternalDefaultValueVerified (columnId),
204
231
resultSchema->GetColumnSaver (columnId)));
205
232
records = { nullptr };
206
233
}
0 commit comments