Skip to content

Commit a3c59a2

Browse files
hollingumMongoDB Bot
authored and
MongoDB Bot
committed
SERVER-96691: Reduce memory consumption of 2nd-phase validation (#28973)
GitOrigin-RevId: 33c1b1f88f04061dfb0cea4e83a283488a135b46
1 parent 539f9c3 commit a3c59a2

File tree

3 files changed

+92
-157
lines changed

3 files changed

+92
-157
lines changed

src/mongo/db/catalog/validate/index_consistency.cpp

+67-114
Original file line numberDiff line numberDiff line change
@@ -98,22 +98,6 @@ MONGO_FAIL_POINT_DEFINE(failIndexKeyOrdering);
9898

9999
StringSet::hasher hash;
100100

101-
/**
102-
* Returns a key for the '_extraIndexEntries' and '_missingIndexEntries' maps. The key is a pair
103-
* of index name and the index key represented in KeyString form.
104-
* Using the index name is required as the index keys are passed in as KeyStrings which do not
105-
* contain field names.
106-
*
107-
* If we had the following document: { a: 1, b: 1 } with two indexes on keys "a" and "b", then
108-
* the KeyStrings for the index keys of the document would be identical as the field name in the
109-
* KeyString is not present. The BSON representation of this would look like: { : 1 } for both.
110-
* To distinguish these as different index keys, return a pair of index name and index key.
111-
*/
112-
std::pair<std::string, std::string> _generateKeyForMap(const IndexInfo& indexInfo,
113-
const key_string::Value& ks) {
114-
return std::make_pair(indexInfo.indexName, std::string(ks.getBuffer(), ks.getSize()));
115-
}
116-
117101
BSONObj _rehydrateKey(const BSONObj& keyPattern, const BSONObj& indexKey) {
118102
// We need to rehydrate the indexKey for improved readability.
119103
// {"": ObjectId(...)} -> {"_id": ObjectId(...)}
@@ -132,27 +116,15 @@ BSONObj _rehydrateKey(const BSONObj& keyPattern, const BSONObj& indexKey) {
132116
return b.obj();
133117
}
134118

135-
136119
} // namespace
137120

138-
IndexInfo::IndexInfo(const IndexDescriptor* descriptor)
139-
: indexName(descriptor->indexName()),
140-
keyPattern(descriptor->keyPattern()),
141-
indexNameHash(hash(descriptor->indexName())),
142-
ord(Ordering::make(descriptor->keyPattern())),
143-
unique(descriptor->unique()),
144-
accessMethod(descriptor->getEntry()->accessMethod()) {}
145-
146-
IndexEntryInfo::IndexEntryInfo(const IndexInfo& indexInfo,
147-
RecordId entryRecordId,
148-
BSONObj entryIdKey,
149-
key_string::Value entryKeyString)
150-
: indexName(indexInfo.indexName),
151-
keyPattern(indexInfo.keyPattern),
152-
ord(indexInfo.ord),
153-
recordId(std::move(entryRecordId)),
154-
idKey(entryIdKey.getOwned()),
155-
keyString(entryKeyString) {}
121+
IndexInfo::IndexInfo(const IndexDescriptor& descriptor)
122+
: indexName(descriptor.indexName()),
123+
keyPattern(descriptor.keyPattern()),
124+
indexNameHash(hash(descriptor.indexName())),
125+
ord(Ordering::make(descriptor.keyPattern())),
126+
unique(descriptor.unique()),
127+
accessMethod(descriptor.getEntry()->accessMethod()) {}
156128

157129
IndexConsistency::IndexConsistency(OperationContext* opCtx,
158130
CollectionValidation::ValidateState* validateState,
@@ -174,7 +146,7 @@ KeyStringIndexConsistency::KeyStringIndexConsistency(
174146
for (const auto& indexIdent : _validateState->getIndexIdents()) {
175147
const IndexDescriptor* descriptor =
176148
validateState->getCollection()->getIndexCatalog()->findIndexByIdent(opCtx, indexIdent);
177-
_indexesInfo.emplace(descriptor->indexName(), IndexInfo(descriptor));
149+
_indexesInfo.emplace(descriptor->indexName(), IndexInfo(*descriptor));
178150
}
179151
}
180152

@@ -233,10 +205,10 @@ void KeyStringIndexConsistency::repairIndexEntries(OperationContext* opCtx,
233205
ValidateResults* results) {
234206
invariant(_validateState->getIndexIdents().size() > 0);
235207
for (auto it = _missingIndexEntries.begin(); it != _missingIndexEntries.end();) {
236-
const key_string::Value& ks = it->second.keyString;
208+
const key_string::Value& ks = it->first.second;
237209
const KeyFormat keyFormat = _validateState->getCollection()->getRecordStore()->keyFormat();
238210

239-
const std::string& indexName = it->first.first;
211+
const std::string& indexName = it->first.first->indexName;
240212
const IndexDescriptor* descriptor =
241213
_validateState->getCollection()->getIndexCatalog()->findIndexByName(opCtx, indexName);
242214
const IndexCatalogEntry* entry = descriptor->getEntry();
@@ -276,25 +248,17 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
276248
// Inform which indexes have inconsistencies and add the BSON objects of the inconsistent index
277249
// entries to the results vector.
278250
int numMissingIndexEntryErrors = _missingIndexEntries.size();
279-
for (const auto& missingIndexEntry : _missingIndexEntries) {
280-
const IndexEntryInfo& entryInfo = missingIndexEntry.second;
281-
key_string::Value ks = entryInfo.keyString;
282-
auto indexKey =
283-
key_string::toBsonSafe(ks.getBuffer(), ks.getSize(), entryInfo.ord, ks.getTypeBits());
284-
const BSONObj entry = _generateInfo(entryInfo.indexName,
285-
entryInfo.keyPattern,
286-
entryInfo.recordId,
287-
indexKey,
288-
entryInfo.idKey);
289-
results->addMissingIndexEntry(entry);
290-
291-
_printMetadata(opCtx, results, entryInfo);
292-
293-
std::string indexName = entry["indexName"].String();
251+
for (const auto& [missingIndexKey, missingRecordId] : _missingIndexEntries) {
252+
_foundInconsistency(opCtx,
253+
missingIndexKey,
254+
missingRecordId,
255+
*results,
256+
/*isMissing=*/true);
257+
258+
const std::string& indexName = missingIndexKey.first->indexName;
294259
if (!results->getIndexResultsMap().at(indexName).isValid()) {
295260
continue;
296261
}
297-
298262
StringBuilder ss;
299263
ss << "Index with name '" << indexName << "' has inconsistencies.";
300264
results->getIndexResultsMap().at(indexName).addError(ss.str());
@@ -304,9 +268,13 @@ void KeyStringIndexConsistency::addIndexEntryErrors(OperationContext* opCtx,
304268
for (const auto& item : _extraIndexEntries) {
305269
numExtraIndexEntryErrors += item.second.size();
306270
for (const auto& entry : item.second) {
307-
results->addExtraIndexEntry(entry);
271+
_foundInconsistency(opCtx,
272+
item.first,
273+
entry,
274+
*results,
275+
/*isMissing=*/false);
308276

309-
std::string indexName = entry["indexName"].String();
277+
const std::string& indexName = item.first.first->indexName;
310278
if (!results->getIndexResultsMap().at(indexName).isValid()) {
311279
continue;
312280
}
@@ -375,23 +343,8 @@ void KeyStringIndexConsistency::addDocKey(OperationContext* opCtx,
375343
}
376344
} else if (lower.indexKeyCount || upper.indexKeyCount) {
377345
// Found a document key for a hash bucket that had mismatches.
378-
379-
// Get the documents _id index key.
380-
auto record = _validateState->getSeekRecordStoreCursor()->seekExact(opCtx, recordId);
381-
invariant(record);
382-
383-
BSONObj data = record->data.toBson();
384-
385-
BSONObjBuilder idKeyBuilder;
386-
if (data.hasField("_id")) {
387-
idKeyBuilder.append(data["_id"]);
388-
}
389-
390346
// Cannot have duplicate KeyStrings during the document scan phase for the same index.
391-
IndexKey key = _generateKeyForMap(*indexInfo, ks);
392-
invariant(_missingIndexEntries.count(key) == 0);
393-
_missingIndexEntries.insert(
394-
std::make_pair(key, IndexEntryInfo(*indexInfo, recordId, idKeyBuilder.obj(), ks)));
347+
invariant(_missingIndexEntries.insert({{indexInfo, ks}, recordId}).second);
395348
}
396349
}
397350

@@ -432,12 +385,7 @@ void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx,
432385
// If there is a corresponding document key for the index entry key, we remove the key from
433386
// the '_missingIndexEntries' map. However if there was no document key for the index entry
434387
// key, we add the key to the '_extraIndexEntries' map.
435-
auto indexKey =
436-
key_string::toBsonSafe(ks.getBuffer(), ks.getSize(), indexInfo->ord, ks.getTypeBits());
437-
BSONObj info = _generateInfo(
438-
indexInfo->indexName, indexInfo->keyPattern, recordId, indexKey, BSONObj());
439-
440-
IndexKey key = _generateKeyForMap(*indexInfo, ks);
388+
IndexKey key{indexInfo, ks};
441389
if (_missingIndexEntries.count(key) == 0) {
442390
if (_validateState->fixErrors()) {
443391
// Removing extra index entries.
@@ -460,20 +408,7 @@ void KeyStringIndexConsistency::addIndexKey(OperationContext* opCtx,
460408
}
461409

462410
// We may have multiple extra index entries for a given KeyString.
463-
auto search = _extraIndexEntries.find(key);
464-
if (search == _extraIndexEntries.end()) {
465-
SimpleBSONObjSet infoSet = {info};
466-
_extraIndexEntries.insert(std::make_pair(key, infoSet));
467-
468-
// Prints the collection document's and index entry's metadata.
469-
_validateState->getCollection()->getRecordStore()->printRecordMetadata(
470-
opCtx, recordId, results->getRecordTimestampsPtr());
471-
indexInfo->accessMethod->asSortedData()
472-
->getSortedDataInterface()
473-
->printIndexEntryMetadata(opCtx, ks);
474-
return;
475-
}
476-
search->second.insert(info);
411+
_extraIndexEntries[key].emplace_back(recordId);
477412
} else {
478413
_missingIndexEntries.erase(key);
479414
}
@@ -992,40 +927,58 @@ void KeyStringIndexConsistency::traverseRecord(OperationContext* opCtx,
992927
}
993928
}
994929

995-
BSONObj KeyStringIndexConsistency::_generateInfo(const std::string& indexName,
996-
const BSONObj& keyPattern,
997-
const RecordId& recordId,
998-
const BSONObj& indexKey,
999-
const BSONObj& idKey) {
1000-
BSONObj rehydratedKey = _rehydrateKey(keyPattern, indexKey);
930+
void KeyStringIndexConsistency::_foundInconsistency(OperationContext* opCtx,
931+
const IndexKey& key,
932+
const RecordId& recordId,
933+
ValidateResults& results,
934+
bool isMissing) {
935+
const IndexInfo& info = *key.first;
936+
const key_string::Value& ks = key.second;
1001937

1002-
BSONObjBuilder infoBuilder;
1003-
infoBuilder.append("indexName", indexName);
1004-
recordId.serializeToken("recordId", &infoBuilder);
938+
// Print the metadata associated with the inconsistency.
939+
_validateState->getCollection()->getRecordStore()->printRecordMetadata(
940+
opCtx, recordId, results.getRecordTimestampsPtr());
941+
info.accessMethod->asSortedData()->getSortedDataInterface()->printIndexEntryMetadata(opCtx, ks);
1005942

1006-
if (!idKey.isEmpty()) {
1007-
infoBuilder.append("idKey", idKey);
1008-
}
943+
const BSONObj& indexKey =
944+
key_string::toBsonSafe(ks.getBuffer(), ks.getSize(), info.ord, ks.getTypeBits());
945+
BSONObj rehydratedKey = _rehydrateKey(info.keyPattern, indexKey);
1009946

947+
BSONObjBuilder infoBuilder;
948+
infoBuilder.append("indexName", info.indexName);
949+
recordId.serializeToken("recordId", &infoBuilder);
1010950
infoBuilder.append("indexKey", rehydratedKey);
1011951

1012-
return infoBuilder.obj();
952+
if (isMissing) {
953+
// Get the documents _id index key.
954+
auto record = _validateState->getSeekRecordStoreCursor()->seekExact(opCtx, recordId);
955+
invariant(record);
956+
BSONObj data = record->data.toBson();
957+
BSONObjBuilder idKeyBuilder;
958+
if (data.hasField("_id")) {
959+
idKeyBuilder.append(data.getField("_id"));
960+
infoBuilder.append("idKey", idKeyBuilder.obj());
961+
}
962+
963+
results.addMissingIndexEntry(infoBuilder.obj());
964+
} else {
965+
results.addExtraIndexEntry(infoBuilder.obj());
966+
}
1013967
}
1014968

1015969
uint32_t KeyStringIndexConsistency::_hashKeyString(const key_string::Value& ks,
1016970
const uint32_t indexNameHash) const {
1017971
return ks.hash(indexNameHash);
1018972
}
1019973

1020-
void KeyStringIndexConsistency::_printMetadata(OperationContext* opCtx,
1021-
ValidateResults* results,
1022-
const IndexEntryInfo& entryInfo) {
1023-
_validateState->getCollection()->getRecordStore()->printRecordMetadata(
1024-
opCtx, entryInfo.recordId, results->getRecordTimestampsPtr());
1025-
getIndexInfo(entryInfo.indexName)
1026-
.accessMethod->asSortedData()
1027-
->getSortedDataInterface()
1028-
->printIndexEntryMetadata(opCtx, entryInfo.keyString);
974+
bool KeyStringIndexConsistency::AlphabeticalByIndexNameComparator::operator()(
975+
const IndexKey& lhs, const IndexKey& rhs) const {
976+
if (lhs.first->indexName < rhs.first->indexName) {
977+
return true;
978+
} else if (rhs.first->indexName < lhs.first->indexName) {
979+
return false;
980+
}
981+
return lhs.second < rhs.second;
1029982
}
1030983

1031984
} // namespace mongo

0 commit comments

Comments
 (0)