Skip to content

Commit f3b5bd9

Browse files
authored
Move the terms index of _id off-heap. (#52405)
In #42838 we moved the terms index of all fields off-heap except the `_id` field because we were worried it might make indexing slower. In general, the indexing rate is only affected if explicit IDs are used, as otherwise Elasticsearch almost never performs lookups in the terms dictionary for the purpose of indexing. So it's quite wasteful to require the terms index of `_id` to be loaded on-heap for users who have append-only workloads. Furthermore I've been conducting benchmarks when indexing with explicit ids on the http_logs dataset that suggest that the slowdown is low enough that it's probably not worth forcing the terms index to be kept on-heap. Here are some numbers for the median indexing rate in docs/s: | Run | Master | Patch | | --- | ------- | ------- | | 1 | 45851.2 | 46401.4 | | 2 | 45192.6 | 44561.0 | | 3 | 45635.2 | 44137.0 | | 4 | 46435.0 | 44692.8 | | 5 | 45829.0 | 44949.0 | And now heap usage in MB for segments: | Run | Master | Patch | | --- | ------- | -------- | | 1 | 41.1720 | 0.352083 | | 2 | 45.1545 | 0.382534 | | 3 | 41.7746 | 0.381285 | | 4 | 45.3673 | 0.412737 | | 5 | 45.4616 | 0.375063 | Indexing rate decreased by 1.8% on average, while memory usage decreased by more than 100x. The `http_logs` dataset contains small documents and has a simple indexing chain. More complex indexing chains, e.g. with more fields, ingest pipelines, etc. would see an even lower decrease of indexing rate.
1 parent 67f6840 commit f3b5bd9

File tree

6 files changed

+57
-26
lines changed

6 files changed

+57
-26
lines changed

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

+1
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
164164
IndexSettings.DEFAULT_PIPELINE,
165165
IndexSettings.FINAL_PIPELINE,
166166
MetaDataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING,
167+
IndexSettings.ON_HEAP_ID_TERMS_INDEX,
167168

168169
// validate that built-in similarities don't get redefined
169170
Setting.groupSetting(

server/src/main/java/org/elasticsearch/index/IndexSettings.java

+3
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ public final class IndexSettings {
8484
"[true, false, checksum] but was: " + s);
8585
}
8686
}, Property.IndexScope);
87+
// This setting is undocumented as it is considered as an escape hatch.
88+
public static final Setting<Boolean> ON_HEAP_ID_TERMS_INDEX =
89+
Setting.boolSetting("index.force_memory_id_terms_dictionary", false, Property.IndexScope);
8790

8891
/**
8992
* Index setting describing the maximum value of from + size on a query.

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

+11-7
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
import java.io.Closeable;
103103
import java.io.IOException;
104104
import java.util.Arrays;
105+
import java.util.Collections;
105106
import java.util.HashMap;
106107
import java.util.List;
107108
import java.util.Locale;
@@ -2074,21 +2075,24 @@ IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOEx
20742075
}
20752076
}
20762077

2077-
static Map<String, String> getReaderAttributes(Directory directory) {
2078+
static Map<String, String> getReaderAttributes(Directory directory, IndexSettings indexSettings) {
20782079
Directory unwrap = FilterDirectory.unwrap(directory);
20792080
boolean defaultOffHeap = FsDirectoryFactory.isHybridFs(unwrap) || unwrap instanceof MMapDirectory;
2080-
return Map.of(
2081-
BlockTreeTermsReader.FST_MODE_KEY, // if we are using MMAP for term dics we force all off heap unless it's the ID field
2082-
defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name()
2083-
, BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME, // always force ID field on-heap for fast updates
2084-
FSTLoadMode.ON_HEAP.name());
2081+
Map<String, String> attributes = new HashMap<>();
2082+
attributes.put(BlockTreeTermsReader.FST_MODE_KEY, defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2083+
if (IndexSettings.ON_HEAP_ID_TERMS_INDEX.exists(indexSettings.getSettings())) {
2084+
final boolean idOffHeap = IndexSettings.ON_HEAP_ID_TERMS_INDEX.get(indexSettings.getSettings()) == false;
2085+
attributes.put(BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME,
2086+
idOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2087+
}
2088+
return Collections.unmodifiableMap(attributes);
20852089
}
20862090

20872091
private IndexWriterConfig getIndexWriterConfig() {
20882092
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
20892093
iwc.setCommitOnClose(false); // we by default don't commit on close
20902094
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
2091-
iwc.setReaderAttributes(getReaderAttributes(store.directory()));
2095+
iwc.setReaderAttributes(getReaderAttributes(store.directory(), engineConfig.getIndexSettings()));
20922096
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
20932097
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
20942098
boolean verbose = false;

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

+37-19
Original file line numberDiff line numberDiff line change
@@ -5530,53 +5530,71 @@ public void testRefreshAndCloseEngineConcurrently() throws Exception {
55305530
}
55315531

55325532
public void testGetReaderAttributes() throws IOException {
5533+
Settings.Builder settingsBuilder = Settings.builder()
5534+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
5535+
Settings settings = settingsBuilder.build();
5536+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
5537+
55335538
try(BaseDirectoryWrapper dir = newFSDirectory(createTempDir())) {
55345539
Directory unwrap = FilterDirectory.unwrap(dir);
55355540
boolean isMMap = unwrap instanceof MMapDirectory;
5536-
Map<String, String> readerAttributes = InternalEngine.getReaderAttributes(dir);
5537-
assertEquals(2, readerAttributes.size());
5538-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5539-
if (isMMap) {
5540-
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
5541-
} else {
5542-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5543-
}
5541+
Map<String, String> readerAttributes = InternalEngine.getReaderAttributes(dir, indexSettings);
5542+
assertEquals(Collections.singletonMap("blocktree.terms.fst", isMMap ? "OFF_HEAP" : "ON_HEAP"), readerAttributes);
55445543
}
55455544

55465545
try(MMapDirectory dir = new MMapDirectory(createTempDir())) {
55475546
Map<String, String> readerAttributes =
55485547
InternalEngine.getReaderAttributes(randomBoolean() ? dir :
5549-
new MockDirectoryWrapper(random(), dir));
5550-
assertEquals(2, readerAttributes.size());
5551-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5552-
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
5548+
new MockDirectoryWrapper(random(), dir), indexSettings);
5549+
assertEquals(Collections.singletonMap("blocktree.terms.fst", "OFF_HEAP"), readerAttributes);
55535550
}
55545551

5555-
Settings.Builder settingsBuilder = Settings.builder()
5556-
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
5557-
Settings settings = settingsBuilder.build();
5558-
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
55595552
FsDirectoryFactory service = new FsDirectoryFactory();
55605553
Path tempDir = createTempDir().resolve(indexSettings.getUUID()).resolve("0");
55615554
ShardPath path = new ShardPath(false, tempDir, tempDir, new ShardId(indexSettings.getIndex(), 0));
55625555
try (Directory directory = service.newDirectory(indexSettings, path)) {
55635556

55645557
Map<String, String> readerAttributes =
55655558
InternalEngine.getReaderAttributes(randomBoolean() ? directory :
5566-
new MockDirectoryWrapper(random(), directory));
5567-
assertEquals(2, readerAttributes.size());
5559+
new MockDirectoryWrapper(random(), directory), indexSettings);
5560+
assertEquals(1, readerAttributes.size());
55685561

55695562
switch (IndexModule.defaultStoreType(true)) {
55705563
case HYBRIDFS:
55715564
case MMAPFS:
5572-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
55735565
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
55745566
break;
55755567
case NIOFS:
55765568
case SIMPLEFS:
55775569
case FS:
5570+
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5571+
break;
5572+
default:
5573+
fail("unknownw type");
5574+
}
5575+
}
5576+
5577+
settingsBuilder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), true);
5578+
settings = settingsBuilder.build();
5579+
indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
5580+
try (Directory directory = service.newDirectory(indexSettings, path)) {
5581+
5582+
Map<String, String> readerAttributes =
5583+
InternalEngine.getReaderAttributes(randomBoolean() ? directory :
5584+
new MockDirectoryWrapper(random(), directory), indexSettings);
5585+
assertEquals(2, readerAttributes.size());
5586+
5587+
switch (IndexModule.defaultStoreType(true)) {
5588+
case HYBRIDFS:
5589+
case MMAPFS:
5590+
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
55785591
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5592+
break;
5593+
case NIOFS:
5594+
case SIMPLEFS:
5595+
case FS:
55795596
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5597+
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
55805598
break;
55815599
default:
55825600
fail("unknownw type");

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

+4
Original file line numberDiff line numberDiff line change
@@ -436,6 +436,10 @@ protected Settings.Builder setRandomIndexSettings(Random random, Settings.Builde
436436
RandomNumbers.randomIntBetween(random, 1, 15) + "ms");
437437
}
438438

439+
if (random.nextBoolean()) {
440+
builder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), random.nextBoolean());
441+
}
442+
439443
return builder;
440444
}
441445

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java

+1
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map<String, String> ccrIndexMetaD
401401
IndexSettings.INDEX_FLUSH_AFTER_MERGE_THRESHOLD_SIZE_SETTING,
402402
IndexSettings.INDEX_GC_DELETES_SETTING,
403403
IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD,
404+
IndexSettings.ON_HEAP_ID_TERMS_INDEX,
404405
IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING,
405406
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
406407
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_DEBUG_SETTING,

0 commit comments

Comments
 (0)