Skip to content

Commit f993ef8

Browse files
authored
Move the terms index of _id off-heap. (#52518)
In #42838 we moved the terms index of all fields off-heap except the `_id` field because we were worried it might make indexing slower. In general, the indexing rate is only affected if explicit IDs are used, as otherwise Elasticsearch almost never performs lookups in the terms dictionary for the purpose of indexing. So it's quite wasteful to require the terms index of `_id` to be loaded on-heap for users who have append-only workloads. Furthermore I've been conducting benchmarks when indexing with explicit ids on the http_logs dataset that suggest that the slowdown is low enough that it's probably not worth forcing the terms index to be kept on-heap. Here are some numbers for the median indexing rate in docs/s: | Run | Master | Patch | | --- | ------- | ------- | | 1 | 45851.2 | 46401.4 | | 2 | 45192.6 | 44561.0 | | 3 | 45635.2 | 44137.0 | | 4 | 46435.0 | 44692.8 | | 5 | 45829.0 | 44949.0 | And now heap usage in MB for segments: | Run | Master | Patch | | --- | ------- | -------- | | 1 | 41.1720 | 0.352083 | | 2 | 45.1545 | 0.382534 | | 3 | 41.7746 | 0.381285 | | 4 | 45.3673 | 0.412737 | | 5 | 45.4616 | 0.375063 | Indexing rate decreased by 1.8% on average, while memory usage decreased by more than 100x. The `http_logs` dataset contains small documents and has a simple indexing chain. More complex indexing chains, e.g. with more fields, ingest pipelines, etc. would see an even lower decrease of indexing rate.
1 parent de3d674 commit f993ef8

File tree

6 files changed

+57
-28
lines changed

6 files changed

+57
-28
lines changed

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
170170
IndexSettings.DEFAULT_PIPELINE,
171171
IndexSettings.FINAL_PIPELINE,
172172
MetaDataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING,
173+
IndexSettings.ON_HEAP_ID_TERMS_INDEX,
173174

174175
// validate that built-in similarities don't get redefined
175176
Setting.groupSetting("index.similarity.", (s) -> {

server/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ public final class IndexSettings {
8484
"[true, false, checksum] but was: " + s);
8585
}
8686
}, Property.IndexScope);
87+
// This setting is undocumented as it is considered as an escape hatch.
88+
public static final Setting<Boolean> ON_HEAP_ID_TERMS_INDEX =
89+
Setting.boolSetting("index.force_memory_id_terms_dictionary", false, Property.IndexScope);
8790

8891
/**
8992
* Index setting describing the maximum value of from + size on a query.

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2211,22 +2211,24 @@ IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOEx
22112211
}
22122212
}
22132213

2214-
static Map<String, String> getReaderAttributes(Directory directory) {
2214+
static Map<String, String> getReaderAttributes(Directory directory, IndexSettings indexSettings) {
22152215
Directory unwrap = FilterDirectory.unwrap(directory);
22162216
boolean defaultOffHeap = FsDirectoryFactory.isHybridFs(unwrap) || unwrap instanceof MMapDirectory;
2217-
HashMap<String, String> map = new HashMap(2);
2218-
map.put(BlockTreeTermsReader.FST_MODE_KEY, // if we are using MMAP for term dics we force all off heap unless it's the ID field
2219-
defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2220-
map.put(BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME, // always force ID field on-heap for fast updates
2221-
FSTLoadMode.ON_HEAP.name());
2222-
return Collections.unmodifiableMap(map);
2217+
Map<String, String> attributes = new HashMap<>();
2218+
attributes.put(BlockTreeTermsReader.FST_MODE_KEY, defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2219+
if (IndexSettings.ON_HEAP_ID_TERMS_INDEX.exists(indexSettings.getSettings())) {
2220+
final boolean idOffHeap = IndexSettings.ON_HEAP_ID_TERMS_INDEX.get(indexSettings.getSettings()) == false;
2221+
attributes.put(BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME,
2222+
idOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2223+
}
2224+
return Collections.unmodifiableMap(attributes);
22232225
}
22242226

22252227
private IndexWriterConfig getIndexWriterConfig() {
22262228
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
22272229
iwc.setCommitOnClose(false); // we by default don't commit on close
22282230
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
2229-
iwc.setReaderAttributes(getReaderAttributes(store.directory()));
2231+
iwc.setReaderAttributes(getReaderAttributes(store.directory(), engineConfig.getIndexSettings()));
22302232
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
22312233
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
22322234
boolean verbose = false;

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5886,53 +5886,71 @@ public void testRefreshAndCloseEngineConcurrently() throws Exception {
58865886
}
58875887

58885888
public void testGetReaderAttributes() throws IOException {
5889+
Settings.Builder settingsBuilder = Settings.builder()
5890+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
5891+
Settings settings = settingsBuilder.build();
5892+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
5893+
58895894
try(BaseDirectoryWrapper dir = newFSDirectory(createTempDir())) {
58905895
Directory unwrap = FilterDirectory.unwrap(dir);
58915896
boolean isMMap = unwrap instanceof MMapDirectory;
5892-
Map<String, String> readerAttributes = InternalEngine.getReaderAttributes(dir);
5893-
assertEquals(2, readerAttributes.size());
5894-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5895-
if (isMMap) {
5896-
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
5897-
} else {
5898-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5899-
}
5897+
Map<String, String> readerAttributes = InternalEngine.getReaderAttributes(dir, indexSettings);
5898+
assertEquals(Collections.singletonMap("blocktree.terms.fst", isMMap ? "OFF_HEAP" : "ON_HEAP"), readerAttributes);
59005899
}
59015900

59025901
try(MMapDirectory dir = new MMapDirectory(createTempDir())) {
59035902
Map<String, String> readerAttributes =
59045903
InternalEngine.getReaderAttributes(randomBoolean() ? dir :
5905-
new MockDirectoryWrapper(random(), dir));
5906-
assertEquals(2, readerAttributes.size());
5907-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5908-
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
5904+
new MockDirectoryWrapper(random(), dir), indexSettings);
5905+
assertEquals(Collections.singletonMap("blocktree.terms.fst", "OFF_HEAP"), readerAttributes);
59095906
}
59105907

5911-
Settings.Builder settingsBuilder = Settings.builder()
5912-
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
5913-
Settings settings = settingsBuilder.build();
5914-
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
59155908
FsDirectoryFactory service = new FsDirectoryFactory();
59165909
Path tempDir = createTempDir().resolve(indexSettings.getUUID()).resolve("0");
59175910
ShardPath path = new ShardPath(false, tempDir, tempDir, new ShardId(indexSettings.getIndex(), 0));
59185911
try (Directory directory = service.newDirectory(indexSettings, path)) {
59195912

59205913
Map<String, String> readerAttributes =
59215914
InternalEngine.getReaderAttributes(randomBoolean() ? directory :
5922-
new MockDirectoryWrapper(random(), directory));
5923-
assertEquals(2, readerAttributes.size());
5915+
new MockDirectoryWrapper(random(), directory), indexSettings);
5916+
assertEquals(1, readerAttributes.size());
59245917

59255918
switch (IndexModule.defaultStoreType(true)) {
59265919
case HYBRIDFS:
59275920
case MMAPFS:
5928-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
59295921
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
59305922
break;
59315923
case NIOFS:
59325924
case SIMPLEFS:
59335925
case FS:
5926+
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5927+
break;
5928+
default:
5929+
fail("unknownw type");
5930+
}
5931+
}
5932+
5933+
settingsBuilder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), true);
5934+
settings = settingsBuilder.build();
5935+
indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
5936+
try (Directory directory = service.newDirectory(indexSettings, path)) {
5937+
5938+
Map<String, String> readerAttributes =
5939+
InternalEngine.getReaderAttributes(randomBoolean() ? directory :
5940+
new MockDirectoryWrapper(random(), directory), indexSettings);
5941+
assertEquals(2, readerAttributes.size());
5942+
5943+
switch (IndexModule.defaultStoreType(true)) {
5944+
case HYBRIDFS:
5945+
case MMAPFS:
5946+
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
59345947
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5948+
break;
5949+
case NIOFS:
5950+
case SIMPLEFS:
5951+
case FS:
59355952
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5953+
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
59365954
break;
59375955
default:
59385956
fail("unknownw type");

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,10 +457,14 @@ protected Settings.Builder setRandomIndexSettings(Random random, Settings.Builde
457457
RandomNumbers.randomIntBetween(random, 1, 15) + "ms");
458458
}
459459

460-
if (randomBoolean()) {
460+
if (random.nextBoolean()) {
461461
builder.put(Store.FORCE_RAM_TERM_DICT.getKey(), true);
462462
}
463463

464+
if (random.nextBoolean()) {
465+
builder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), random.nextBoolean());
466+
}
467+
464468
return builder;
465469
}
466470

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map<String, String> ccrIndexMetaD
408408
nonReplicatedSettings.add(IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING);
409409
nonReplicatedSettings.add(IndexSettings.INDEX_GC_DELETES_SETTING);
410410
nonReplicatedSettings.add(IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD);
411+
nonReplicatedSettings.add(IndexSettings.ON_HEAP_ID_TERMS_INDEX);
411412

412413
nonReplicatedSettings.add(IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING);
413414
nonReplicatedSettings.add(BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING);

0 commit comments

Comments
 (0)