From 5ddb043ddac765d5907fc1711f1fbddea8ac64e4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 17 Feb 2020 11:13:36 +0100 Subject: [PATCH 1/3] Move the terms index of `_id` off-heap. In #42838 we moved the terms index of all fields off-heap except the `_id` field because we were worried it might make indexing slower. In general, the indexing rate is only affected if explicit IDs are used, as otherwise Elasticsearch almost never performs lookups in the terms dictionary for the purpose of indexing. So it's quite wasteful to require the terms index of `_id` to be loaded on-heap for users who have append-only workloads. Furthermore I've been conducting benchmarks when indexing with explicit ids on the http_logs dataset that suggest that the slowdown is low enough that it's probably not worth forcing the terms index to be kept on-heap. Here are some numbers for the median indexing rate in docs/s: | Run | Master | Patch | | --- | ------- | ------- | | 1 | 45851.2 | 46401.4 | | 2 | 45192.6 | 44561.0 | | 3 | 45635.2 | 44137.0 | | 4 | 46435.0 | 44692.8 | | 5 | 45829.0 | 44949.0 | And now heap usage in MB for segments: | Run | Master | Patch | | --- | ------- | -------- | | 1 | 41.1720 | 0.352083 | | 2 | 45.1545 | 0.382534 | | 3 | 41.7746 | 0.381285 | | 4 | 45.3673 | 0.412737 | | 5 | 45.4616 | 0.375063 | Indexing rate decreased by 1.8% on average, while memory usage decreased by more than 100x. The `http_logs` dataset contains small documents and has a simple indexing chain. More complex indexing chains, e.g. with more fields, ingest pipelines, etc. would see an even lower decrease of indexing rate. --- .../elasticsearch/index/IndexSettings.java | 3 + .../index/engine/InternalEngine.java | 18 +++--- .../index/engine/InternalEngineTests.java | 56 ++++++++++++------- .../action/TransportResumeFollowAction.java | 1 + 4 files changed, 52 insertions(+), 26 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 78eeec56e75df..48d22fb9c19bc 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -84,6 +84,9 @@ public final class IndexSettings { "[true, false, checksum] but was: " + s); } }, Property.IndexScope); + // This setting is undocumented as it is considered as an escape hatch. + public static final Setting ON_HEAP_ID_TERMS_INDEX = + Setting.boolSetting("index.force_memory_id_terms_dictinary", false, Property.IndexScope); /** * Index setting describing the maximum value of from + size on a query. diff --git a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java index 9656b091c2c6f..a3f53b069e7df 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java @@ -102,6 +102,7 @@ import java.io.Closeable; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Locale; @@ -2074,21 +2075,24 @@ IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOEx } } - static Map getReaderAttributes(Directory directory) { + static Map getReaderAttributes(Directory directory, IndexSettings indexSettings) { Directory unwrap = FilterDirectory.unwrap(directory); boolean defaultOffHeap = FsDirectoryFactory.isHybridFs(unwrap) || unwrap instanceof MMapDirectory; - return Map.of( - BlockTreeTermsReader.FST_MODE_KEY, // if we are using MMAP for term dics we force all off heap unless it's the ID field - defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name() - , BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME, // always force ID field on-heap for fast updates - FSTLoadMode.ON_HEAP.name()); + Map attributes = new HashMap<>(); + attributes.put(BlockTreeTermsReader.FST_MODE_KEY, defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name()); + if (IndexSettings.ON_HEAP_ID_TERMS_INDEX.exists(indexSettings.getSettings())) { + final boolean idOffHeap = IndexSettings.ON_HEAP_ID_TERMS_INDEX.get(indexSettings.getSettings()) == false; + attributes.put(BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME, + idOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name()); + } + return Collections.unmodifiableMap(attributes); } private IndexWriterConfig getIndexWriterConfig() { final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND); - iwc.setReaderAttributes(getReaderAttributes(store.directory())); + iwc.setReaderAttributes(getReaderAttributes(store.directory(), engineConfig.getIndexSettings())); iwc.setIndexDeletionPolicy(combinedDeletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; diff --git a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java index f0749df406daf..bb853ce21f17e 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java @@ -5538,32 +5538,25 @@ public void testRefreshAndCloseEngineConcurrently() throws Exception { } public void testGetReaderAttributes() throws IOException { + Settings.Builder settingsBuilder = Settings.builder() + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT); + Settings settings = settingsBuilder.build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings); + try(BaseDirectoryWrapper dir = newFSDirectory(createTempDir())) { Directory unwrap = FilterDirectory.unwrap(dir); boolean isMMap = unwrap instanceof MMapDirectory; - Map readerAttributes = InternalEngine.getReaderAttributes(dir); - assertEquals(2, readerAttributes.size()); - assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id")); - if (isMMap) { - assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst")); - } else { - assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst")); - } + Map readerAttributes = InternalEngine.getReaderAttributes(dir, indexSettings); + assertEquals(Collections.singletonMap("blocktree.terms.fst", isMMap ? "OFF_HEAP" : "ON_HEAP"), readerAttributes); } try(MMapDirectory dir = new MMapDirectory(createTempDir())) { Map readerAttributes = InternalEngine.getReaderAttributes(randomBoolean() ? dir : - new MockDirectoryWrapper(random(), dir)); - assertEquals(2, readerAttributes.size()); - assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id")); - assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst")); + new MockDirectoryWrapper(random(), dir), indexSettings); + assertEquals(Collections.singletonMap("blocktree.terms.fst", "OFF_HEAP"), readerAttributes); } - Settings.Builder settingsBuilder = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT); - Settings settings = settingsBuilder.build(); - IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings); FsDirectoryFactory service = new FsDirectoryFactory(); Path tempDir = createTempDir().resolve(indexSettings.getUUID()).resolve("0"); ShardPath path = new ShardPath(false, tempDir, tempDir, new ShardId(indexSettings.getIndex(), 0)); @@ -5571,20 +5564,45 @@ public void testGetReaderAttributes() throws IOException { Map readerAttributes = InternalEngine.getReaderAttributes(randomBoolean() ? directory : - new MockDirectoryWrapper(random(), directory)); - assertEquals(2, readerAttributes.size()); + new MockDirectoryWrapper(random(), directory), indexSettings); + assertEquals(1, readerAttributes.size()); switch (IndexModule.defaultStoreType(true)) { case HYBRIDFS: case MMAPFS: - assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id")); assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst")); break; case NIOFS: case SIMPLEFS: case FS: + assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst")); + break; + default: + fail("unknownw type"); + } + } + + settingsBuilder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), true); + settings = settingsBuilder.build(); + indexSettings = IndexSettingsModule.newIndexSettings("foo", settings); + try (Directory directory = service.newDirectory(indexSettings, path)) { + + Map readerAttributes = + InternalEngine.getReaderAttributes(randomBoolean() ? directory : + new MockDirectoryWrapper(random(), directory), indexSettings); + assertEquals(2, readerAttributes.size()); + + switch (IndexModule.defaultStoreType(true)) { + case HYBRIDFS: + case MMAPFS: + assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst")); assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id")); + break; + case NIOFS: + case SIMPLEFS: + case FS: assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst")); + assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id")); break; default: fail("unknownw type"); diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java index 61845545ae098..75f8ff59d1fa5 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java @@ -401,6 +401,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map ccrIndexMetaD IndexSettings.INDEX_FLUSH_AFTER_MERGE_THRESHOLD_SIZE_SETTING, IndexSettings.INDEX_GC_DELETES_SETTING, IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD, + IndexSettings.ON_HEAP_ID_TERMS_INDEX, IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING, BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING, SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_DEBUG_SETTING, From 10c42d3e5c069c34537a425fe8848fdcf5d5ddd8 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 17 Feb 2020 19:25:36 +0100 Subject: [PATCH 2/3] Register the setting. --- .../elasticsearch/common/settings/IndexScopedSettings.java | 1 + .../src/main/java/org/elasticsearch/test/ESIntegTestCase.java | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 31a8d9b89414f..bb73691f283bc 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -164,6 +164,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.DEFAULT_PIPELINE, IndexSettings.FINAL_PIPELINE, MetaDataIndexStateService.VERIFIED_BEFORE_CLOSE_SETTING, + IndexSettings.ON_HEAP_ID_TERMS_INDEX, // validate that built-in similarities don't get redefined Setting.groupSetting( diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 240190d5f6d9c..9939f6c8755ff 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -436,6 +436,10 @@ protected Settings.Builder setRandomIndexSettings(Random random, Settings.Builde RandomNumbers.randomIntBetween(random, 1, 15) + "ms"); } + if (random.nextBoolean()) { + builder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), random.nextBoolean()); + } + return builder; } From 9da21a406878d08c93a1936172edb2f51869c8d0 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 19 Feb 2020 08:40:27 +0100 Subject: [PATCH 3/3] typo --- server/src/main/java/org/elasticsearch/index/IndexSettings.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 48d22fb9c19bc..83cbd0a2edd2a 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -86,7 +86,7 @@ public final class IndexSettings { }, Property.IndexScope); // This setting is undocumented as it is considered as an escape hatch. public static final Setting ON_HEAP_ID_TERMS_INDEX = - Setting.boolSetting("index.force_memory_id_terms_dictinary", false, Property.IndexScope); + Setting.boolSetting("index.force_memory_id_terms_dictionary", false, Property.IndexScope); /** * Index setting describing the maximum value of from + size on a query.