Skip to content

Commit 5ddb043

Browse files
committed
Move the terms index of _id off-heap.
In elastic#42838 we moved the terms index of all fields off-heap except the `_id` field because we were worried it might make indexing slower. In general, the indexing rate is only affected if explicit IDs are used, as otherwise Elasticsearch almost never performs lookups in the terms dictionary for the purpose of indexing. So it's quite wasteful to require the terms index of `_id` to be loaded on-heap for users who have append-only workloads. Furthermore I've been conducting benchmarks when indexing with explicit ids on the http_logs dataset that suggest that the slowdown is low enough that it's probably not worth forcing the terms index to be kept on-heap. Here are some numbers for the median indexing rate in docs/s: | Run | Master | Patch | | --- | ------- | ------- | | 1 | 45851.2 | 46401.4 | | 2 | 45192.6 | 44561.0 | | 3 | 45635.2 | 44137.0 | | 4 | 46435.0 | 44692.8 | | 5 | 45829.0 | 44949.0 | And now heap usage in MB for segments: | Run | Master | Patch | | --- | ------- | -------- | | 1 | 41.1720 | 0.352083 | | 2 | 45.1545 | 0.382534 | | 3 | 41.7746 | 0.381285 | | 4 | 45.3673 | 0.412737 | | 5 | 45.4616 | 0.375063 | Indexing rate decreased by 1.8% on average, while memory usage decreased by more than 100x. The `http_logs` dataset contains small documents and has a simple indexing chain. More complex indexing chains, e.g. with more fields, ingest pipelines, etc. would see an even lower decrease of indexing rate.
1 parent 36436ff commit 5ddb043

File tree

4 files changed

+52
-26
lines changed

4 files changed

+52
-26
lines changed

server/src/main/java/org/elasticsearch/index/IndexSettings.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,9 @@ public final class IndexSettings {
8484
"[true, false, checksum] but was: " + s);
8585
}
8686
}, Property.IndexScope);
87+
// This setting is undocumented as it is considered as an escape hatch.
88+
public static final Setting<Boolean> ON_HEAP_ID_TERMS_INDEX =
89+
Setting.boolSetting("index.force_memory_id_terms_dictinary", false, Property.IndexScope);
8790

8891
/**
8992
* Index setting describing the maximum value of from + size on a query.

server/src/main/java/org/elasticsearch/index/engine/InternalEngine.java

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
import java.io.Closeable;
103103
import java.io.IOException;
104104
import java.util.Arrays;
105+
import java.util.Collections;
105106
import java.util.HashMap;
106107
import java.util.List;
107108
import java.util.Locale;
@@ -2074,21 +2075,24 @@ IndexWriter createWriter(Directory directory, IndexWriterConfig iwc) throws IOEx
20742075
}
20752076
}
20762077

2077-
static Map<String, String> getReaderAttributes(Directory directory) {
2078+
static Map<String, String> getReaderAttributes(Directory directory, IndexSettings indexSettings) {
20782079
Directory unwrap = FilterDirectory.unwrap(directory);
20792080
boolean defaultOffHeap = FsDirectoryFactory.isHybridFs(unwrap) || unwrap instanceof MMapDirectory;
2080-
return Map.of(
2081-
BlockTreeTermsReader.FST_MODE_KEY, // if we are using MMAP for term dics we force all off heap unless it's the ID field
2082-
defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name()
2083-
, BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME, // always force ID field on-heap for fast updates
2084-
FSTLoadMode.ON_HEAP.name());
2081+
Map<String, String> attributes = new HashMap<>();
2082+
attributes.put(BlockTreeTermsReader.FST_MODE_KEY, defaultOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2083+
if (IndexSettings.ON_HEAP_ID_TERMS_INDEX.exists(indexSettings.getSettings())) {
2084+
final boolean idOffHeap = IndexSettings.ON_HEAP_ID_TERMS_INDEX.get(indexSettings.getSettings()) == false;
2085+
attributes.put(BlockTreeTermsReader.FST_MODE_KEY + "." + IdFieldMapper.NAME,
2086+
idOffHeap ? FSTLoadMode.OFF_HEAP.name() : FSTLoadMode.ON_HEAP.name());
2087+
}
2088+
return Collections.unmodifiableMap(attributes);
20852089
}
20862090

20872091
private IndexWriterConfig getIndexWriterConfig() {
20882092
final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer());
20892093
iwc.setCommitOnClose(false); // we by default don't commit on close
20902094
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
2091-
iwc.setReaderAttributes(getReaderAttributes(store.directory()));
2095+
iwc.setReaderAttributes(getReaderAttributes(store.directory(), engineConfig.getIndexSettings()));
20922096
iwc.setIndexDeletionPolicy(combinedDeletionPolicy);
20932097
// with tests.verbose, lucene sets this up: plumb to align with filesystem stream
20942098
boolean verbose = false;

server/src/test/java/org/elasticsearch/index/engine/InternalEngineTests.java

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5538,53 +5538,71 @@ public void testRefreshAndCloseEngineConcurrently() throws Exception {
55385538
}
55395539

55405540
public void testGetReaderAttributes() throws IOException {
5541+
Settings.Builder settingsBuilder = Settings.builder()
5542+
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
5543+
Settings settings = settingsBuilder.build();
5544+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
5545+
55415546
try(BaseDirectoryWrapper dir = newFSDirectory(createTempDir())) {
55425547
Directory unwrap = FilterDirectory.unwrap(dir);
55435548
boolean isMMap = unwrap instanceof MMapDirectory;
5544-
Map<String, String> readerAttributes = InternalEngine.getReaderAttributes(dir);
5545-
assertEquals(2, readerAttributes.size());
5546-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5547-
if (isMMap) {
5548-
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
5549-
} else {
5550-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5551-
}
5549+
Map<String, String> readerAttributes = InternalEngine.getReaderAttributes(dir, indexSettings);
5550+
assertEquals(Collections.singletonMap("blocktree.terms.fst", isMMap ? "OFF_HEAP" : "ON_HEAP"), readerAttributes);
55525551
}
55535552

55545553
try(MMapDirectory dir = new MMapDirectory(createTempDir())) {
55555554
Map<String, String> readerAttributes =
55565555
InternalEngine.getReaderAttributes(randomBoolean() ? dir :
5557-
new MockDirectoryWrapper(random(), dir));
5558-
assertEquals(2, readerAttributes.size());
5559-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5560-
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
5556+
new MockDirectoryWrapper(random(), dir), indexSettings);
5557+
assertEquals(Collections.singletonMap("blocktree.terms.fst", "OFF_HEAP"), readerAttributes);
55615558
}
55625559

5563-
Settings.Builder settingsBuilder = Settings.builder()
5564-
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
5565-
Settings settings = settingsBuilder.build();
5566-
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
55675560
FsDirectoryFactory service = new FsDirectoryFactory();
55685561
Path tempDir = createTempDir().resolve(indexSettings.getUUID()).resolve("0");
55695562
ShardPath path = new ShardPath(false, tempDir, tempDir, new ShardId(indexSettings.getIndex(), 0));
55705563
try (Directory directory = service.newDirectory(indexSettings, path)) {
55715564

55725565
Map<String, String> readerAttributes =
55735566
InternalEngine.getReaderAttributes(randomBoolean() ? directory :
5574-
new MockDirectoryWrapper(random(), directory));
5575-
assertEquals(2, readerAttributes.size());
5567+
new MockDirectoryWrapper(random(), directory), indexSettings);
5568+
assertEquals(1, readerAttributes.size());
55765569

55775570
switch (IndexModule.defaultStoreType(true)) {
55785571
case HYBRIDFS:
55795572
case MMAPFS:
5580-
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
55815573
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
55825574
break;
55835575
case NIOFS:
55845576
case SIMPLEFS:
55855577
case FS:
5578+
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5579+
break;
5580+
default:
5581+
fail("unknownw type");
5582+
}
5583+
}
5584+
5585+
settingsBuilder.put(IndexSettings.ON_HEAP_ID_TERMS_INDEX.getKey(), true);
5586+
settings = settingsBuilder.build();
5587+
indexSettings = IndexSettingsModule.newIndexSettings("foo", settings);
5588+
try (Directory directory = service.newDirectory(indexSettings, path)) {
5589+
5590+
Map<String, String> readerAttributes =
5591+
InternalEngine.getReaderAttributes(randomBoolean() ? directory :
5592+
new MockDirectoryWrapper(random(), directory), indexSettings);
5593+
assertEquals(2, readerAttributes.size());
5594+
5595+
switch (IndexModule.defaultStoreType(true)) {
5596+
case HYBRIDFS:
5597+
case MMAPFS:
5598+
assertEquals("OFF_HEAP", readerAttributes.get("blocktree.terms.fst"));
55865599
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
5600+
break;
5601+
case NIOFS:
5602+
case SIMPLEFS:
5603+
case FS:
55875604
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst"));
5605+
assertEquals("ON_HEAP", readerAttributes.get("blocktree.terms.fst._id"));
55885606
break;
55895607
default:
55905608
fail("unknownw type");

x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map<String, String> ccrIndexMetaD
401401
IndexSettings.INDEX_FLUSH_AFTER_MERGE_THRESHOLD_SIZE_SETTING,
402402
IndexSettings.INDEX_GC_DELETES_SETTING,
403403
IndexSettings.MAX_REFRESH_LISTENERS_PER_SHARD,
404+
IndexSettings.ON_HEAP_ID_TERMS_INDEX,
404405
IndicesRequestCache.INDEX_CACHE_REQUEST_ENABLED_SETTING,
405406
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
406407
SearchSlowLog.INDEX_SEARCH_SLOWLOG_THRESHOLD_FETCH_DEBUG_SETTING,

0 commit comments

Comments
 (0)