diff --git a/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java b/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java index f58e2ca947e51..0e5cad2daf5b1 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.store.LuceneFilesExtensions; import java.io.IOException; @@ -35,40 +36,6 @@ public class SegmentsStats implements Writeable, ToXContentFragment { private long bitsetMemoryInBytes; private ImmutableOpenMap fileSizes = ImmutableOpenMap.of(); - /* - * A map to provide a best-effort approach describing Lucene index files. - * - * Ideally this should be in sync to what the current version of Lucene is using, but it's harmless to leave extensions out, - * they'll just miss a proper description in the stats - */ - static final ImmutableOpenMap FILE_DESCRIPTIONS = ImmutableOpenMap.builder() - .fPut("si", "Segment Info") - .fPut("fnm", "Fields") - .fPut("fdm", "Field Metadata") - .fPut("fdx", "Field Index") - .fPut("fdt", "Field Data") - .fPut("tmd", "Term Dictionary Metadata") - .fPut("tim", "Term Dictionary") - .fPut("tip", "Term Index") - .fPut("doc", "Frequencies") - .fPut("pos", "Positions") - .fPut("pay", "Payloads") - .fPut("nvd", "Norms") - .fPut("nvm", "Norms metadata") - .fPut("kdm", "Points Metadata") - .fPut("kdi", "Points Index") - .fPut("kdm", "Points Metadata") - .fPut("kdi", "Points Index") // old extension - .fPut("kdd", "Points") // old extension - .fPut("dvd", "DocValues") - .fPut("dvm", "DocValues Metadata") - .fPut("tvm", "Term Vector Metadata") - .fPut("tvx", "Term Vector Index") - .fPut("tvd", "Term Vector Documents") - .fPut("tvf", "Term Vector Fields") - .fPut("liv", "Live Documents") - .build(); - public SegmentsStats() {} public SegmentsStats(StreamInput in) throws IOException { @@ -321,7 +288,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws for (ObjectObjectCursor entry : fileSizes) { builder.startObject(entry.key); builder.humanReadableField(Fields.SIZE_IN_BYTES, Fields.SIZE, new ByteSizeValue(entry.value)); - builder.field(Fields.DESCRIPTION, FILE_DESCRIPTIONS.getOrDefault(entry.key, "Others")); + LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(entry.key); + builder.field(Fields.DESCRIPTION, extension != null ? extension.getDescription() : "Others"); builder.endObject(); } builder.endObject(); diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index 7e0fc49fed97a..88251f0872765 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -58,7 +58,7 @@ public Directory newDirectory(IndexSettings indexSettings, ShardPath path) throw protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { final String storeType = - indexSettings.getSettings().get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.FS.getSettingsKey()); + indexSettings.getSettings().get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.FS.getSettingsKey()); IndexModule.Type type; if (IndexModule.Type.FS.match(storeType)) { type = IndexModule.defaultStoreType(IndexModule.NODE_STORE_ALLOW_MMAP.get(indexSettings.getNodeSettings())); @@ -89,7 +89,7 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index } public static MMapDirectory setPreload(MMapDirectory mMapDirectory, LockFactory lockFactory, - Set preLoadExtensions) throws IOException { + Set preLoadExtensions) throws IOException { assert mMapDirectory.getPreload() == false; if (preLoadExtensions.isEmpty() == false) { if (preLoadExtensions.contains("*")) { @@ -145,35 +145,14 @@ boolean useDelegate(String name, IOContext ioContext) { return false; } - String extension = FileSwitchDirectory.getExtension(name); - switch(extension) { - // Norms, doc values and term dictionaries are typically performance-sensitive and hot in the page - // cache, so we use mmap, which provides better performance. - case "nvd": - case "dvd": - case "tim": - // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs - // well if using mmap. - case "tip": - // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 - case "dim": - case "kdd": - case "kdi": - // Compound files are tricky because they store all the information for the segment. Benchmarks - // suggested that not mapping them hurts performance. - case "cfs": - // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed - // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we - // are not mmaping them as queries that leverage positions are more costly and the decoding of postings - // tends to be less a bottleneck. - case "doc": - return true; + final LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(FileSwitchDirectory.getExtension(name)); + if (extension == null || extension.shouldMmap() == false) { // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) // or are large and have a random access pattern and mmap leads to page cache trashing // (e.g. stored fields and term vectors). - default: - return false; + return false; } + return true; } MMapDirectory getDelegate() { diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java new file mode 100644 index 0000000000000..7203cdea3cfff --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -0,0 +1,136 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.store; + +import org.elasticsearch.common.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +public enum LuceneFilesExtensions { + + CFE("cfe", "Compound Files Entries", true, false), + // Compound files are tricky because they store all the information for the segment. Benchmarks + // suggested that not mapping them hurts performance. + CFS("cfs", "Compound Files", false, true), + CMP("cmp", "Completion Index", true, false), + DII("dii", "Points Index", false, false), + // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 + DIM("dim", "Points", false, true), + // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed + // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we + // are not mmaping them as queries that leverage positions are more costly and the decoding of postings + // tends to be less a bottleneck. + DOC("doc", "Frequencies", false, true), + // Doc values are typically performance-sensitive and hot in the page + // cache, so we use mmap, which provides better performance. + DVD("dvd", "DocValues", false, true), + DVM("dvm", "DocValues Metadata", true, false), + FDM("fdm", "Field Metadata", true, false), + FDT("fdt", "Field Data", false, false), + FDX("fdx", "Field Index", false, false), + FNM("fnm", "Fields", true, false), + // old extension + KDD("kdd", "Points", false, true), + // old extension + KDI("kdi", "Points Index", false, true), + // Lucene 8.6 point format metadata file + KDM("kdm", "Points Metadata", true, false), + LIV("liv", "Live Documents", false, false), + LKP("lkp", "Completion Dictionary", false, false), + // Norms are typically performance-sensitive and hot in the page + // cache, so we use mmap, which provides better performance. + NVD("nvd", "Norms", false, true), + NVM("nvm", "Norms Metadata", true, false), + PAY("pay", "Payloads", false, false), + POS("pos", "Positions", false, false), + SI("si", "Segment Info", true, false), + // Term dictionaries are typically performance-sensitive and hot in the page + // cache, so we use mmap, which provides better performance. + TIM("tim", "Term Dictionary", false, true), + // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs + // well if using mmap. + TIP("tip", "Term Index", false, true), + // Lucene 8.6 terms metadata file + TMD("tmd", "Term Dictionary Metadata", true, false), + // Temporary Lucene file + TMP("tmp", "Temporary File", false, false), + TVD("tvd", "Term Vector Documents", false, false), + TVF("tvf", "Term Vector Fields", false, false), + TVM("tvm", "Term Vector Metadata", true, false), + TVX("tvx", "Term Vector Index", false, false), + VEC("vec", "Vector Data", false, false), + // Lucene 9.0 indexed vectors metadata + VEM("vem","Vector Metadata", true, false); + + /** + * Lucene file's extension. + */ + private final String extension; + + /** + * Short description of the Lucene file + */ + private final String description; + + /** + * Some Lucene files should be memory-mapped when applicable. + */ + private final boolean mmap; + + /** + * Some Lucene files are considered as "metadata" files and should therefore be fully cached when applicable. Those files are usually + * fully read by Lucene when a Directory is opened. For non-metadata files Lucene usually only reads the header and footer checksums. + */ + private final boolean metadata; + + LuceneFilesExtensions(String extension, String description, boolean metadata, boolean mmap) { + this.description = Objects.requireNonNull(description); + this.extension = Objects.requireNonNull(extension); + this.metadata = metadata; + this.mmap = mmap; + } + + public String getDescription() { + return description; + } + + public String getExtension() { + return extension; + } + + public boolean isMetadata() { + return metadata; + } + + public boolean shouldMmap() { + return mmap; + } + + private static final Map extensions; + static { + final Map map = new HashMap<>(values().length); + for (LuceneFilesExtensions extension : values()) { + map.put(extension.extension, extension); + } + extensions = Collections.unmodifiableMap(map); + } + + @Nullable + public static LuceneFilesExtensions fromExtension(String ext) { + if (ext != null && ext.isEmpty() == false) { + final LuceneFilesExtensions extension = extensions.get(ext); + assert extension != null: "unknown Lucene file extension [" + ext + ']'; + return extension; + } + return null; + } +} diff --git a/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java b/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java index f548a521a3ebc..bbf1af0aca3bb 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; +import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.test.ESTestCase; public class SegmentsStatsTests extends ESTestCase { @@ -58,7 +59,7 @@ public void testFileExtensionDescriptions() throws Exception { } if (extension != null) { assertNotNull("extension [" + extension + "] was not contained in the known segment stats files", - SegmentsStats.FILE_DESCRIPTIONS.get(extension)); + LuceneFilesExtensions.fromExtension(extension)); } } } diff --git a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java index 7239e127c49e8..30da3b1f443a2 100644 --- a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java @@ -42,7 +42,7 @@ public void testPreload() throws IOException { doTestPreload("*"); Settings build = Settings.builder() .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.HYBRIDFS.name().toLowerCase(Locale.ROOT)) - .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "dvd", "bar") + .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "dvd", "tmp") .build(); try (Directory directory = newDirectory(build)) { assertTrue(FsDirectoryFactory.isHybridFs(directory)); @@ -56,12 +56,12 @@ public void testPreload() throws IOException { assertTrue(hybridDirectory.useDelegate("foo.kdd", newIOContext(random()))); assertTrue(hybridDirectory.useDelegate("foo.kdi", newIOContext(random()))); assertFalse(hybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM)); - assertFalse(hybridDirectory.useDelegate("foo.bar", newIOContext(random()))); + assertFalse(hybridDirectory.useDelegate("foo.tmp", newIOContext(random()))); MMapDirectory delegate = hybridDirectory.getDelegate(); assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate; assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd")); - assertTrue(preLoadMMapDirectory.useDelegate("foo.bar")); + assertTrue(preLoadMMapDirectory.useDelegate("foo.tmp")); } } @@ -98,12 +98,12 @@ private void doTestPreload(String...preload) throws IOException { assertFalse(preLoadMMapDirectory.useDelegate("XXX")); assertFalse(preLoadMMapDirectory.getPreload()); preLoadMMapDirectory.close(); - expectThrows(AlreadyClosedException.class, () -> preLoadMMapDirectory.getDelegate().openInput("foo.bar", + expectThrows(AlreadyClosedException.class, () -> preLoadMMapDirectory.getDelegate().openInput("foo.tmp", IOContext.DEFAULT)); } } expectThrows(AlreadyClosedException.class, () -> directory.openInput(randomBoolean() && preload.length != 0 ? - "foo." + preload[0] : "foo.bar", IOContext.DEFAULT)); + "foo." + preload[0] : "foo.tmp", IOContext.DEFAULT)); } public void testStoreDirectory() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java index 74afa1fbe963c..1117a953e7c08 100644 --- a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.test.ESTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -196,33 +197,6 @@ private String randomUniqueSliceName() { } protected static String randomFileExtension() { - return randomFrom( - ".cfe", - ".cfs", - ".dii", - ".dim", - ".doc", - ".dvd", - ".dvm", - ".fdt", - ".fdx", - ".fdm", - ".fnm", - ".kdd", - ".kdi", - ".kdm", - ".liv", - ".nvd", - ".nvm", - ".pay", - ".pos", - ".tim", - ".tip", - ".tmd", - ".tvd", - ".tvx", - ".vec", - ".vem" - ); + return '.' + randomFrom(LuceneFilesExtensions.values()).getExtension(); } } diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java index 5c2ed1e17d0f8..fb39fc82201ee 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java @@ -33,16 +33,15 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.RunOnce; -import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.node.NodeClosedException; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; import org.elasticsearch.xpack.searchablesnapshots.cache.common.ByteRange; import java.time.Instant; -import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -247,52 +246,11 @@ public void onFailure(Exception e) { } } - private static final Set METADATA_FILES_EXTENSIONS; - private static final Set OTHER_FILES_EXTENSIONS; - static { - // List of Lucene file extensions that are considered as "metadata" and should therefore be fully cached in the blob store cache. - // Those files are usually fully read by Lucene when it opens a Directory. - METADATA_FILES_EXTENSIONS = Set.of( - "cfe", // compound file's entry table - "dvm", // doc values metadata file - "fdm", // stored fields metadata file - "fnm", // field names metadata file - "kdm", // Lucene 8.6 point format metadata file - "nvm", // norms metadata file - "tmd", // Lucene 8.6 terms metadata file - "tvm", // terms vectors metadata file - "vem" // Lucene 9.0 indexed vectors metadata - ); - - // List of extensions for which Lucene usually only reads the first 1024 byte and checks a header checksum when opening a Directory. - OTHER_FILES_EXTENSIONS = Set.of( - "cfs", - "dii", - "dim", - "doc", - "dvd", - "fdt", - "fdx", - "kdd", - "kdi", - "liv", - "nvd", - "pay", - "pos", - "tim", - "tip", - "tvd", - "tvx", - "vec" - ); - assert Sets.intersection(METADATA_FILES_EXTENSIONS, OTHER_FILES_EXTENSIONS).isEmpty(); - } - /** * Computes the {@link ByteRange} corresponding to the header of a Lucene file. This range can vary depending of the type of the file * which is indicated by the file's extension. The returned byte range can never be larger than the file's length but it can be smaller. * - * For files that are declared as metadata files in {@link #METADATA_FILES_EXTENSIONS}, the header can be as large as the specified + * For files that are declared as metadata files in {@link LuceneFilesExtensions}, the header can be as large as the specified * maximum metadata length parameter {@code maxMetadataLength}. Non-metadata files have a fixed length header of maximum 1KB. * * @param fileName the name of the file @@ -302,9 +260,8 @@ public void onFailure(Exception e) { * @return the header {@link ByteRange} */ public ByteRange computeBlobCacheByteRange(String fileName, long fileLength, ByteSizeValue maxMetadataLength) { - final String fileExtension = IndexFileNames.getExtension(fileName); - assert fileExtension == null || METADATA_FILES_EXTENSIONS.contains(fileExtension) || OTHER_FILES_EXTENSIONS.contains(fileExtension) - : "unknown Lucene file extension [" + fileExtension + "] - should it be considered a metadata file?"; + final LuceneFilesExtensions fileExtension = LuceneFilesExtensions.fromExtension(IndexFileNames.getExtension(fileName)); + assert fileExtension != null : "unknown Lucene file extension [" + fileName + "] - should it be considered a metadata file?"; if (useLegacyCachedBlobSizes()) { if (fileLength <= ByteSizeUnit.KB.toBytes(8L)) { @@ -314,7 +271,7 @@ public ByteRange computeBlobCacheByteRange(String fileName, long fileLength, Byt } } - if (METADATA_FILES_EXTENSIONS.contains(fileExtension)) { + if (fileExtension != null && fileExtension.isMetadata()) { final long maxAllowedLengthInBytes = maxMetadataLength.getBytes(); if (fileLength > maxAllowedLengthInBytes) { logExceedingFile(fileExtension, fileLength, maxMetadataLength); @@ -329,11 +286,11 @@ protected boolean useLegacyCachedBlobSizes() { return minNodeVersion.before(OLD_CACHED_BLOB_SIZE_VERSION); } - private static void logExceedingFile(String extension, long length, ByteSizeValue maxAllowedLength) { + private static void logExceedingFile(LuceneFilesExtensions extension, long length, ByteSizeValue maxAllowedLength) { if (logger.isWarnEnabled()) { try { // Use of a cache to prevent too many log traces per hour - LOG_EXCEEDING_FILES_CACHE.computeIfAbsent(extension, key -> { + LOG_EXCEEDING_FILES_CACHE.computeIfAbsent(extension.getExtension(), key -> { logger.warn( "file with extension [{}] is larger ([{}]) than the max. length allowed [{}] to cache metadata files in blob cache", extension,