From be21bf5a0e87a50aa7a832008bc20a4810881279 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Wed, 7 Apr 2021 17:05:30 +0200 Subject: [PATCH 1/7] Centralize Lucene files extensions --- .../index/engine/SegmentsStats.java | 38 +---- .../index/store/FsDirectoryFactory.java | 33 +---- .../index/store/LuceneFilesExtensions.java | 132 ++++++++++++++++++ .../index/engine/SegmentsStatsTests.java | 3 +- .../cache/blob/BlobStoreCacheService.java | 57 +------- 5 files changed, 150 insertions(+), 113 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java diff --git a/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java b/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java index f58e2ca947e51..0e5cad2daf5b1 100644 --- a/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java +++ b/server/src/main/java/org/elasticsearch/index/engine/SegmentsStats.java @@ -16,6 +16,7 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.store.LuceneFilesExtensions; import java.io.IOException; @@ -35,40 +36,6 @@ public class SegmentsStats implements Writeable, ToXContentFragment { private long bitsetMemoryInBytes; private ImmutableOpenMap fileSizes = ImmutableOpenMap.of(); - /* - * A map to provide a best-effort approach describing Lucene index files. - * - * Ideally this should be in sync to what the current version of Lucene is using, but it's harmless to leave extensions out, - * they'll just miss a proper description in the stats - */ - static final ImmutableOpenMap FILE_DESCRIPTIONS = ImmutableOpenMap.builder() - .fPut("si", "Segment Info") - .fPut("fnm", "Fields") - .fPut("fdm", "Field Metadata") - .fPut("fdx", "Field Index") - .fPut("fdt", "Field Data") - .fPut("tmd", "Term Dictionary Metadata") - .fPut("tim", "Term Dictionary") - .fPut("tip", "Term Index") - .fPut("doc", "Frequencies") - .fPut("pos", "Positions") - .fPut("pay", "Payloads") - .fPut("nvd", "Norms") - .fPut("nvm", "Norms metadata") - .fPut("kdm", "Points Metadata") - .fPut("kdi", "Points Index") - .fPut("kdm", "Points Metadata") - .fPut("kdi", "Points Index") // old extension - .fPut("kdd", "Points") // old extension - .fPut("dvd", "DocValues") - .fPut("dvm", "DocValues Metadata") - .fPut("tvm", "Term Vector Metadata") - .fPut("tvx", "Term Vector Index") - .fPut("tvd", "Term Vector Documents") - .fPut("tvf", "Term Vector Fields") - .fPut("liv", "Live Documents") - .build(); - public SegmentsStats() {} public SegmentsStats(StreamInput in) throws IOException { @@ -321,7 +288,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws for (ObjectObjectCursor entry : fileSizes) { builder.startObject(entry.key); builder.humanReadableField(Fields.SIZE_IN_BYTES, Fields.SIZE, new ByteSizeValue(entry.value)); - builder.field(Fields.DESCRIPTION, FILE_DESCRIPTIONS.getOrDefault(entry.key, "Others")); + LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(entry.key); + builder.field(Fields.DESCRIPTION, extension != null ? extension.getDescription() : "Others"); builder.endObject(); } builder.endObject(); diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index 7e0fc49fed97a..a8738663b5ab2 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -58,7 +58,7 @@ public Directory newDirectory(IndexSettings indexSettings, ShardPath path) throw protected Directory newFSDirectory(Path location, LockFactory lockFactory, IndexSettings indexSettings) throws IOException { final String storeType = - indexSettings.getSettings().get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.FS.getSettingsKey()); + indexSettings.getSettings().get(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.FS.getSettingsKey()); IndexModule.Type type; if (IndexModule.Type.FS.match(storeType)) { type = IndexModule.defaultStoreType(IndexModule.NODE_STORE_ALLOW_MMAP.get(indexSettings.getNodeSettings())); @@ -89,7 +89,7 @@ protected Directory newFSDirectory(Path location, LockFactory lockFactory, Index } public static MMapDirectory setPreload(MMapDirectory mMapDirectory, LockFactory lockFactory, - Set preLoadExtensions) throws IOException { + Set preLoadExtensions) throws IOException { assert mMapDirectory.getPreload() == false; if (preLoadExtensions.isEmpty() == false) { if (preLoadExtensions.contains("*")) { @@ -145,35 +145,14 @@ boolean useDelegate(String name, IOContext ioContext) { return false; } - String extension = FileSwitchDirectory.getExtension(name); - switch(extension) { - // Norms, doc values and term dictionaries are typically performance-sensitive and hot in the page - // cache, so we use mmap, which provides better performance. - case "nvd": - case "dvd": - case "tim": - // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs - // well if using mmap. - case "tip": - // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 - case "dim": - case "kdd": - case "kdi": - // Compound files are tricky because they store all the information for the segment. Benchmarks - // suggested that not mapping them hurts performance. - case "cfs": - // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed - // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we - // are not mmaping them as queries that leverage positions are more costly and the decoding of postings - // tends to be less a bottleneck. - case "doc": - return true; + final LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(FileSwitchDirectory.getExtension(name)); + if (extension == null || extension.isMmap() == false) { // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) // or are large and have a random access pattern and mmap leads to page cache trashing // (e.g. stored fields and term vectors). - default: - return false; + return false; } + return true; } MMapDirectory getDelegate() { diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java new file mode 100644 index 0000000000000..85847ee58ea45 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -0,0 +1,132 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.store; + +import org.elasticsearch.common.Nullable; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +public enum LuceneFilesExtensions { + + CFE("cfe", "Compound Files Entries", true, false), + // Compound files are tricky because they store all the information for the segment. Benchmarks + // suggested that not mapping them hurts performance. + CFS("cfs", "Compound Files", false, true), + DII("dii", "Points Index", false, false), + // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 + DIM("dim", "Points", false, true), + DVM("dvm", "DocValues Metadata", true, false), + // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed + // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we + // are not mmaping them as queries that leverage positions are more costly and the decoding of postings + // tends to be less a bottleneck. + DOC("doc", "Frequencies", false, true), + // Doc values are typically performance-sensitive and hot in the page + // cache, so we use mmap, which provides better performance. + DVD("dvd", "DocValues", false, true), + FDM("fdm", "Field Metadata", true, false), + FDT("fdt", "Field Data", false, false), + FDX("fdx", "Field Index", false, false), + FNM("fnm", "Fields", true, false), + // old extension + KDD("kdd", "Points", false, true), + // old extension + KDI("kdi", "Points Index", false, true), + // Lucene 8.6 point format metadata file + KDM("kdm", "Points Metadata", true, false), + LIV("liv", "Live Documents", false, false), + // Norms are typically performance-sensitive and hot in the page + // cache, so we use mmap, which provides better performance. + NVD("nvd", "Norms", false, true), + NVM("nvm", "Norms Metadata", true, false), + PAY("pay", "Payloads", false, false), + POS("pos", "Positions", false, false), + SI("si", "Segment Info", true, false), + // Term dictionaries are typically performance-sensitive and hot in the page + // cache, so we use mmap, which provides better performance. + TIM("tim", "Term Dictionary", false, true), + // We want to open the terms index and KD-tree index off-heap to save memory, but this only performs + // well if using mmap. + TIP("tip", "Term Index", false, true), + // Lucene 8.6 terms metadata file + TMD("tmd", "Term Dictionary Metadata", true, false), + TVD("tvd", "Term Vector Documents", false, false), + TVF("tvf", "Term Vector Fields", false, false), + TVM("tvm", "Term Vector Metadata", true, false), + TVX("tvx", "Term Vector Index", false, false), + VEC("vec", "Vector Data", false, false), + // Lucene 9.0 indexed vectors metadata + VEM("vem","Vector Metadata", true, false); + + /** + * Short description of the Lucene file + */ + private final String description; + + /** + * Lucene file's extension. + */ + private final String extension; + + /** + * Some Lucene files should be memory-mapped when applicable. + */ + private final boolean mmap; + + /** + * Some Lucene files are considered as "metadata" files and should therefore be fully cached when applicable. Those files are usually + * fully read by Lucene when a Directory is opened. For non-metadata files Lucene usually only reads the header and footer checksums. + */ + private final boolean metadata; + + LuceneFilesExtensions(String extension, String description, boolean metadata, boolean mmap) { + this.description = Objects.requireNonNull(description); + this.extension = Objects.requireNonNull(extension); + this.metadata = metadata; + this.mmap = mmap; + } + + public String getDescription() { + return description; + } + + public String getExtension() { + return extension; + } + + public boolean isMetadata() { + return metadata; + } + + public boolean isMmap() { + return mmap; + } + + private static final Map extensions; + static { + final Map list = new HashMap<>(values().length); + for (LuceneFilesExtensions extension : values()) { + list.put(extension.extension, extension); + } + extensions = Collections.unmodifiableMap(list); + } + + @Nullable + public static LuceneFilesExtensions fromExtension(String ext) { + if (ext != null) { + final LuceneFilesExtensions extension = extensions.get(ext); + assert extension != null: "unknown Lucene file extension [" + ext + ']'; + return extension; + } + return null; + } +} diff --git a/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java b/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java index f548a521a3ebc..bbf1af0aca3bb 100644 --- a/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java +++ b/server/src/test/java/org/elasticsearch/index/engine/SegmentsStatsTests.java @@ -22,6 +22,7 @@ import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; +import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.test.ESTestCase; public class SegmentsStatsTests extends ESTestCase { @@ -58,7 +59,7 @@ public void testFileExtensionDescriptions() throws Exception { } if (extension != null) { assertNotNull("extension [" + extension + "] was not contained in the known segment stats files", - SegmentsStats.FILE_DESCRIPTIONS.get(extension)); + LuceneFilesExtensions.fromExtension(extension)); } } } diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java index 5c2ed1e17d0f8..a5aba9e51dfda 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java @@ -33,16 +33,15 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.RunOnce; -import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.node.NodeClosedException; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; import org.elasticsearch.xpack.searchablesnapshots.cache.common.ByteRange; import java.time.Instant; -import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; @@ -247,52 +246,11 @@ public void onFailure(Exception e) { } } - private static final Set METADATA_FILES_EXTENSIONS; - private static final Set OTHER_FILES_EXTENSIONS; - static { - // List of Lucene file extensions that are considered as "metadata" and should therefore be fully cached in the blob store cache. - // Those files are usually fully read by Lucene when it opens a Directory. - METADATA_FILES_EXTENSIONS = Set.of( - "cfe", // compound file's entry table - "dvm", // doc values metadata file - "fdm", // stored fields metadata file - "fnm", // field names metadata file - "kdm", // Lucene 8.6 point format metadata file - "nvm", // norms metadata file - "tmd", // Lucene 8.6 terms metadata file - "tvm", // terms vectors metadata file - "vem" // Lucene 9.0 indexed vectors metadata - ); - - // List of extensions for which Lucene usually only reads the first 1024 byte and checks a header checksum when opening a Directory. - OTHER_FILES_EXTENSIONS = Set.of( - "cfs", - "dii", - "dim", - "doc", - "dvd", - "fdt", - "fdx", - "kdd", - "kdi", - "liv", - "nvd", - "pay", - "pos", - "tim", - "tip", - "tvd", - "tvx", - "vec" - ); - assert Sets.intersection(METADATA_FILES_EXTENSIONS, OTHER_FILES_EXTENSIONS).isEmpty(); - } - /** * Computes the {@link ByteRange} corresponding to the header of a Lucene file. This range can vary depending of the type of the file * which is indicated by the file's extension. The returned byte range can never be larger than the file's length but it can be smaller. * - * For files that are declared as metadata files in {@link #METADATA_FILES_EXTENSIONS}, the header can be as large as the specified + * For files that are declared as metadata files in {@link LuceneFilesExtensions}, the header can be as large as the specified * maximum metadata length parameter {@code maxMetadataLength}. Non-metadata files have a fixed length header of maximum 1KB. * * @param fileName the name of the file @@ -302,9 +260,8 @@ public void onFailure(Exception e) { * @return the header {@link ByteRange} */ public ByteRange computeBlobCacheByteRange(String fileName, long fileLength, ByteSizeValue maxMetadataLength) { - final String fileExtension = IndexFileNames.getExtension(fileName); - assert fileExtension == null || METADATA_FILES_EXTENSIONS.contains(fileExtension) || OTHER_FILES_EXTENSIONS.contains(fileExtension) - : "unknown Lucene file extension [" + fileExtension + "] - should it be considered a metadata file?"; + final LuceneFilesExtensions fileExtension = LuceneFilesExtensions.fromExtension(IndexFileNames.getExtension(fileName)); + assert fileExtension != null : "unknown Lucene file extension [" + fileName + "] - should it be considered a metadata file?"; if (useLegacyCachedBlobSizes()) { if (fileLength <= ByteSizeUnit.KB.toBytes(8L)) { @@ -314,7 +271,7 @@ public ByteRange computeBlobCacheByteRange(String fileName, long fileLength, Byt } } - if (METADATA_FILES_EXTENSIONS.contains(fileExtension)) { + if (fileExtension.isMetadata()) { final long maxAllowedLengthInBytes = maxMetadataLength.getBytes(); if (fileLength > maxAllowedLengthInBytes) { logExceedingFile(fileExtension, fileLength, maxMetadataLength); @@ -329,11 +286,11 @@ protected boolean useLegacyCachedBlobSizes() { return minNodeVersion.before(OLD_CACHED_BLOB_SIZE_VERSION); } - private static void logExceedingFile(String extension, long length, ByteSizeValue maxAllowedLength) { + private static void logExceedingFile(LuceneFilesExtensions extension, long length, ByteSizeValue maxAllowedLength) { if (logger.isWarnEnabled()) { try { // Use of a cache to prevent too many log traces per hour - LOG_EXCEEDING_FILES_CACHE.computeIfAbsent(extension, key -> { + LOG_EXCEEDING_FILES_CACHE.computeIfAbsent(extension.getExtension(), key -> { logger.warn( "file with extension [{}] is larger ([{}]) than the max. length allowed [{}] to cache metadata files in blob cache", extension, From a991bf9f71e66c0cef704ecfbd5b94aa4f5fc14e Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 8 Apr 2021 10:50:27 +0200 Subject: [PATCH 2/7] add tmp --- .../org/elasticsearch/index/store/LuceneFilesExtensions.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index 85847ee58ea45..fe010681bd140 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -59,6 +59,8 @@ public enum LuceneFilesExtensions { TIP("tip", "Term Index", false, true), // Lucene 8.6 terms metadata file TMD("tmd", "Term Dictionary Metadata", true, false), + // Temporary Lucene file + TMP("tmp", "Temporary File", false, false), TVD("tvd", "Term Vector Documents", false, false), TVF("tvf", "Term Vector Fields", false, false), TVM("tvm", "Term Vector Metadata", true, false), @@ -122,7 +124,7 @@ public boolean isMmap() { @Nullable public static LuceneFilesExtensions fromExtension(String ext) { - if (ext != null) { + if (ext != null && ext.isEmpty() == false) { final LuceneFilesExtensions extension = extensions.get(ext); assert extension != null: "unknown Lucene file extension [" + ext + ']'; return extension; From 66dec29c077ac265ada611055de75bc0d0ea324f Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 8 Apr 2021 11:32:58 +0200 Subject: [PATCH 3/7] add cmp/lkp --- .../org/elasticsearch/index/store/LuceneFilesExtensions.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index fe010681bd140..ba78d7fc664f1 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -21,6 +21,7 @@ public enum LuceneFilesExtensions { // Compound files are tricky because they store all the information for the segment. Benchmarks // suggested that not mapping them hurts performance. CFS("cfs", "Compound Files", false, true), + CMP("cmp", "Completion Index", false, false), DII("dii", "Points Index", false, false), // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 DIM("dim", "Points", false, true), @@ -44,6 +45,7 @@ public enum LuceneFilesExtensions { // Lucene 8.6 point format metadata file KDM("kdm", "Points Metadata", true, false), LIV("liv", "Live Documents", false, false), + LKP("lkp", "Completion Dictionary", false, false), // Norms are typically performance-sensitive and hot in the page // cache, so we use mmap, which provides better performance. NVD("nvd", "Norms", false, true), From 9bdcae97f6e94fab73f0ba311ac2ee1d509fe7b9 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 8 Apr 2021 13:19:25 +0200 Subject: [PATCH 4/7] more fixes --- .../index/store/LuceneFilesExtensions.java | 2 +- .../index/store/FsDirectoryFactoryTests.java | 10 +++---- .../lucene/store/ESIndexInputTestCase.java | 30 ++----------------- 3 files changed, 8 insertions(+), 34 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index ba78d7fc664f1..bfdd1ed3a54a4 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -25,7 +25,6 @@ public enum LuceneFilesExtensions { DII("dii", "Points Index", false, false), // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 DIM("dim", "Points", false, true), - DVM("dvm", "DocValues Metadata", true, false), // MMapDirectory has special logic to read long[] arrays in little-endian order that helps speed // up the decoding of postings. The same logic applies to positions (.pos) of offsets (.pay) but we // are not mmaping them as queries that leverage positions are more costly and the decoding of postings @@ -34,6 +33,7 @@ public enum LuceneFilesExtensions { // Doc values are typically performance-sensitive and hot in the page // cache, so we use mmap, which provides better performance. DVD("dvd", "DocValues", false, true), + DVM("dvm", "DocValues Metadata", true, false), FDM("fdm", "Field Metadata", true, false), FDT("fdt", "Field Data", false, false), FDX("fdx", "Field Index", false, false), diff --git a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java index 7239e127c49e8..30da3b1f443a2 100644 --- a/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java +++ b/server/src/test/java/org/elasticsearch/index/store/FsDirectoryFactoryTests.java @@ -42,7 +42,7 @@ public void testPreload() throws IOException { doTestPreload("*"); Settings build = Settings.builder() .put(IndexModule.INDEX_STORE_TYPE_SETTING.getKey(), IndexModule.Type.HYBRIDFS.name().toLowerCase(Locale.ROOT)) - .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "dvd", "bar") + .putList(IndexModule.INDEX_STORE_PRE_LOAD_SETTING.getKey(), "dvd", "tmp") .build(); try (Directory directory = newDirectory(build)) { assertTrue(FsDirectoryFactory.isHybridFs(directory)); @@ -56,12 +56,12 @@ public void testPreload() throws IOException { assertTrue(hybridDirectory.useDelegate("foo.kdd", newIOContext(random()))); assertTrue(hybridDirectory.useDelegate("foo.kdi", newIOContext(random()))); assertFalse(hybridDirectory.useDelegate("foo.kdi", Store.READONCE_CHECKSUM)); - assertFalse(hybridDirectory.useDelegate("foo.bar", newIOContext(random()))); + assertFalse(hybridDirectory.useDelegate("foo.tmp", newIOContext(random()))); MMapDirectory delegate = hybridDirectory.getDelegate(); assertThat(delegate, Matchers.instanceOf(FsDirectoryFactory.PreLoadMMapDirectory.class)); FsDirectoryFactory.PreLoadMMapDirectory preLoadMMapDirectory = (FsDirectoryFactory.PreLoadMMapDirectory) delegate; assertTrue(preLoadMMapDirectory.useDelegate("foo.dvd")); - assertTrue(preLoadMMapDirectory.useDelegate("foo.bar")); + assertTrue(preLoadMMapDirectory.useDelegate("foo.tmp")); } } @@ -98,12 +98,12 @@ private void doTestPreload(String...preload) throws IOException { assertFalse(preLoadMMapDirectory.useDelegate("XXX")); assertFalse(preLoadMMapDirectory.getPreload()); preLoadMMapDirectory.close(); - expectThrows(AlreadyClosedException.class, () -> preLoadMMapDirectory.getDelegate().openInput("foo.bar", + expectThrows(AlreadyClosedException.class, () -> preLoadMMapDirectory.getDelegate().openInput("foo.tmp", IOContext.DEFAULT)); } } expectThrows(AlreadyClosedException.class, () -> directory.openInput(randomBoolean() && preload.length != 0 ? - "foo." + preload[0] : "foo.bar", IOContext.DEFAULT)); + "foo." + preload[0] : "foo.tmp", IOContext.DEFAULT)); } public void testStoreDirectory() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java index 74afa1fbe963c..84616fad858ff 100644 --- a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java @@ -15,6 +15,7 @@ import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.util.concurrent.EsThreadPoolExecutor; import org.elasticsearch.common.util.concurrent.ThreadContext; +import org.elasticsearch.index.store.LuceneFilesExtensions; import org.elasticsearch.test.ESTestCase; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -196,33 +197,6 @@ private String randomUniqueSliceName() { } protected static String randomFileExtension() { - return randomFrom( - ".cfe", - ".cfs", - ".dii", - ".dim", - ".doc", - ".dvd", - ".dvm", - ".fdt", - ".fdx", - ".fdm", - ".fnm", - ".kdd", - ".kdi", - ".kdm", - ".liv", - ".nvd", - ".nvm", - ".pay", - ".pos", - ".tim", - ".tip", - ".tmd", - ".tvd", - ".tvx", - ".vec", - ".vem" - ); + return randomFrom(LuceneFilesExtensions.values()).getExtension(); } } From 7ff0eeae85ea176e822d82e1cd3f3ce99b398fb8 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 8 Apr 2021 13:48:25 +0200 Subject: [PATCH 5/7] more fixes --- .../elasticsearch/common/lucene/store/ESIndexInputTestCase.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java index 84616fad858ff..1117a953e7c08 100644 --- a/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/common/lucene/store/ESIndexInputTestCase.java @@ -197,6 +197,6 @@ private String randomUniqueSliceName() { } protected static String randomFileExtension() { - return randomFrom(LuceneFilesExtensions.values()).getExtension(); + return '.' + randomFrom(LuceneFilesExtensions.values()).getExtension(); } } From b4eb5b3b44eb8b2c6dfaefb666dcc80cd099725b Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 8 Apr 2021 13:49:52 +0200 Subject: [PATCH 6/7] cmp as metadata --- .../org/elasticsearch/index/store/LuceneFilesExtensions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index bfdd1ed3a54a4..f263a4d946b41 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -21,7 +21,7 @@ public enum LuceneFilesExtensions { // Compound files are tricky because they store all the information for the segment. Benchmarks // suggested that not mapping them hurts performance. CFS("cfs", "Compound Files", false, true), - CMP("cmp", "Completion Index", false, false), + CMP("cmp", "Completion Index", true, false), DII("dii", "Points Index", false, false), // dim files only apply up to lucene 8.x indices. It can be removed once we are in lucene 10 DIM("dim", "Points", false, true), From 11d448351a8257607d482a23141835ea270febe2 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Mon, 12 Apr 2021 10:33:21 +0200 Subject: [PATCH 7/7] feedback --- .../index/store/FsDirectoryFactory.java | 2 +- .../index/store/LuceneFilesExtensions.java | 16 ++++++++-------- .../cache/blob/BlobStoreCacheService.java | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java index a8738663b5ab2..88251f0872765 100644 --- a/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java +++ b/server/src/main/java/org/elasticsearch/index/store/FsDirectoryFactory.java @@ -146,7 +146,7 @@ boolean useDelegate(String name, IOContext ioContext) { } final LuceneFilesExtensions extension = LuceneFilesExtensions.fromExtension(FileSwitchDirectory.getExtension(name)); - if (extension == null || extension.isMmap() == false) { + if (extension == null || extension.shouldMmap() == false) { // Other files are either less performance-sensitive (e.g. stored field index, norms metadata) // or are large and have a random access pattern and mmap leads to page cache trashing // (e.g. stored fields and term vectors). diff --git a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java index f263a4d946b41..7203cdea3cfff 100644 --- a/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java +++ b/server/src/main/java/org/elasticsearch/index/store/LuceneFilesExtensions.java @@ -72,14 +72,14 @@ public enum LuceneFilesExtensions { VEM("vem","Vector Metadata", true, false); /** - * Short description of the Lucene file + * Lucene file's extension. */ - private final String description; + private final String extension; /** - * Lucene file's extension. + * Short description of the Lucene file */ - private final String extension; + private final String description; /** * Some Lucene files should be memory-mapped when applicable. @@ -111,17 +111,17 @@ public boolean isMetadata() { return metadata; } - public boolean isMmap() { + public boolean shouldMmap() { return mmap; } private static final Map extensions; static { - final Map list = new HashMap<>(values().length); + final Map map = new HashMap<>(values().length); for (LuceneFilesExtensions extension : values()) { - list.put(extension.extension, extension); + map.put(extension.extension, extension); } - extensions = Collections.unmodifiableMap(list); + extensions = Collections.unmodifiableMap(map); } @Nullable diff --git a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java index a5aba9e51dfda..fb39fc82201ee 100644 --- a/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java +++ b/x-pack/plugin/searchable-snapshots/src/main/java/org/elasticsearch/xpack/searchablesnapshots/cache/blob/BlobStoreCacheService.java @@ -271,7 +271,7 @@ public ByteRange computeBlobCacheByteRange(String fileName, long fileLength, Byt } } - if (fileExtension.isMetadata()) { + if (fileExtension != null && fileExtension.isMetadata()) { final long maxAllowedLengthInBytes = maxMetadataLength.getBytes(); if (fileLength > maxAllowedLengthInBytes) { logExceedingFile(fileExtension, fileLength, maxMetadataLength);