Skip to content

Commit 73d474e

Browse files
committed
Add option to force load term dict into memory (#39741)
Lucene added an optimization to leave the term dictionary on disk for non-id like fields. This change happened very late in the release processes such that it's better to have an escape hatch if certain use-cases are hurt by this optimization. This setting might be removed in the future if it turns out to be unnecessary.
1 parent bb7f276 commit 73d474e

File tree

4 files changed

+200
-2
lines changed

4 files changed

+200
-2
lines changed

server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
158158
IndexModule.INDEX_STORE_PRE_LOAD_SETTING,
159159
IndexModule.INDEX_QUERY_CACHE_ENABLED_SETTING,
160160
FsDirectoryService.INDEX_LOCK_FACTOR_SETTING,
161+
Store.FORCE_RAM_TERM_DICT,
161162
EngineConfig.INDEX_CODEC_SETTING,
162163
EngineConfig.INDEX_OPTIMIZE_AUTO_GENERATED_IDS,
163164
IndexMetaData.SETTING_WAIT_FOR_ACTIVE_SHARDS,

server/src/main/java/org/elasticsearch/index/store/Store.java

Lines changed: 150 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@
3838
import org.apache.lucene.store.AlreadyClosedException;
3939
import org.apache.lucene.store.BufferedChecksum;
4040
import org.apache.lucene.store.ByteArrayDataInput;
41+
import org.apache.lucene.store.ByteBufferIndexInput;
4142
import org.apache.lucene.store.ChecksumIndexInput;
4243
import org.apache.lucene.store.Directory;
4344
import org.apache.lucene.store.FilterDirectory;
4445
import org.apache.lucene.store.IOContext;
4546
import org.apache.lucene.store.IndexInput;
4647
import org.apache.lucene.store.IndexOutput;
4748
import org.apache.lucene.store.Lock;
49+
import org.apache.lucene.store.RandomAccessInput;
4850
import org.apache.lucene.store.SimpleFSDirectory;
4951
import org.apache.lucene.util.ArrayUtil;
5052
import org.apache.lucene.util.BytesRef;
@@ -95,6 +97,7 @@
9597
import java.util.Iterator;
9698
import java.util.List;
9799
import java.util.Map;
100+
import java.util.Set;
98101
import java.util.concurrent.TimeUnit;
99102
import java.util.concurrent.atomic.AtomicBoolean;
100103
import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -126,6 +129,14 @@
126129
* </pre>
127130
*/
128131
public class Store extends AbstractIndexShardComponent implements Closeable, RefCounted {
132+
/**
133+
* This is an escape hatch for lucenes internal optimization that checks if the IndexInput is an instance of ByteBufferIndexInput
134+
* and if that's the case doesn't load the term dictionary into ram but loads it off disk iff the fields is not an ID like field.
135+
* Since this optimization has been added very late in the release processes we add this setting to allow users to opt-out of
136+
* this by exploiting lucene internals and wrapping the IndexInput in a simple delegate.
137+
*/
138+
public static final Setting<Boolean> FORCE_RAM_TERM_DICT = Setting.boolSetting("index.force_memory_term_dictionary", false,
139+
Property.IndexScope);
129140
static final String CODEC = "store";
130141
static final int VERSION_WRITE_THROWABLE= 2; // we write throwable since 2.0
131142
static final int VERSION_STACK_TRACE = 1; // we write the stack trace too since 1.4.0
@@ -160,7 +171,8 @@ public Store(ShardId shardId, IndexSettings indexSettings, Directory directory,
160171
final TimeValue refreshInterval = indexSettings.getValue(INDEX_STORE_STATS_REFRESH_INTERVAL_SETTING);
161172
logger.debug("store stats are refreshed with refresh_interval [{}]", refreshInterval);
162173
ByteSizeCachingDirectory sizeCachingDir = new ByteSizeCachingDirectory(directory, refreshInterval);
163-
this.directory = new StoreDirectory(sizeCachingDir, Loggers.getLogger("index.store.deletes", shardId));
174+
this.directory = new StoreDirectory(sizeCachingDir, Loggers.getLogger("index.store.deletes", shardId),
175+
indexSettings.getValue(FORCE_RAM_TERM_DICT));
164176
this.shardLock = shardLock;
165177
this.onClose = onClose;
166178

@@ -700,10 +712,12 @@ public int refCount() {
700712
static final class StoreDirectory extends FilterDirectory {
701713

702714
private final Logger deletesLogger;
715+
private final boolean forceRamTermDict;
703716

704-
StoreDirectory(ByteSizeCachingDirectory delegateDirectory, Logger deletesLogger) {
717+
StoreDirectory(ByteSizeCachingDirectory delegateDirectory, Logger deletesLogger, boolean forceRamTermDict) {
705718
super(delegateDirectory);
706719
this.deletesLogger = deletesLogger;
720+
this.forceRamTermDict = forceRamTermDict;
707721
}
708722

709723
/** Estimate the cumulative size of all files in this directory in bytes. */
@@ -730,6 +744,18 @@ private void innerClose() throws IOException {
730744
super.close();
731745
}
732746

747+
@Override
748+
public IndexInput openInput(String name, IOContext context) throws IOException {
749+
IndexInput input = super.openInput(name, context);
750+
if (name.endsWith(".tip") || name.endsWith(".cfs")) {
751+
// only do this if we are reading cfs or tip file - all other files don't need this.
752+
if (forceRamTermDict && input instanceof ByteBufferIndexInput) {
753+
return new DeoptimizingIndexInput(input.toString(), input);
754+
}
755+
}
756+
return input;
757+
}
758+
733759
@Override
734760
public String toString() {
735761
return "store(" + in.toString() + ")";
@@ -1604,4 +1630,126 @@ private static IndexWriterConfig newIndexWriterConfig() {
16041630
.setMergePolicy(NoMergePolicy.INSTANCE);
16051631
}
16061632

1633+
/**
1634+
* see {@link #FORCE_RAM_TERM_DICT} for details
1635+
*/
1636+
private static final class DeoptimizingIndexInput extends IndexInput {
1637+
1638+
private final IndexInput in;
1639+
1640+
private DeoptimizingIndexInput(String resourceDescription, IndexInput in) {
1641+
super(resourceDescription);
1642+
this.in = in;
1643+
}
1644+
1645+
@Override
1646+
public IndexInput clone() {
1647+
return new DeoptimizingIndexInput(toString(), in.clone());
1648+
}
1649+
1650+
@Override
1651+
public void close() throws IOException {
1652+
in.close();
1653+
}
1654+
1655+
@Override
1656+
public long getFilePointer() {
1657+
return in.getFilePointer();
1658+
}
1659+
1660+
@Override
1661+
public void seek(long pos) throws IOException {
1662+
in.seek(pos);
1663+
}
1664+
1665+
@Override
1666+
public long length() {
1667+
return in.length();
1668+
}
1669+
1670+
@Override
1671+
public String toString() {
1672+
return in.toString();
1673+
}
1674+
1675+
@Override
1676+
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
1677+
return new DeoptimizingIndexInput(sliceDescription, in.slice(sliceDescription, offset, length));
1678+
}
1679+
1680+
@Override
1681+
public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException {
1682+
return in.randomAccessSlice(offset, length);
1683+
}
1684+
1685+
@Override
1686+
public byte readByte() throws IOException {
1687+
return in.readByte();
1688+
}
1689+
1690+
@Override
1691+
public void readBytes(byte[] b, int offset, int len) throws IOException {
1692+
in.readBytes(b, offset, len);
1693+
}
1694+
1695+
@Override
1696+
public void readBytes(byte[] b, int offset, int len, boolean useBuffer) throws IOException {
1697+
in.readBytes(b, offset, len, useBuffer);
1698+
}
1699+
1700+
@Override
1701+
public short readShort() throws IOException {
1702+
return in.readShort();
1703+
}
1704+
1705+
@Override
1706+
public int readInt() throws IOException {
1707+
return in.readInt();
1708+
}
1709+
1710+
@Override
1711+
public int readVInt() throws IOException {
1712+
return in.readVInt();
1713+
}
1714+
1715+
@Override
1716+
public int readZInt() throws IOException {
1717+
return in.readZInt();
1718+
}
1719+
1720+
@Override
1721+
public long readLong() throws IOException {
1722+
return in.readLong();
1723+
}
1724+
1725+
@Override
1726+
public long readVLong() throws IOException {
1727+
return in.readVLong();
1728+
}
1729+
1730+
@Override
1731+
public long readZLong() throws IOException {
1732+
return in.readZLong();
1733+
}
1734+
1735+
@Override
1736+
public String readString() throws IOException {
1737+
return in.readString();
1738+
}
1739+
1740+
@Override
1741+
public Map<String, String> readMapOfStrings() throws IOException {
1742+
return in.readMapOfStrings();
1743+
}
1744+
1745+
@Override
1746+
public Set<String> readSetOfStrings() throws IOException {
1747+
return in.readSetOfStrings();
1748+
}
1749+
1750+
@Override
1751+
public void skipBytes(long numBytes) throws IOException {
1752+
in.skipBytes(numBytes);
1753+
}
1754+
}
16071755
}

server/src/test/java/org/elasticsearch/index/store/StoreTests.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,13 @@
4040
import org.apache.lucene.index.SnapshotDeletionPolicy;
4141
import org.apache.lucene.index.Term;
4242
import org.apache.lucene.store.BaseDirectoryWrapper;
43+
import org.apache.lucene.store.ByteBufferIndexInput;
4344
import org.apache.lucene.store.ChecksumIndexInput;
4445
import org.apache.lucene.store.Directory;
4546
import org.apache.lucene.store.IOContext;
4647
import org.apache.lucene.store.IndexInput;
4748
import org.apache.lucene.store.IndexOutput;
49+
import org.apache.lucene.store.MMapDirectory;
4850
import org.apache.lucene.store.RAMDirectory;
4951
import org.apache.lucene.util.BytesRef;
5052
import org.apache.lucene.util.TestUtil;
@@ -1080,4 +1082,46 @@ public void testHistoryUUIDCanBeForced() throws IOException {
10801082
assertThat(segmentInfos.getUserData().get(Engine.HISTORY_UUID_KEY), not(equalTo(oldHistoryUUID)));
10811083
}
10821084
}
1085+
1086+
public void testDeoptimizeMMap() throws IOException {
1087+
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("index",
1088+
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT)
1089+
.put(Store.FORCE_RAM_TERM_DICT.getKey(), true).build());
1090+
final ShardId shardId = new ShardId("index", "_na_", 1);
1091+
String file = "test." + (randomBoolean() ? "tip" : "cfs");
1092+
try (Store store = new Store(shardId, indexSettings, new MMapDirectory(createTempDir()), new DummyShardLock(shardId))) {
1093+
try (IndexOutput output = store.directory().createOutput(file, IOContext.DEFAULT)) {
1094+
output.writeInt(0);
1095+
}
1096+
try (IndexOutput output = store.directory().createOutput("someOtherFile.txt", IOContext.DEFAULT)) {
1097+
output.writeInt(0);
1098+
}
1099+
try (IndexInput input = store.directory().openInput(file, IOContext.DEFAULT)) {
1100+
assertFalse(input instanceof ByteBufferIndexInput);
1101+
assertFalse(input.clone() instanceof ByteBufferIndexInput);
1102+
assertFalse(input.slice("foo", 1, 1) instanceof ByteBufferIndexInput);
1103+
}
1104+
1105+
try (IndexInput input = store.directory().openInput("someOtherFile.txt", IOContext.DEFAULT)) {
1106+
assertTrue(input instanceof ByteBufferIndexInput);
1107+
assertTrue(input.clone() instanceof ByteBufferIndexInput);
1108+
assertTrue(input.slice("foo", 1, 1) instanceof ByteBufferIndexInput);
1109+
}
1110+
}
1111+
1112+
indexSettings = IndexSettingsModule.newIndexSettings("index",
1113+
Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, org.elasticsearch.Version.CURRENT)
1114+
.put(Store.FORCE_RAM_TERM_DICT.getKey(), false).build());
1115+
1116+
try (Store store = new Store(shardId, indexSettings, new MMapDirectory(createTempDir()), new DummyShardLock(shardId))) {
1117+
try (IndexOutput output = store.directory().createOutput(file, IOContext.DEFAULT)) {
1118+
output.writeInt(0);
1119+
}
1120+
try (IndexInput input = store.directory().openInput(file, IOContext.DEFAULT)) {
1121+
assertTrue(input instanceof ByteBufferIndexInput);
1122+
assertTrue(input.clone() instanceof ByteBufferIndexInput);
1123+
assertTrue(input.slice("foo", 1, 1) instanceof ByteBufferIndexInput);
1124+
}
1125+
}
1126+
}
10831127
}

test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
import org.elasticsearch.index.mapper.MappedFieldType;
125125
import org.elasticsearch.index.mapper.MapperService;
126126
import org.elasticsearch.index.mapper.MockFieldFilterPlugin;
127+
import org.elasticsearch.index.store.Store;
127128
import org.elasticsearch.index.translog.Translog;
128129
import org.elasticsearch.indices.IndicesQueryCache;
129130
import org.elasticsearch.indices.IndicesRequestCache;
@@ -466,6 +467,10 @@ protected Settings.Builder setRandomIndexSettings(Random random, Settings.Builde
466467
RandomNumbers.randomIntBetween(random, 1, 15) + "ms");
467468
}
468469

470+
if (randomBoolean()) {
471+
builder.put(Store.FORCE_RAM_TERM_DICT.getKey(), true);
472+
}
473+
469474
return builder;
470475
}
471476

0 commit comments

Comments
 (0)