Skip to content

Commit 6781a29

Browse files
authored
Move ingest-geoip default databases out of config (elastic#36949)
Today the default databases bundled with ingest-geoip are treated as configuration files that we unbundle into the Elasticsearch configuration directory. This can cause problems for users using our Docker images if they bind mount over the configuration directory. Additionally, it creates complexity when trying to convert ingest-geoip to a module. This commit moves these databases out of the configuration directory and instead loads from the plugin directory. Further, custom databases can still be put into the configuration directory.
1 parent 25625d2 commit 6781a29

File tree

4 files changed

+177
-57
lines changed

4 files changed

+177
-57
lines changed

plugins/ingest-geoip/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ compileTestJava.options.compilerArgs << "-Xlint:-rawtypes,-unchecked"
4646

4747
bundlePlugin {
4848
from("${project.buildDir}/ingest-geoip") {
49-
into 'config/'
49+
into '/'
5050
}
5151
}
5252

plugins/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java

Lines changed: 71 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.elasticsearch.common.SuppressForbidden;
2929
import org.elasticsearch.common.cache.Cache;
3030
import org.elasticsearch.common.cache.CacheBuilder;
31+
import org.elasticsearch.common.io.PathUtils;
3132
import org.elasticsearch.common.settings.Setting;
3233
import org.elasticsearch.core.internal.io.IOUtils;
3334
import org.elasticsearch.ingest.Processor;
@@ -54,6 +55,8 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, Closeable
5455
public static final Setting<Long> CACHE_SIZE =
5556
Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope);
5657

58+
static String[] DEFAULT_DATABASE_FILENAMES = new String[]{"GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb"};
59+
5760
private Map<String, DatabaseReaderLazyLoader> databaseReaders;
5861

5962
@Override
@@ -66,48 +69,89 @@ public Map<String, Processor.Factory> getProcessors(Processor.Parameters paramet
6669
if (databaseReaders != null) {
6770
throw new IllegalStateException("getProcessors called twice for geoip plugin!!");
6871
}
69-
Path geoIpConfigDirectory = parameters.env.configFile().resolve("ingest-geoip");
72+
final Path geoIpDirectory = getGeoIpDirectory(parameters);
73+
final Path geoIpConfigDirectory = parameters.env.configFile().resolve("ingest-geoip");
7074
long cacheSize = CACHE_SIZE.get(parameters.env.settings());
7175
try {
72-
databaseReaders = loadDatabaseReaders(geoIpConfigDirectory);
76+
databaseReaders = loadDatabaseReaders(geoIpDirectory, geoIpConfigDirectory);
7377
} catch (IOException e) {
7478
throw new RuntimeException(e);
7579
}
7680
return Collections.singletonMap(GeoIpProcessor.TYPE, new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(cacheSize)));
7781
}
7882

79-
static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpConfigDirectory) throws IOException {
80-
if (Files.exists(geoIpConfigDirectory) == false && Files.isDirectory(geoIpConfigDirectory)) {
81-
throw new IllegalStateException("the geoip directory [" + geoIpConfigDirectory + "] containing databases doesn't exist");
83+
/*
84+
* In GeoIpProcessorNonIngestNodeTests, ingest-geoip is loaded on the classpath. This means that the plugin is never unbundled into a
85+
* directory where the database files would live. Therefore, we have to copy these database files ourselves. To do this, we need the
86+
* ability to specify where those database files would go. We do this by adding a plugin that registers ingest.geoip.database_path as
87+
* an actual setting. Otherwise, in production code, this setting is not registered and the database path is not configurable.
88+
*/
89+
@SuppressForbidden(reason = "PathUtils#get")
90+
private Path getGeoIpDirectory(Processor.Parameters parameters) {
91+
final Path geoIpDirectory;
92+
if (parameters.env.settings().get("ingest.geoip.database_path") == null) {
93+
geoIpDirectory = parameters.env.pluginsFile().resolve("ingest-geoip");
94+
} else {
95+
geoIpDirectory = PathUtils.get(parameters.env.settings().get("ingest.geoip.database_path"));
96+
}
97+
return geoIpDirectory;
98+
}
99+
100+
static Map<String, DatabaseReaderLazyLoader> loadDatabaseReaders(Path geoIpDirectory, Path geoIpConfigDirectory) throws IOException {
101+
assertDatabaseExistence(geoIpDirectory, true);
102+
assertDatabaseExistence(geoIpConfigDirectory, false);
103+
final boolean loadDatabaseOnHeap = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false"));
104+
final Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>();
105+
106+
// load the default databases
107+
for (final String databaseFilename : DEFAULT_DATABASE_FILENAMES) {
108+
final Path databasePath = geoIpDirectory.resolve(databaseFilename);
109+
final DatabaseReaderLazyLoader loader = createLoader(databasePath, loadDatabaseOnHeap);
110+
databaseReaders.put(databaseFilename, loader);
82111
}
83-
boolean loadDatabaseOnHeap = Booleans.parseBoolean(System.getProperty("es.geoip.load_db_on_heap", "false"));
84-
Map<String, DatabaseReaderLazyLoader> databaseReaders = new HashMap<>();
85-
try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
86-
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb");
87-
// Use iterator instead of forEach otherwise IOException needs to be caught twice...
88-
Iterator<Path> iterator = databaseFiles.iterator();
89-
while (iterator.hasNext()) {
90-
Path databasePath = iterator.next();
91-
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
92-
String databaseFileName = databasePath.getFileName().toString();
93-
DatabaseReaderLazyLoader holder = new DatabaseReaderLazyLoader(
94-
databasePath,
95-
() -> {
96-
DatabaseReader.Builder builder = createDatabaseBuilder(databasePath).withCache(NoCache.getInstance());
97-
if (loadDatabaseOnHeap) {
98-
builder.fileMode(Reader.FileMode.MEMORY);
99-
} else {
100-
builder.fileMode(Reader.FileMode.MEMORY_MAPPED);
101-
}
102-
return builder.build();
103-
});
104-
databaseReaders.put(databaseFileName, holder);
112+
113+
// load any custom databases
114+
if (Files.exists(geoIpConfigDirectory)) {
115+
try (Stream<Path> databaseFiles = Files.list(geoIpConfigDirectory)) {
116+
PathMatcher pathMatcher = geoIpConfigDirectory.getFileSystem().getPathMatcher("glob:**.mmdb");
117+
// Use iterator instead of forEach otherwise IOException needs to be caught twice...
118+
Iterator<Path> iterator = databaseFiles.iterator();
119+
while (iterator.hasNext()) {
120+
Path databasePath = iterator.next();
121+
if (Files.isRegularFile(databasePath) && pathMatcher.matches(databasePath)) {
122+
String databaseFileName = databasePath.getFileName().toString();
123+
final DatabaseReaderLazyLoader loader = createLoader(databasePath, loadDatabaseOnHeap);
124+
databaseReaders.put(databaseFileName, loader);
125+
}
105126
}
106127
}
107128
}
108129
return Collections.unmodifiableMap(databaseReaders);
109130
}
110131

132+
private static DatabaseReaderLazyLoader createLoader(Path databasePath, boolean loadDatabaseOnHeap) {
133+
return new DatabaseReaderLazyLoader(
134+
databasePath,
135+
() -> {
136+
DatabaseReader.Builder builder = createDatabaseBuilder(databasePath).withCache(NoCache.getInstance());
137+
if (loadDatabaseOnHeap) {
138+
builder.fileMode(Reader.FileMode.MEMORY);
139+
} else {
140+
builder.fileMode(Reader.FileMode.MEMORY_MAPPED);
141+
}
142+
return builder.build();
143+
});
144+
}
145+
146+
private static void assertDatabaseExistence(final Path path, final boolean exists) throws IOException {
147+
for (final String database : DEFAULT_DATABASE_FILENAMES) {
148+
if (Files.exists(path.resolve(database)) != exists) {
149+
final String message = "expected database [" + database + "] to " + (exists ? "" : "not ") + "exist in [" + path + "]";
150+
throw new IOException(message);
151+
}
152+
}
153+
}
154+
111155
@SuppressForbidden(reason = "Maxmind API requires java.io.File")
112156
private static DatabaseReader.Builder createDatabaseBuilder(Path databasePath) {
113157
return new DatabaseReader.Builder(databasePath.toFile());

plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorFactoryTests.java

Lines changed: 88 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,9 @@
4444
import java.util.Map;
4545
import java.util.Set;
4646

47+
import static org.hamcrest.Matchers.containsString;
4748
import static org.hamcrest.Matchers.equalTo;
49+
import static org.hamcrest.Matchers.hasToString;
4850
import static org.hamcrest.Matchers.sameInstance;
4951

5052
public class GeoIpProcessorFactoryTests extends ESTestCase {
@@ -60,17 +62,13 @@ public static void loadDatabaseReaders() throws IOException {
6062
return;
6163
}
6264

63-
Path configDir = createTempDir();
64-
Path geoIpConfigDir = configDir.resolve("ingest-geoip");
65+
final Path geoIpDir = createTempDir();
66+
final Path configDir = createTempDir();
67+
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
6568
Files.createDirectories(geoIpConfigDir);
66-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
67-
geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
68-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
69-
geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
70-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
71-
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb"));
72-
73-
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir);
69+
copyDatabaseFiles(geoIpDir);
70+
71+
databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir);
7472
}
7573

7674
@AfterClass
@@ -297,21 +295,16 @@ public void testLazyLoading() throws Exception {
297295
// This test uses a MappedByteBuffer which will keep the file mappings active until it is garbage-collected.
298296
// As a consequence, the corresponding file appears to be still in use and Windows cannot delete it.
299297
assumeFalse("windows deletion behavior is asinine", Constants.WINDOWS);
300-
Path configDir = createTempDir();
301-
Path geoIpConfigDir = configDir.resolve("ingest-geoip");
298+
final Path geoIpDir = createTempDir();
299+
final Path configDir = createTempDir();
300+
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
302301
Files.createDirectories(geoIpConfigDir);
303-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
304-
geoIpConfigDir.resolve("GeoLite2-City.mmdb"));
305-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-Country.mmdb")),
306-
geoIpConfigDir.resolve("GeoLite2-Country.mmdb"));
307-
Files.copy(new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-ASN.mmdb")),
308-
geoIpConfigDir.resolve("GeoLite2-ASN.mmdb"));
302+
copyDatabaseFiles(geoIpDir);
309303

310304
// Loading another database reader instances, because otherwise we can't test lazy loading as the
311305
// database readers used at class level are reused between tests. (we want to keep that otherwise running this
312306
// test will take roughly 4 times more time)
313-
Map<String, DatabaseReaderLazyLoader> databaseReaders =
314-
IngestGeoIpPlugin.loadDatabaseReaders(geoIpConfigDir);
307+
Map<String, DatabaseReaderLazyLoader> databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir);
315308
GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(1000));
316309
for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
317310
assertNull(lazyLoader.databaseReader.get());
@@ -354,4 +347,79 @@ public void testLazyLoading() throws Exception {
354347
assertNotNull(databaseReaders.get("GeoLite2-ASN.mmdb").databaseReader.get());
355348
}
356349

350+
public void testLoadingCustomDatabase() throws IOException {
351+
final Path geoIpDir = createTempDir();
352+
final Path configDir = createTempDir();
353+
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
354+
Files.createDirectories(geoIpConfigDir);
355+
copyDatabaseFiles(geoIpDir);
356+
// fake the GeoIP2-City database
357+
copyDatabaseFile(geoIpConfigDir, "GeoLite2-City.mmdb");
358+
Files.move(geoIpConfigDir.resolve("GeoLite2-City.mmdb"), geoIpConfigDir.resolve("GeoIP2-City.mmdb"));
359+
360+
/*
361+
* Loading another database reader instances, because otherwise we can't test lazy loading as the database readers used at class
362+
* level are reused between tests. (we want to keep that otherwise running this test will take roughly 4 times more time).
363+
*/
364+
final Map<String, DatabaseReaderLazyLoader> databaseReaders = IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir);
365+
final GeoIpProcessor.Factory factory = new GeoIpProcessor.Factory(databaseReaders, new GeoIpCache(1000));
366+
for (DatabaseReaderLazyLoader lazyLoader : databaseReaders.values()) {
367+
assertNull(lazyLoader.databaseReader.get());
368+
}
369+
370+
final Map<String, Object> field = Collections.singletonMap("_field", "1.1.1.1");
371+
final IngestDocument document = new IngestDocument("index", "type", "id", "routing", 1L, VersionType.EXTERNAL, field);
372+
373+
Map<String, Object> config = new HashMap<>();
374+
config.put("field", "_field");
375+
config.put("database_file", "GeoIP2-City.mmdb");
376+
final GeoIpProcessor city = factory.create(null, "_tag", config);
377+
378+
// these are lazy loaded until first use so we expect null here
379+
assertNull(databaseReaders.get("GeoIP2-City.mmdb").databaseReader.get());
380+
city.execute(document);
381+
// the first ingest should trigger a database load
382+
assertNotNull(databaseReaders.get("GeoIP2-City.mmdb").databaseReader.get());
383+
}
384+
385+
public void testDatabaseNotExistsInDir() throws IOException {
386+
final Path geoIpDir = createTempDir();
387+
final Path configDir = createTempDir();
388+
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
389+
if (randomBoolean()) {
390+
Files.createDirectories(geoIpConfigDir);
391+
}
392+
copyDatabaseFiles(geoIpDir);
393+
final String databaseFilename = randomFrom(IngestGeoIpPlugin.DEFAULT_DATABASE_FILENAMES);
394+
Files.delete(geoIpDir.resolve(databaseFilename));
395+
final IOException e =
396+
expectThrows(IOException.class, () -> IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir));
397+
assertThat(e, hasToString(containsString("expected database [" + databaseFilename + "] to exist in [" + geoIpDir + "]")));
398+
}
399+
400+
public void testDatabaseExistsInConfigDir() throws IOException {
401+
final Path geoIpDir = createTempDir();
402+
final Path configDir = createTempDir();
403+
final Path geoIpConfigDir = configDir.resolve("ingest-geoip");
404+
Files.createDirectories(geoIpConfigDir);
405+
copyDatabaseFiles(geoIpDir);
406+
final String databaseFilename = randomFrom(IngestGeoIpPlugin.DEFAULT_DATABASE_FILENAMES);
407+
copyDatabaseFile(geoIpConfigDir, databaseFilename);
408+
final IOException e =
409+
expectThrows(IOException.class, () -> IngestGeoIpPlugin.loadDatabaseReaders(geoIpDir, geoIpConfigDir));
410+
assertThat(e, hasToString(containsString("expected database [" + databaseFilename + "] to not exist in [" + geoIpConfigDir + "]")));
411+
}
412+
413+
private static void copyDatabaseFile(final Path path, final String databaseFilename) throws IOException {
414+
Files.copy(
415+
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/" + databaseFilename)),
416+
path.resolve(databaseFilename));
417+
}
418+
419+
private static void copyDatabaseFiles(final Path path) throws IOException {
420+
for (final String databaseFilename : IngestGeoIpPlugin.DEFAULT_DATABASE_FILENAMES) {
421+
copyDatabaseFile(path, databaseFilename);
422+
}
423+
}
424+
357425
}

plugins/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorNonIngestNodeTests.java

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.elasticsearch.action.index.IndexResponse;
2424
import org.elasticsearch.action.ingest.PutPipelineRequest;
2525
import org.elasticsearch.common.bytes.BytesReference;
26+
import org.elasticsearch.common.settings.Setting;
2627
import org.elasticsearch.common.settings.Settings;
2728
import org.elasticsearch.common.xcontent.XContentBuilder;
2829
import org.elasticsearch.common.xcontent.XContentType;
@@ -41,27 +42,30 @@
4142
import java.util.Arrays;
4243
import java.util.Collection;
4344
import java.util.Collections;
45+
import java.util.List;
4446

4547
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
4648
import static org.hamcrest.Matchers.equalTo;
4749

4850
public class GeoIpProcessorNonIngestNodeTests extends ESIntegTestCase {
4951

50-
@Override
51-
protected Collection<Class<? extends Plugin>> nodePlugins() {
52-
return Collections.singleton(IngestGeoIpPlugin.class);
52+
public static class IngestGeoIpSettingsPlugin extends Plugin {
53+
54+
@Override
55+
public List<Setting<?>> getSettings() {
56+
return Collections.singletonList(Setting.simpleString("ingest.geoip.database_path", Setting.Property.NodeScope));
57+
}
5358
}
5459

5560
@Override
56-
protected Settings nodeSettings(final int nodeOrdinal) {
57-
return Settings.builder().put("node.ingest", false).put(super.nodeSettings(nodeOrdinal)).build();
61+
protected Collection<Class<? extends Plugin>> nodePlugins() {
62+
return Arrays.asList(IngestGeoIpPlugin.class, IngestGeoIpSettingsPlugin.class);
5863
}
5964

6065
@Override
61-
protected Path nodeConfigPath(final int nodeOrdinal) {
62-
final Path configPath = createTempDir();
66+
protected Settings nodeSettings(final int nodeOrdinal) {
67+
final Path databasePath = createTempDir();
6368
try {
64-
final Path databasePath = configPath.resolve("ingest-geoip");
6569
Files.createDirectories(databasePath);
6670
Files.copy(
6771
new ByteArrayInputStream(StreamsUtils.copyToBytesFromClasspath("/GeoLite2-City.mmdb")),
@@ -75,7 +79,11 @@ protected Path nodeConfigPath(final int nodeOrdinal) {
7579
} catch (final IOException e) {
7680
throw new UncheckedIOException(e);
7781
}
78-
return configPath;
82+
return Settings.builder()
83+
.put("ingest.geoip.database_path", databasePath)
84+
.put("node.ingest", false)
85+
.put(super.nodeSettings(nodeOrdinal))
86+
.build();
7987
}
8088

8189
/**

0 commit comments

Comments
 (0)