Skip to content

Commit 0f95629

Browse files
jdconradcolings86
authored andcommitted
Ingest Attachment: Upgrade Tika to 1.18 (#31252)
Fixes ES from hanging when a bad zip file is loaded through Tika.
1 parent 43c2e2f commit 0f95629

19 files changed

+27
-14
lines changed

plugins/ingest-attachment/build.gradle

+11-7
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ esplugin {
2323
}
2424

2525
versions << [
26-
'tika': '1.17',
27-
'pdfbox': '2.0.8',
26+
'tika': '1.18',
27+
'pdfbox': '2.0.9',
2828
'bouncycastle': '1.55',
2929
'poi': '3.17',
3030
'mime4j': '0.8.1'
@@ -33,9 +33,10 @@ versions << [
3333
dependencies {
3434
// mandatory for tika
3535
compile "org.apache.tika:tika-core:${versions.tika}"
36+
// build against Jackson 2.9.5, but still works on our current version
3637
compile "org.apache.tika:tika-parsers:${versions.tika}"
37-
compile 'org.tukaani:xz:1.6'
38-
compile 'commons-io:commons-io:2.5'
38+
compile 'org.tukaani:xz:1.8'
39+
compile 'commons-io:commons-io:2.6'
3940
compile "org.slf4j:slf4j-api:${versions.slf4j}"
4041

4142
// character set detection
@@ -62,7 +63,7 @@ dependencies {
6263
// MS Office
6364
compile "org.apache.poi:poi-scratchpad:${versions.poi}"
6465
// Apple iWork
65-
compile 'org.apache.commons:commons-compress:1.14'
66+
compile 'org.apache.commons:commons-compress:1.16.1'
6667
// Outlook documents
6768
compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
6869
compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
@@ -118,6 +119,10 @@ thirdPartyAudit.excludes = [
118119
'com.drew.metadata.jpeg.JpegDirectory',
119120
'com.github.junrar.Archive',
120121
'com.github.junrar.rarfile.FileHeader',
122+
'com.github.luben.zstd.ZstdInputStream',
123+
'com.github.luben.zstd.ZstdOutputStream',
124+
'com.github.openjson.JSONArray',
125+
'com.github.openjson.JSONObject',
121126
'com.google.common.reflect.TypeToken',
122127
'com.google.gson.Gson',
123128
'com.googlecode.mp4parser.DataSource',
@@ -531,6 +536,7 @@ thirdPartyAudit.excludes = [
531536
'org.apache.commons.exec.PumpStreamHandler',
532537
'org.apache.commons.exec.environment.EnvironmentUtils',
533538
'org.apache.commons.lang.StringUtils',
539+
'org.apache.commons.lang.SystemUtils',
534540
'org.apache.ctakes.typesystem.type.refsem.UmlsConcept',
535541
'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation',
536542
'org.apache.cxf.jaxrs.client.WebClient',
@@ -635,8 +641,6 @@ thirdPartyAudit.excludes = [
635641
'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
636642
'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
637643
'org.etsi.uri.x01903.v14.ValidationDataType',
638-
'org.json.JSONArray',
639-
'org.json.JSONObject',
640644
'org.json.simple.JSONArray',
641645
'org.json.simple.JSONObject',
642646
'org.json.simple.parser.JSONParser',

plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
7b5cdabadb4cf12f5ee0f801399e70635583193f

plugins/ingest-attachment/licenses/commons-io-2.5.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
815893df5f31da2ece4040fe0a12fd44b577afaf

plugins/ingest-attachment/licenses/fontbox-2.0.8.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
f961f17ebdbc307e9055e3cf7c0e207f0895ae55

plugins/ingest-attachment/licenses/pdfbox-2.0.8.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
d0425578218624388f2ec84a0b3a11efd55df0f5

plugins/ingest-attachment/licenses/tika-core-1.17.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
69556697de96cf0b22df846e970dafd29866eee0

plugins/ingest-attachment/licenses/tika-parsers-1.17.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
7d9b6dea91d783165f3313d320d3aaaa9a4dfc13

plugins/ingest-attachment/licenses/xz-1.6.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
c4f7d054303948eb6a4066194253886c8af07128

plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java

+1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ static PermissionCollection getRestrictedPermissions() {
159159
perms.add(new SecurityPermission("putProviderProperty.BC"));
160160
perms.add(new SecurityPermission("insertProvider"));
161161
perms.add(new ReflectPermission("suppressAccessChecks"));
162+
perms.add(new RuntimePermission("accessClassInPackage.sun.java2d.cmm.kcms"));
162163
// xmlbeans, use by POI, needs to get the context classloader
163164
perms.add(new RuntimePermission("getClassLoader"));
164165
// ZipFile needs accessDeclaredMembers on JDK 10; cf. https://bugs.openjdk.java.net/browse/JDK-8187485

plugins/ingest-attachment/src/main/plugin-metadata/plugin-security.policy

+2
Original file line numberDiff line numberDiff line change
@@ -31,4 +31,6 @@ grant {
3131
permission java.lang.RuntimePermission "getClassLoader";
3232
// ZipFile needs accessDeclaredMembers on Java 10
3333
permission java.lang.RuntimePermission "accessDeclaredMembers";
34+
// PDFBox checks for the existence of this class
35+
permission java.lang.RuntimePermission "accessClassInPackage.sun.java2d.cmm.kcms";
3436
};

plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java

+6
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,12 @@ public void testAsciidocDocument() throws Exception {
214214
assertThat(attachmentData.get("content_type").toString(), containsString("text/plain"));
215215
}
216216

217+
// See (https://issues.apache.org/jira/browse/COMPRESS-432) for information
218+
// about the issue that causes a zip file to hang in Tika versions prior to 1.18.
219+
public void testZipFileDoesNotHang() {
220+
expectThrows(Exception.class, () -> parseDocument("bad_tika.zip", processor));
221+
}
222+
217223
public void testParseAsBytesArray() throws Exception {
218224
String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt";
219225
byte[] bytes;

0 commit comments

Comments
 (0)