Skip to content

Commit 394fb08

Browse files
jdconradrjernst
authored andcommitted
Ingest Attachment: Upgrade Tika to 1.18 (#31252)
Fixes ES from hanging when a bad zip file is loaded through Tika.
1 parent 86cc9ae commit 394fb08

19 files changed

+29
-14
lines changed

plugins/ingest-attachment/build.gradle

+11-7
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ esplugin {
2323
}
2424

2525
versions << [
26-
'tika': '1.17',
27-
'pdfbox': '2.0.8',
26+
'tika': '1.18',
27+
'pdfbox': '2.0.9',
2828
'bouncycastle': '1.55',
2929
'poi': '3.17',
3030
'mime4j': '0.8.1'
@@ -33,9 +33,10 @@ versions << [
3333
dependencies {
3434
// mandatory for tika
3535
compile "org.apache.tika:tika-core:${versions.tika}"
36+
// build against Jackson 2.9.5, but still works on our current version
3637
compile "org.apache.tika:tika-parsers:${versions.tika}"
37-
compile 'org.tukaani:xz:1.6'
38-
compile 'commons-io:commons-io:2.5'
38+
compile 'org.tukaani:xz:1.8'
39+
compile 'commons-io:commons-io:2.6'
3940
compile "org.slf4j:slf4j-api:${versions.slf4j}"
4041

4142
// character set detection
@@ -62,7 +63,7 @@ dependencies {
6263
// MS Office
6364
compile "org.apache.poi:poi-scratchpad:${versions.poi}"
6465
// Apple iWork
65-
compile 'org.apache.commons:commons-compress:1.14'
66+
compile 'org.apache.commons:commons-compress:1.16.1'
6667
// Outlook documents
6768
compile "org.apache.james:apache-mime4j-core:${versions.mime4j}"
6869
compile "org.apache.james:apache-mime4j-dom:${versions.mime4j}"
@@ -118,6 +119,10 @@ thirdPartyAudit.excludes = [
118119
'com.drew.metadata.jpeg.JpegDirectory',
119120
'com.github.junrar.Archive',
120121
'com.github.junrar.rarfile.FileHeader',
122+
'com.github.luben.zstd.ZstdInputStream',
123+
'com.github.luben.zstd.ZstdOutputStream',
124+
'com.github.openjson.JSONArray',
125+
'com.github.openjson.JSONObject',
121126
'com.google.common.reflect.TypeToken',
122127
'com.google.gson.Gson',
123128
'com.googlecode.mp4parser.DataSource',
@@ -531,6 +536,7 @@ thirdPartyAudit.excludes = [
531536
'org.apache.commons.exec.PumpStreamHandler',
532537
'org.apache.commons.exec.environment.EnvironmentUtils',
533538
'org.apache.commons.lang.StringUtils',
539+
'org.apache.commons.lang.SystemUtils',
534540
'org.apache.ctakes.typesystem.type.refsem.UmlsConcept',
535541
'org.apache.ctakes.typesystem.type.textsem.IdentifiedAnnotation',
536542
'org.apache.cxf.jaxrs.client.WebClient',
@@ -635,8 +641,6 @@ thirdPartyAudit.excludes = [
635641
'org.etsi.uri.x01903.v13.impl.UnsignedSignaturePropertiesTypeImpl$1SignatureTimeStampList',
636642
'org.etsi.uri.x01903.v14.ValidationDataType$Factory',
637643
'org.etsi.uri.x01903.v14.ValidationDataType',
638-
'org.json.JSONArray',
639-
'org.json.JSONObject',
640644
'org.json.simple.JSONArray',
641645
'org.json.simple.JSONObject',
642646
'org.json.simple.parser.JSONParser',

plugins/ingest-attachment/licenses/commons-compress-1.14.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
7b5cdabadb4cf12f5ee0f801399e70635583193f

plugins/ingest-attachment/licenses/commons-io-2.5.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
815893df5f31da2ece4040fe0a12fd44b577afaf

plugins/ingest-attachment/licenses/fontbox-2.0.8.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
f961f17ebdbc307e9055e3cf7c0e207f0895ae55

plugins/ingest-attachment/licenses/pdfbox-2.0.8.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
d0425578218624388f2ec84a0b3a11efd55df0f5

plugins/ingest-attachment/licenses/tika-core-1.17.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
69556697de96cf0b22df846e970dafd29866eee0

plugins/ingest-attachment/licenses/tika-parsers-1.17.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
7d9b6dea91d783165f3313d320d3aaaa9a4dfc13

plugins/ingest-attachment/licenses/xz-1.6.jar.sha1

-1
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
c4f7d054303948eb6a4066194253886c8af07128

plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/TikaImpl.java

+1
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ static PermissionCollection getRestrictedPermissions() {
159159
perms.add(new SecurityPermission("putProviderProperty.BC"));
160160
perms.add(new SecurityPermission("insertProvider"));
161161
perms.add(new ReflectPermission("suppressAccessChecks"));
162+
perms.add(new RuntimePermission("accessClassInPackage.sun.java2d.cmm.kcms"));
162163
// xmlbeans, use by POI, needs to get the context classloader
163164
perms.add(new RuntimePermission("getClassLoader"));
164165
perms.setReadOnly();

plugins/ingest-attachment/src/main/plugin-metadata/plugin-security.policy

+4
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,8 @@ grant {
2929
permission java.lang.reflect.ReflectPermission "suppressAccessChecks";
3030
// needed by xmlbeans, as part of POI for MS xml docs
3131
permission java.lang.RuntimePermission "getClassLoader";
32+
// ZipFile needs accessDeclaredMembers on Java 10
33+
permission java.lang.RuntimePermission "accessDeclaredMembers";
34+
// PDFBox checks for the existence of this class
35+
permission java.lang.RuntimePermission "accessClassInPackage.sun.java2d.cmm.kcms";
3236
};

plugins/ingest-attachment/src/test/java/org/elasticsearch/ingest/attachment/AttachmentProcessorTests.java

+6
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,12 @@ public void testAsciidocDocument() throws Exception {
215215
assertThat(attachmentData.get("content_type").toString(), containsString("text/plain"));
216216
}
217217

218+
// See (https://issues.apache.org/jira/browse/COMPRESS-432) for information
219+
// about the issue that causes a zip file to hang in Tika versions prior to 1.18.
220+
public void testZipFileDoesNotHang() {
221+
expectThrows(Exception.class, () -> parseDocument("bad_tika.zip", processor));
222+
}
223+
218224
public void testParseAsBytesArray() throws Exception {
219225
String path = "/org/elasticsearch/ingest/attachment/test/sample-files/text-in-english.txt";
220226
byte[] bytes;

0 commit comments

Comments
 (0)