Skip to content

Commit 1c030f6

Browse files
committed
Update to Tika 1.8
Tika 1.8 has been released. See https://dist.apache.org/repos/dist/release/tika/CHANGES-1.8.txt We can replace: ```java public static boolean isLocaleCompatible() { String language = Locale.getDefault().getLanguage(); boolean acceptedLocale = true; if ( // We can have issues with JDK7 Patch < 80 (JVM_MAJOR_VERSION == 1 && JVM_MINOR_VERSION == 7 && JVM_PATCH_MAJOR_VERSION == 0 && JVM_PATCH_MINOR_VERSION < 80) || // We can have issues with JDK8 Patch < 40 (JVM_MAJOR_VERSION == 1 && JVM_MINOR_VERSION == 8 && JVM_PATCH_MAJOR_VERSION == 0 && JVM_PATCH_MINOR_VERSION < 40) ) { if (language.equalsIgnoreCase("tr") || language.equalsIgnoreCase("az")) { acceptedLocale = false; } } return acceptedLocale; } ``` by ```java public static boolean isLocaleCompatible() { return true; } ``` Related to https://issues.apache.org/jira/browse/TIKA-1526 and elastic#105 Note that Content-type has changed a bit and now returns something like `application/xhtml+xml; charset=ISO-8859-1` instead of `application/xhtml+xml`. Closes elastic#112. (cherry picked from commit bf4af47971ed07bfa126409413c435f121444c3c)
1 parent 2f45711 commit 1c030f6

File tree

3 files changed

+6
-23
lines changed

3 files changed

+6
-23
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@
5757
<dependency>
5858
<groupId>org.apache.tika</groupId>
5959
<artifactId>tika-parsers</artifactId>
60-
<version>1.7</version>
60+
<version>1.8</version>
6161
<exclusions>
6262
<!-- Not Apache2 License compatible -->
6363
<exclusion>

src/main/java/org/elasticsearch/plugin/mapper/attachments/tika/LocaleChecker.java

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121

2222
import org.apache.lucene.util.Constants;
2323

24-
import java.util.Locale;
2524
import java.util.StringTokenizer;
2625

2726
import static java.lang.Integer.parseInt;
@@ -46,24 +45,9 @@ public class LocaleChecker {
4645
}
4746

4847
/**
49-
* We can have issues with some JVMs and Locale
50-
* See https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/105
48+
* Tika 1.8 fixed currently known Locale issues with some JVMs
5149
*/
5250
public static boolean isLocaleCompatible() {
53-
String language = Locale.getDefault().getLanguage();
54-
boolean acceptedLocale = true;
55-
56-
if (
57-
// We can have issues with JDK7 Patch < 80
58-
(JVM_MAJOR_VERSION == 1 && JVM_MINOR_VERSION == 7 && JVM_PATCH_MAJOR_VERSION == 0 && JVM_PATCH_MINOR_VERSION < 80) ||
59-
// We can have issues with JDK8 Patch < 40
60-
(JVM_MAJOR_VERSION == 1 && JVM_MINOR_VERSION == 8 && JVM_PATCH_MAJOR_VERSION == 0 && JVM_PATCH_MINOR_VERSION < 40)
61-
) {
62-
if (language.equalsIgnoreCase("tr") || language.equalsIgnoreCase("az")) {
63-
acceptedLocale = false;
64-
}
65-
}
66-
67-
return acceptedLocale;
51+
return true;
6852
}
6953
}

src/test/java/org/elasticsearch/index/mapper/attachment/test/unit/SimpleAttachmentMapperTests.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@
3131
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
3232
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
3333
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
34-
import static org.hamcrest.Matchers.containsString;
35-
import static org.hamcrest.Matchers.equalTo;
34+
import static org.hamcrest.Matchers.*;
3635

3736
/**
3837
*
@@ -57,7 +56,7 @@ public void testSimpleMappings() throws Exception {
5756

5857
ParseContext.Document doc = docMapper.parse(json).rootDoc();
5958

60-
assertThat(doc.get(docMapper.mappers().getMapper("file.content_type").names().indexName()), equalTo("application/xhtml+xml"));
59+
assertThat(doc.get(docMapper.mappers().getMapper("file.content_type").names().indexName()), startsWith("application/xhtml+xml"));
6160
assertThat(doc.get(docMapper.mappers().getMapper("file.title").names().indexName()), equalTo("XHTML test document"));
6261
assertThat(doc.get(docMapper.mappers().getMapper("file").names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
6362

@@ -69,7 +68,7 @@ public void testSimpleMappings() throws Exception {
6968

7069
doc = docMapper.parse(json).rootDoc();
7170

72-
assertThat(doc.get(docMapper.mappers().getMapper("file.content_type").names().indexName()), equalTo("application/xhtml+xml"));
71+
assertThat(doc.get(docMapper.mappers().getMapper("file.content_type").names().indexName()), startsWith("application/xhtml+xml"));
7372
assertThat(doc.get(docMapper.mappers().getMapper("file.title").names().indexName()), equalTo("XHTML test document"));
7473
assertThat(doc.get(docMapper.mappers().getMapper("file").names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
7574
}

0 commit comments

Comments
 (0)