Skip to content

Commit 45f928e

Browse files
committed
GEO: More robust handling of ignore_malformed in geoshape parsing
Adds a method to XContent parser to skip all children of a current element in case of the parsing failure and applies this method to be able to ignore the rest of the GeoJson shape if the parsing fails and we need to ignore the geoshape due to the ignore malformed flag. Supersedes elastic#34498 Closes elastic#34047
1 parent c7a2c6d commit 45f928e

File tree

6 files changed

+268
-6
lines changed

6 files changed

+268
-6
lines changed

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,4 +253,30 @@ enum NumberType {
253253
* The callback to notify when parsing encounters a deprecated field.
254254
*/
255255
DeprecationHandler getDeprecationHandler();
256+
257+
/**
258+
* Marks the current token to be used with skipMarkedChildren.
259+
*
260+
* Can be used to gracefully handle parsing errors within large nested structures. Usage:
261+
*
262+
* Mark mark = parser.mark();
263+
* try {
264+
* while (parser.nextToken() != Token.END_OBJECT) {
265+
* // parse
266+
* if (isSomethingWrong) {
267+
* throw ElasticSearchParsingException("some thing is wrong here");
268+
* }
269+
* }
270+
* } catch(ElasticSearchParsingException ex) {
271+
* mark.skipChildren(); // skip children and continue parsing
272+
* }
273+
*
274+
* which would be equvivalent to parser.skipChildren() in case of an error
275+
*
276+
*/
277+
Mark markParent();
278+
279+
interface Mark {
280+
void skipChildren() throws IOException;
281+
}
256282
}

libs/x-content/src/main/java/org/elasticsearch/common/xcontent/json/JsonXContentParser.java

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@
2121

2222
import com.fasterxml.jackson.core.JsonLocation;
2323
import com.fasterxml.jackson.core.JsonParser;
24+
import com.fasterxml.jackson.core.JsonStreamContext;
2425
import com.fasterxml.jackson.core.JsonToken;
25-
26-
import org.elasticsearch.core.internal.io.IOUtils;
2726
import org.elasticsearch.common.xcontent.DeprecationHandler;
2827
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
2928
import org.elasticsearch.common.xcontent.XContentLocation;
3029
import org.elasticsearch.common.xcontent.XContentType;
3130
import org.elasticsearch.common.xcontent.support.AbstractXContentParser;
31+
import org.elasticsearch.core.internal.io.IOUtils;
3232

3333
import java.io.IOException;
3434
import java.nio.CharBuffer;
@@ -245,4 +245,48 @@ private Token convertToken(JsonToken token) {
245245
public boolean isClosed() {
246246
return parser.isClosed();
247247
}
248+
249+
@Override
250+
public Mark markParent() {
251+
if (currentToken() != Token.START_OBJECT && currentToken() != Token.START_ARRAY) {
252+
throw new IllegalStateException("markParent() has to be called on the start of an object or an array");
253+
}
254+
JsonStreamContext context = parser.getParsingContext();
255+
JsonStreamContext parentContext = context.getParent();
256+
if (parentContext == null) {
257+
// we cannot be here because we already checked for the first token, but just in case we double check
258+
throw new IllegalStateException("Cannot mark root");
259+
}
260+
return () -> {
261+
JsonStreamContext currentContext;
262+
Token token = currentToken();
263+
// First we skip all tokens until we are back to the parsing context that we started with
264+
for (currentContext = parser.getParsingContext();
265+
currentContext != null && parentContext.equals(parser.getParsingContext()) == false;
266+
currentContext = parser.getParsingContext()
267+
) {
268+
token = nextToken();
269+
}
270+
if (currentContext == null) {
271+
throw new IllegalStateException("Didn't find parent context");
272+
}
273+
// Now we are going to skip all children of the current structure
274+
int level = 1;
275+
while (true) {
276+
if (token == null) {
277+
throw new IllegalStateException("Unexpected end of the stream");
278+
}
279+
if (token == Token.START_OBJECT || token == Token.START_ARRAY) {
280+
level++;
281+
} else if (token == Token.END_OBJECT || token == Token.END_ARRAY) {
282+
if (--level == 0) {
283+
return;
284+
}
285+
}
286+
token = nextToken();
287+
}
288+
289+
};
290+
}
291+
248292
}

libs/x-content/src/test/java/org/elasticsearch/common/xcontent/XContentParserTests.java

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
package org.elasticsearch.common.xcontent;
2121

2222
import com.fasterxml.jackson.core.JsonParseException;
23+
import org.elasticsearch.common.CheckedSupplier;
2324
import org.elasticsearch.common.Strings;
2425
import org.elasticsearch.common.bytes.BytesReference;
2526
import org.elasticsearch.common.xcontent.json.JsonXContent;
@@ -327,4 +328,165 @@ public void testNestedMapInList() throws IOException {
327328
parser.list());
328329
}
329330
}
331+
332+
public void testMarkParent() throws IOException {
333+
XContentBuilder builder = XContentFactory.jsonBuilder();
334+
boolean testObject = randomBoolean();
335+
int numberOfTokens;
336+
if (testObject) {
337+
numberOfTokens = generateRandomObjectForMarking(builder);
338+
} else {
339+
numberOfTokens = generateRandomArrayForMarking(builder);
340+
}
341+
String content = Strings.toString(builder);
342+
343+
try (XContentParser parser = createParser(JsonXContent.jsonXContent, content)) {
344+
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
345+
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // first field
346+
assertEquals("first_field", parser.currentName());
347+
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken()); // foo
348+
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // marked field
349+
assertEquals("marked_field", parser.currentName());
350+
if (testObject) {
351+
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken()); // {
352+
} else {
353+
assertEquals(XContentParser.Token.START_ARRAY, parser.nextToken()); // [
354+
}
355+
XContentParser.Mark mark = parser.markParent();
356+
int tokensToSkip = randomInt(numberOfTokens - 1);
357+
for (int i = 0; i < tokensToSkip; i++) {
358+
// Simulate incomplete parsing
359+
assertNotNull(parser.nextToken());
360+
}
361+
// now skip to the end of the marked_field
362+
mark.skipChildren();
363+
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // last field
364+
assertEquals("last_field", parser.currentName());
365+
assertEquals(XContentParser.Token.VALUE_STRING, parser.nextToken());
366+
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken());
367+
assertNull(parser.nextToken());
368+
}
369+
}
370+
371+
public void testMarkAtWrongPlace() throws IOException {
372+
XContentBuilder builder = XContentFactory.jsonBuilder();
373+
generateRandomObjectForMarking(builder);
374+
String content = Strings.toString(builder);
375+
376+
try (XContentParser parser = createParser(JsonXContent.jsonXContent, content)) {
377+
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
378+
assertEquals(XContentParser.Token.FIELD_NAME, parser.nextToken()); // first field
379+
assertEquals("first_field", parser.currentName());
380+
IllegalStateException exception = expectThrows(IllegalStateException.class, parser::markParent);
381+
assertEquals("markParent() has to be called on the start of an object or an array", exception.getMessage());
382+
}
383+
}
384+
385+
386+
public void testMarkRoot() throws IOException {
387+
XContentBuilder builder = XContentFactory.jsonBuilder();
388+
int numberOfTokens = generateRandomObjectForMarking(builder);
389+
String content = Strings.toString(builder);
390+
391+
try (XContentParser parser = createParser(JsonXContent.jsonXContent, content)) {
392+
assertEquals(XContentParser.Token.START_OBJECT, parser.nextToken());
393+
XContentParser.Mark mark = parser.markParent();
394+
int tokensToSkip = randomInt(numberOfTokens + 3);
395+
for (int i = 0; i < tokensToSkip; i++) {
396+
// Simulate incomplete parsing
397+
assertNotNull(parser.nextToken());
398+
}
399+
mark.skipChildren();
400+
assertNull(parser.nextToken());
401+
}
402+
403+
}
404+
405+
/**
406+
* Generates a random object {"first_field": "foo", "marked_field": {...random...}, "last_field": "bar}
407+
*
408+
* Returns the number of tokens in the marked field
409+
*/
410+
private int generateRandomObjectForMarking(XContentBuilder builder) throws IOException {
411+
builder.startObject()
412+
.field("first_field", "foo")
413+
.field("marked_field");
414+
int numberOfTokens = generateRandomObject(builder, 0);
415+
builder.field("last_field", "bar").endObject();
416+
return numberOfTokens;
417+
}
418+
419+
420+
/**
421+
* Generates a random object {"first_field": "foo", "marked_field": [...random...], "last_field": "bar}
422+
*
423+
* Returns the number of tokens in the marked field
424+
*/
425+
private int generateRandomArrayForMarking(XContentBuilder builder) throws IOException {
426+
builder.startObject()
427+
.field("first_field", "foo")
428+
.field("marked_field");
429+
int numberOfTokens = generateRandomArray(builder, 0);
430+
builder.field("last_field", "bar").endObject();
431+
return numberOfTokens;
432+
}
433+
434+
private int generateRandomObject(XContentBuilder builder, int level) throws IOException {
435+
int tokens = 2;
436+
builder.startObject();
437+
int numberOfElements = randomInt(5);
438+
for (int i = 0; i < numberOfElements; i++) {
439+
builder.field(randomAlphaOfLength(10) + "_" + i);
440+
tokens += generateRandomValue(builder, level + 1);
441+
}
442+
builder.endObject();
443+
return tokens;
444+
}
445+
446+
private int generateRandomValue(XContentBuilder builder, int level) throws IOException {
447+
@SuppressWarnings("unchecked") CheckedSupplier<Integer, IOException> fieldGenerator = randomFrom(
448+
() -> {
449+
builder.value(randomInt());
450+
return 1;
451+
},
452+
() -> {
453+
builder.value(randomAlphaOfLength(10));
454+
return 1;
455+
},
456+
() -> {
457+
builder.value(randomDouble());
458+
return 1;
459+
},
460+
() -> {
461+
if (level < 3) {
462+
// don't need to go too deep
463+
return generateRandomObject(builder, level + 1);
464+
} else {
465+
builder.value(0);
466+
return 1;
467+
}
468+
},
469+
() -> {
470+
if (level < 5) { // don't need to go too deep
471+
return generateRandomArray(builder, level);
472+
} else {
473+
builder.value(0);
474+
return 1;
475+
}
476+
}
477+
);
478+
return fieldGenerator.get();
479+
}
480+
481+
private int generateRandomArray(XContentBuilder builder, int level) throws IOException {
482+
int tokens = 2;
483+
int arraySize = randomInt(3);
484+
builder.startArray();
485+
for (int i = 0; i < arraySize; i++) {
486+
tokens += generateRandomValue(builder, level + 1);
487+
}
488+
builder.endArray();
489+
return tokens;
490+
}
491+
330492
}

server/src/main/java/org/elasticsearch/common/geo/parsers/GeoJsonParser.java

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ protected static ShapeBuilder parse(XContentParser parser, GeoShapeFieldMapper s
5555
String malformedException = null;
5656

5757
XContentParser.Token token;
58+
XContentParser.Mark mark = parser.markParent();
5859
try {
5960
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
6061
if (token == XContentParser.Token.FIELD_NAME) {
@@ -110,10 +111,7 @@ protected static ShapeBuilder parse(XContentParser parser, GeoShapeFieldMapper s
110111
}
111112
} catch (Exception ex) {
112113
// Skip all other fields until the end of the object
113-
while (parser.currentToken() != XContentParser.Token.END_OBJECT && parser.currentToken() != null) {
114-
parser.nextToken();
115-
parser.skipChildren();
116-
}
114+
mark.skipChildren();
117115
throw ex;
118116
}
119117

server/src/test/java/org/elasticsearch/common/geo/GeoJsonShapeParserTests.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1213,4 +1213,31 @@ public void testParseInvalidShapes() throws IOException {
12131213
assertNull(parser.nextToken());
12141214
}
12151215
}
1216+
1217+
public void testParseInvalidGeometryCollectionShapes() throws IOException {
1218+
// single dimensions point
1219+
XContentBuilder invalidPoints = XContentFactory.jsonBuilder()
1220+
.startObject()
1221+
.startObject("foo")
1222+
.field("type", "geometrycollection")
1223+
.startArray("geometries")
1224+
.startObject()
1225+
.field("type", "polygon")
1226+
.startArray("coordinates")
1227+
.startArray().value("46.6022226498514").value("24.7237442867977").endArray()
1228+
.startArray().value("46.6031857243798").value("24.722968774929").endArray()
1229+
.endArray() // coordinates
1230+
.endObject()
1231+
.endArray() // geometries
1232+
.endObject()
1233+
.endObject();
1234+
try (XContentParser parser = createParser(invalidPoints)) {
1235+
parser.nextToken(); // foo
1236+
parser.nextToken(); // start object
1237+
parser.nextToken(); // start object
1238+
ElasticsearchGeoAssertions.assertValidException(parser, ElasticsearchParseException.class);
1239+
assertEquals(XContentParser.Token.END_OBJECT, parser.nextToken()); // end of the document
1240+
assertNull(parser.nextToken()); // no more elements afterwards
1241+
}
1242+
}
12161243
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/watcher/support/xcontent/WatcherXContentParser.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,4 +287,9 @@ public void close() throws IOException {
287287
public DeprecationHandler getDeprecationHandler() {
288288
return parser.getDeprecationHandler();
289289
}
290+
291+
@Override
292+
public Mark markParent() {
293+
return parser.markParent();
294+
}
290295
}

0 commit comments

Comments
 (0)