Skip to content

Commit 5631815

Browse files
committed
Fix content type detection with leading whitespace (#32632)
Today content type detection on an input stream works by peeking up to twenty bytes into the stream. If the stream is headed by more whitespace than twenty bytes, we might fail to detect the content type. We should be ignoring this whitespace before attempting to detect the content type. This commit does that by ignoring all leading whitespace in an input stream before attempting to guess the content type.
1 parent c644789 commit 5631815

File tree

2 files changed

+46
-6
lines changed

2 files changed

+46
-6
lines changed

core/src/main/java/org/elasticsearch/common/xcontent/XContentFactory.java

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
*/
4040
public class XContentFactory {
4141

42-
private static final int GUESS_HEADER_LENGTH = 20;
42+
static final int GUESS_HEADER_LENGTH = 20;
4343

4444
/**
4545
* Returns a content builder using JSON format ({@link org.elasticsearch.common.xcontent.XContentType#JSON}.
@@ -213,17 +213,41 @@ public static XContentType xContentType(byte[] data) {
213213
* Guesses the content type based on the provided input stream without consuming it.
214214
*/
215215
public static XContentType xContentType(InputStream si) throws IOException {
216+
/*
217+
* We need to guess the content type. To do this, we look for the first non-whitespace character and then try to guess the content
218+
* type on the GUESS_HEADER_LENGTH bytes that follow. We do this in a way that does not modify the initial read position in the
219+
* underlying input stream. This is why the input stream must support mark/reset and why we repeatedly mark the read position and
220+
* reset.
221+
*/
216222
if (si.markSupported() == false) {
217223
throw new IllegalArgumentException("Cannot guess the xcontent type without mark/reset support on " + si.getClass());
218224
}
219-
si.mark(GUESS_HEADER_LENGTH);
225+
si.mark(Integer.MAX_VALUE);
220226
try {
227+
// scan until we find the first non-whitespace character or the end of the stream
228+
int current;
229+
do {
230+
current = si.read();
231+
if (current == -1) {
232+
return null;
233+
}
234+
} while (Character.isWhitespace((char) current));
235+
// now guess the content type off the next GUESS_HEADER_LENGTH bytes including the current byte
221236
final byte[] firstBytes = new byte[GUESS_HEADER_LENGTH];
222-
final int read = Streams.readFully(si, firstBytes);
223-
return xContentType(new BytesArray(firstBytes, 0, read));
237+
firstBytes[0] = (byte) current;
238+
int read = 1;
239+
while (read < GUESS_HEADER_LENGTH) {
240+
final int r = si.read(firstBytes, read, GUESS_HEADER_LENGTH - read);
241+
if (r == -1) {
242+
break;
243+
}
244+
read += r;
245+
}
246+
return xContentType(firstBytes, 0, read);
224247
} finally {
225248
si.reset();
226249
}
250+
227251
}
228252

229253
/**

core/src/test/java/org/elasticsearch/common/xcontent/XContentFactoryTests.java

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,15 @@
2121

2222
import com.fasterxml.jackson.dataformat.cbor.CBORConstants;
2323
import com.fasterxml.jackson.dataformat.smile.SmileConstants;
24+
import org.elasticsearch.common.Strings;
2425
import org.elasticsearch.common.bytes.BytesArray;
26+
import org.elasticsearch.common.bytes.BytesReference;
2527
import org.elasticsearch.common.io.stream.StreamInput;
2628
import org.elasticsearch.test.ESTestCase;
2729

2830
import java.io.ByteArrayInputStream;
2931
import java.io.IOException;
32+
import java.util.Arrays;
3033

3134
import static org.hamcrest.Matchers.equalTo;
3235

@@ -56,8 +59,21 @@ private void testGuessType(XContentType type) throws IOException {
5659
builder.field("field1", "value1");
5760
builder.endObject();
5861

59-
assertThat(XContentFactory.xContentType(builder.bytes()), equalTo(type));
60-
assertThat(XContentFactory.xContentType(builder.bytes().streamInput()), equalTo(type));
62+
final BytesReference bytes;
63+
if (type == XContentType.JSON && randomBoolean()) {
64+
final int length = randomIntBetween(0, 8 * XContentFactory.GUESS_HEADER_LENGTH);
65+
final String content = builder.string();
66+
final StringBuilder sb = new StringBuilder(length + content.length());
67+
final char[] chars = new char[length];
68+
Arrays.fill(chars, ' ');
69+
sb.append(new String(chars)).append(content);
70+
bytes = new BytesArray(sb.toString());
71+
} else {
72+
bytes = builder.bytes();
73+
}
74+
75+
assertThat(XContentFactory.xContentType(bytes), equalTo(type));
76+
assertThat(XContentFactory.xContentType(bytes.streamInput()), equalTo(type));
6177

6278
// CBOR is binary, cannot use String
6379
if (type != XContentType.CBOR && type != XContentType.SMILE) {

0 commit comments

Comments
 (0)