Skip to content

Commit 9874a65

Browse files
committed
Allow StaxEventItemReader to auto-detect the input file encoding
Before this commit, it was not possible to pass a null encoding to the StaxEventItemReader, which prevents the XML event reader to auto-detect the file encoding. This commits makes the encoding setter more lenient by accepting a null value. Resolves #4101
1 parent 81e619b commit 9874a65

File tree

3 files changed

+83
-44
lines changed

3 files changed

+83
-44
lines changed

Diff for: spring-batch-infrastructure/src/main/java/org/springframework/batch/item/xml/StaxEventItemReader.java

+19-18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2006-2020 the original author or authors.
2+
* Copyright 2006-2023 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -49,13 +49,13 @@
4949

5050
/**
5151
* Item reader for reading XML input based on StAX.
52-
*
52+
*
5353
* It extracts fragments from the input XML document which correspond to records for processing. The fragments are
5454
* wrapped with StartDocument and EndDocument events so that the fragments can be further processed like standalone XML
5555
* documents.
56-
*
56+
*
5757
* The implementation is <b>not</b> thread-safe.
58-
*
58+
*
5959
* @author Robert Kasanicky
6060
* @author Mahmoud Ben Hassine
6161
*/
@@ -140,16 +140,16 @@ public void setXmlInputFactory(XMLInputFactory xmlInputFactory) {
140140
/**
141141
* Set encoding to be used for the input file. Defaults to {@link #DEFAULT_ENCODING}.
142142
*
143-
* @param encoding the encoding to be used
143+
* @param encoding the encoding to be used. Can be {@code null}, in which case, the
144+
* XML event reader will attempt to auto-detect the encoding from the input file.
144145
*/
145-
public void setEncoding(String encoding) {
146-
Assert.notNull(encoding, "The encoding must not be null");
146+
public void setEncoding(@Nullable String encoding) {
147147
this.encoding = encoding;
148148
}
149149

150150
/**
151151
* Ensure that all required dependencies for the ItemReader to run are provided after all properties have been set.
152-
*
152+
*
153153
* @see org.springframework.beans.factory.InitializingBean#afterPropertiesSet()
154154
* @throws IllegalArgumentException if the Resource, FragmentDeserializer or FragmentRootElementName is null, or if
155155
* the root element is empty.
@@ -161,19 +161,19 @@ public void afterPropertiesSet() throws Exception {
161161
Assert.notEmpty(fragmentRootElementNames, "The FragmentRootElementNames must not be empty");
162162
for (QName fragmentRootElementName : fragmentRootElementNames) {
163163
Assert.hasText(fragmentRootElementName.getLocalPart(), "The FragmentRootElementNames must not contain empty elements");
164-
}
164+
}
165165
}
166166

167167
/**
168168
* Responsible for moving the cursor before the StartElement of the fragment root.
169-
*
169+
*
170170
* This implementation simply looks for the next corresponding element, it does not care about element nesting. You
171171
* will need to override this method to correctly handle composite fragments.
172172
*
173173
* @param reader the {@link XMLEventReader} to be used to find next fragment.
174-
*
174+
*
175175
* @return <code>true</code> if next fragment was found, <code>false</code> otherwise.
176-
*
176+
*
177177
* @throws NonTransientResourceException if the cursor could not be moved. This will be treated as fatal and
178178
* subsequent calls to read will return null.
179179
*/
@@ -237,7 +237,8 @@ protected void doOpen() throws Exception {
237237
}
238238

239239
inputStream = resource.getInputStream();
240-
eventReader = xmlInputFactory.createXMLEventReader(inputStream, this.encoding);
240+
eventReader = this.encoding != null ? xmlInputFactory.createXMLEventReader(inputStream, this.encoding)
241+
: xmlInputFactory.createXMLEventReader(inputStream);
241242
fragmentReader = new DefaultFragmentEventReader(eventReader);
242243
noInput = false;
243244

@@ -332,19 +333,19 @@ private void readToEndFragment(QName fragmentRootElementName) throws XMLStreamEx
332333
}
333334
}
334335
}
335-
336+
336337
protected boolean isFragmentRootElementName(QName name) {
337338
for (QName fragmentRootElementName : fragmentRootElementNames) {
338339
if (fragmentRootElementName.getLocalPart().equals(name.getLocalPart())) {
339340
if (!StringUtils.hasText(fragmentRootElementName.getNamespaceURI())
340-
|| fragmentRootElementName.getNamespaceURI().equals(name.getNamespaceURI())) {
341+
|| fragmentRootElementName.getNamespaceURI().equals(name.getNamespaceURI())) {
341342
return true;
342343
}
343344
}
344345
}
345346
return false;
346-
}
347-
347+
}
348+
348349
private QName parseFragmentRootElementName(String fragmentRootElementName) {
349350
String name = fragmentRootElementName;
350351
String nameSpace = null;
@@ -354,5 +355,5 @@ private QName parseFragmentRootElementName(String fragmentRootElementName) {
354355
}
355356
return new QName(nameSpace, name, "");
356357
}
357-
358+
358359
}

Diff for: spring-batch-infrastructure/src/main/java/org/springframework/batch/item/xml/builder/StaxEventItemReaderBuilder.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2017-2020 the original author or authors.
2+
* Copyright 2017-2023 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -26,6 +26,7 @@
2626

2727
import org.springframework.batch.item.xml.StaxEventItemReader;
2828
import org.springframework.core.io.Resource;
29+
import org.springframework.lang.Nullable;
2930
import org.springframework.oxm.Unmarshaller;
3031
import org.springframework.util.Assert;
3132
import org.springframework.util.StringUtils;
@@ -203,13 +204,14 @@ public StaxEventItemReaderBuilder<T> xmlInputFactory(XMLInputFactory xmlInputFac
203204
}
204205

205206
/**
206-
* Encoding for the input file. Defaults to {@link StaxEventItemReader#DEFAULT_ENCODING}.
207-
*
207+
* Encoding for the input file. Defaults to
208+
* {@link StaxEventItemReader#DEFAULT_ENCODING}. Can be {@code null}, in which case
209+
* the XML event reader will attempt to auto-detect the encoding from the input file.
208210
* @param encoding String encoding algorithm
209211
* @return the current instance of the builder
210212
* @see StaxEventItemReader#setEncoding(String)
211213
*/
212-
public StaxEventItemReaderBuilder<T> encoding(String encoding) {
214+
public StaxEventItemReaderBuilder<T> encoding(@Nullable String encoding) {
213215
this.encoding = encoding;
214216

215217
return this;

Diff for: spring-batch-infrastructure/src/test/java/org/springframework/batch/item/xml/StaxEventItemReaderTests.java

+58-22
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2008-2020 the original author or authors.
2+
* Copyright 2008-2023 the original author or authors.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -36,8 +36,10 @@
3636
import javax.xml.namespace.QName;
3737
import javax.xml.stream.FactoryConfigurationError;
3838
import javax.xml.stream.XMLEventReader;
39+
import javax.xml.stream.XMLInputFactory;
3940
import javax.xml.stream.XMLStreamException;
4041
import javax.xml.stream.events.EndElement;
42+
import javax.xml.stream.events.StartDocument;
4143
import javax.xml.stream.events.StartElement;
4244
import javax.xml.stream.events.XMLEvent;
4345
import javax.xml.transform.Source;
@@ -57,10 +59,13 @@
5759
import static org.junit.Assert.assertNull;
5860
import static org.junit.Assert.assertTrue;
5961
import static org.junit.Assert.fail;
62+
import static org.mockito.Mockito.mock;
63+
import static org.mockito.Mockito.verify;
64+
import static org.mockito.Mockito.when;
6065

6166
/**
6267
* Tests for {@link StaxEventItemReader}.
63-
*
68+
*
6469
* @author Robert Kasanicky
6570
* @author Michael Minella
6671
* @author Mahmoud Ben Hassine
@@ -94,7 +99,7 @@ public class StaxEventItemReaderTests {
9499
private Unmarshaller unmarshaller = new MockFragmentUnmarshaller();
95100

96101
private static final String FRAGMENT_ROOT_ELEMENT = "fragment";
97-
102+
98103
private static final String[] MULTI_FRAGMENT_ROOT_ELEMENTS = {"fragmentA", "fragmentB"};
99104

100105
private ExecutionContext executionContext;
@@ -170,6 +175,37 @@ public void testCustomEncoding() throws Exception {
170175
source.close();
171176
}
172177

178+
@Test
179+
public void testNullEncoding() throws Exception {
180+
// given
181+
XMLEventReader eventReader = mock(XMLEventReader.class);
182+
when(eventReader.peek()).thenReturn(mock(StartDocument.class));
183+
184+
Resource resource = mock(Resource.class);
185+
InputStream inputStream = mock(InputStream.class);
186+
when(resource.getInputStream()).thenReturn(inputStream);
187+
when(resource.isReadable()).thenReturn(true);
188+
when(resource.exists()).thenReturn(true);
189+
XMLInputFactory xmlInputFactory = mock(XMLInputFactory.class);
190+
when(xmlInputFactory.createXMLEventReader(inputStream)).thenReturn(eventReader);
191+
192+
StaxEventItemReader<Object> reader = new StaxEventItemReader<>();
193+
reader.setUnmarshaller(new MockFragmentUnmarshaller());
194+
reader.setFragmentRootElementName(FRAGMENT_ROOT_ELEMENT);
195+
reader.setResource(resource);
196+
reader.setEncoding(null);
197+
reader.setStrict(false);
198+
reader.setXmlInputFactory(xmlInputFactory);
199+
reader.afterPropertiesSet();
200+
201+
// when
202+
reader.open(new ExecutionContext());
203+
204+
// then
205+
verify(xmlInputFactory).createXMLEventReader(inputStream);
206+
reader.close();
207+
}
208+
173209
@Test
174210
public void testItemCountAwareFragment() throws Exception {
175211
StaxEventItemReader<ItemCountAwareFragment> source = createNewItemCountAwareInputSource();
@@ -247,7 +283,7 @@ public void testFragmentInvalid() throws Exception {
247283

248284
source.close();
249285
}
250-
286+
251287
@Test
252288
public void testMultiFragment() throws Exception {
253289

@@ -262,7 +298,7 @@ public void testMultiFragment() throws Exception {
262298
assertNull(source.read()); // there are only three fragments
263299

264300
source.close();
265-
}
301+
}
266302

267303
@Test
268304
public void testMultiFragmentNameSpace() throws Exception {
@@ -277,7 +313,7 @@ public void testMultiFragmentNameSpace() throws Exception {
277313
assertNull(source.read()); // there are only two fragments (one has wrong namespace)
278314

279315
source.close();
280-
}
316+
}
281317

282318
@Test
283319
public void testMultiFragmentRestart() throws Exception {
@@ -289,23 +325,23 @@ public void testMultiFragmentRestart() throws Exception {
289325
// see asserts in the mock unmarshaller
290326
assertNotNull(source.read());
291327
assertNotNull(source.read());
292-
293-
source.update(executionContext);
328+
329+
source.update(executionContext);
294330
assertEquals(2, executionContext.getInt(ClassUtils.getShortName(StaxEventItemReader.class) + ".read.count"));
295-
331+
296332
source.close();
297-
333+
298334
source = createNewInputSource();
299335
source.setResource(new ByteArrayResource(xmlMultiFragment.getBytes()));
300336
source.setFragmentRootElementNames(MULTI_FRAGMENT_ROOT_ELEMENTS);
301337
source.afterPropertiesSet();
302338
source.open(executionContext);
303-
339+
304340
assertNotNull(source.read());
305341
assertNull(source.read()); // there are only three fragments
306342

307343
source.close();
308-
}
344+
}
309345

310346
@Test
311347
public void testMultiFragmentNested() throws Exception {
@@ -322,7 +358,7 @@ public void testMultiFragmentNested() throws Exception {
322358

323359
source.close();
324360
}
325-
361+
326362
@Test
327363
public void testMultiFragmentNestedRestart() throws Exception {
328364

@@ -333,24 +369,24 @@ public void testMultiFragmentNestedRestart() throws Exception {
333369
// see asserts in the mock unmarshaller
334370
assertNotNull(source.read());
335371
assertNotNull(source.read());
336-
337-
source.update(executionContext);
372+
373+
source.update(executionContext);
338374
assertEquals(2, executionContext.getInt(ClassUtils.getShortName(StaxEventItemReader.class) + ".read.count"));
339-
375+
340376
source.close();
341-
377+
342378
source = createNewInputSource();
343379
source.setResource(new ByteArrayResource(xmlMultiFragment.getBytes()));
344380
source.setFragmentRootElementNames(MULTI_FRAGMENT_ROOT_ELEMENTS);
345381
source.afterPropertiesSet();
346382
source.open(executionContext);
347-
383+
348384
assertNotNull(source.read());
349385
assertNull(source.read()); // there are only three fragments
350386

351387
source.close();
352-
}
353-
388+
}
389+
354390
/**
355391
* Cursor is moved before beginning of next fragment.
356392
*/
@@ -714,7 +750,7 @@ public boolean supports(Class<?> clazz) {
714750

715751
/**
716752
* A simple mapFragment implementation checking the StaxEventReaderItemReader basic read functionality.
717-
*
753+
*
718754
* @param source
719755
* @return list of the events from fragment body
720756
*/
@@ -753,7 +789,7 @@ public Object unmarshal(Source source) throws XmlMappingException, IOException {
753789
}
754790
return fragmentContent;
755791
}
756-
792+
757793
private boolean isFragmentRootElement(String name) {
758794
return FRAGMENT_ROOT_ELEMENT.equals(name) || Arrays.asList(MULTI_FRAGMENT_ROOT_ELEMENTS).contains(name);
759795
}

0 commit comments

Comments
 (0)