Skip to content

Commit f118190

Browse files
committed
Add option to split processor for preserving trailing empty fields (elastic#48664)
1 parent 356066c commit f118190

File tree

4 files changed

+68
-11
lines changed

4 files changed

+68
-11
lines changed

docs/reference/ingest/processors/split.asciidoc

+18
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Splits a field into an array using a separator character. Only works on string f
1111
| `separator` | yes | - | A regex which matches the separator, eg `,` or `\s+`
1212
| `target_field` | no | `field` | The field to assign the split value to, by default `field` is updated in-place
1313
| `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document
14+
| `preserve_trailing`| no | `false` | Preserves empty trailing fields, if any.
1415
include::common-options.asciidoc[]
1516
|======
1617

@@ -25,3 +26,20 @@ include::common-options.asciidoc[]
2526
--------------------------------------------------
2627
// NOTCONSOLE
2728
<1> Treat all consecutive whitespace characters as a single separator
29+
30+
If the `preserve_trailing` option is enabled, any trailing empty fields in the input will be preserved. For example,
31+
in the configuration below, a value of `A,,B,,` in the `my_field` property will be split into an array of five elements
32+
`["A", "", "B", "", ""]` with two empty trailing fields. If the `preserve_trailing` property were not enabled, the two
33+
empty trailing fields would be discarded resulting in the three-element array `["A", "", "B"]`.
34+
35+
[source,js]
36+
--------------------------------------------------
37+
{
38+
"split": {
39+
"field": "my_field",
40+
"separator": ",",
41+
"preserve_trailing": true
42+
}
43+
}
44+
--------------------------------------------------
45+
// NOTCONSOLE

modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/SplitProcessor.java

+9-4
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,15 @@ public final class SplitProcessor extends AbstractProcessor {
4141
private final String field;
4242
private final String separator;
4343
private final boolean ignoreMissing;
44+
private final boolean preserveTrailing;
4445
private final String targetField;
4546

46-
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, String targetField) {
47+
SplitProcessor(String tag, String field, String separator, boolean ignoreMissing, boolean preserveTrailing, String targetField) {
4748
super(tag);
4849
this.field = field;
4950
this.separator = separator;
5051
this.ignoreMissing = ignoreMissing;
52+
this.preserveTrailing = preserveTrailing;
5153
this.targetField = targetField;
5254
}
5355

@@ -63,6 +65,8 @@ boolean isIgnoreMissing() {
6365
return ignoreMissing;
6466
}
6567

68+
boolean isPreserveTrailing() { return preserveTrailing; }
69+
6670
String getTargetField() {
6771
return targetField;
6872
}
@@ -77,7 +81,7 @@ public IngestDocument execute(IngestDocument document) {
7781
throw new IllegalArgumentException("field [" + field + "] is null, cannot split.");
7882
}
7983

80-
String[] strings = oldVal.split(separator);
84+
String[] strings = oldVal.split(separator, preserveTrailing ? -1 : 0);
8185
List<String> splitList = new ArrayList<>(strings.length);
8286
Collections.addAll(splitList, strings);
8387
document.setFieldValue(targetField, splitList);
@@ -95,9 +99,10 @@ public SplitProcessor create(Map<String, Processor.Factory> registry, String pro
9599
Map<String, Object> config) throws Exception {
96100
String field = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "field");
97101
boolean ignoreMissing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "ignore_missing", false);
102+
boolean preserveTrailing = ConfigurationUtils.readBooleanProperty(TYPE, processorTag, config, "preserve_trailing", false);
98103
String targetField = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "target_field", field);
99-
return new SplitProcessor(processorTag, field,
100-
ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator"), ignoreMissing, targetField);
104+
String separator = ConfigurationUtils.readStringProperty(TYPE, processorTag, config, "separator");
105+
return new SplitProcessor(processorTag, field, separator, ignoreMissing, preserveTrailing, targetField);
101106
}
102107
}
103108
}

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorFactoryTests.java

+18
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,24 @@ public void testCreateWithTargetField() throws Exception {
7979
assertThat(splitProcessor.getField(), equalTo("field1"));
8080
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
8181
assertFalse(splitProcessor.isIgnoreMissing());
82+
assertFalse(splitProcessor.isPreserveTrailing());
8283
assertThat(splitProcessor.getTargetField(), equalTo("target"));
8384
}
85+
86+
public void testCreateWithPreserveTrailing() throws Exception {
87+
SplitProcessor.Factory factory = new SplitProcessor.Factory();
88+
Map<String, Object> config = new HashMap<>();
89+
config.put("field", "field1");
90+
config.put("separator", "\\.");
91+
config.put("target_field", "target");
92+
config.put("preserve_trailing", true);
93+
String processorTag = randomAlphaOfLength(10);
94+
SplitProcessor splitProcessor = factory.create(null, processorTag, config);
95+
assertThat(splitProcessor.getTag(), equalTo(processorTag));
96+
assertThat(splitProcessor.getField(), equalTo("field1"));
97+
assertThat(splitProcessor.getSeparator(), equalTo("\\."));
98+
assertFalse(splitProcessor.isIgnoreMissing());
99+
assertThat(splitProcessor.getTargetField(), equalTo("target"));
100+
}
101+
84102
}

modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/SplitProcessorTests.java

+23-7
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,15 @@ public class SplitProcessorTests extends ESTestCase {
3939
public void testSplit() throws Exception {
4040
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
4141
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
42-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
42+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
4343
processor.execute(ingestDocument);
4444
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
4545
}
4646

4747
public void testSplitFieldNotFound() throws Exception {
4848
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
4949
String fieldName = RandomDocumentPicks.randomFieldName(random());
50-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
50+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
5151
try {
5252
processor.execute(ingestDocument);
5353
fail("split processor should have failed");
@@ -59,7 +59,7 @@ public void testSplitFieldNotFound() throws Exception {
5959
public void testSplitNullValue() throws Exception {
6060
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(),
6161
Collections.singletonMap("field", null));
62-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, "field");
62+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", false, false, "field");
6363
try {
6464
processor.execute(ingestDocument);
6565
fail("split processor should have failed");
@@ -73,15 +73,15 @@ public void testSplitNullValueWithIgnoreMissing() throws Exception {
7373
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(),
7474
Collections.singletonMap(fieldName, null));
7575
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
76-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, fieldName);
76+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", true, false, fieldName);
7777
processor.execute(ingestDocument);
7878
assertIngestDocument(originalIngestDocument, ingestDocument);
7979
}
8080

8181
public void testSplitNonExistentWithIgnoreMissing() throws Exception {
8282
IngestDocument originalIngestDocument = RandomDocumentPicks.randomIngestDocument(random(), Collections.emptyMap());
8383
IngestDocument ingestDocument = new IngestDocument(originalIngestDocument);
84-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, "field");
84+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), "field", "\\.", true, false, "field");
8585
processor.execute(ingestDocument);
8686
assertIngestDocument(originalIngestDocument, ingestDocument);
8787
}
@@ -90,7 +90,7 @@ public void testSplitNonStringValue() throws Exception {
9090
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random(), new HashMap<>());
9191
String fieldName = RandomDocumentPicks.randomFieldName(random());
9292
ingestDocument.setFieldValue(fieldName, randomInt());
93-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, fieldName);
93+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, fieldName);
9494
try {
9595
processor.execute(ingestDocument);
9696
fail("split processor should have failed");
@@ -121,8 +121,24 @@ public void testSplitWithTargetField() throws Exception {
121121
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
122122
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, "127.0.0.1");
123123
String targetFieldName = fieldName + randomAlphaOfLength(5);
124-
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, targetFieldName);
124+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\.", false, false, targetFieldName);
125125
processor.execute(ingestDocument);
126126
assertThat(ingestDocument.getFieldValue(targetFieldName, List.class), equalTo(Arrays.asList("127", "0", "0", "1")));
127127
}
128+
129+
public void testSplitWithPreserveTrailing() throws Exception {
130+
doTestSplitWithPreserveTrailing(true, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz", "", ""));
131+
}
132+
133+
public void testSplitWithoutPreserveTrailing() throws Exception {
134+
doTestSplitWithPreserveTrailing(false, "foo|bar|baz||", Arrays.asList("foo", "bar", "baz"));
135+
}
136+
137+
private void doTestSplitWithPreserveTrailing(boolean preserveTrailing, String fieldValue, List<String> expected) throws Exception {
138+
IngestDocument ingestDocument = RandomDocumentPicks.randomIngestDocument(random());
139+
String fieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValue);
140+
Processor processor = new SplitProcessor(randomAlphaOfLength(10), fieldName, "\\|", false, preserveTrailing, fieldName);
141+
processor.execute(ingestDocument);
142+
assertThat(ingestDocument.getFieldValue(fieldName, List.class), equalTo(expected));
143+
}
128144
}

0 commit comments

Comments
 (0)