Skip to content

Commit 015ba43

Browse files
committed
Force UTF8 for empty string when flush is invoked without a previous extract call
1 parent b42ca05 commit 015ba43

File tree

2 files changed

+28
-3
lines changed

2 files changed

+28
-3
lines changed

logstash-core/src/main/java/org/logstash/common/BufferedTokenizerExt.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,9 +160,20 @@ public IRubyObject flush(final ThreadContext context) {
160160
headToken = new StringBuilder();
161161
inputSize = 0;
162162

163-
// create new RubyString with the last data specified encoding
164-
RubyString encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(buffer.toString().getBytes(Charset.forName(encodingName))));
165-
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
163+
// create new RubyString with the last data specified encoding, if exists
164+
RubyString encodedHeadToken;
165+
if (encodingName != null) {
166+
encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(buffer.toString().getBytes(Charset.forName(encodingName))));
167+
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
168+
} else {
169+
// When used with TCP input it could be that on socket connection the flush method
170+
// is invoked while no invocation of extract, leaving the encoding name unassigned.
171+
// In such case also the headToken must be empty
172+
if (!buffer.toString().isEmpty()) {
173+
throw new IllegalStateException("invoked flush with unassigned encoding but not empty head token, this shouldn't happen");
174+
}
175+
encodedHeadToken = (RubyString) buffer;
176+
}
166177

167178
return encodedHeadToken;
168179
}

logstash-core/src/test/java/org/logstash/common/BufferedTokenizerExtTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,4 +144,18 @@ public void shouldNotChangeEncodingOfTokensAfterPartitioningWhenRetrieveLastFlus
144144
RubyEncoding encoding = (RubyEncoding) lastToken.callMethod(context, "encoding");
145145
assertEquals("ISO-8859-1", encoding.toString());
146146
}
147+
148+
@Test
149+
public void givenDirectFlushInvocationUTF8EncodingIsApplied() {
150+
RubyString rubyString = RubyString.newString(RUBY, new byte[]{(byte) 0xA3, 0x41}); // £ character, A
151+
IRubyObject rubyInput = rubyString.force_encoding(context, RUBY.newString("ISO8859-1"));
152+
153+
// flush and check that the remaining A is still encoded in ISO8859-1
154+
IRubyObject lastToken = sut.flush(context);
155+
assertEquals("", lastToken.toString());
156+
157+
// verify encoding "ISO8859-1" is preserved in the Java to Ruby String conversion
158+
RubyEncoding encoding = (RubyEncoding) lastToken.callMethod(context, "encoding");
159+
assertEquals("UTF-8", encoding.toString());
160+
}
147161
}

0 commit comments

Comments
 (0)