Skip to content

Commit 8dfa1fe

Browse files
committed
Updates the point of return Java String to return the one encoded with data input encoding, to do not change encoding
1 parent 76139e9 commit 8dfa1fe

File tree

1 file changed

+18
-7
lines changed

1 file changed

+18
-7
lines changed

Diff for: logstash-core/src/main/java/org/logstash/common/BufferedTokenizerExt.java

+18-7
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,17 @@
2020

2121
package org.logstash.common;
2222

23-
import org.jruby.Ruby;
24-
import org.jruby.RubyArray;
25-
import org.jruby.RubyClass;
26-
import org.jruby.RubyObject;
27-
import org.jruby.RubyString;
23+
import org.jruby.*;
2824
import org.jruby.anno.JRubyClass;
2925
import org.jruby.anno.JRubyMethod;
3026
import org.jruby.runtime.ThreadContext;
3127
import org.jruby.runtime.builtin.IRubyObject;
28+
import org.jruby.util.ByteList;
3229
import org.logstash.RubyUtil;
3330

31+
import java.nio.charset.Charset;
32+
import java.nio.charset.StandardCharsets;
33+
3434
@JRubyClass(name = "BufferedTokenizer")
3535
public class BufferedTokenizerExt extends RubyObject {
3636

@@ -46,6 +46,7 @@ public class BufferedTokenizerExt extends RubyObject {
4646
private boolean hasSizeLimit;
4747
private int inputSize;
4848
private boolean bufferFullErrorNotified = false;
49+
private String encodingName;
4950

5051
public BufferedTokenizerExt(final Ruby runtime, final RubyClass metaClass) {
5152
super(runtime, metaClass);
@@ -82,6 +83,8 @@ public IRubyObject init(final ThreadContext context, IRubyObject[] args) {
8283
@JRubyMethod
8384
@SuppressWarnings("rawtypes")
8485
public RubyArray extract(final ThreadContext context, IRubyObject data) {
86+
RubyEncoding encoding = (RubyEncoding) data.convertToString().encoding(context);
87+
encodingName = encoding.getEncoding().getCharsetName();
8588
final RubyArray entities = data.convertToString().split(delimiter, -1);
8689
if (!bufferFullErrorNotified) {
8790
input.clear();
@@ -134,7 +137,10 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
134137
// if there is a pending token part, merge it with the first token segment present
135138
// in the accumulator, and clean the pending token part.
136139
headToken.append(input.shift(context)); // append buffer to first element and
137-
input.unshift(RubyUtil.toRubyObject(headToken.toString())); // reinsert it into the array
140+
// create new RubyString with the data specified encoding
141+
RubyString encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(headToken.toString().getBytes(Charset.forName(encodingName))));
142+
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
143+
input.unshift(encodedHeadToken); // reinsert it into the array
138144
headToken = new StringBuilder();
139145
}
140146
headToken.append(input.pop(context)); // put the leftovers in headToken for later
@@ -154,7 +160,12 @@ public IRubyObject flush(final ThreadContext context) {
154160
final IRubyObject buffer = RubyUtil.toRubyObject(headToken.toString());
155161
headToken = new StringBuilder();
156162
inputSize = 0;
157-
return buffer;
163+
164+
// create new RubyString with the last data specified encoding
165+
RubyString encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(buffer.toString().getBytes(Charset.forName(encodingName))));
166+
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
167+
168+
return encodedHeadToken;
158169
}
159170

160171
@JRubyMethod(name = "empty?")

0 commit comments

Comments
 (0)