20
20
21
21
package org .logstash .common ;
22
22
23
- import org .jruby .Ruby ;
24
- import org .jruby .RubyArray ;
25
- import org .jruby .RubyClass ;
26
- import org .jruby .RubyObject ;
27
- import org .jruby .RubyString ;
23
+ import org .jruby .*;
28
24
import org .jruby .anno .JRubyClass ;
29
25
import org .jruby .anno .JRubyMethod ;
30
26
import org .jruby .runtime .ThreadContext ;
31
27
import org .jruby .runtime .builtin .IRubyObject ;
28
+ import org .jruby .util .ByteList ;
32
29
import org .logstash .RubyUtil ;
33
30
31
+ import java .nio .charset .Charset ;
32
+ import java .nio .charset .StandardCharsets ;
33
+
34
34
@ JRubyClass (name = "BufferedTokenizer" )
35
35
public class BufferedTokenizerExt extends RubyObject {
36
36
@@ -46,6 +46,7 @@ public class BufferedTokenizerExt extends RubyObject {
46
46
private boolean hasSizeLimit ;
47
47
private int inputSize ;
48
48
private boolean bufferFullErrorNotified = false ;
49
+ private String encodingName ;
49
50
50
51
public BufferedTokenizerExt (final Ruby runtime , final RubyClass metaClass ) {
51
52
super (runtime , metaClass );
@@ -82,6 +83,8 @@ public IRubyObject init(final ThreadContext context, IRubyObject[] args) {
82
83
@ JRubyMethod
83
84
@ SuppressWarnings ("rawtypes" )
84
85
public RubyArray extract (final ThreadContext context , IRubyObject data ) {
86
+ RubyEncoding encoding = (RubyEncoding ) data .convertToString ().encoding (context );
87
+ encodingName = encoding .getEncoding ().getCharsetName ();
85
88
final RubyArray entities = data .convertToString ().split (delimiter , -1 );
86
89
if (!bufferFullErrorNotified ) {
87
90
input .clear ();
@@ -134,7 +137,10 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
134
137
// if there is a pending token part, merge it with the first token segment present
135
138
// in the accumulator, and clean the pending token part.
136
139
headToken .append (input .shift (context )); // append buffer to first element and
137
- input .unshift (RubyUtil .toRubyObject (headToken .toString ())); // reinsert it into the array
140
+ // create new RubyString with the data specified encoding
141
+ RubyString encodedHeadToken = RubyUtil .RUBY .newString (new ByteList (headToken .toString ().getBytes (Charset .forName (encodingName ))));
142
+ encodedHeadToken .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
143
+ input .unshift (encodedHeadToken ); // reinsert it into the array
138
144
headToken = new StringBuilder ();
139
145
}
140
146
headToken .append (input .pop (context )); // put the leftovers in headToken for later
@@ -154,7 +160,12 @@ public IRubyObject flush(final ThreadContext context) {
154
160
final IRubyObject buffer = RubyUtil .toRubyObject (headToken .toString ());
155
161
headToken = new StringBuilder ();
156
162
inputSize = 0 ;
157
- return buffer ;
163
+
164
+ // create new RubyString with the last data specified encoding
165
+ RubyString encodedHeadToken = RubyUtil .RUBY .newString (new ByteList (buffer .toString ().getBytes (Charset .forName (encodingName ))));
166
+ encodedHeadToken .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
167
+
168
+ return encodedHeadToken ;
158
169
}
159
170
160
171
@ JRubyMethod (name = "empty?" )
0 commit comments