2020
2121package org .logstash .common ;
2222
23- import org .jruby .Ruby ;
24- import org .jruby .RubyArray ;
25- import org .jruby .RubyClass ;
26- import org .jruby .RubyObject ;
27- import org .jruby .RubyString ;
23+ import org .jruby .*;
2824import org .jruby .anno .JRubyClass ;
2925import org .jruby .anno .JRubyMethod ;
3026import org .jruby .runtime .ThreadContext ;
3127import org .jruby .runtime .builtin .IRubyObject ;
28+ import org .jruby .util .ByteList ;
3229import org .logstash .RubyUtil ;
3330
31+ import java .nio .charset .Charset ;
32+ import java .nio .charset .StandardCharsets ;
33+
3434@ JRubyClass (name = "BufferedTokenizer" )
3535public class BufferedTokenizerExt extends RubyObject {
3636
@@ -46,6 +46,7 @@ public class BufferedTokenizerExt extends RubyObject {
4646 private boolean hasSizeLimit ;
4747 private int inputSize ;
4848 private boolean bufferFullErrorNotified = false ;
49+ private String encodingName ;
4950
5051 public BufferedTokenizerExt (final Ruby runtime , final RubyClass metaClass ) {
5152 super (runtime , metaClass );
@@ -82,6 +83,8 @@ public IRubyObject init(final ThreadContext context, IRubyObject[] args) {
8283 @ JRubyMethod
8384 @ SuppressWarnings ("rawtypes" )
8485 public RubyArray extract (final ThreadContext context , IRubyObject data ) {
86+ RubyEncoding encoding = (RubyEncoding ) data .convertToString ().encoding (context );
87+ encodingName = encoding .getEncoding ().getCharsetName ();
8588 final RubyArray entities = data .convertToString ().split (delimiter , -1 );
8689 if (!bufferFullErrorNotified ) {
8790 input .clear ();
@@ -134,7 +137,10 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
134137 // if there is a pending token part, merge it with the first token segment present
135138 // in the accumulator, and clean the pending token part.
136139 headToken .append (input .shift (context )); // append buffer to first element and
137- input .unshift (RubyUtil .toRubyObject (headToken .toString ())); // reinsert it into the array
140+ // create new RubyString with the data specified encoding
141+ RubyString encodedHeadToken = RubyUtil .RUBY .newString (new ByteList (headToken .toString ().getBytes (Charset .forName (encodingName ))));
142+ encodedHeadToken .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
143+ input .unshift (encodedHeadToken ); // reinsert it into the array
138144 headToken = new StringBuilder ();
139145 }
140146 headToken .append (input .pop (context )); // put the leftovers in headToken for later
@@ -154,7 +160,12 @@ public IRubyObject flush(final ThreadContext context) {
154160 final IRubyObject buffer = RubyUtil .toRubyObject (headToken .toString ());
155161 headToken = new StringBuilder ();
156162 inputSize = 0 ;
157- return buffer ;
163+
164+ // create new RubyString with the last data specified encoding
165+ RubyString encodedHeadToken = RubyUtil .RUBY .newString (new ByteList (buffer .toString ().getBytes (Charset .forName (encodingName ))));
166+ encodedHeadToken .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
167+
168+ return encodedHeadToken ;
158169 }
159170
160171 @ JRubyMethod (name = "empty?" )
0 commit comments