2020
2121package  org .logstash .common ;
2222
23- import  org .jruby .*;
23+ import  org .jruby .Ruby ;
24+ import  org .jruby .RubyArray ;
25+ import  org .jruby .RubyClass ;
26+ import  org .jruby .RubyEncoding ;
27+ import  org .jruby .RubyObject ;
28+ import  org .jruby .RubyString ;
2429import  org .jruby .anno .JRubyClass ;
2530import  org .jruby .anno .JRubyMethod ;
2631import  org .jruby .runtime .ThreadContext ;
@@ -118,9 +123,10 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
118123            if  (inputSize  + entitiesSize  > sizeLimit ) {
119124                bufferFullErrorNotified  = true ;
120125                headToken  = new  StringBuilder ();
126+                 String  errorMessage  = String .format ("input buffer full, consumed token which exceeded the sizeLimit %d; inputSize: %d, entitiesSize %d" , sizeLimit , inputSize , entitiesSize );
121127                inputSize  = 0 ;
122128                input .shift (context ); // consume the token fragment that generates the buffer full 
123-                 throw  new  IllegalStateException ("input buffer full" );
129+                 throw  new  IllegalStateException (errorMessage );
124130            }
125131            this .inputSize  = inputSize  + entitiesSize ;
126132        }
@@ -137,8 +143,7 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
137143            // in the accumulator, and clean the pending token part. 
138144            headToken .append (input .shift (context )); // append buffer to first element and 
139145            // create new RubyString with the data specified encoding 
140-             RubyString  encodedHeadToken  = RubyUtil .RUBY .newString (new  ByteList (headToken .toString ().getBytes (Charset .forName (encodingName ))));
141-             encodedHeadToken .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
146+             RubyString  encodedHeadToken  = toEncodedRubyString (context , headToken .toString ());
142147            input .unshift (encodedHeadToken ); // reinsert it into the array 
143148            headToken  = new  StringBuilder ();
144149        }
@@ -147,6 +152,13 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
147152        return  input ;
148153    }
149154
155+     private  RubyString  toEncodedRubyString (ThreadContext  context , String  input ) {
156+         // Depends on the encodingName being set by the extract method, could potentially raise if not set. 
157+         RubyString  result  = RubyUtil .RUBY .newString (new  ByteList (input .getBytes (Charset .forName (encodingName ))));
158+         result .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
159+         return  result ;
160+     }
161+ 
150162    /** 
151163     * Flush the contents of the input buffer, i.e. return the input buffer even though 
152164     * a token has not yet been encountered 
@@ -163,8 +175,7 @@ public IRubyObject flush(final ThreadContext context) {
163175        // create new RubyString with the last data specified encoding, if exists 
164176        RubyString  encodedHeadToken ;
165177        if  (encodingName  != null ) {
166-             encodedHeadToken  = RubyUtil .RUBY .newString (new  ByteList (buffer .toString ().getBytes (Charset .forName (encodingName ))));
167-             encodedHeadToken .force_encoding (context , RubyUtil .RUBY .newString (encodingName ));
178+             encodedHeadToken  = toEncodedRubyString (context , buffer .toString ());
168179        } else  {
169180            // When used with TCP input it could be that on socket connection the flush method 
170181            // is invoked while no invocation of extract, leaving the encoding name unassigned. 
0 commit comments