Skip to content

Commit 727e3b1

Browse files
committed
Addressed some concerns raised in PR review
- extracted common code used in string encoding - avoid full packaeg import - better execption message with details on limit exceeded
1 parent 015ba43 commit 727e3b1

File tree

1 file changed

+17
-6
lines changed

1 file changed

+17
-6
lines changed

logstash-core/src/main/java/org/logstash/common/BufferedTokenizerExt.java

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@
2020

2121
package org.logstash.common;
2222

23-
import org.jruby.*;
23+
import org.jruby.Ruby;
24+
import org.jruby.RubyArray;
25+
import org.jruby.RubyClass;
26+
import org.jruby.RubyEncoding;
27+
import org.jruby.RubyObject;
28+
import org.jruby.RubyString;
2429
import org.jruby.anno.JRubyClass;
2530
import org.jruby.anno.JRubyMethod;
2631
import org.jruby.runtime.ThreadContext;
@@ -118,9 +123,10 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
118123
if (inputSize + entitiesSize > sizeLimit) {
119124
bufferFullErrorNotified = true;
120125
headToken = new StringBuilder();
126+
String errorMessage = String.format("input buffer full, consumed token which exceeded the sizeLimit %d; inputSize: %d, entitiesSize %d", sizeLimit, inputSize, entitiesSize);
121127
inputSize = 0;
122128
input.shift(context); // consume the token fragment that generates the buffer full
123-
throw new IllegalStateException("input buffer full");
129+
throw new IllegalStateException(errorMessage);
124130
}
125131
this.inputSize = inputSize + entitiesSize;
126132
}
@@ -137,8 +143,7 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
137143
// in the accumulator, and clean the pending token part.
138144
headToken.append(input.shift(context)); // append buffer to first element and
139145
// create new RubyString with the data specified encoding
140-
RubyString encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(headToken.toString().getBytes(Charset.forName(encodingName))));
141-
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
146+
RubyString encodedHeadToken = toEncodedRubyString(context, headToken.toString());
142147
input.unshift(encodedHeadToken); // reinsert it into the array
143148
headToken = new StringBuilder();
144149
}
@@ -147,6 +152,13 @@ public RubyArray extract(final ThreadContext context, IRubyObject data) {
147152
return input;
148153
}
149154

155+
private RubyString toEncodedRubyString(ThreadContext context, String input) {
156+
// Depends on the encodingName being set by the extract method, could potentially raise if not set.
157+
RubyString result = RubyUtil.RUBY.newString(new ByteList(input.getBytes(Charset.forName(encodingName))));
158+
result.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
159+
return result;
160+
}
161+
150162
/**
151163
* Flush the contents of the input buffer, i.e. return the input buffer even though
152164
* a token has not yet been encountered
@@ -163,8 +175,7 @@ public IRubyObject flush(final ThreadContext context) {
163175
// create new RubyString with the last data specified encoding, if exists
164176
RubyString encodedHeadToken;
165177
if (encodingName != null) {
166-
encodedHeadToken = RubyUtil.RUBY.newString(new ByteList(buffer.toString().getBytes(Charset.forName(encodingName))));
167-
encodedHeadToken.force_encoding(context, RubyUtil.RUBY.newString(encodingName));
178+
encodedHeadToken = toEncodedRubyString(context, buffer.toString());
168179
} else {
169180
// When used with TCP input it could be that on socket connection the flush method
170181
// is invoked while no invocation of extract, leaving the encoding name unassigned.

0 commit comments

Comments
 (0)