Skip to content

Commit 939e666

Browse files
committed
Implement set_encoding_by_bom
Roughly ported from CRuby. See b249631
1 parent 8856ed3 commit 939e666

File tree

1 file changed

+75
-0
lines changed

1 file changed

+75
-0
lines changed

ext/java/org/jruby/ext/stringio/StringIO.java

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@
3333

3434
import org.jcodings.Encoding;
3535
import org.jcodings.specific.ASCIIEncoding;
36+
import org.jcodings.specific.UTF16BEEncoding;
37+
import org.jcodings.specific.UTF16LEEncoding;
38+
import org.jcodings.specific.UTF32BEEncoding;
39+
import org.jcodings.specific.UTF32LEEncoding;
40+
import org.jcodings.specific.UTF8Encoding;
3641
import org.jruby.*;
3742
import org.jruby.anno.FrameField;
3843
import org.jruby.anno.JRubyClass;
@@ -51,6 +56,7 @@
5156
import org.jruby.util.ByteList;
5257
import org.jruby.util.StringSupport;
5358
import org.jruby.util.TypeConverter;
59+
import org.jruby.util.func.ObjectObjectIntFunction;
5460
import org.jruby.util.io.EncodingUtils;
5561
import org.jruby.util.io.Getline;
5662
import org.jruby.util.io.IOEncodable;
@@ -63,6 +69,7 @@
6369
import java.util.Arrays;
6470
import java.util.concurrent.atomic.AtomicReferenceFieldUpdater;
6571

72+
import static java.lang.Byte.toUnsignedInt;
6673
import static org.jruby.RubyEnumerator.enumeratorize;
6774
import static org.jruby.runtime.Visibility.PRIVATE;
6875
import static org.jruby.util.RubyStringBuilder.str;
@@ -349,6 +356,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO
349356
ptr.enc = encoding;
350357
ptr.pos = 0;
351358
ptr.lineno = 0;
359+
if ((ptr.flags & OpenFile.SETENC_BY_BOM) != 0) setEncodingByBOM(context);
352360
// funky way of shifting readwrite flags into object flags
353361
flags |= (ptr.flags & OpenFile.READWRITE) * (STRIO_READABLE / OpenFile.READABLE);
354362
} finally {
@@ -1641,6 +1649,73 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj
16411649
return set_encoding(context, enc);
16421650
}
16431651

1652+
@JRubyMethod
1653+
public IRubyObject set_encoding_by_bom(ThreadContext context) {
1654+
if (setEncodingByBOM(context) == null) return context.nil;
1655+
1656+
return context.runtime.getEncodingService().convertEncodingToRubyEncoding(ptr.enc);
1657+
}
1658+
1659+
private Encoding setEncodingByBOM(ThreadContext context) {
1660+
Encoding enc = detectBOM(context, ptr.string, (ctx, enc2, bomlen) -> {
1661+
ptr.pos = bomlen;
1662+
if (writable()) {
1663+
ptr.string.setEncoding(enc2);
1664+
}
1665+
return enc2;
1666+
});
1667+
ptr.enc = enc;
1668+
return enc;
1669+
}
1670+
1671+
private static Encoding detectBOM(ThreadContext context, RubyString str, ObjectObjectIntFunction<ThreadContext, Encoding, Encoding> callback) {
1672+
int p;
1673+
int len;
1674+
1675+
ByteList byteList = str.getByteList();
1676+
byte[] bytes = byteList.unsafeBytes();
1677+
p = byteList.begin();
1678+
len = byteList.realSize();
1679+
1680+
if (len < 1) return null;
1681+
switch (toUnsignedInt(bytes[p])) {
1682+
case 0xEF:
1683+
if (len < 2) break;
1684+
if (toUnsignedInt(bytes[p + 1]) == 0xBB && len > 2) {
1685+
if (toUnsignedInt(bytes[p + 2]) == 0xBF) {
1686+
return callback.apply(context, UTF8Encoding.INSTANCE, 3);
1687+
}
1688+
}
1689+
break;
1690+
1691+
case 0xFE:
1692+
if (len < 2) break;
1693+
if (toUnsignedInt(bytes[p + 1]) == 0xFF) {
1694+
return callback.apply(context, UTF16BEEncoding.INSTANCE, 2);
1695+
}
1696+
break;
1697+
1698+
case 0xFF:
1699+
if (len < 2) break;
1700+
if (toUnsignedInt(bytes[p + 1]) == 0xFE) {
1701+
if (len >= 4 && toUnsignedInt(bytes[p + 2]) == 0 && toUnsignedInt(bytes[p + 3]) == 0) {
1702+
return callback.apply(context, UTF32LEEncoding.INSTANCE, 4);
1703+
}
1704+
return callback.apply(context, UTF16LEEncoding.INSTANCE, 2);
1705+
}
1706+
break;
1707+
1708+
case 0:
1709+
if (len < 4) break;
1710+
if (toUnsignedInt(bytes[p + 1]) == 0 && toUnsignedInt(bytes[p + 2]) == 0xFE && toUnsignedInt(bytes[p + 3]) == 0xFF) {
1711+
return callback.apply(context, UTF32BEEncoding.INSTANCE, 4);
1712+
}
1713+
break;
1714+
}
1715+
return callback.apply(context, null, 0);
1716+
}
1717+
1718+
16441719
@JRubyMethod
16451720
public IRubyObject external_encoding(ThreadContext context) {
16461721
return context.runtime.getEncodingService().convertEncodingToRubyEncoding(getEncoding());

0 commit comments

Comments
 (0)