|
33 | 33 |
|
34 | 34 | import org.jcodings.Encoding; |
35 | 35 | import org.jcodings.specific.ASCIIEncoding; |
| 36 | +import org.jcodings.specific.UTF16BEEncoding; |
| 37 | +import org.jcodings.specific.UTF16LEEncoding; |
| 38 | +import org.jcodings.specific.UTF32BEEncoding; |
| 39 | +import org.jcodings.specific.UTF32LEEncoding; |
| 40 | +import org.jcodings.specific.UTF8Encoding; |
36 | 41 | import org.jruby.*; |
37 | 42 | import org.jruby.anno.FrameField; |
38 | 43 | import org.jruby.anno.JRubyClass; |
|
51 | 56 | import org.jruby.util.ByteList; |
52 | 57 | import org.jruby.util.StringSupport; |
53 | 58 | import org.jruby.util.TypeConverter; |
| 59 | +import org.jruby.util.func.ObjectObjectIntFunction; |
54 | 60 | import org.jruby.util.io.EncodingUtils; |
55 | 61 | import org.jruby.util.io.Getline; |
56 | 62 | import org.jruby.util.io.IOEncodable; |
|
63 | 69 | import java.util.Arrays; |
64 | 70 | import java.util.concurrent.atomic.AtomicReferenceFieldUpdater; |
65 | 71 |
|
| 72 | +import static java.lang.Byte.toUnsignedInt; |
66 | 73 | import static org.jruby.RubyEnumerator.enumeratorize; |
67 | 74 | import static org.jruby.runtime.Visibility.PRIVATE; |
68 | 75 | import static org.jruby.util.RubyStringBuilder.str; |
@@ -349,6 +356,7 @@ private void strioInit(ThreadContext context, int argc, IRubyObject arg0, IRubyO |
349 | 356 | ptr.enc = encoding; |
350 | 357 | ptr.pos = 0; |
351 | 358 | ptr.lineno = 0; |
| 359 | + if ((ptr.flags & OpenFile.SETENC_BY_BOM) != 0) setEncodingByBOM(context); |
352 | 360 | // funky way of shifting readwrite flags into object flags |
353 | 361 | flags |= (ptr.flags & OpenFile.READWRITE) * (STRIO_READABLE / OpenFile.READABLE); |
354 | 362 | } finally { |
@@ -1641,6 +1649,73 @@ public IRubyObject set_encoding(ThreadContext context, IRubyObject enc, IRubyObj |
1641 | 1649 | return set_encoding(context, enc); |
1642 | 1650 | } |
1643 | 1651 |
|
| 1652 | + @JRubyMethod |
| 1653 | + public IRubyObject set_encoding_by_bom(ThreadContext context) { |
| 1654 | + if (setEncodingByBOM(context) == null) return context.nil; |
| 1655 | + |
| 1656 | + return context.runtime.getEncodingService().convertEncodingToRubyEncoding(ptr.enc); |
| 1657 | + } |
| 1658 | + |
| 1659 | + private Encoding setEncodingByBOM(ThreadContext context) { |
| 1660 | + Encoding enc = detectBOM(context, ptr.string, (ctx, enc2, bomlen) -> { |
| 1661 | + ptr.pos = bomlen; |
| 1662 | + if (writable()) { |
| 1663 | + ptr.string.setEncoding(enc2); |
| 1664 | + } |
| 1665 | + return enc2; |
| 1666 | + }); |
| 1667 | + ptr.enc = enc; |
| 1668 | + return enc; |
| 1669 | + } |
| 1670 | + |
| 1671 | + private static Encoding detectBOM(ThreadContext context, RubyString str, ObjectObjectIntFunction<ThreadContext, Encoding, Encoding> callback) { |
| 1672 | + int p; |
| 1673 | + int len; |
| 1674 | + |
| 1675 | + ByteList byteList = str.getByteList(); |
| 1676 | + byte[] bytes = byteList.unsafeBytes(); |
| 1677 | + p = byteList.begin(); |
| 1678 | + len = byteList.realSize(); |
| 1679 | + |
| 1680 | + if (len < 1) return null; |
| 1681 | + switch (toUnsignedInt(bytes[p])) { |
| 1682 | + case 0xEF: |
| 1683 | + if (len < 2) break; |
| 1684 | + if (toUnsignedInt(bytes[p + 1]) == 0xBB && len > 2) { |
| 1685 | + if (toUnsignedInt(bytes[p + 2]) == 0xBF) { |
| 1686 | + return callback.apply(context, UTF8Encoding.INSTANCE, 3); |
| 1687 | + } |
| 1688 | + } |
| 1689 | + break; |
| 1690 | + |
| 1691 | + case 0xFE: |
| 1692 | + if (len < 2) break; |
| 1693 | + if (toUnsignedInt(bytes[p + 1]) == 0xFF) { |
| 1694 | + return callback.apply(context, UTF16BEEncoding.INSTANCE, 2); |
| 1695 | + } |
| 1696 | + break; |
| 1697 | + |
| 1698 | + case 0xFF: |
| 1699 | + if (len < 2) break; |
| 1700 | + if (toUnsignedInt(bytes[p + 1]) == 0xFE) { |
| 1701 | + if (len >= 4 && toUnsignedInt(bytes[p + 2]) == 0 && toUnsignedInt(bytes[p + 3]) == 0) { |
| 1702 | + return callback.apply(context, UTF32LEEncoding.INSTANCE, 4); |
| 1703 | + } |
| 1704 | + return callback.apply(context, UTF16LEEncoding.INSTANCE, 2); |
| 1705 | + } |
| 1706 | + break; |
| 1707 | + |
| 1708 | + case 0: |
| 1709 | + if (len < 4) break; |
| 1710 | + if (toUnsignedInt(bytes[p + 1]) == 0 && toUnsignedInt(bytes[p + 2]) == 0xFE && toUnsignedInt(bytes[p + 3]) == 0xFF) { |
| 1711 | + return callback.apply(context, UTF32BEEncoding.INSTANCE, 4); |
| 1712 | + } |
| 1713 | + break; |
| 1714 | + } |
| 1715 | + return callback.apply(context, null, 0); |
| 1716 | + } |
| 1717 | + |
| 1718 | + |
1644 | 1719 | @JRubyMethod |
1645 | 1720 | public IRubyObject external_encoding(ThreadContext context) { |
1646 | 1721 | return context.runtime.getEncodingService().convertEncodingToRubyEncoding(getEncoding()); |
|
0 commit comments