11# This file is a part of Julia. License is MIT: https://julialang.org/license
22
3- convert (:: Type{Char} , x:: UInt32 ) = reinterpret (Char, x)
3+ struct MalformedCharError <: Exception
4+ char:: Char
5+ end
6+ struct CodePointError <: Exception
7+ code:: Integer
8+ end
9+ @noinline malformed_char (c:: Char ) = throw (MalformedCharError (c))
10+ @noinline code_point_err (u:: UInt32 ) = throw (CodePointError (u))
11+
12+ function ismalformed (c:: Char )
13+ u = reinterpret (UInt32, c)
14+ l1 = leading_ones (u) << 3
15+ t0 = trailing_zeros (u) & 56
16+ (l1 == 8 ) | (l1 + t0 > 32 ) |
17+ (((u & 0x00c0c0c0 ) ⊻ 0x00808080 ) >> t0 != 0 )
18+ end
19+
20+ function convert (:: Type{UInt32} , c:: Char )
21+ # TODO : use optimized inline LLVM
22+ u = reinterpret (UInt32, c)
23+ u < 0x80000000 && return reinterpret (UInt32, u >> 24 )
24+ l1 = leading_ones (u)
25+ t0 = trailing_zeros (u) & 56
26+ (l1 == 1 ) | (8 l1 + t0 > 32 ) |
27+ (((u & 0x00c0c0c0 ) ⊻ 0x00808080 ) >> t0 != 0 ) &&
28+ malformed_char (c):: Union{}
29+ u &= 0xffffffff >> l1
30+ u >>= t0
31+ (u & 0x0000007f >> 0 ) | (u & 0x00007f00 >> 2 ) |
32+ (u & 0x007f0000 >> 4 ) | (u & 0x7f000000 >> 6 )
33+ end
34+
35+ function convert (:: Type{Char} , u:: UInt32 )
36+ u < 0x80 && return reinterpret (Char, u << 24 )
37+ u < 0x00200000 || code_point_err (u):: Union{}
38+ c = ((u << 0 ) & 0x0000003f ) | ((u << 2 ) & 0x00003f00 ) |
39+ ((u << 4 ) & 0x003f0000 ) | ((u << 6 ) & 0x3f000000 )
40+ c = u < 0x00000800 ? (c << 16 ) | 0xc0800000 :
41+ u < 0x00010000 ? (c << 08 ) | 0xe0808000 :
42+ (c << 00 ) | 0xf0808080
43+ reinterpret (Char, c)
44+ end
45+
46+ function convert (:: Type{T} , c:: Char ) where T <: Union{Int8,UInt8}
47+ i = reinterpret (Int32, c)
48+ i ≥ 0 ? ((i >>> 24 ) % T) : T (UInt32 (c))
49+ end
50+
51+ function convert (:: Type{Char} , b:: Union{Int8,UInt8} )
52+ 0 ≤ b ≤ 0x7f ? reinterpret (Char, (b % UInt32) << 24 ) : Char (UInt32 (b))
53+ end
54+
455convert (:: Type{Char} , x:: Number ) = Char (UInt32 (x))
5- convert (:: Type{UInt32} , x:: Char ) = reinterpret (UInt32, x)
656convert (:: Type{T} , x:: Char ) where {T<: Number } = convert (T, UInt32 (x))
757
858rem (x:: Char , :: Type{T} ) where {T<: Number } = rem (UInt32 (x), T)
@@ -29,19 +79,16 @@ done(c::Char, state) = state
2979isempty (c:: Char ) = false
3080in (x:: Char , y:: Char ) = x == y
3181
32- == (x:: Char , y:: Char ) = UInt32 (x) == UInt32 (y)
33- isless (x:: Char , y:: Char ) = UInt32 (x) < UInt32 (y)
34-
35- const hashchar_seed = 0xd4d64234
36- hash (x:: Char , h:: UInt ) = hash_uint64 (((UInt64 (x)+ hashchar_seed)<< 32 ) ⊻ UInt64 (h))
82+ == (x:: Char , y:: Char ) = reinterpret (UInt32, x) == reinterpret (UInt32, y)
83+ isless (x:: Char , y:: Char ) = reinterpret (UInt32, x) < reinterpret (UInt32, y)
84+ hash (x:: Char , h:: UInt ) =
85+ hash_uint64 (((reinterpret (UInt32, x) + UInt64 (0xd4d64234 )) << 32 ) ⊻ UInt64 (h))
3786
3887- (x:: Char , y:: Char ) = Int (x) - Int (y)
3988- (x:: Char , y:: Integer ) = Char (Int32 (x) - Int32 (y))
4089+ (x:: Char , y:: Integer ) = Char (Int32 (x) + Int32 (y))
4190+ (x:: Integer , y:: Char ) = y + x
4291
43- bswap (x:: Char ) = Char (bswap (UInt32 (x)))
44-
4592print (io:: IO , c:: Char ) = (write (io, c); nothing )
4693
4794const hex_chars = UInt8[' 0' :' 9' ;' a' :' z' ]
@@ -66,21 +113,37 @@ function show(io::IO, c::Char)
66113 end
67114 if Unicode. isprint (c)
68115 write (io, 0x27 , c, 0x27 )
69- else
116+ elseif ! ismalformed (c)
70117 u = UInt32 (c)
71118 write (io, 0x27 , 0x5c , c <= ' \x 7f' ? 0x78 : c <= ' \u ffff' ? 0x75 : 0x55 )
72119 d = max (2 , 8 - (leading_zeros (u) >> 2 ))
73120 while 0 < d
74121 write (io, hex_chars[((u >> ((d -= 1 ) << 2 )) & 0xf ) + 1 ])
75122 end
76123 write (io, 0x27 )
124+ else # malformed
125+ write (io, 0x27 )
126+ u = reinterpret (UInt32, c)
127+ while true
128+ a = hex_chars[((u >> 28 ) & 0xf ) + 1 ]
129+ b = hex_chars[((u >> 24 ) & 0xf ) + 1 ]
130+ write (io, 0x5c , ' x' , a, b)
131+ (u <<= 8 ) == 0 && break
132+ end
133+ write (io, 0x27 )
77134 end
78135 return
79136end
80137
81138function show (io:: IO , :: MIME"text/plain" , c:: Char )
82139 show (io, c)
83- u = UInt32 (c)
84- print (io, " : " , Unicode. isascii (c) ? " ASCII/" : " " , " Unicode U+" , hex (u, u > 0xffff ? 6 : 4 ))
85- print (io, " (category " , Unicode. category_abbrev (c), " : " , Unicode. category_string (c), " )" )
140+ if ! ismalformed (c)
141+ u = UInt32 (c)
142+ print (io, " : " , Unicode. isascii (c) ? " ASCII/" : " " , " Unicode U+" , hex (u, u > 0xffff ? 6 : 4 ))
143+ else
144+ print (io, " : Malformed UTF-8" )
145+ end
146+ abr = Unicode. category_abbrev (c)
147+ str = Unicode. category_string (c)
148+ print (io, " (category " , abr, " : " , str, " )" )
86149end
0 commit comments