diff --git a/base/essentials.jl b/base/essentials.jl index a5279bffbe1fd..6bba04dbbf2cb 100644 --- a/base/essentials.jl +++ b/base/essentials.jl @@ -93,8 +93,6 @@ unsafe_convert{T}(::Type{T}, x::T) = x # unsafe_convert (like convert) defaults unsafe_convert{P<:Ptr}(::Type{P}, x::Ptr) = convert(P, x) reinterpret{T}(::Type{T}, x) = box(T, x) -reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x) -reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x) sizeof(x) = Core.sizeof(x) diff --git a/base/float.jl b/base/float.jl index 4eafa39353b21..db688a80458bb 100644 --- a/base/float.jl +++ b/base/float.jl @@ -43,198 +43,26 @@ A not-a-number value of type `Float64`. const NaN = NaN64 ## conversions to floating-point ## -convert(::Type{Float16}, x::Integer) = convert(Float16, convert(Float32,x)) -for t in (Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128) - @eval promote_rule(::Type{Float16}, ::Type{$t}) = Float16 -end -promote_rule(::Type{Float16}, ::Type{Bool}) = Float16 - -for t1 in (Float32,Float64) - for st in (Int8,Int16,Int32,Int64) +for t1 in (Float16,Float32,Float64) + for st in (Int8,Int16,Int32,Int64,Int128) @eval begin convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x))) promote_rule(::Type{$t1}, ::Type{$st} ) = $t1 end end - for ut in (Bool,UInt8,UInt16,UInt32,UInt64) + for ut in (Bool,UInt8,UInt16,UInt32,UInt64,UInt128) @eval begin convert(::Type{$t1},x::($ut)) = box($t1,uitofp($t1,unbox($ut,x))) promote_rule(::Type{$t1}, ::Type{$ut} ) = $t1 end end end -convert{T<:Integer}(::Type{T}, x::Float16) = convert(T, Float32(x)) - - -promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64 -promote_rule(::Type{Float64}, ::Type{Int128}) = Float64 -promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32 -promote_rule(::Type{Float32}, ::Type{Int128}) = Float32 - -function convert(::Type{Float64}, x::UInt128) - x == 0 && return 0.0 - n = 128-leading_zeros(x) # ndigits0z(x,2) - if n <= 53 - y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff - else - y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit - y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent) - y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even - end - d = ((n+1022) % UInt64) << 52 - reinterpret(Float64, d + y) -end - -function convert(::Type{Float64}, x::Int128) - x == 0 && return 0.0 - s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit - x = abs(x) % UInt128 - n = 128-leading_zeros(x) # ndigits0z(x,2) - if n <= 53 - y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff - else - y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit - y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent) - y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even - end - d = ((n+1022) % UInt64) << 52 - reinterpret(Float64, s | d + y) -end - -function convert(::Type{Float32}, x::UInt128) - x == 0 && return 0f0 - n = 128-leading_zeros(x) # ndigits0z(x,2) - if n <= 24 - y = ((x % UInt32) << (24-n)) & 0x007f_ffff - else - y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit - y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent) - y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even - end - d = ((n+126) % UInt32) << 23 - reinterpret(Float32, d + y) -end -function convert(::Type{Float32}, x::Int128) - x == 0 && return 0f0 - s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit - x = abs(x) % UInt128 - n = 128-leading_zeros(x) # ndigits0z(x,2) - if n <= 24 - y = ((x % UInt32) << (24-n)) & 0x007f_ffff - else - y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit - y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent) - y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even - end - d = ((n+126) % UInt32) << 23 - reinterpret(Float32, s | d + y) -end +convert(::Type{Float16}, x::Union{Float32, Float64}) = box(Float16,fptrunc(Float16,x)) +convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x)) -function convert(::Type{Float16}, val::Float32) - f = reinterpret(UInt32, val) - i = (f >> 23) & 0x1ff + 1 - sh = shifttable[i] - f &= 0x007fffff - h::UInt16 = basetable[i] + (f >> sh) - # round - # NOTE: we maybe should ignore NaNs here, but the payload is - # getting truncated anyway so "rounding" it might not matter - nextbit = (f >> (sh-1)) & 1 - if nextbit != 0 - # Round halfway to even or check lower bits - if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0 - h += 1 - end - end - reinterpret(Float16, h) -end - -function convert(::Type{Float32}, val::Float16) - local ival::UInt32 = reinterpret(UInt16, val), - sign::UInt32 = (ival & 0x8000) >> 15, - exp::UInt32 = (ival & 0x7c00) >> 10, - sig::UInt32 = (ival & 0x3ff) >> 0, - ret::UInt32 - - if exp == 0 - if sig == 0 - sign = sign << 31 - ret = sign | exp | sig - else - n_bit = 1 - bit = 0x0200 - while (bit & sig) == 0 - n_bit = n_bit + 1 - bit = bit >> 1 - end - sign = sign << 31 - exp = (-14 - n_bit + 127) << 23 - sig = ((sig & (~bit)) << n_bit) << (23 - 10) - ret = sign | exp | sig - end - elseif exp == 0x1f - if sig == 0 # Inf - if sign == 0 - ret = 0x7f800000 - else - ret = 0xff800000 - end - else # NaN - ret = 0x7fc00000 | (sign<<31) - end - else - sign = sign << 31 - exp = (exp - 15 + 127) << 23 - sig = sig << (23 - 10) - ret = sign | exp | sig - end - return reinterpret(Float32, ret) -end - -# Float32 -> Float16 algorithm from: -# "Fast Half Float Conversion" by Jeroen van der Zijp -# ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf - -const basetable = Array{UInt16}(512) -const shifttable = Array{UInt8}(512) - -for i = 0:255 - e = i - 127 - if e < -24 # Very small numbers map to zero - basetable[i|0x000+1] = 0x0000 - basetable[i|0x100+1] = 0x8000 - shifttable[i|0x000+1] = 24 - shifttable[i|0x100+1] = 24 - elseif e < -14 # Small numbers map to denorms - basetable[i|0x000+1] = (0x0400>>(-e-14)) - basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000 - shifttable[i|0x000+1] = -e-1 - shifttable[i|0x100+1] = -e-1 - elseif e <= 15 # Normal numbers just lose precision - basetable[i|0x000+1] = ((e+15)<<10) - basetable[i|0x100+1] = ((e+15)<<10) | 0x8000 - shifttable[i|0x000+1] = 13 - shifttable[i|0x100+1] = 13 - elseif e < 128 # Large numbers map to Infinity - basetable[i|0x000+1] = 0x7C00 - basetable[i|0x100+1] = 0xFC00 - shifttable[i|0x000+1] = 24 - shifttable[i|0x100+1] = 24 - else # Infinity and NaN's stay Infinity and NaN's - basetable[i|0x000+1] = 0x7C00 - basetable[i|0x100+1] = 0xFC00 - shifttable[i|0x000+1] = 13 - shifttable[i|0x100+1] = 13 - end -end -#convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x)) -convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,unbox(Float64,x))) -convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x)) - -#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x)) -convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,unbox(Float32,x))) -convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x)) +convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x)) +convert(::Type{Float64}, x::Union{Float16,Float32}) = box(Float64,fpext(Float64,x)) convert(::Type{AbstractFloat}, x::Bool) = convert(Float64, x) convert(::Type{AbstractFloat}, x::Int8) = convert(Float64, x) @@ -253,81 +81,33 @@ float(x) = convert(AbstractFloat, x) # for constructing arrays float{T<:Number}(::Type{T}) = typeof(float(zero(T))) -for Ti in (Int8, Int16, Int32, Int64) +typealias IntrinsicFloats Union{Float16,Float32,Float64} +for Ti in (Int8, Int16, Int32, Int64, Int128) @eval begin - unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptosi($Ti,unbox(Float32,x))) - unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptosi($Ti,unbox(Float64,x))) + unsafe_trunc(::Type{$Ti}, x::IntrinsicFloats) = box($Ti,fptosi($Ti,x)) end end -for Ti in (UInt8, UInt16, UInt32, UInt64) +for Ti in (UInt8, UInt16, UInt32, UInt64, UInt128) @eval begin - unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptoui($Ti,unbox(Float32,x))) - unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptoui($Ti,unbox(Float64,x))) + unsafe_trunc(::Type{$Ti}, x::IntrinsicFloats) = box($Ti,fptoui($Ti,x)) end end -function unsafe_trunc(::Type{UInt128}, x::Float64) - xu = reinterpret(UInt64,x) - k = Int(xu >> 52) & 0x07ff - 1075 - xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000 - if k <= 0 - UInt128(xu >> -k) - else - UInt128(xu) << k - end -end -function unsafe_trunc(::Type{Int128}, x::Float64) - copysign(unsafe_trunc(UInt128,x) % Int128, x) -end - -function unsafe_trunc(::Type{UInt128}, x::Float32) - xu = reinterpret(UInt32,x) - k = Int(xu >> 23) & 0x00ff - 150 - xu = (xu & 0x007f_ffff) | 0x0080_0000 - if k <= 0 - UInt128(xu >> -k) - else - UInt128(xu) << k - end -end -function unsafe_trunc(::Type{Int128}, x::Float32) - copysign(unsafe_trunc(UInt128,x) % Int128, x) -end - - # matches convert methods # also determines floor, ceil, round -trunc(::Type{Signed}, x::Float32) = trunc(Int,x) -trunc(::Type{Signed}, x::Float64) = trunc(Int,x) -trunc(::Type{Unsigned}, x::Float32) = trunc(UInt,x) -trunc(::Type{Unsigned}, x::Float64) = trunc(UInt,x) -trunc(::Type{Integer}, x::Float32) = trunc(Int,x) -trunc(::Type{Integer}, x::Float64) = trunc(Int,x) -trunc{T<:Integer}(::Type{T}, x::Float16) = trunc(T, Float32(x)) +trunc(::Type{Signed}, x::IntrinsicFloats) = trunc(Int,x) +trunc(::Type{Unsigned}, x::IntrinsicFloats) = trunc(UInt,x) +trunc(::Type{Integer}, x::IntrinsicFloats) = trunc(Int,x) # fallbacks floor{T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,floor(x)) -floor{T<:Integer}(::Type{T}, x::Float16) = floor(T, Float32(x)) ceil{ T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,ceil(x)) -ceil{ T<:Integer}(::Type{T}, x::Float16) = ceil(T, Float32(x)) round{T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,round(x)) -round{T<:Integer}(::Type{T}, x::Float16) = round(T, Float32(x)) - -trunc(x::Float64) = box(Float64,trunc_llvm(unbox(Float64,x))) -trunc(x::Float32) = box(Float32,trunc_llvm(unbox(Float32,x))) -trunc(x::Float16) = Float16(trunc(Float32(x))) - -floor(x::Float64) = box(Float64,floor_llvm(unbox(Float64,x))) -floor(x::Float32) = box(Float32,floor_llvm(unbox(Float32,x))) -floor(x::Float16) = Float16(floor(Float32(x))) -ceil(x::Float64) = box(Float64,ceil_llvm(unbox(Float64,x))) -ceil(x::Float32) = box(Float32,ceil_llvm(unbox(Float32,x))) -ceil(x::Float16) = Float16( ceil(Float32(x))) - -round(x::Float64) = box(Float64,rint_llvm(unbox(Float64,x))) -round(x::Float32) = box(Float32,rint_llvm(unbox(Float32,x))) -round(x::Float16) = Float16(round(Float32(x))) +trunc{T<:IntrinsicFloats}(x::T) = box(T,trunc_llvm(x)) +floor{T<:IntrinsicFloats}(x::T) = box(T,floor_llvm(x)) +ceil{T<:IntrinsicFloats}(x::T) = box(T,ceil_llvm(x)) +round{T<:IntrinsicFloats}(x::T) = box(T,rint_llvm(x)) ## floating point promotions ## promote_rule(::Type{Float32}, ::Type{Float16}) = Float32 @@ -340,40 +120,29 @@ widen(::Type{Float32}) = Float64 _default_type(T::Union{Type{Real},Type{AbstractFloat}}) = Float64 ## floating point arithmetic ## --(x::Float64) = box(Float64,neg_float(unbox(Float64,x))) --(x::Float32) = box(Float32,neg_float(unbox(Float32,x))) --(x::Float16) = reinterpret(Float16, reinterpret(UInt16,x) $ 0x8000) +-{T<:IntrinsicFloats}(x::T) = box(T,neg_float(x)) ++{T<:IntrinsicFloats}(x::T, y::T) = box(T,add_float(x,y)) +-{T<:IntrinsicFloats}(x::T, y::T) = box(T,sub_float(x,y)) +*{T<:IntrinsicFloats}(x::T, y::T) = box(T,mul_float(x,y)) +/{T<:IntrinsicFloats}(x::T, y::T) = box(T,div_float(x,y)) -for op in (:+,:-,:*,:/,:\,:^) +for op in (:\,:^) @eval ($op)(a::Float16, b::Float16) = Float16(($op)(Float32(a), Float32(b))) end -+(x::Float32, y::Float32) = box(Float32,add_float(unbox(Float32,x),unbox(Float32,y))) -+(x::Float64, y::Float64) = box(Float64,add_float(unbox(Float64,x),unbox(Float64,y))) --(x::Float32, y::Float32) = box(Float32,sub_float(unbox(Float32,x),unbox(Float32,y))) --(x::Float64, y::Float64) = box(Float64,sub_float(unbox(Float64,x),unbox(Float64,y))) -*(x::Float32, y::Float32) = box(Float32,mul_float(unbox(Float32,x),unbox(Float32,y))) -*(x::Float64, y::Float64) = box(Float64,mul_float(unbox(Float64,x),unbox(Float64,y))) -/(x::Float32, y::Float32) = box(Float32,div_float(unbox(Float32,x),unbox(Float32,y))) -/(x::Float64, y::Float64) = box(Float64,div_float(unbox(Float64,x),unbox(Float64,y))) - -muladd(x::Float32, y::Float32, z::Float32) = box(Float32,muladd_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z))) -muladd(x::Float64, y::Float64, z::Float64) = box(Float64,muladd_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z))) -function muladd(a::Float16, b::Float16, c::Float16) - Float16(muladd(Float32(a), Float32(b), Float32(c))) -end + +muladd{T<:IntrinsicFloats}(x::T, y::T, z::T) = box(T,muladd_float(x,y,z)) # TODO: faster floating point div? # TODO: faster floating point fld? # TODO: faster floating point mod? -for func in (:div,:fld,:cld,:rem,:mod) +for func in (:div,:fld) @eval begin $func(a::Float16,b::Float16) = Float16($func(Float32(a),Float32(b))) end end -rem(x::Float32, y::Float32) = box(Float32,rem_float(unbox(Float32,x),unbox(Float32,y))) -rem(x::Float64, y::Float64) = box(Float64,rem_float(unbox(Float64,x),unbox(Float64,y))) +rem{T<:IntrinsicFloats}(x::T, y::T) = box(T,rem_float(x,y)) cld{T<:AbstractFloat}(x::T, y::T) = -fld(-x,y) @@ -389,33 +158,13 @@ function mod{T<:AbstractFloat}(x::T, y::T) end ## floating point comparisons ## -function ==(x::Float16, y::Float16) - ix = reinterpret(UInt16,x) - iy = reinterpret(UInt16,y) - if (ix|iy)&0x7fff > 0x7c00 #isnan(x) || isnan(y) - return false - end - if (ix|iy)&0x7fff == 0x0000 - return true - end - return ix == iy -end -==(x::Float32, y::Float32) = eq_float(unbox(Float32,x),unbox(Float32,y)) -==(x::Float64, y::Float64) = eq_float(unbox(Float64,x),unbox(Float64,y)) -!=(x::Float32, y::Float32) = ne_float(unbox(Float32,x),unbox(Float32,y)) -!=(x::Float64, y::Float64) = ne_float(unbox(Float64,x),unbox(Float64,y)) -<( x::Float32, y::Float32) = lt_float(unbox(Float32,x),unbox(Float32,y)) -<( x::Float64, y::Float64) = lt_float(unbox(Float64,x),unbox(Float64,y)) -<=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y)) -<=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y)) - -isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y)) -isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y)) -isless( x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y)) -isless( x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y)) -for op in (:<,:<=,:isless) - @eval ($op)(a::Float16, b::Float16) = ($op)(Float32(a), Float32(b)) -end +=={T<:IntrinsicFloats}(x::T, y::T) = eq_float(x,y) +!={T<:IntrinsicFloats}(x::T, y::T) = ne_float(x,y) +<{ T<:IntrinsicFloats}(x::T, y::T) = lt_float(x,y) +<={T<:IntrinsicFloats}(x::T, y::T) = le_float(x,y) + +isequal{T<:IntrinsicFloats}(x::T, y::T) = fpiseq(x,y) +isless{ T<:IntrinsicFloats}(x::T, y::T) = fpislt(x,y) function cmp(x::AbstractFloat, y::AbstractFloat) (isnan(x) || isnan(y)) && throw(DomainError()) @@ -640,6 +389,10 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn end end end + @eval begin + convert(::Type{$Ti}, x::Float16) = convert($Ti, convert(Float32, x)) + trunc(::Type{$Ti}, x::Float16) = trunc($Ti, convert(Float32, x)) + end end @eval begin @@ -674,14 +427,15 @@ end end ## byte order swaps for arbitrary-endianness serialization/deserialization ## -bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x))) -bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x))) +bswap{T<:IntrinsicFloats}(x::T) = box(T,bswap_int(x)) # bit patterns reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64,x) reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32,x) +reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x) reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64,x) reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32,x) +reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x) sign_mask(::Type{Float64}) = 0x8000_0000_0000_0000 exponent_mask(::Type{Float64}) = 0x7ff0_0000_0000_0000 diff --git a/base/int.jl b/base/int.jl index 33836165e1ca0..984a864040505 100644 --- a/base/int.jl +++ b/base/int.jl @@ -333,8 +333,8 @@ for (Ts, Tu) in ((Int8, UInt8), (Int16, UInt16), (Int32, UInt32), (Int64, UInt64 @eval convert(::Type{Unsigned}, x::$Ts) = convert($Tu, x) end -convert{T<:Union{Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int,x) -convert{T<:Union{Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt,x) +convert{T<:Union{Float16, Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int,x) +convert{T<:Union{Float16, Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt,x) convert(::Type{Integer}, x::Integer) = x convert(::Type{Integer}, x::Real) = convert(Signed,x) diff --git a/base/rtlib/RTLIB.jl b/base/rtlib/RTLIB.jl new file mode 100644 index 0000000000000..298742b3ab09c --- /dev/null +++ b/base/rtlib/RTLIB.jl @@ -0,0 +1,241 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license +""" + RTLIB + +Implements the runtime library for Julia. The implementations are based on +llvm's compiler-rt. This implementations follows the compiler-rt naming convention +and registers the pure Julia implementation as `extern_c` so that LLVM can find them. + +As a secondary interface `RTLIB.convert(::Type{T}, x)` is provided. +""" +module RTLIB + +register(f::Function, rtype::ANY, argt::ANY, name::String) = + ccall(:jl_extern_c, Void, (Any, Any, Any, Cstring), + f, rtype, argt, name) + +# Check if relative include is available +if isdefined(Base, :INCLUDE_STATE) && Base.INCLUDE_STATE == 1 + include("rtlib/fp_util.jl") + include("rtlib/fp_extend.jl") + include("rtlib/fp_trunc.jl") + include("rtlib/fp_fixint.jl") +else + include("fp_util.jl") + include("fp_extend.jl") + include("fp_trunc.jl") + include("fp_fixint.jl") +end + +# All these function names are enumerated in lib/CodeGen/TargetLoweringBase.cpp +# right now we don't have a good way of getting at this information. + +### +# Floating point extend and trunc functions +### + +# "convert Float64 to Float128" +# extenddftf2(x::Float64) = extendXfYf2(Float128, x) +# convert(::Type{Float128}, x::Float64) = extenddftf2(x) + +# "convert Float32 to Float128" +# extendsftf2(x::Float32) = extendXfYf2(Float128, x) +# convert(::Type{Float128}, x::Float32) = extendsftf2(x) + +"convert Float32 to Float64" +extendsfdf2(x::Float32) = extendXfYf2(Float64, x) +convert(::Type{Float64}, x::Float32) = extendsfdf2(x) + +"convert Float16 to Float32" +extendhfsf2(x::Float16) = extendXfYf2(Float32, x) +convert(::Type{Float32}, x::Float16) = extendhfsf2(x) + +"convert Float32 to Float16" +truncsfhf2(x::Float32) = truncXfYf2(Float16, x) +convert(::Type{Float16}, x::Float32) = truncsfhf2(x) + +"convert Float64 to Float16" +truncdfhf2(x::Float64) = truncXfYf2(Float16, x) +convert(::Type{Float16}, x::Float64) = truncdfhf2(x) + +# "convert Float128 to Float16" +# trunctfhf2(x :: Float128) = truncXfYf2(Float16, x) +# convert(::Type{Float16}, x::Float128) = trunctfhf2(x) + +"convert Float64 to Float32" +truncdfsf2(x::Float64) = truncXfYf2(Float32, x) +convert(::Type{Float32}, x::Float64) = truncdfsf2(x) + +# "convert Float128 to Float32" +# trunctfsf2(x :: Float128) = truncXfYf2(Float32, x) +# convert(::Type{Float32}, x::Float128) = trunctfsf2(x) + +# "convert Float128 to Float64" +# trunctfdf2(x :: Float128) = truncXfYf2(Float32, x) +# convert(::Type{Float64}, x::Float128) = trunctfdf2(x) + +### +# Conversion between integers and floats +### + +"convert Float32 to Int32" +fixsfsi(x::Float32) = fixint(Int32, x) +convert(::Type{Int32}, x::Float32) = fixsfsi(x) + +"convert Float32 to Int64" +fixsfdi(x::Float32) = fixint(Int64, x) +convert(::Type{Int64}, x::Float32) = fixsfdi(x) + +"convert Float32 to Int64" +fixsfti(x::Float32) = fixint(Int128, x) +convert(::Type{Int128}, x::Float32) = fixsfti(x) + +"convert Float64 to Int32" +fixdfsi(x::Float64) = fixint(Int32, x) +convert(::Type{Int32}, x::Float64) = fixdfsi(x) + +"convert Float64 to Int64" +fixdfdi(x::Float64) = fixint(Int64, x) +convert(::Type{Int64}, x::Float64) = fixdfdi(x) + +"convert Float64 to Int64" +fixdfti(x::Float64) = fixint(Int128, x) +convert(::Type{Int128}, x::Float64) = fixdfti(x) + +# "convert Float128 to Int32" +# fixtfsi(x::Float128) = fixint(Int32, x) +# convert(::Type{Int32}, x::Float128) = fixtfsi(x) + +# "convert Float128 to Int64" +# fixtfdi(x::Float128) = fixint(Int64, x) +# convert(::Type{Int64}, x::Float128) = fixtfdi(x) + +# "convert Float128 to Int64" +# fixtfti(x::Float128) = fixint(Int128, x) +# convert(::Type{Int128}, x::Float128) = fixtfti(x) + +# Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; +# Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; +# Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; +# Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; +# Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; +# Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; +# Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; +# Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; +# Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; +# Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; +# Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; +# Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; +# Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; +# Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; +# Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; + +"convert Int128 to Float32" +function floattisf(x::Int128) + x == 0 && return 0f0 + s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit + x = abs(x) % UInt128 + n = 128-leading_zeros(x) # ndigits0z(x,2) + if n <= 24 + y = ((x % UInt32) << (24-n)) & 0x007f_ffff + else + y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit + y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent) + y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even + end + d = ((n+126) % UInt32) << 23 + reinterpret(Float32, s | d + y) +end +convert(::Type{Float32}, x::Int128) = floattisf(x) + +"convert Int128 to Float64" +function floattidf(x::Int128) + x == 0 && return 0.0 + s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit + x = abs(x) % UInt128 + n = 128-leading_zeros(x) # ndigits0z(x,2) + if n <= 53 + y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff + else + y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit + y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent) + y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even + end + d = ((n+1022) % UInt64) << 52 + reinterpret(Float64, s | d + y) +end +convert(::Type{Float64}, x::Int128) = floattidf(x) + +# Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; +# Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; +# Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; +# Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; +# Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; +# Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; +# Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; + +"convert UInt128 to Float32" +function floatuntisf(x::UInt128) + x == 0 && return 0f0 + n = 128-leading_zeros(x) # ndigits0z(x,2) + if n <= 24 + y = ((x % UInt32) << (24-n)) & 0x007f_ffff + else + y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit + y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent) + y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even + end + d = ((n+126) % UInt32) << 23 + reinterpret(Float32, d + y) +end +convert(::Type{Float32}, x::UInt128) = floatuntisf(x) + +"convert UInt128 to Float64" +function floatuntidf(x::UInt128) + x == 0 && return 0.0 + n = 128-leading_zeros(x) # ndigits0z(x,2) + if n <= 53 + y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff + else + y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit + y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent) + y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even + end + d = ((n+1022) % UInt64) << 52 + reinterpret(Float64, d + y) +end +convert(::Type{Float64}, x::UInt128) = floatuntidf(x) + +# Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; +end + +# RTLIB.register(RTLIB.extenddftf2, Float128, Tuple{Float64}, "__extenddftf2") +# RTLIB.register(RTLIB.extendsftf2, Float128, Tuple{Float32}, "__extendsftf2") +RTLIB.register(RTLIB.extendsfdf2, Float64, Tuple{Float32}, "__extendsfdf2") +if is_apple() + RTLIB.register(RTLIB.extendhfsf2, Float32, Tuple{Float16}, "__extendhfsf2") + RTLIB.register(RTLIB.truncsfhf2, Float16, Tuple{Float32}, "__truncsfhf2") +else + RTLIB.register(RTLIB.extendhfsf2, Float32, Tuple{Float16}, "__gnu_h2f_ieee") + RTLIB.register(RTLIB.truncsfhf2, Float16, Tuple{Float32}, "__gnu_f2h_ieee") +end +RTLIB.register(RTLIB.truncdfhf2, Float16, Tuple{Float64}, "__truncdfhf2") +# RTLIB.register(RTLIB.trunctfhf2, Float16, Tuple{Float128}, "__trunctfhf2") +RTLIB.register(RTLIB.truncdfsf2, Float32, Tuple{Float64}, "__truncdfsf2") +# RTLIB.register(RTLIB.trunctfsf2, Float32, Tuple{Float128}, "__trunctfsf2") +# RTLIB.register(RTLIB.trunctfdf2, Float64, Tuple{Float128}, "__trunctfdf2") + +RTLIB.register(RTLIB.fixsfsi, Int32, Tuple{Float32}, "__fixsfsi") +RTLIB.register(RTLIB.fixsfdi, Int64, Tuple{Float32}, "__fixsfdi") +RTLIB.register(RTLIB.fixsfti, Int128, Tuple{Float32}, "__fixsfti") +RTLIB.register(RTLIB.fixdfsi, Int32, Tuple{Float64}, "__fixdfsi") +RTLIB.register(RTLIB.fixdfdi, Int64, Tuple{Float64}, "__fixdfdi") +RTLIB.register(RTLIB.fixdfti, Int128, Tuple{Float64}, "__fixdfti") +# RTLIB.register(RTLIB.fixtfsi, Int32, Tuple{Float128}, "__fixtfsi") +# RTLIB.register(RTLIB.fixtfdi, Int64, Tuple{Float128}, "__fixtfdi") +# RTLIB.register(RTLIB.fixtfti, Int128, Tuple{Float128}, "__fixtfti") + +RTLIB.register(RTLIB.floattisf, Float32, Tuple{Int128}, "__floattisf") +RTLIB.register(RTLIB.floattidf, Float64, Tuple{Int128}, "__floattidf") +RTLIB.register(RTLIB.floatuntisf, Float32, Tuple{UInt128}, "__floatuntisf") +RTLIB.register(RTLIB.floatuntidf, Float64, Tuple{UInt128}, "__floatuntidf") diff --git a/base/rtlib/fp_extend.jl b/base/rtlib/fp_extend.jl new file mode 100644 index 0000000000000..66fd5feacdc52 --- /dev/null +++ b/base/rtlib/fp_extend.jl @@ -0,0 +1,100 @@ +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +# +# This file implements a fairly generic conversion from a narrower to a wider +# IEEE-754 floating-point type. The constants and types defined following the +# includes below parameterize the conversion. +# +# It does not support types that don't use the usual IEEE-754 interchange +# formats; specifically, some work would be needed to adapt it to +# (for example) the Intel 80-bit format or PowerPC double-double format. +# +# Note please, however, that this implementation is only intended to support +# *widening* operations; if you need to convert to a *narrower* floating-point +# type (e.g. double -> float), then this routine will not do what you want it +# to. +# +# It also requires that integer types at least as large as both formats +# are available on the target platform; this may pose a problem when trying +# to add support for quad on some 32-bit systems, for example. You also may +# run into trouble finding an appropriate CLZ function for wide source types; +# you will likely need to roll your own on some platforms. +# +# Finally, the following assumptions are made: +# +# 1. floating-point types and integer types have the same endianness on the +# target platform +# +# 2. quiet NaNs, if supported, are indicated by the leading bit of the +# significand field being set + +@inline function extendXfYf2{dst_t<:RTLIB_FLOAT, src_t<:RTLIB_FLOAT}(::Type{dst_t}, a::src_t) + # Various constants whose values follow from the type parameters. + # Any reasonable optimizer will fold and propagate all of these. + const src_rep_t = fptoui(src_t) + const dst_rep_t = fptoui(dst_t) + const srcSigBits = significand_bits(src_t) + const dstSigBits = significand_bits(dst_t) + + const srcBits = nbits(src_t) + const srcExpBits = exponent_bits(src_t) + const srcInfExp = exponent_inf(src_t) + const srcExpBias = exponent_bias(src_t) + + const srcMinNormal = one(src_rep_t) << srcSigBits + const srcInfinity = srcInfExp << srcSigBits + const srcSignMask = one(src_rep_t) << (srcSigBits + srcExpBits) + const srcAbsMask = srcSignMask - one(src_rep_t) + const srcQNaN = one(src_rep_t) << (srcSigBits - 1) + const srcNaNCode = srcQNaN - one(src_rep_t) + + const dstBits = nbits(dst_t) + const dstExpBits = exponent_bits(dst_t) + const dstInfExp = exponent_inf(dst_t) + const dstExpBias = exponent_bias(dst_t) + + const dstMinNormal = one(dst_rep_t) << dstSigBits + + # Break a into a sign and representation of the absolute value + const aRep = reinterpret(src_rep_t, a) + const aAbs = aRep & srcAbsMask + const sign = aRep & srcSignMask + + local absResult :: dst_rep_t + + if (aAbs - srcMinNormal) < (srcInfinity - srcMinNormal) + # a is a normal number. + # Extend to the destination type by shifting the significand and + # exponent into the proper position and rebiasing the exponent. + absResult = (aAbs % dst_rep_t) << (dstSigBits - srcSigBits) + absResult += (dstExpBias - srcExpBias) << dstSigBits + elseif aAbs >= srcInfinity + # a is NaN or infinity. + # Conjure the result by beginning with infinity, then setting the qNaN + # bit (if needed) and right-aligning the rest of the trailing NaN + # payload field. + absResult = dstInfExp << dstSigBits + absResult |= (aAbs & srcQNaN) % dst_rep_t << (dstSigBits - srcSigBits) + absResult |= (aAbs & srcNaNCode) % dst_rep_t << (dstSigBits - srcSigBits) + elseif aAbs != zero(src_rep_t) # in c if (aAbs) + # a is denormal. + # renormalize the significand and clear the leading bit, then insert + # the correct adjusted exponent in the destination type. + const scale = leading_zeros(aAbs) - leading_zeros(srcMinNormal) + absResult = aAbs % dst_rep_t << (dstSigBits - srcSigBits + scale) + absResult $= dstMinNormal + const resultExponent = (dstExpBias - srcExpBias - scale + 1) % dst_rep_t + absResult |= resultExponent << dstSigBits + else + # a is zero. + absResult = zero(dst_rep_t) + end + + # Apply the signbit to (dst_t)abs(a). + const result = absResult | sign % dst_rep_t << (dstBits - srcBits) + return reinterpret(dst_t, result) +end diff --git a/base/rtlib/fp_fixint.jl b/base/rtlib/fp_fixint.jl new file mode 100644 index 0000000000000..fe5a3dfc71533 --- /dev/null +++ b/base/rtlib/fp_fixint.jl @@ -0,0 +1,42 @@ +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +# +# This file implements float to integer conversion for the +# compiler-rt library. +# + +@inline function fixint{fixint_t, fp_t<:RTLIB_FLOAT}(::Type{fixint_t}, a::fp_t) + const rep_t = fptoui(fp_t) + + # Get masks + const signBit = one(src_rep_t) << (significand_bits(fp_t) + exponent_bits(fp_t)) + const absMask = signBit - one(src_rep_t) + # Break a into sign, exponent, significand + const aRep = reinterpret(rep_t, a) + const aAbs = aRep & absMask + const sign = ifelse(aRep & signBit != 0, -one(fixint_t), one(fixint_t)) + const exponent :: rep_t = (aAbs >> significand_bits(fp_t)) - exponent_bias(fp_t) + const significand = (aAbs & significandMask) | implicitBit + + # If exponent is negative, the result is zero. + if exponent < 0 + return zero(fixint_t) + end + + # If the value is too large for the integer type, saturate. + if exponent >= nbits(fixint_t) + return ifelse(sign == 1, typemax(fixint_t), typemin(fixint_t)) + end + + # If 0 <= exponent < significandBits, right shift to get the result. + # Otherwise, shift left. + if exponent < significand_bits(fp_t) + return sign * (significand >> (significand_bits(fp_t) - exponent)) + else + return sign * ((significand % fixint_t) << (exponent - significandBits)) + end +end diff --git a/base/rtlib/fp_trunc.jl b/base/rtlib/fp_trunc.jl new file mode 100644 index 0000000000000..ca56c11e6cca8 --- /dev/null +++ b/base/rtlib/fp_trunc.jl @@ -0,0 +1,135 @@ +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. +# +# +# This file implements a fairly generic conversion from a wider to a narrower +# IEEE-754 floating-point type in the default (round to nearest, ties to even) +# rounding mode. The constants and types defined following the includes below +# parameterize the conversion. +# +# This routine can be trivially adapted to support conversions to +# half-precision or from quad-precision. It does not support types that don't +# use the usual IEEE-754 interchange formats; specifically, some work would be +# needed to adapt it to (for example) the Intel 80-bit format or PowerPC +# double-double format. +# +# Note please, however, that this implementation is only intended to support +# *narrowing* operations; if you need to convert to a *wider* floating-point +# type (e.g. float -> double), then this routine will not do what you want it +# to. +# +# It also requires that integer types at least as large as both formats +# are available on the target platform; this may pose a problem when trying +# to add support for quad on some 32-bit systems, for example. +# +# Finally, the following assumptions are made: +# +# 1. floating-point types and integer types have the same endianness on the +# target platform +# +# 2. quiet NaNs, if supported, are indicated by the leading bit of the +# significand field being set + +@inline function truncXfYf2{dst_t<:RTLIB_FLOAT, src_t<:RTLIB_FLOAT}(::Type{dst_t}, a::src_t) + # Various constants whose values follow from the type parameters. + # Any reasonable optimizer will fold and propagate all of these. + const src_rep_t = fptoui(src_t) + const dst_rep_t = fptoui(dst_t) + const srcSigBits = significand_bits(src_t) + const dstSigBits = significand_bits(dst_t) + + const srcBits = nbits(src_t) + const srcExpBits = exponent_bits(src_t) + const srcInfExp = exponent_inf(src_t) + const srcExpBias = exponent_bias(src_t) + + const srcMinNormal = one(src_rep_t) << srcSigBits + const srcSignificandMask = srcMinNormal - one(src_rep_t) + const srcInfinity = srcInfExp << srcSigBits + const srcSignMask = one(src_rep_t) << (srcSigBits + srcExpBits) + const srcAbsMask = srcSignMask - one(src_rep_t) + const roundMask = (one(src_rep_t) << (srcSigBits - dstSigBits)) - one(src_rep_t) + const halfway = one(src_rep_t) << (srcSigBits - dstSigBits - one(src_rep_t)) + const srcQNaN = one(src_rep_t) << (srcSigBits - one(src_rep_t)) + const srcNaNCode = srcQNaN - one(src_rep_t) + + const dstBits = nbits(dst_t) + + const dstExpBits = exponent_bits(dst_t) + const dstInfExp = exponent_inf(dst_t) + const dstExpBias = exponent_bias(dst_t) + + const underflowExponent = srcExpBias - dstExpBias + 1 + const overflowExponent = srcExpBias + dstInfExp - dstExpBias + const underflow::src_rep_t = underflowExponent << srcSigBits + const overflow::src_rep_t = overflowExponent << srcSigBits + + const dstQNaN = one(dst_rep_t) << (dstSigBits - 1) + const dstNaNCode = dstQNaN - one(dst_rep_t) + + # Break a into a sign and representation of the absolute value + const aRep = reinterpret(src_rep_t, a) + const aAbs = aRep & srcAbsMask + const sign = aRep & srcSignMask + + local absResult :: dst_rep_t + + if (aAbs - underflow) < (aAbs - overflow) + # The exponent of a is within the range of normal numbers in the + # destination format. We can convert by simply right-shifting with + # rounding and adjusting the exponent. + absResult = (aAbs >> (srcSigBits - dstSigBits)) % dst_rep_t + absResult -= (srcExpBias - dstExpBias) % dst_rep_t << dstSigBits + + const roundBits = aAbs & roundMask + # Round to nearest + if roundBits > halfway + absResult += one(dst_rep_t) + # Ties to even + elseif roundBits == halfway + absResult += absResult & one(dst_rep_t) + end + elseif aAbs > srcInfinity + # a is NaN. + # Conjure the result by beginning with infinity, setting the qNaN + # bit and inserting the (truncated) trailing NaN field. + absResult = dstInfExp << dstSigBits + absResult |= dstQNaN + absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode + elseif aAbs >= overflow + # a overflows to infinity. + absResult = dstInfExp << dstSigBits + else + # a underflows on conversion to the destination type or is an exact + # zero. The result may be a denormal or zero. Extract the exponent + # to get the shift amount for the denormalization. + const aExp = aAbs >> srcSigBits + const shift = srcExpBias - dstExpBias - aExp + 1 + + const significand = (aRep & srcSignificandMask) | srcMinNormal + + # Right shift by the denormalization amount with sticky. + if shift > srcSigBits + absResult = zero(dst_rep_t) + else + const sticky = significand << (srcBits - shift) + denormalizedSignificand = significand >> shift | sticky + absResult = (denormalizedSignificand >> (srcSigBits - dstSigBits)) % dst_rep_t + const roundBits = denormalizedSignificand & roundMask + # Round to nearest + if roundBits > halfway + absResult += one(dst_rep_t) + # Ties to even + elseif roundBits == halfway + absResult += absResult & one(dst_rep_t) + end + end + end + + # Apply the signbit to (dst_t)abs(a). + const result = absResult | (sign >> (srcBits - dstBits)) % dst_rep_t + return reinterpret(dst_t, result) +end \ No newline at end of file diff --git a/base/rtlib/fp_util.jl b/base/rtlib/fp_util.jl new file mode 100644 index 0000000000000..4dee24fa14326 --- /dev/null +++ b/base/rtlib/fp_util.jl @@ -0,0 +1,41 @@ +# This file is a part of Julia. License is MIT: http://julialang.org/license +import Base: @pure + +typealias RTLIB_FLOAT Union{Float16, Float32, Float64} +const CHAR_BIT = 8 + +fptoui(::Type{Float16}) = UInt16 +fptoui(::Type{Float32}) = UInt32 +fptoui(::Type{Float64}) = UInt64 +# fptoui(::Type{Float128}) = UInt128 + +fptosi(::Type{Float16}) = Int16 +fptosi(::Type{Float32}) = Int32 +fptosi(::Type{Float64}) = Int64 +# fptosi(::Type{Float128}) = Int128 + +signed(::Type{UInt8}) = Int8 +signed(::Type{UInt16}) = Int16 +signed(::Type{UInt32}) = Int32 +signed(::Type{UInt64}) = Int64 +signed(::Type{UInt128}) = Int128 + +unsigned(::Type{Int8}) = UInt8 +unsigned(::Type{Int16}) = UInt16 +unsigned(::Type{Int32}) = UInt32 +unsigned(::Type{Int64}) = UInt64 +unsigned(::Type{Int128}) = UInt128 + +nbits{T}(::Type{T}) = sizeof(T) * CHAR_BIT + +significand_bits(::Type{Float16}) = 10 +significand_bits(::Type{Float32}) = 23 +significand_bits(::Type{Float64}) = 52 +#significand_bits(::Type{Float128}) = 112 + +@pure exponent_bits{T<:RTLIB_FLOAT}(::Type{T}) = nbits(T) - significand_bits(T) - 1 +@pure exponent_inf{T<:RTLIB_FLOAT}(::Type{T}) = (one(fptoui(T)) << exponent_bits(T)) - one(fptoui(T)) +@pure exponent_bias{T<:RTLIB_FLOAT}(::Type{T}) = exponent_inf(T) >> 1 + +@pure sign_mask{T<:RTLIB_FLOAT}(::Type{T}) = one(fptoui(T)) << (significand_bits(T) + exponent_bits(T)) + diff --git a/base/sysimg.jl b/base/sysimg.jl index 6c739d9c8f36c..b51b5dfaefc90 100644 --- a/base/sysimg.jl +++ b/base/sysimg.jl @@ -103,11 +103,16 @@ Array{T}(::Type{T}, m::Integer) = Array{T,1}(Int(m)) Array{T}(::Type{T}, m::Integer,n::Integer) = Array{T,2}(Int(m),Int(n)) Array{T}(::Type{T}, m::Integer,n::Integer,o::Integer) = Array{T,3}(Int(m),Int(n),Int(o)) +# OS specific stuff part one +include("osutils.jl") +include("c.jl") + # numeric operations include("hashing.jl") include("rounding.jl") importall .Rounding -include("float.jl") +include("rtlib/RTLIB.jl") +include("float.jl") # depends on rtlib include("complex.jl") include("rational.jl") include("multinverses.jl") @@ -138,11 +143,9 @@ typealias StridedVector{T,A<:Union{DenseArray,StridedReshapedArray},I<:Tuple{Var typealias StridedMatrix{T,A<:Union{DenseArray,StridedReshapedArray},I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}} Union{DenseArray{T,2}, SubArray{T,2,A,I}, StridedReshapedArray{T,2}} typealias StridedVecOrMat{T} Union{StridedVector{T}, StridedMatrix{T}} -# For OS specific stuff +# For OS specific stuff part two include(String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "build_h.jl".data))) # include($BUILDROOT/base/build_h.jl) include(String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "version_git.jl".data))) # include($BUILDROOT/base/version_git.jl) -include("osutils.jl") -include("c.jl") include("sysinfo.jl") if !isdefined(Core, :Inference) diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 077d6934628d0..b7098679571f3 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -342,12 +342,9 @@ JL_DLLEXPORT Type *julia_type_to_llvm(jl_value_t *jt, bool *isboxed) return T_size; int nb = jl_datatype_size(jt); if (jl_is_floattype(jt)) { -#ifndef DISABLE_FLOAT16 if (nb == 2) return T_float16; - else -#endif - if (nb == 4) + else if (nb == 4) return T_float32; else if (nb == 8) return T_float64; diff --git a/src/codegen.cpp b/src/codegen.cpp index 5a1bd54d7131d..c5abc914448a0 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -174,8 +174,6 @@ extern void _chkstk(void); #define __alignof__ __alignof #endif -#define DISABLE_FLOAT16 - // llvm state #if JL_LLVM_VERSION >= 30900 JL_DLLEXPORT LLVMContext jl_LLVMContext; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 14bf8bc59b01a..2d459b19d1443 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -72,12 +72,9 @@ extern "C" JL_DLLEXPORT int8_t jl_is_memdebug() { static Type *FTnbits(size_t nb) { -#ifndef DISABLE_FLOAT16 if (nb == 16) return T_float16; - else -#endif - if (nb == 32) + else if (nb == 32) return T_float32; else if (nb == 64) return T_float64; @@ -109,6 +106,7 @@ static Type *JL_INTT(Type *t) return t; if (t->isPointerTy()) return T_size; + if (t == T_float16) return T_int16; if (t == T_float32) return T_int32; if (t == T_float64) return T_int64; assert(t == T_void); @@ -118,18 +116,18 @@ static Type *JL_INTT(Type *t) static jl_value_t *JL_JLUINTT(Type *t) { assert(!t->isIntegerTy()); + if (t == T_float16) return (jl_value_t*)jl_uint16_type; if (t == T_float32) return (jl_value_t*)jl_uint32_type; if (t == T_float64) return (jl_value_t*)jl_uint64_type; - if (t == T_float16) return (jl_value_t*)jl_uint16_type; assert(t == T_void); return jl_bottom_type; } static jl_value_t *JL_JLSINTT(Type *t) { assert(!t->isIntegerTy()); + if (t == T_float16) return (jl_value_t*)jl_int16_type; if (t == T_float32) return (jl_value_t*)jl_int32_type; if (t == T_float64) return (jl_value_t*)jl_int64_type; - if (t == T_float16) return (jl_value_t*)jl_int16_type; assert(t == T_void); return jl_bottom_type; } @@ -180,10 +178,8 @@ static Constant *julia_const_to_llvm(void *ptr, jl_value_t *bt) } case 2: { uint16_t data16 = *(uint16_t*)ptr; -#ifndef DISABLE_FLOAT16 if (jl_is_floattype(bt)) return ConstantFP::get(jl_LLVMContext, LLVM_FP(APFloat::IEEEhalf,APInt(16,data16))); -#endif return ConstantInt::get(T_int16, data16); } case 4: { diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 9fb098edf635f..9eeedaa730741 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -308,6 +308,28 @@ void NotifyDebugger(jit_code_entry *JITCodeEntry) } // ------------------------ END OF TEMPORARY COPY FROM LLVM ----------------- +// Resolve llvm libcalls to the implementations in rtlib.jl +static uint64_t resolve_libcalls(const char *name) +{ + static void *sys_hdl = jl_load_dynamic_library_e("sys", JL_RTLD_LOCAL); + static const char *const prefix = "__"; + if (!sys_hdl) { + jl_printf(JL_STDERR, "WARNING: Unable to load sysimage\n"); + return 0; + } + if (strncmp(name, prefix, strlen(prefix)) != 0) + return 0; +#if defined(_OS_DARWIN_) + // jl_dlsym_e expects an unmangled 'C' symbol name, + // so iff we are on Darwin we strip the leading '_' off. + static const char *const mangled_prefix = "___"; + if (strncmp(name, mangled_prefix, strlen(mangled_prefix)) == 0) { + ++name; + } +#endif + return (uintptr_t)jl_dlsym_e(sys_hdl, name); +} + #ifdef _OS_LINUX_ // Resolve non-lock free atomic functions in the libatomic library. // This is the library that provides support for c11/c++11 atomic operations. @@ -542,6 +564,8 @@ void JuliaOJIT::addModule(std::unique_ptr M) if (uint64_t addr = resolve_atomic(Name.c_str())) return JL_SymbolInfo(addr, JITSymbolFlags::Exported); #endif + if (uint64_t addr = resolve_libcalls(Name.c_str())) + return JL_SymbolInfo(addr, JITSymbolFlags::Exported); // Return failure code return JL_SymbolInfo(nullptr); }, diff --git a/test/choosetests.jl b/test/choosetests.jl index 03b91553e40d8..1e242f46fe865 100644 --- a/test/choosetests.jl +++ b/test/choosetests.jl @@ -16,7 +16,7 @@ Upon return, `tests` is a vector of fully-expanded test names, and function choosetests(choices = []) testnames = [ "linalg", "subarray", "core", "inference", "keywordargs", "numbers", - "printf", "char", "strings", "triplequote", "unicode", + "printf", "char", "strings", "triplequote", "unicode", "rtlib", "dates", "dict", "hashing", "iobuffer", "staged", "offsetarray", "arrayops", "tuple", "reduce", "reducedim", "random", "abstractarray", "intfuncs", "simdloop", "vecelement", "blas", "sparse", diff --git a/test/rtlib.jl b/test/rtlib.jl new file mode 100644 index 0000000000000..ccfc71e30eb76 --- /dev/null +++ b/test/rtlib.jl @@ -0,0 +1,114 @@ +# This file is part of Julia. License is MIT: http://julialang.org/license +# Parts of it are copied from llvm's compiler-rt +# +# The LLVM Compiler Infrastructure +# +# This file is dual licensed under the MIT and the University of Illinois Open +# Source Licenses. See LICENSE.TXT for details. + +using Base.Test +import Base.RTLIB +@testset "RTLIB" begin +@testset "truncdfhf2" begin + @test RTLIB.truncdfhf2(NaN) === NaN16 + @test RTLIB.truncdfhf2(Inf) === Inf16 + @test RTLIB.truncdfhf2(-Inf) === -Inf16 + @test RTLIB.truncdfhf2(0.0) === reinterpret(Float16, 0x0000) + @test RTLIB.truncdfhf2(-0.0) === reinterpret(Float16, 0x8000) + @test RTLIB.truncdfhf2(3.1415926535) === reinterpret(Float16, 0x4248) + @test RTLIB.truncdfhf2(-3.1415926535) === reinterpret(Float16, 0xc248) + @test RTLIB.truncdfhf2(0x1.987124876876324p+1000) === reinterpret(Float16, 0x7c00) + @test RTLIB.truncdfhf2(0x1.987124876876324p+12) === reinterpret(Float16, 0x6e62) + @test RTLIB.truncdfhf2(0x1.0p+0) === reinterpret(Float16, 0x3c00) + @test RTLIB.truncdfhf2(0x1.0p-14) === reinterpret(Float16, 0x0400) + # denormal + @test RTLIB.truncdfhf2(0x1.0p-20) === reinterpret(Float16, 0x0010) + @test RTLIB.truncdfhf2(0x1.0p-24) === reinterpret(Float16, 0x0001) + @test RTLIB.truncdfhf2(-0x1.0p-24) === reinterpret(Float16, 0x8001) + @test RTLIB.truncdfhf2(0x1.5p-25) === reinterpret(Float16, 0x0001) + # and back to zero + @test RTLIB.truncdfhf2(0x1.0p-25) === reinterpret(Float16, 0x0000) + @test RTLIB.truncdfhf2(-0x1.0p-25) === reinterpret(Float16, 0x8000) + # max (precise) + @test RTLIB.truncdfhf2(65504.0) === reinterpret(Float16, 0x7bff) + # max (rounded) + @test RTLIB.truncdfhf2(65519.0) === reinterpret(Float16, 0x7bff) + # max (to +inf) + @test RTLIB.truncdfhf2(65520.0) === reinterpret(Float16, 0x7c00) + @test RTLIB.truncdfhf2(-65520.0) === reinterpret(Float16, 0xfc00) + @test RTLIB.truncdfhf2(65536.0) === reinterpret(Float16, 0x7c00) +end + +@testset "truncdfsf2" begin + @test RTLIB.truncdfsf2(340282366920938463463374607431768211456.0) === Inf32 +end + +@testset "truncsfhf2" begin + # NaN + @test RTLIB.truncsfhf2(NaN32) === reinterpret(Float16, 0x7e00) + # inf + @test RTLIB.truncsfhf2(Inf32) === reinterpret(Float16, 0x7c00) + @test RTLIB.truncsfhf2(-Inf32) === reinterpret(Float16, 0xfc00) + # zero + @test RTLIB.truncsfhf2(0.0f0) === reinterpret(Float16, 0x0000) + @test RTLIB.truncsfhf2(-0.0f0) === reinterpret(Float16, 0x8000) + @test RTLIB.truncsfhf2(3.1415926535f0) === reinterpret(Float16, 0x4248) + @test RTLIB.truncsfhf2(-3.1415926535f0) === reinterpret(Float16, 0xc248) + @test RTLIB.truncsfhf2(Float32(0x1.987124876876324p+100)) === reinterpret(Float16, 0x7c00) + @test RTLIB.truncsfhf2(Float32(0x1.987124876876324p+12)) === reinterpret(Float16, 0x6e62) + @test RTLIB.truncsfhf2(Float32(0x1.0p+0)) === reinterpret(Float16, 0x3c00) + @test RTLIB.truncsfhf2(Float32(0x1.0p-14)) === reinterpret(Float16, 0x0400) + # denormal + @test RTLIB.truncsfhf2(Float32(0x1.0p-20)) === reinterpret(Float16, 0x0010) + @test RTLIB.truncsfhf2(Float32(0x1.0p-24)) === reinterpret(Float16, 0x0001) + @test RTLIB.truncsfhf2(Float32(-0x1.0p-24)) === reinterpret(Float16, 0x8001) + @test RTLIB.truncsfhf2(Float32(0x1.5p-25)) === reinterpret(Float16, 0x0001) + # and back to zero + @test RTLIB.truncsfhf2(Float32(0x1.0p-25)) === reinterpret(Float16, 0x0000) + @test RTLIB.truncsfhf2(Float32(-0x1.0p-25)) === reinterpret(Float16, 0x8000) + # max (precise) + @test RTLIB.truncsfhf2(65504.0f0) === reinterpret(Float16, 0x7bff) + # max (rounded) + @test RTLIB.truncsfhf2(65519.0f0) === reinterpret(Float16, 0x7bff) + # max (to +inf) + @test RTLIB.truncsfhf2(65520.0f0) === reinterpret(Float16, 0x7c00) + @test RTLIB.truncsfhf2(65536.0f0) === reinterpret(Float16, 0x7c00) + @test RTLIB.truncsfhf2(-65520.0f0) === reinterpret(Float16, 0xfc00) +end + +@testset "extendhfsf2" begin + ## + # Note: + # These tests are taken fromt the compiler-rt testsuite. Were as of 3.9.0 + # the test are done with compareResultH (so with after casting to UInt16) + # Tests that are commented out fail as === Float32 comparisons. + # Some tests succedd with ≈ (and are consistent with Julia v0.5 convert) + ## + # NaN + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7e00)) === NaN32 + # inf + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7c00)) === Inf32 + @test RTLIB.extendhfsf2(reinterpret(Float16, 0xfc00)) === -Inf32 + # zero + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0000)) === 0.0f0 + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x8000)) === -0.0f0 + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x4248)) ≈ Float32(π) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0xc248)) ≈ Float32(-π) + # @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7c00)) === Float32(0x1.987124876876324p+100) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x6e62)) === Float32(0x1.988p+12) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x3c00)) === Float32(0x1.0p+0) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0400)) === Float32(0x1.0p-14) + # denormal + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0010)) === Float32(0x1.0p-20) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.0p-24) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x8001)) === Float32(-0x1.0p-24) + #@test RTLIB.extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.5p-25) + # and back to zero + # @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0000)) === Float32(0x1.0p-25) + # @test RTLIB.extendhfsf2(reinterpret(Float16, 0x8000)) === Float32(-0x1.0p-25) + # max (precise) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0 + # max (rounded) + @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0 +end +end \ No newline at end of file