diff --git a/base/essentials.jl b/base/essentials.jl
index a5279bffbe1fd..6bba04dbbf2cb 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -93,8 +93,6 @@ unsafe_convert{T}(::Type{T}, x::T) = x # unsafe_convert (like convert) defaults
 unsafe_convert{P<:Ptr}(::Type{P}, x::Ptr) = convert(P, x)
 
 reinterpret{T}(::Type{T}, x) = box(T, x)
-reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)
-reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x)
 
 sizeof(x) = Core.sizeof(x)
 
diff --git a/base/float.jl b/base/float.jl
index 4eafa39353b21..db688a80458bb 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -43,198 +43,26 @@ A not-a-number value of type `Float64`.
 const NaN = NaN64
 
 ## conversions to floating-point ##
-convert(::Type{Float16}, x::Integer) = convert(Float16, convert(Float32,x))
-for t in (Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128)
-    @eval promote_rule(::Type{Float16}, ::Type{$t}) = Float16
-end
-promote_rule(::Type{Float16}, ::Type{Bool}) = Float16
-
-for t1 in (Float32,Float64)
-    for st in (Int8,Int16,Int32,Int64)
+for t1 in (Float16,Float32,Float64)
+    for st in (Int8,Int16,Int32,Int64,Int128)
         @eval begin
             convert(::Type{$t1},x::($st)) = box($t1,sitofp($t1,unbox($st,x)))
             promote_rule(::Type{$t1}, ::Type{$st}  ) = $t1
         end
     end
-    for ut in (Bool,UInt8,UInt16,UInt32,UInt64)
+    for ut in (Bool,UInt8,UInt16,UInt32,UInt64,UInt128)
         @eval begin
             convert(::Type{$t1},x::($ut)) = box($t1,uitofp($t1,unbox($ut,x)))
             promote_rule(::Type{$t1}, ::Type{$ut}  ) = $t1
         end
     end
 end
-convert{T<:Integer}(::Type{T}, x::Float16) = convert(T, Float32(x))
-
-
-promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
-promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
-promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
-promote_rule(::Type{Float32}, ::Type{Int128}) = Float32
-
-function convert(::Type{Float64}, x::UInt128)
-    x == 0 && return 0.0
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
-    if n <= 53
-        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
-    else
-        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
-        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
-        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
-    end
-    d = ((n+1022) % UInt64) << 52
-    reinterpret(Float64, d + y)
-end
-
-function convert(::Type{Float64}, x::Int128)
-    x == 0 && return 0.0
-    s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit
-    x = abs(x) % UInt128
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
-    if n <= 53
-        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
-    else
-        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
-        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
-        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
-    end
-    d = ((n+1022) % UInt64) << 52
-    reinterpret(Float64, s | d + y)
-end
-
-function convert(::Type{Float32}, x::UInt128)
-    x == 0 && return 0f0
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
-    if n <= 24
-        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
-    else
-        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
-        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
-        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
-    end
-    d = ((n+126) % UInt32) << 23
-    reinterpret(Float32, d + y)
-end
 
-function convert(::Type{Float32}, x::Int128)
-    x == 0 && return 0f0
-    s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
-    x = abs(x) % UInt128
-    n = 128-leading_zeros(x) # ndigits0z(x,2)
-    if n <= 24
-        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
-    else
-        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
-        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
-        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
-    end
-    d = ((n+126) % UInt32) << 23
-    reinterpret(Float32, s | d + y)
-end
+convert(::Type{Float16}, x::Union{Float32, Float64}) = box(Float16,fptrunc(Float16,x))
+convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,x))
 
-function convert(::Type{Float16}, val::Float32)
-    f = reinterpret(UInt32, val)
-    i = (f >> 23) & 0x1ff + 1
-    sh = shifttable[i]
-    f &= 0x007fffff
-    h::UInt16 = basetable[i] + (f >> sh)
-    # round
-    # NOTE: we maybe should ignore NaNs here, but the payload is
-    # getting truncated anyway so "rounding" it might not matter
-    nextbit = (f >> (sh-1)) & 1
-    if nextbit != 0
-        # Round halfway to even or check lower bits
-        if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
-            h += 1
-        end
-    end
-    reinterpret(Float16, h)
-end
-
-function convert(::Type{Float32}, val::Float16)
-    local ival::UInt32 = reinterpret(UInt16, val),
-          sign::UInt32 = (ival & 0x8000) >> 15,
-          exp::UInt32  = (ival & 0x7c00) >> 10,
-          sig::UInt32  = (ival & 0x3ff) >> 0,
-          ret::UInt32
-
-    if exp == 0
-        if sig == 0
-            sign = sign << 31
-            ret = sign | exp | sig
-        else
-            n_bit = 1
-            bit = 0x0200
-            while (bit & sig) == 0
-                n_bit = n_bit + 1
-                bit = bit >> 1
-            end
-            sign = sign << 31
-            exp = (-14 - n_bit + 127) << 23
-            sig = ((sig & (~bit)) << n_bit) << (23 - 10)
-            ret = sign | exp | sig
-        end
-    elseif exp == 0x1f
-        if sig == 0  # Inf
-            if sign == 0
-                ret = 0x7f800000
-            else
-                ret = 0xff800000
-            end
-        else  # NaN
-            ret = 0x7fc00000 | (sign<<31)
-        end
-    else
-        sign = sign << 31
-        exp  = (exp - 15 + 127) << 23
-        sig  = sig << (23 - 10)
-        ret = sign | exp | sig
-    end
-    return reinterpret(Float32, ret)
-end
-
-# Float32 -> Float16 algorithm from:
-#   "Fast Half Float Conversion" by Jeroen van der Zijp
-#   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
-
-const basetable = Array{UInt16}(512)
-const shifttable = Array{UInt8}(512)
-
-for i = 0:255
-    e = i - 127
-    if e < -24  # Very small numbers map to zero
-        basetable[i|0x000+1] = 0x0000
-        basetable[i|0x100+1] = 0x8000
-        shifttable[i|0x000+1] = 24
-        shifttable[i|0x100+1] = 24
-    elseif e < -14  # Small numbers map to denorms
-        basetable[i|0x000+1] = (0x0400>>(-e-14))
-        basetable[i|0x100+1] = (0x0400>>(-e-14)) | 0x8000
-        shifttable[i|0x000+1] = -e-1
-        shifttable[i|0x100+1] = -e-1
-    elseif e <= 15  # Normal numbers just lose precision
-        basetable[i|0x000+1] = ((e+15)<<10)
-        basetable[i|0x100+1] = ((e+15)<<10) | 0x8000
-        shifttable[i|0x000+1] = 13
-        shifttable[i|0x100+1] = 13
-    elseif e < 128  # Large numbers map to Infinity
-        basetable[i|0x000+1] = 0x7C00
-        basetable[i|0x100+1] = 0xFC00
-        shifttable[i|0x000+1] = 24
-        shifttable[i|0x100+1] = 24
-    else  # Infinity and NaN's stay Infinity and NaN's
-        basetable[i|0x000+1] = 0x7C00
-        basetable[i|0x100+1] = 0xFC00
-        shifttable[i|0x000+1] = 13
-        shifttable[i|0x100+1] = 13
-    end
-end
-#convert(::Type{Float16}, x::Float32) = box(Float16,fptrunc(Float16,x))
-convert(::Type{Float32}, x::Float64) = box(Float32,fptrunc(Float32,unbox(Float64,x)))
-convert(::Type{Float16}, x::Float64) = convert(Float16, convert(Float32,x))
-
-#convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
-convert(::Type{Float64}, x::Float32) = box(Float64,fpext(Float64,unbox(Float32,x)))
-convert(::Type{Float64}, x::Float16) = convert(Float64, convert(Float32,x))
+convert(::Type{Float32}, x::Float16) = box(Float32,fpext(Float32,x))
+convert(::Type{Float64}, x::Union{Float16,Float32}) = box(Float64,fpext(Float64,x))
 
 convert(::Type{AbstractFloat}, x::Bool)    = convert(Float64, x)
 convert(::Type{AbstractFloat}, x::Int8)    = convert(Float64, x)
@@ -253,81 +81,33 @@ float(x) = convert(AbstractFloat, x)
 # for constructing arrays
 float{T<:Number}(::Type{T}) = typeof(float(zero(T)))
 
-for Ti in (Int8, Int16, Int32, Int64)
+typealias IntrinsicFloats Union{Float16,Float32,Float64}
+for Ti in (Int8, Int16, Int32, Int64, Int128)
     @eval begin
-        unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptosi($Ti,unbox(Float32,x)))
-        unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptosi($Ti,unbox(Float64,x)))
+        unsafe_trunc(::Type{$Ti}, x::IntrinsicFloats) = box($Ti,fptosi($Ti,x))
     end
 end
-for Ti in (UInt8, UInt16, UInt32, UInt64)
+for Ti in (UInt8, UInt16, UInt32, UInt64, UInt128)
     @eval begin
-        unsafe_trunc(::Type{$Ti}, x::Float32) = box($Ti,fptoui($Ti,unbox(Float32,x)))
-        unsafe_trunc(::Type{$Ti}, x::Float64) = box($Ti,fptoui($Ti,unbox(Float64,x)))
+        unsafe_trunc(::Type{$Ti}, x::IntrinsicFloats) = box($Ti,fptoui($Ti,x))
     end
 end
 
-function unsafe_trunc(::Type{UInt128}, x::Float64)
-    xu = reinterpret(UInt64,x)
-    k = Int(xu >> 52) & 0x07ff - 1075
-    xu = (xu & 0x000f_ffff_ffff_ffff) | 0x0010_0000_0000_0000
-    if k <= 0
-        UInt128(xu >> -k)
-    else
-        UInt128(xu) << k
-    end
-end
-function unsafe_trunc(::Type{Int128}, x::Float64)
-    copysign(unsafe_trunc(UInt128,x) % Int128, x)
-end
-
-function unsafe_trunc(::Type{UInt128}, x::Float32)
-    xu = reinterpret(UInt32,x)
-    k = Int(xu >> 23) & 0x00ff - 150
-    xu = (xu & 0x007f_ffff) | 0x0080_0000
-    if k <= 0
-        UInt128(xu >> -k)
-    else
-        UInt128(xu) << k
-    end
-end
-function unsafe_trunc(::Type{Int128}, x::Float32)
-    copysign(unsafe_trunc(UInt128,x) % Int128, x)
-end
-
-
 # matches convert methods
 # also determines floor, ceil, round
-trunc(::Type{Signed}, x::Float32) = trunc(Int,x)
-trunc(::Type{Signed}, x::Float64) = trunc(Int,x)
-trunc(::Type{Unsigned}, x::Float32) = trunc(UInt,x)
-trunc(::Type{Unsigned}, x::Float64) = trunc(UInt,x)
-trunc(::Type{Integer}, x::Float32) = trunc(Int,x)
-trunc(::Type{Integer}, x::Float64) = trunc(Int,x)
-trunc{T<:Integer}(::Type{T}, x::Float16) = trunc(T, Float32(x))
+trunc(::Type{Signed},   x::IntrinsicFloats) = trunc(Int,x)
+trunc(::Type{Unsigned}, x::IntrinsicFloats) = trunc(UInt,x)
+trunc(::Type{Integer},  x::IntrinsicFloats) = trunc(Int,x)
 
 # fallbacks
 floor{T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,floor(x))
-floor{T<:Integer}(::Type{T}, x::Float16) = floor(T, Float32(x))
 ceil{ T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,ceil(x))
-ceil{ T<:Integer}(::Type{T}, x::Float16) = ceil(T, Float32(x))
 round{T<:Integer}(::Type{T}, x::AbstractFloat) = trunc(T,round(x))
-round{T<:Integer}(::Type{T}, x::Float16) = round(T, Float32(x))
-
-trunc(x::Float64) = box(Float64,trunc_llvm(unbox(Float64,x)))
-trunc(x::Float32) = box(Float32,trunc_llvm(unbox(Float32,x)))
-trunc(x::Float16) = Float16(trunc(Float32(x)))
-
-floor(x::Float64) = box(Float64,floor_llvm(unbox(Float64,x)))
-floor(x::Float32) = box(Float32,floor_llvm(unbox(Float32,x)))
-floor(x::Float16) = Float16(floor(Float32(x)))
 
-ceil(x::Float64) = box(Float64,ceil_llvm(unbox(Float64,x)))
-ceil(x::Float32) = box(Float32,ceil_llvm(unbox(Float32,x)))
-ceil(x::Float16) = Float16( ceil(Float32(x)))
-
-round(x::Float64) = box(Float64,rint_llvm(unbox(Float64,x)))
-round(x::Float32) = box(Float32,rint_llvm(unbox(Float32,x)))
-round(x::Float16) = Float16(round(Float32(x)))
+trunc{T<:IntrinsicFloats}(x::T) = box(T,trunc_llvm(x))
+floor{T<:IntrinsicFloats}(x::T) = box(T,floor_llvm(x))
+ceil{T<:IntrinsicFloats}(x::T) = box(T,ceil_llvm(x))
+round{T<:IntrinsicFloats}(x::T) = box(T,rint_llvm(x))
 
 ## floating point promotions ##
 promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
@@ -340,40 +120,29 @@ widen(::Type{Float32}) = Float64
 _default_type(T::Union{Type{Real},Type{AbstractFloat}}) = Float64
 
 ## floating point arithmetic ##
--(x::Float64) = box(Float64,neg_float(unbox(Float64,x)))
--(x::Float32) = box(Float32,neg_float(unbox(Float32,x)))
--(x::Float16) = reinterpret(Float16, reinterpret(UInt16,x) $ 0x8000)
+-{T<:IntrinsicFloats}(x::T) = box(T,neg_float(x))
++{T<:IntrinsicFloats}(x::T, y::T) = box(T,add_float(x,y))
+-{T<:IntrinsicFloats}(x::T, y::T) = box(T,sub_float(x,y))
+*{T<:IntrinsicFloats}(x::T, y::T) = box(T,mul_float(x,y))
+/{T<:IntrinsicFloats}(x::T, y::T) = box(T,div_float(x,y))
 
-for op in (:+,:-,:*,:/,:\,:^)
+for op in (:\,:^)
     @eval ($op)(a::Float16, b::Float16) = Float16(($op)(Float32(a), Float32(b)))
 end
-+(x::Float32, y::Float32) = box(Float32,add_float(unbox(Float32,x),unbox(Float32,y)))
-+(x::Float64, y::Float64) = box(Float64,add_float(unbox(Float64,x),unbox(Float64,y)))
--(x::Float32, y::Float32) = box(Float32,sub_float(unbox(Float32,x),unbox(Float32,y)))
--(x::Float64, y::Float64) = box(Float64,sub_float(unbox(Float64,x),unbox(Float64,y)))
-*(x::Float32, y::Float32) = box(Float32,mul_float(unbox(Float32,x),unbox(Float32,y)))
-*(x::Float64, y::Float64) = box(Float64,mul_float(unbox(Float64,x),unbox(Float64,y)))
-/(x::Float32, y::Float32) = box(Float32,div_float(unbox(Float32,x),unbox(Float32,y)))
-/(x::Float64, y::Float64) = box(Float64,div_float(unbox(Float64,x),unbox(Float64,y)))
-
-muladd(x::Float32, y::Float32, z::Float32) = box(Float32,muladd_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
-muladd(x::Float64, y::Float64, z::Float64) = box(Float64,muladd_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
-function muladd(a::Float16, b::Float16, c::Float16)
-    Float16(muladd(Float32(a), Float32(b), Float32(c)))
-end
+
+muladd{T<:IntrinsicFloats}(x::T, y::T, z::T) = box(T,muladd_float(x,y,z))
 
 # TODO: faster floating point div?
 # TODO: faster floating point fld?
 # TODO: faster floating point mod?
 
-for func in (:div,:fld,:cld,:rem,:mod)
+for func in (:div,:fld)
     @eval begin
         $func(a::Float16,b::Float16) = Float16($func(Float32(a),Float32(b)))
     end
 end
 
-rem(x::Float32, y::Float32) = box(Float32,rem_float(unbox(Float32,x),unbox(Float32,y)))
-rem(x::Float64, y::Float64) = box(Float64,rem_float(unbox(Float64,x),unbox(Float64,y)))
+rem{T<:IntrinsicFloats}(x::T, y::T) = box(T,rem_float(x,y))
 
 cld{T<:AbstractFloat}(x::T, y::T) = -fld(-x,y)
 
@@ -389,33 +158,13 @@ function mod{T<:AbstractFloat}(x::T, y::T)
 end
 
 ## floating point comparisons ##
-function ==(x::Float16, y::Float16)
-    ix = reinterpret(UInt16,x)
-    iy = reinterpret(UInt16,y)
-    if (ix|iy)&0x7fff > 0x7c00 #isnan(x) || isnan(y)
-        return false
-    end
-    if (ix|iy)&0x7fff == 0x0000
-        return true
-    end
-    return ix == iy
-end
-==(x::Float32, y::Float32) = eq_float(unbox(Float32,x),unbox(Float32,y))
-==(x::Float64, y::Float64) = eq_float(unbox(Float64,x),unbox(Float64,y))
-!=(x::Float32, y::Float32) = ne_float(unbox(Float32,x),unbox(Float32,y))
-!=(x::Float64, y::Float64) = ne_float(unbox(Float64,x),unbox(Float64,y))
-<( x::Float32, y::Float32) = lt_float(unbox(Float32,x),unbox(Float32,y))
-<( x::Float64, y::Float64) = lt_float(unbox(Float64,x),unbox(Float64,y))
-<=(x::Float32, y::Float32) = le_float(unbox(Float32,x),unbox(Float32,y))
-<=(x::Float64, y::Float64) = le_float(unbox(Float64,x),unbox(Float64,y))
-
-isequal(x::Float32, y::Float32) = fpiseq(unbox(Float32,x),unbox(Float32,y))
-isequal(x::Float64, y::Float64) = fpiseq(unbox(Float64,x),unbox(Float64,y))
-isless( x::Float32, y::Float32) = fpislt(unbox(Float32,x),unbox(Float32,y))
-isless( x::Float64, y::Float64) = fpislt(unbox(Float64,x),unbox(Float64,y))
-for op in (:<,:<=,:isless)
-    @eval ($op)(a::Float16, b::Float16) = ($op)(Float32(a), Float32(b))
-end
+=={T<:IntrinsicFloats}(x::T, y::T) = eq_float(x,y)
+!={T<:IntrinsicFloats}(x::T, y::T) = ne_float(x,y)
+<{ T<:IntrinsicFloats}(x::T, y::T) = lt_float(x,y)
+<={T<:IntrinsicFloats}(x::T, y::T) = le_float(x,y)
+
+isequal{T<:IntrinsicFloats}(x::T, y::T) = fpiseq(x,y)
+isless{ T<:IntrinsicFloats}(x::T, y::T) = fpislt(x,y)
 
 function cmp(x::AbstractFloat, y::AbstractFloat)
     (isnan(x) || isnan(y)) && throw(DomainError())
@@ -640,6 +389,10 @@ for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UIn
             end
         end
     end
+    @eval begin
+        convert(::Type{$Ti}, x::Float16) = convert($Ti, convert(Float32, x))
+        trunc(::Type{$Ti}, x::Float16) = trunc($Ti, convert(Float32, x))
+    end
 end
 
 @eval begin
@@ -674,14 +427,15 @@ end
 end
 
 ## byte order swaps for arbitrary-endianness serialization/deserialization ##
-bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x)))
-bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x)))
+bswap{T<:IntrinsicFloats}(x::T) = box(T,bswap_int(x))
 
 # bit patterns
 reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64,x)
 reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32,x)
+reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)
 reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64,x)
 reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32,x)
+reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x)
 
 sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
 exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
diff --git a/base/int.jl b/base/int.jl
index 33836165e1ca0..984a864040505 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -333,8 +333,8 @@ for (Ts, Tu) in ((Int8, UInt8), (Int16, UInt16), (Int32, UInt32), (Int64, UInt64
     @eval convert(::Type{Unsigned}, x::$Ts) = convert($Tu, x)
 end
 
-convert{T<:Union{Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int,x)
-convert{T<:Union{Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt,x)
+convert{T<:Union{Float16, Float32, Float64, Bool}}(::Type{Signed}, x::T) = convert(Int,x)
+convert{T<:Union{Float16, Float32, Float64, Bool}}(::Type{Unsigned}, x::T) = convert(UInt,x)
 
 convert(::Type{Integer}, x::Integer) = x
 convert(::Type{Integer}, x::Real) = convert(Signed,x)
diff --git a/base/rtlib/RTLIB.jl b/base/rtlib/RTLIB.jl
new file mode 100644
index 0000000000000..298742b3ab09c
--- /dev/null
+++ b/base/rtlib/RTLIB.jl
@@ -0,0 +1,241 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+"""
+    RTLIB
+
+Implements the runtime library for Julia. The implementations are based on
+llvm's compiler-rt. This implementations follows the compiler-rt naming convention
+and registers the pure Julia implementation as `extern_c` so that LLVM can find them.
+
+As a secondary interface `RTLIB.convert(::Type{T}, x)` is provided.
+"""
+module RTLIB
+
+register(f::Function, rtype::ANY, argt::ANY, name::String) =
+    ccall(:jl_extern_c, Void, (Any, Any, Any, Cstring),
+          f, rtype, argt, name)
+
+# Check if relative include is available
+if isdefined(Base, :INCLUDE_STATE) && Base.INCLUDE_STATE == 1
+    include("rtlib/fp_util.jl")
+    include("rtlib/fp_extend.jl")
+    include("rtlib/fp_trunc.jl")
+    include("rtlib/fp_fixint.jl")
+else
+    include("fp_util.jl")
+    include("fp_extend.jl")
+    include("fp_trunc.jl")
+    include("fp_fixint.jl")
+end
+
+# All these function names are enumerated in lib/CodeGen/TargetLoweringBase.cpp
+# right now we don't have a good way of getting at this information.
+
+###
+# Floating point extend and trunc functions
+###
+
+# "convert Float64 to Float128"
+# extenddftf2(x::Float64) = extendXfYf2(Float128, x)
+# convert(::Type{Float128}, x::Float64) = extenddftf2(x)
+
+# "convert Float32 to Float128"
+# extendsftf2(x::Float32) = extendXfYf2(Float128, x)
+# convert(::Type{Float128}, x::Float32) = extendsftf2(x)
+
+"convert Float32 to Float64"
+extendsfdf2(x::Float32) = extendXfYf2(Float64, x)
+convert(::Type{Float64}, x::Float32) = extendsfdf2(x)
+
+"convert Float16 to Float32"
+extendhfsf2(x::Float16) = extendXfYf2(Float32, x)
+convert(::Type{Float32}, x::Float16) = extendhfsf2(x)
+
+"convert Float32 to Float16"
+truncsfhf2(x::Float32) = truncXfYf2(Float16, x)
+convert(::Type{Float16}, x::Float32) = truncsfhf2(x)
+
+"convert Float64 to Float16"
+truncdfhf2(x::Float64) = truncXfYf2(Float16, x)
+convert(::Type{Float16}, x::Float64) = truncdfhf2(x)
+
+# "convert Float128 to Float16"
+# trunctfhf2(x :: Float128) = truncXfYf2(Float16, x)
+# convert(::Type{Float16}, x::Float128) = trunctfhf2(x)
+
+"convert Float64 to Float32"
+truncdfsf2(x::Float64) = truncXfYf2(Float32, x)
+convert(::Type{Float32}, x::Float64) = truncdfsf2(x)
+
+# "convert Float128 to Float32"
+# trunctfsf2(x :: Float128) = truncXfYf2(Float32, x)
+# convert(::Type{Float32}, x::Float128) = trunctfsf2(x)
+
+# "convert Float128 to Float64"
+# trunctfdf2(x :: Float128) = truncXfYf2(Float32, x)
+# convert(::Type{Float64}, x::Float128) = trunctfdf2(x)
+
+###
+# Conversion between integers and floats
+###
+
+"convert Float32 to Int32"
+fixsfsi(x::Float32) = fixint(Int32, x)
+convert(::Type{Int32}, x::Float32) = fixsfsi(x)
+
+"convert Float32 to Int64"
+fixsfdi(x::Float32) = fixint(Int64, x)
+convert(::Type{Int64}, x::Float32) = fixsfdi(x)
+
+"convert Float32 to Int64"
+fixsfti(x::Float32) = fixint(Int128, x)
+convert(::Type{Int128}, x::Float32) = fixsfti(x)
+
+"convert Float64 to Int32"
+fixdfsi(x::Float64) = fixint(Int32, x)
+convert(::Type{Int32}, x::Float64) = fixdfsi(x)
+
+"convert Float64 to Int64"
+fixdfdi(x::Float64) = fixint(Int64, x)
+convert(::Type{Int64}, x::Float64) = fixdfdi(x)
+
+"convert Float64 to Int64"
+fixdfti(x::Float64) = fixint(Int128, x)
+convert(::Type{Int128}, x::Float64) = fixdfti(x)
+
+# "convert Float128 to Int32"
+# fixtfsi(x::Float128) = fixint(Int32, x)
+# convert(::Type{Int32}, x::Float128) = fixtfsi(x)
+
+# "convert Float128 to Int64"
+# fixtfdi(x::Float128) = fixint(Int64, x)
+# convert(::Type{Int64}, x::Float128) = fixtfdi(x)
+
+# "convert Float128 to Int64"
+# fixtfti(x::Float128) = fixint(Int128, x)
+# convert(::Type{Int128}, x::Float128) = fixtfti(x)
+
+# Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
+# Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
+# Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+# Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
+# Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
+# Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
+# Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi";
+# Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi";
+# Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti";
+# Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf";
+# Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf";
+# Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf";
+# Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf";
+# Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf";
+# Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf";
+
+"convert Int128 to Float32"
+function floattisf(x::Int128)
+    x == 0 && return 0f0
+    s = ((x >>> 96) % UInt32) & 0x8000_0000 # sign bit
+    x = abs(x) % UInt128
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 24
+        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
+    else
+        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
+        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
+    end
+    d = ((n+126) % UInt32) << 23
+    reinterpret(Float32, s | d + y)
+end
+convert(::Type{Float32}, x::Int128) = floattisf(x)
+
+"convert Int128 to Float64"
+function floattidf(x::Int128)
+    x == 0 && return 0.0
+    s = ((x >>> 64) % UInt64) & 0x8000_0000_0000_0000 # sign bit
+    x = abs(x) % UInt128
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 53
+        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
+    else
+        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
+        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
+    end
+    d = ((n+1022) % UInt64) << 52
+    reinterpret(Float64, s | d + y)
+end
+convert(::Type{Float64}, x::Int128) = floattidf(x)
+
+# Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf";
+# Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf";
+# Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf";
+# Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf";
+# Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf";
+# Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf";
+# Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf";
+
+"convert UInt128 to Float32"
+function floatuntisf(x::UInt128)
+    x == 0 && return 0f0
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 24
+        y = ((x % UInt32) << (24-n)) & 0x007f_ffff
+    else
+        y = ((x >> (n-25)) % UInt32) & 0x00ff_ffff # keep 1 extra bit
+        y = (y+one(UInt32))>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt32(trailing_zeros(x) == (n-25)) # fix last bit to round to even
+    end
+    d = ((n+126) % UInt32) << 23
+    reinterpret(Float32, d + y)
+end
+convert(::Type{Float32}, x::UInt128) = floatuntisf(x)
+
+"convert UInt128 to Float64"
+function floatuntidf(x::UInt128)
+    x == 0 && return 0.0
+    n = 128-leading_zeros(x) # ndigits0z(x,2)
+    if n <= 53
+        y = ((x % UInt64) << (53-n)) & 0x000f_ffff_ffff_ffff
+    else
+        y = ((x >> (n-54)) % UInt64) & 0x001f_ffff_ffff_ffff # keep 1 extra bit
+        y = (y+1)>>1 # round, ties up (extra leading bit in case of next exponent)
+        y &= ~UInt64(trailing_zeros(x) == (n-54)) # fix last bit to round to even
+    end
+    d = ((n+1022) % UInt64) << 52
+    reinterpret(Float64, d + y)
+end
+convert(::Type{Float64}, x::UInt128) = floatuntidf(x)
+
+# Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf";
+end
+
+# RTLIB.register(RTLIB.extenddftf2, Float128, Tuple{Float64}, "__extenddftf2")
+# RTLIB.register(RTLIB.extendsftf2, Float128, Tuple{Float32}, "__extendsftf2")
+RTLIB.register(RTLIB.extendsfdf2, Float64, Tuple{Float32}, "__extendsfdf2")
+if is_apple()
+    RTLIB.register(RTLIB.extendhfsf2, Float32, Tuple{Float16}, "__extendhfsf2")
+    RTLIB.register(RTLIB.truncsfhf2, Float16, Tuple{Float32}, "__truncsfhf2")
+else
+    RTLIB.register(RTLIB.extendhfsf2, Float32, Tuple{Float16}, "__gnu_h2f_ieee")
+    RTLIB.register(RTLIB.truncsfhf2, Float16, Tuple{Float32}, "__gnu_f2h_ieee")
+end
+RTLIB.register(RTLIB.truncdfhf2, Float16, Tuple{Float64}, "__truncdfhf2")
+# RTLIB.register(RTLIB.trunctfhf2, Float16, Tuple{Float128}, "__trunctfhf2")
+RTLIB.register(RTLIB.truncdfsf2, Float32, Tuple{Float64}, "__truncdfsf2")
+# RTLIB.register(RTLIB.trunctfsf2, Float32, Tuple{Float128}, "__trunctfsf2")
+# RTLIB.register(RTLIB.trunctfdf2, Float64, Tuple{Float128}, "__trunctfdf2")
+
+RTLIB.register(RTLIB.fixsfsi, Int32, Tuple{Float32}, "__fixsfsi")
+RTLIB.register(RTLIB.fixsfdi, Int64, Tuple{Float32}, "__fixsfdi")
+RTLIB.register(RTLIB.fixsfti, Int128, Tuple{Float32}, "__fixsfti")
+RTLIB.register(RTLIB.fixdfsi, Int32, Tuple{Float64}, "__fixdfsi")
+RTLIB.register(RTLIB.fixdfdi, Int64, Tuple{Float64}, "__fixdfdi")
+RTLIB.register(RTLIB.fixdfti, Int128, Tuple{Float64}, "__fixdfti")
+# RTLIB.register(RTLIB.fixtfsi, Int32, Tuple{Float128}, "__fixtfsi")
+# RTLIB.register(RTLIB.fixtfdi, Int64, Tuple{Float128}, "__fixtfdi")
+# RTLIB.register(RTLIB.fixtfti, Int128, Tuple{Float128}, "__fixtfti")
+
+RTLIB.register(RTLIB.floattisf, Float32, Tuple{Int128}, "__floattisf")
+RTLIB.register(RTLIB.floattidf, Float64, Tuple{Int128}, "__floattidf")
+RTLIB.register(RTLIB.floatuntisf, Float32, Tuple{UInt128}, "__floatuntisf")
+RTLIB.register(RTLIB.floatuntidf, Float64, Tuple{UInt128}, "__floatuntidf")
diff --git a/base/rtlib/fp_extend.jl b/base/rtlib/fp_extend.jl
new file mode 100644
index 0000000000000..66fd5feacdc52
--- /dev/null
+++ b/base/rtlib/fp_extend.jl
@@ -0,0 +1,100 @@
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#
+# This file implements a fairly generic conversion from a narrower to a wider
+# IEEE-754 floating-point type.  The constants and types defined following the
+# includes below parameterize the conversion.
+#
+# It does not support types that don't use the usual IEEE-754 interchange
+# formats; specifically, some work would be needed to adapt it to
+# (for example) the Intel 80-bit format or PowerPC double-double format.
+#
+# Note please, however, that this implementation is only intended to support
+# *widening* operations; if you need to convert to a *narrower* floating-point
+# type (e.g. double -> float), then this routine will not do what you want it
+# to.
+#
+# It also requires that integer types at least as large as both formats
+# are available on the target platform; this may pose a problem when trying
+# to add support for quad on some 32-bit systems, for example.  You also may
+# run into trouble finding an appropriate CLZ function for wide source types;
+# you will likely need to roll your own on some platforms.
+#
+# Finally, the following assumptions are made:
+#
+# 1. floating-point types and integer types have the same endianness on the
+#    target platform
+#
+# 2. quiet NaNs, if supported, are indicated by the leading bit of the
+#    significand field being set
+
+@inline function extendXfYf2{dst_t<:RTLIB_FLOAT, src_t<:RTLIB_FLOAT}(::Type{dst_t}, a::src_t)
+    # Various constants whose values follow from the type parameters.
+    # Any reasonable optimizer will fold and propagate all of these.
+    const src_rep_t = fptoui(src_t)
+    const dst_rep_t = fptoui(dst_t)
+    const srcSigBits = significand_bits(src_t)
+    const dstSigBits = significand_bits(dst_t)
+
+    const srcBits = nbits(src_t)
+    const srcExpBits = exponent_bits(src_t)
+    const srcInfExp = exponent_inf(src_t)
+    const srcExpBias = exponent_bias(src_t)
+
+    const srcMinNormal = one(src_rep_t) << srcSigBits
+    const srcInfinity = srcInfExp << srcSigBits
+    const srcSignMask = one(src_rep_t) << (srcSigBits + srcExpBits)
+    const srcAbsMask = srcSignMask - one(src_rep_t)
+    const srcQNaN = one(src_rep_t) << (srcSigBits - 1)
+    const srcNaNCode = srcQNaN - one(src_rep_t)
+
+    const dstBits = nbits(dst_t)
+    const dstExpBits = exponent_bits(dst_t)
+    const dstInfExp = exponent_inf(dst_t)
+    const dstExpBias = exponent_bias(dst_t)
+
+    const dstMinNormal = one(dst_rep_t) << dstSigBits
+
+    # Break a into a sign and representation of the absolute value
+    const aRep = reinterpret(src_rep_t, a)
+    const aAbs = aRep & srcAbsMask
+    const sign = aRep & srcSignMask
+
+    local absResult :: dst_rep_t
+
+    if (aAbs - srcMinNormal) < (srcInfinity - srcMinNormal)
+        # a is a normal number.
+        # Extend to the destination type by shifting the significand and
+        # exponent into the proper position and rebiasing the exponent.
+        absResult = (aAbs % dst_rep_t) << (dstSigBits - srcSigBits)
+        absResult += (dstExpBias - srcExpBias) << dstSigBits
+    elseif aAbs >= srcInfinity
+        # a is NaN or infinity.
+        # Conjure the result by beginning with infinity, then setting the qNaN
+        # bit (if needed) and right-aligning the rest of the trailing NaN
+        # payload field.
+        absResult = dstInfExp << dstSigBits
+        absResult |= (aAbs & srcQNaN) % dst_rep_t << (dstSigBits - srcSigBits)
+        absResult |= (aAbs & srcNaNCode) % dst_rep_t << (dstSigBits - srcSigBits)
+    elseif aAbs != zero(src_rep_t) # in c if (aAbs)
+        # a is denormal.
+        # renormalize the significand and clear the leading bit, then insert
+        # the correct adjusted exponent in the destination type.
+        const scale = leading_zeros(aAbs) - leading_zeros(srcMinNormal)
+        absResult = aAbs % dst_rep_t << (dstSigBits - srcSigBits + scale)
+        absResult $= dstMinNormal
+        const resultExponent = (dstExpBias - srcExpBias - scale + 1) % dst_rep_t
+        absResult |= resultExponent << dstSigBits
+    else
+        # a is zero.
+        absResult = zero(dst_rep_t)
+    end
+
+    # Apply the signbit to (dst_t)abs(a).
+    const result = absResult | sign % dst_rep_t << (dstBits - srcBits)
+    return reinterpret(dst_t, result)
+end
diff --git a/base/rtlib/fp_fixint.jl b/base/rtlib/fp_fixint.jl
new file mode 100644
index 0000000000000..fe5a3dfc71533
--- /dev/null
+++ b/base/rtlib/fp_fixint.jl
@@ -0,0 +1,42 @@
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#
+# This file implements float to integer conversion for the
+# compiler-rt library.
+#
+
+@inline function fixint{fixint_t, fp_t<:RTLIB_FLOAT}(::Type{fixint_t}, a::fp_t)
+    const rep_t = fptoui(fp_t)
+
+    # Get masks
+    const signBit = one(src_rep_t) << (significand_bits(fp_t) + exponent_bits(fp_t))
+    const absMask = signBit - one(src_rep_t)
+    # Break a into sign, exponent, significand
+    const aRep = reinterpret(rep_t, a)
+    const aAbs = aRep & absMask
+    const sign = ifelse(aRep & signBit != 0, -one(fixint_t), one(fixint_t))
+    const exponent :: rep_t = (aAbs >> significand_bits(fp_t)) - exponent_bias(fp_t)
+    const significand = (aAbs & significandMask) | implicitBit
+
+    # If exponent is negative, the result is zero.
+    if exponent < 0
+        return zero(fixint_t)
+    end
+
+    # If the value is too large for the integer type, saturate.
+    if exponent >= nbits(fixint_t)
+        return ifelse(sign == 1, typemax(fixint_t), typemin(fixint_t))
+    end
+
+    # If 0 <= exponent < significandBits, right shift to get the result.
+    # Otherwise, shift left.
+    if exponent < significand_bits(fp_t)
+        return sign * (significand >> (significand_bits(fp_t) - exponent))
+    else
+        return sign * ((significand % fixint_t) << (exponent - significandBits))
+    end
+end
diff --git a/base/rtlib/fp_trunc.jl b/base/rtlib/fp_trunc.jl
new file mode 100644
index 0000000000000..ca56c11e6cca8
--- /dev/null
+++ b/base/rtlib/fp_trunc.jl
@@ -0,0 +1,135 @@
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#
+# This file implements a fairly generic conversion from a wider to a narrower
+# IEEE-754 floating-point type in the default (round to nearest, ties to even)
+# rounding mode.  The constants and types defined following the includes below
+# parameterize the conversion.
+#
+# This routine can be trivially adapted to support conversions to
+# half-precision or from quad-precision. It does not support types that don't
+# use the usual IEEE-754 interchange formats; specifically, some work would be
+# needed to adapt it to (for example) the Intel 80-bit format or PowerPC
+# double-double format.
+#
+# Note please, however, that this implementation is only intended to support
+# *narrowing* operations; if you need to convert to a *wider* floating-point
+# type (e.g. float -> double), then this routine will not do what you want it
+# to.
+#
+# It also requires that integer types at least as large as both formats
+# are available on the target platform; this may pose a problem when trying
+# to add support for quad on some 32-bit systems, for example.
+#
+# Finally, the following assumptions are made:
+#
+# 1. floating-point types and integer types have the same endianness on the
+#    target platform
+#
+# 2. quiet NaNs, if supported, are indicated by the leading bit of the
+#    significand field being set
+
+@inline function truncXfYf2{dst_t<:RTLIB_FLOAT, src_t<:RTLIB_FLOAT}(::Type{dst_t}, a::src_t)
+    # Various constants whose values follow from the type parameters.
+    # Any reasonable optimizer will fold and propagate all of these.
+    const src_rep_t = fptoui(src_t)
+    const dst_rep_t = fptoui(dst_t)
+    const srcSigBits = significand_bits(src_t)
+    const dstSigBits = significand_bits(dst_t)
+
+    const srcBits = nbits(src_t)
+    const srcExpBits = exponent_bits(src_t)
+    const srcInfExp = exponent_inf(src_t)
+    const srcExpBias = exponent_bias(src_t)
+
+    const srcMinNormal = one(src_rep_t) << srcSigBits
+    const srcSignificandMask = srcMinNormal - one(src_rep_t)
+    const srcInfinity = srcInfExp << srcSigBits
+    const srcSignMask = one(src_rep_t) << (srcSigBits + srcExpBits)
+    const srcAbsMask = srcSignMask - one(src_rep_t)
+    const roundMask = (one(src_rep_t) << (srcSigBits - dstSigBits)) - one(src_rep_t)
+    const halfway = one(src_rep_t) << (srcSigBits - dstSigBits - one(src_rep_t))
+    const srcQNaN = one(src_rep_t) << (srcSigBits - one(src_rep_t))
+    const srcNaNCode = srcQNaN - one(src_rep_t)
+
+    const dstBits = nbits(dst_t)
+
+    const dstExpBits = exponent_bits(dst_t)
+    const dstInfExp = exponent_inf(dst_t)
+    const dstExpBias = exponent_bias(dst_t)
+
+    const underflowExponent = srcExpBias - dstExpBias + 1
+    const overflowExponent = srcExpBias + dstInfExp - dstExpBias
+    const underflow::src_rep_t = underflowExponent << srcSigBits
+    const overflow::src_rep_t = overflowExponent << srcSigBits
+
+    const dstQNaN = one(dst_rep_t) << (dstSigBits - 1)
+    const dstNaNCode = dstQNaN - one(dst_rep_t)
+
+    # Break a into a sign and representation of the absolute value
+    const aRep = reinterpret(src_rep_t, a)
+    const aAbs = aRep & srcAbsMask
+    const sign = aRep & srcSignMask
+
+    local absResult :: dst_rep_t
+
+    if (aAbs - underflow) < (aAbs - overflow)
+        # The exponent of a is within the range of normal numbers in the
+        # destination format.  We can convert by simply right-shifting with
+        # rounding and adjusting the exponent.
+        absResult = (aAbs >> (srcSigBits - dstSigBits)) % dst_rep_t
+        absResult -= (srcExpBias - dstExpBias) % dst_rep_t << dstSigBits
+
+        const roundBits = aAbs & roundMask
+        # Round to nearest
+        if roundBits > halfway
+            absResult += one(dst_rep_t)
+        # Ties to even
+        elseif roundBits == halfway
+            absResult += absResult & one(dst_rep_t)
+        end
+    elseif aAbs > srcInfinity
+        # a is NaN.
+        # Conjure the result by beginning with infinity, setting the qNaN
+        # bit and inserting the (truncated) trailing NaN field.
+        absResult = dstInfExp << dstSigBits
+        absResult |= dstQNaN
+        absResult |= ((aAbs & srcNaNCode) >> (srcSigBits - dstSigBits)) & dstNaNCode
+    elseif aAbs >= overflow
+        # a overflows to infinity.
+        absResult = dstInfExp << dstSigBits
+    else
+        # a underflows on conversion to the destination type or is an exact
+        # zero.  The result may be a denormal or zero.  Extract the exponent
+        # to get the shift amount for the denormalization.
+        const aExp = aAbs >> srcSigBits
+        const shift = srcExpBias - dstExpBias - aExp + 1
+
+        const significand = (aRep & srcSignificandMask) | srcMinNormal
+
+        # Right shift by the denormalization amount with sticky.
+        if shift > srcSigBits
+            absResult = zero(dst_rep_t)
+        else
+            const sticky = significand << (srcBits - shift)
+            denormalizedSignificand = significand >> shift | sticky
+            absResult = (denormalizedSignificand >> (srcSigBits - dstSigBits)) % dst_rep_t
+            const roundBits = denormalizedSignificand & roundMask
+            # Round to nearest
+            if roundBits > halfway
+                absResult += one(dst_rep_t)
+            # Ties to even
+            elseif roundBits == halfway
+                absResult += absResult & one(dst_rep_t)
+            end
+        end
+    end
+
+    # Apply the signbit to (dst_t)abs(a).
+    const result = absResult | (sign >> (srcBits - dstBits)) % dst_rep_t
+    return reinterpret(dst_t, result)
+end
\ No newline at end of file
diff --git a/base/rtlib/fp_util.jl b/base/rtlib/fp_util.jl
new file mode 100644
index 0000000000000..4dee24fa14326
--- /dev/null
+++ b/base/rtlib/fp_util.jl
@@ -0,0 +1,41 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+import Base: @pure
+
+typealias RTLIB_FLOAT Union{Float16, Float32, Float64}
+const CHAR_BIT = 8
+
+fptoui(::Type{Float16}) = UInt16
+fptoui(::Type{Float32}) = UInt32
+fptoui(::Type{Float64}) = UInt64
+# fptoui(::Type{Float128}) = UInt128
+
+fptosi(::Type{Float16}) = Int16
+fptosi(::Type{Float32}) = Int32
+fptosi(::Type{Float64}) = Int64
+# fptosi(::Type{Float128}) = Int128
+
+signed(::Type{UInt8}) = Int8
+signed(::Type{UInt16}) = Int16
+signed(::Type{UInt32}) = Int32
+signed(::Type{UInt64}) = Int64
+signed(::Type{UInt128}) = Int128
+
+unsigned(::Type{Int8}) = UInt8
+unsigned(::Type{Int16}) = UInt16
+unsigned(::Type{Int32}) = UInt32
+unsigned(::Type{Int64}) = UInt64
+unsigned(::Type{Int128}) = UInt128
+
+nbits{T}(::Type{T}) = sizeof(T) * CHAR_BIT
+
+significand_bits(::Type{Float16}) = 10
+significand_bits(::Type{Float32}) = 23
+significand_bits(::Type{Float64}) = 52
+#significand_bits(::Type{Float128}) = 112
+
+@pure exponent_bits{T<:RTLIB_FLOAT}(::Type{T}) = nbits(T) - significand_bits(T) - 1
+@pure exponent_inf{T<:RTLIB_FLOAT}(::Type{T}) = (one(fptoui(T)) << exponent_bits(T)) - one(fptoui(T))
+@pure exponent_bias{T<:RTLIB_FLOAT}(::Type{T}) = exponent_inf(T) >> 1
+
+@pure sign_mask{T<:RTLIB_FLOAT}(::Type{T}) = one(fptoui(T)) << (significand_bits(T) + exponent_bits(T))
+
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 6c739d9c8f36c..b51b5dfaefc90 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -103,11 +103,16 @@ Array{T}(::Type{T}, m::Integer)                       = Array{T,1}(Int(m))
 Array{T}(::Type{T}, m::Integer,n::Integer)            = Array{T,2}(Int(m),Int(n))
 Array{T}(::Type{T}, m::Integer,n::Integer,o::Integer) = Array{T,3}(Int(m),Int(n),Int(o))
 
+# OS specific stuff part one
+include("osutils.jl")
+include("c.jl")
+
 # numeric operations
 include("hashing.jl")
 include("rounding.jl")
 importall .Rounding
-include("float.jl")
+include("rtlib/RTLIB.jl")
+include("float.jl") # depends on rtlib
 include("complex.jl")
 include("rational.jl")
 include("multinverses.jl")
@@ -138,11 +143,9 @@ typealias StridedVector{T,A<:Union{DenseArray,StridedReshapedArray},I<:Tuple{Var
 typealias StridedMatrix{T,A<:Union{DenseArray,StridedReshapedArray},I<:Tuple{Vararg{Union{RangeIndex, AbstractCartesianIndex}}}}  Union{DenseArray{T,2}, SubArray{T,2,A,I}, StridedReshapedArray{T,2}}
 typealias StridedVecOrMat{T} Union{StridedVector{T}, StridedMatrix{T}}
 
-# For OS specific stuff
+# For OS specific stuff part two
 include(String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "build_h.jl".data))) # include($BUILDROOT/base/build_h.jl)
 include(String(vcat(length(Core.ARGS)>=2?Core.ARGS[2].data:"".data, "version_git.jl".data))) # include($BUILDROOT/base/version_git.jl)
-include("osutils.jl")
-include("c.jl")
 include("sysinfo.jl")
 
 if !isdefined(Core, :Inference)
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 077d6934628d0..b7098679571f3 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -342,12 +342,9 @@ JL_DLLEXPORT Type *julia_type_to_llvm(jl_value_t *jt, bool *isboxed)
             return T_size;
         int nb = jl_datatype_size(jt);
         if (jl_is_floattype(jt)) {
-#ifndef DISABLE_FLOAT16
             if (nb == 2)
                 return T_float16;
-            else
-#endif
-            if (nb == 4)
+            else if (nb == 4)
                 return T_float32;
             else if (nb == 8)
                 return T_float64;
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 5a1bd54d7131d..c5abc914448a0 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -174,8 +174,6 @@ extern void _chkstk(void);
 #define __alignof__ __alignof
 #endif
 
-#define DISABLE_FLOAT16
-
 // llvm state
 #if JL_LLVM_VERSION >= 30900
 JL_DLLEXPORT LLVMContext jl_LLVMContext;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 14bf8bc59b01a..2d459b19d1443 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -72,12 +72,9 @@ extern "C" JL_DLLEXPORT int8_t jl_is_memdebug() {
 
 static Type *FTnbits(size_t nb)
 {
-#ifndef DISABLE_FLOAT16
     if (nb == 16)
         return T_float16;
-    else
-#endif
-    if (nb == 32)
+    else if (nb == 32)
         return T_float32;
     else if (nb == 64)
         return T_float64;
@@ -109,6 +106,7 @@ static Type *JL_INTT(Type *t)
         return t;
     if (t->isPointerTy())
         return T_size;
+    if (t == T_float16) return T_int16;
     if (t == T_float32) return T_int32;
     if (t == T_float64) return T_int64;
     assert(t == T_void);
@@ -118,18 +116,18 @@ static Type *JL_INTT(Type *t)
 static jl_value_t *JL_JLUINTT(Type *t)
 {
     assert(!t->isIntegerTy());
+    if (t == T_float16) return (jl_value_t*)jl_uint16_type;
     if (t == T_float32) return (jl_value_t*)jl_uint32_type;
     if (t == T_float64) return (jl_value_t*)jl_uint64_type;
-    if (t == T_float16) return (jl_value_t*)jl_uint16_type;
     assert(t == T_void);
     return jl_bottom_type;
 }
 static jl_value_t *JL_JLSINTT(Type *t)
 {
     assert(!t->isIntegerTy());
+    if (t == T_float16) return (jl_value_t*)jl_int16_type;
     if (t == T_float32) return (jl_value_t*)jl_int32_type;
     if (t == T_float64) return (jl_value_t*)jl_int64_type;
-    if (t == T_float16) return (jl_value_t*)jl_int16_type;
     assert(t == T_void);
     return jl_bottom_type;
 }
@@ -180,10 +178,8 @@ static Constant *julia_const_to_llvm(void *ptr, jl_value_t *bt)
         }
         case 2: {
             uint16_t data16 = *(uint16_t*)ptr;
-#ifndef DISABLE_FLOAT16
             if (jl_is_floattype(bt))
                 return ConstantFP::get(jl_LLVMContext, LLVM_FP(APFloat::IEEEhalf,APInt(16,data16)));
-#endif
             return ConstantInt::get(T_int16, data16);
         }
         case 4: {
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 9fb098edf635f..9eeedaa730741 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -308,6 +308,28 @@ void NotifyDebugger(jit_code_entry *JITCodeEntry)
 }
 // ------------------------ END OF TEMPORARY COPY FROM LLVM -----------------
 
+// Resolve llvm libcalls to the implementations in rtlib.jl
+static uint64_t resolve_libcalls(const char *name)
+{
+    static void *sys_hdl = jl_load_dynamic_library_e("sys", JL_RTLD_LOCAL);
+    static const char *const prefix = "__";
+    if (!sys_hdl) {
+        jl_printf(JL_STDERR, "WARNING: Unable to load sysimage\n");
+        return 0;
+    }
+    if (strncmp(name, prefix, strlen(prefix)) != 0)
+        return 0;
+#if defined(_OS_DARWIN_)
+    // jl_dlsym_e expects an unmangled 'C' symbol name,
+    // so iff we are on Darwin we strip the leading '_' off.
+    static const char *const mangled_prefix = "___";
+    if (strncmp(name, mangled_prefix, strlen(mangled_prefix)) == 0) {
+        ++name;
+    }
+#endif
+    return (uintptr_t)jl_dlsym_e(sys_hdl, name);
+}
+
 #ifdef _OS_LINUX_
 // Resolve non-lock free atomic functions in the libatomic library.
 // This is the library that provides support for c11/c++11 atomic operations.
@@ -542,6 +564,8 @@ void JuliaOJIT::addModule(std::unique_ptr<Module> M)
                         if (uint64_t addr = resolve_atomic(Name.c_str()))
                             return JL_SymbolInfo(addr, JITSymbolFlags::Exported);
 #endif
+                        if (uint64_t addr = resolve_libcalls(Name.c_str()))
+                            return JL_SymbolInfo(addr, JITSymbolFlags::Exported);
                         // Return failure code
                         return JL_SymbolInfo(nullptr);
                       },
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 03b91553e40d8..1e242f46fe865 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -16,7 +16,7 @@ Upon return, `tests` is a vector of fully-expanded test names, and
 function choosetests(choices = [])
     testnames = [
         "linalg", "subarray", "core", "inference", "keywordargs", "numbers",
-        "printf", "char", "strings", "triplequote", "unicode",
+        "printf", "char", "strings", "triplequote", "unicode", "rtlib",
         "dates", "dict", "hashing", "iobuffer", "staged", "offsetarray",
         "arrayops", "tuple", "reduce", "reducedim", "random", "abstractarray",
         "intfuncs", "simdloop", "vecelement", "blas", "sparse",
diff --git a/test/rtlib.jl b/test/rtlib.jl
new file mode 100644
index 0000000000000..ccfc71e30eb76
--- /dev/null
+++ b/test/rtlib.jl
@@ -0,0 +1,114 @@
+# This file is part of Julia. License is MIT: http://julialang.org/license
+# Parts of it are copied from llvm's compiler-rt
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+
+using Base.Test
+import Base.RTLIB
+@testset "RTLIB" begin
+@testset "truncdfhf2" begin
+    @test RTLIB.truncdfhf2(NaN) === NaN16
+    @test RTLIB.truncdfhf2(Inf) === Inf16
+    @test RTLIB.truncdfhf2(-Inf) === -Inf16
+    @test RTLIB.truncdfhf2(0.0) === reinterpret(Float16, 0x0000)
+    @test RTLIB.truncdfhf2(-0.0) === reinterpret(Float16, 0x8000)
+    @test RTLIB.truncdfhf2(3.1415926535) === reinterpret(Float16, 0x4248)
+    @test RTLIB.truncdfhf2(-3.1415926535) === reinterpret(Float16, 0xc248)
+    @test RTLIB.truncdfhf2(0x1.987124876876324p+1000) === reinterpret(Float16, 0x7c00)
+    @test RTLIB.truncdfhf2(0x1.987124876876324p+12) === reinterpret(Float16, 0x6e62)
+    @test RTLIB.truncdfhf2(0x1.0p+0) === reinterpret(Float16, 0x3c00)
+    @test RTLIB.truncdfhf2(0x1.0p-14) === reinterpret(Float16, 0x0400)
+    # denormal
+    @test RTLIB.truncdfhf2(0x1.0p-20) === reinterpret(Float16, 0x0010)
+    @test RTLIB.truncdfhf2(0x1.0p-24) === reinterpret(Float16, 0x0001)
+    @test RTLIB.truncdfhf2(-0x1.0p-24) === reinterpret(Float16, 0x8001)
+    @test RTLIB.truncdfhf2(0x1.5p-25) === reinterpret(Float16, 0x0001)
+    # and back to zero
+    @test RTLIB.truncdfhf2(0x1.0p-25) === reinterpret(Float16, 0x0000)
+    @test RTLIB.truncdfhf2(-0x1.0p-25) === reinterpret(Float16, 0x8000)
+    # max (precise)
+    @test RTLIB.truncdfhf2(65504.0) === reinterpret(Float16, 0x7bff)
+    # max (rounded)
+    @test RTLIB.truncdfhf2(65519.0) === reinterpret(Float16, 0x7bff)
+    # max (to +inf)
+    @test RTLIB.truncdfhf2(65520.0) === reinterpret(Float16, 0x7c00)
+    @test RTLIB.truncdfhf2(-65520.0) === reinterpret(Float16, 0xfc00)
+    @test RTLIB.truncdfhf2(65536.0) === reinterpret(Float16, 0x7c00)
+end
+
+@testset "truncdfsf2" begin
+   @test RTLIB.truncdfsf2(340282366920938463463374607431768211456.0) === Inf32
+end
+
+@testset "truncsfhf2" begin
+    # NaN
+    @test RTLIB.truncsfhf2(NaN32) === reinterpret(Float16, 0x7e00)
+    # inf
+    @test RTLIB.truncsfhf2(Inf32) === reinterpret(Float16, 0x7c00)
+    @test RTLIB.truncsfhf2(-Inf32) === reinterpret(Float16, 0xfc00)
+    # zero
+    @test RTLIB.truncsfhf2(0.0f0) === reinterpret(Float16, 0x0000)
+    @test RTLIB.truncsfhf2(-0.0f0) === reinterpret(Float16, 0x8000)
+    @test RTLIB.truncsfhf2(3.1415926535f0) === reinterpret(Float16, 0x4248)
+    @test RTLIB.truncsfhf2(-3.1415926535f0) === reinterpret(Float16, 0xc248)
+    @test RTLIB.truncsfhf2(Float32(0x1.987124876876324p+100)) === reinterpret(Float16, 0x7c00)
+    @test RTLIB.truncsfhf2(Float32(0x1.987124876876324p+12)) === reinterpret(Float16, 0x6e62)
+    @test RTLIB.truncsfhf2(Float32(0x1.0p+0)) === reinterpret(Float16, 0x3c00)
+    @test RTLIB.truncsfhf2(Float32(0x1.0p-14)) === reinterpret(Float16, 0x0400)
+    # denormal
+    @test RTLIB.truncsfhf2(Float32(0x1.0p-20)) === reinterpret(Float16, 0x0010)
+    @test RTLIB.truncsfhf2(Float32(0x1.0p-24)) === reinterpret(Float16, 0x0001)
+    @test RTLIB.truncsfhf2(Float32(-0x1.0p-24)) === reinterpret(Float16, 0x8001)
+    @test RTLIB.truncsfhf2(Float32(0x1.5p-25)) === reinterpret(Float16, 0x0001)
+    # and back to zero
+    @test RTLIB.truncsfhf2(Float32(0x1.0p-25)) === reinterpret(Float16, 0x0000)
+    @test RTLIB.truncsfhf2(Float32(-0x1.0p-25)) === reinterpret(Float16, 0x8000)
+    # max (precise)
+    @test RTLIB.truncsfhf2(65504.0f0) === reinterpret(Float16, 0x7bff)
+    # max (rounded)
+    @test RTLIB.truncsfhf2(65519.0f0) === reinterpret(Float16, 0x7bff)
+    # max (to +inf)
+    @test RTLIB.truncsfhf2(65520.0f0) === reinterpret(Float16, 0x7c00)
+    @test RTLIB.truncsfhf2(65536.0f0) === reinterpret(Float16, 0x7c00)
+    @test RTLIB.truncsfhf2(-65520.0f0) === reinterpret(Float16, 0xfc00)
+end
+
+@testset "extendhfsf2" begin
+    ##
+    # Note:
+    # These tests are taken fromt the compiler-rt testsuite. Were as of 3.9.0
+    # the test are done with compareResultH (so with after casting to UInt16)
+    # Tests that are commented out fail as === Float32 comparisons.
+    # Some tests succedd with ≈ (and are consistent with Julia v0.5 convert)
+    ##
+    # NaN
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7e00)) === NaN32
+    # inf
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7c00)) === Inf32
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0xfc00)) === -Inf32
+    # zero
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0000)) === 0.0f0
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x8000)) === -0.0f0
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x4248)) ≈ Float32(π)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0xc248)) ≈ Float32(-π)
+    # @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7c00)) === Float32(0x1.987124876876324p+100)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x6e62)) === Float32(0x1.988p+12)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x3c00)) === Float32(0x1.0p+0)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0400)) === Float32(0x1.0p-14)
+    # denormal
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0010)) === Float32(0x1.0p-20)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.0p-24)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x8001)) === Float32(-0x1.0p-24)
+    #@test RTLIB.extendhfsf2(reinterpret(Float16, 0x0001)) === Float32(0x1.5p-25)
+    # and back to zero
+    # @test RTLIB.extendhfsf2(reinterpret(Float16, 0x0000)) === Float32(0x1.0p-25)
+    # @test RTLIB.extendhfsf2(reinterpret(Float16, 0x8000)) === Float32(-0x1.0p-25)
+    # max (precise)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
+    # max (rounded)
+    @test RTLIB.extendhfsf2(reinterpret(Float16, 0x7bff)) === 65504.0f0
+end
+end
\ No newline at end of file