diff --git a/base/sort.jl b/base/sort.jl
index a9ae30f4aa475..c98fafe47d5dd 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -528,344 +528,9 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg,
     return v
 end
 
-# selectpivot!
-#
-# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and
-# choose the middle value as a pivot
-#
-# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
-# greater than the pivot
-
-@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    @inbounds begin
-        mi = midpoint(lo, hi)
-
-        # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
-        if lt(o, v[lo], v[mi])
-            v[mi], v[lo] = v[lo], v[mi]
-        end
-
-        if lt(o, v[hi], v[lo])
-            if lt(o, v[hi], v[mi])
-                v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
-            else
-                v[hi], v[lo] = v[lo], v[hi]
-            end
-        end
-
-        # return the pivot
-        return v[lo]
-    end
-end
-
-# partition!
-#
-# select a pivot, and partition v according to the pivot
-
-function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    pivot = selectpivot!(v, lo, hi, o)
-    # pivot == v[lo], v[hi] > pivot
-    i, j = lo, hi
-    @inbounds while true
-        i += 1; j -= 1
-        while lt(o, v[i], pivot); i += 1; end;
-        while lt(o, pivot, v[j]); j -= 1; end;
-        i >= j && break
-        v[i], v[j] = v[j], v[i]
-    end
-    v[j], v[lo] = pivot, v[j]
-
-    # v[j] == pivot
-    # v[k] >= pivot for k > j
-    # v[i] <= pivot for i < j
-    return j
-end
-
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
-        if j-lo < hi-j
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            lo < (j-1) && sort!(v, lo, j-1, a, o)
-            lo = j+1
-        else
-            j+1 < hi && sort!(v, j+1, hi, a, o)
-            hi = j-1
-        end
-    end
-    return v
-end
-
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
-        t0::Union{AbstractVector{T}, Nothing}=nothing) where T
-    @inbounds if lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-
-        m = midpoint(lo, hi)
-        t = workspace(v, t0, m-lo+1)
-
-        sort!(v, lo,  m,  a, o, t)
-        sort!(v, m+1, hi, a, o, t)
-
-        i, j = 1, lo
-        while j <= m
-            t[i] = v[j]
-            i += 1
-            j += 1
-        end
-
-        i, k = 1, lo
-        while k < j <= hi
-            if lt(o, v[j], t[i])
-                v[k] = v[j]
-                j += 1
-            else
-                v[k] = t[i]
-                i += 1
-            end
-            k += 1
-        end
-        while k < j
-            v[k] = t[i]
-            k += 1
-            i += 1
-        end
-    end
-
-    return v
-end
-
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
-               o::Ordering)
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
-
-        if j <= first(a.k)
-            lo = j+1
-        elseif j >= last(a.k)
-            hi = j-1
-        else
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            if j-lo < hi-j
-                lo < (j-1) && sort!(v, lo, j-1, a, o)
-                lo = j+1
-            else
-                hi > (j+1) && sort!(v, j+1, hi, a, o)
-                hi = j-1
-            end
-        end
-    end
-    return v
-end
-
-# This is a stable least significant bit first radix sort.
-#
-# That is, it first sorts the entire vector by the last chunk_size bits, then by the second
-# to last chunk_size bits, and so on. Stability means that it will not reorder two elements
-# that compare equal. This is essential so that the order introduced by earlier,
-# less significant passes is preserved by later passes.
-#
-# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it
-#  * counts the number of entries that fall into each bucket
-#  * uses those counts to compute the indices to move elements of those buckets into
-#  * moves elements into the computed indices in the swap array
-#  * switches the swap and working array
-#
-# In the case of an odd number of passes, the returned vector will === the input vector t,
-# not v. This is one of the many reasons radix_sort! is not exported.
-function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned,
-                     t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
-    # bits is unsigned for performance reasons.
-    mask = UInt(1) << chunk_size - 1
-    counts = Vector{UInt}(undef, mask+2)
-
-    @inbounds for shift in 0:chunk_size:bits-1
-
-        # counts[2:mask+2] will store the number of elements that fall into each bucket.
-        # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff.
-        counts .= 0
-        for k in lo:hi
-            x = v[k]                  # lookup the element
-            i = (x >> shift)&mask + 2 # compute its bucket's index for this pass
-            counts[i] += 1            # increment that bucket's count
-        end
-
-        counts[1] = lo                # set target index for the first bucket
-        cumsum!(counts, counts)       # set target indices for subsequent buckets
-        # counts[1:mask+1] now stores indices where the first member of each bucket
-        # belongs, not the number of elements in each bucket. We will put the first element
-        # of bucket 0x00 in t[counts[1]], the next element of bucket 0x00 in t[counts[1]+1],
-        # and the last element of bucket 0x00 in t[counts[2]-1].
-
-        for k in lo:hi
-            x = v[k]                  # lookup the element
-            i = (x >> shift)&mask + 1 # compute its bucket's index for this pass
-            j = counts[i]             # lookup the target index
-            t[j] = x                  # put the element where it belongs
-            counts[i] = j + 1         # increment the target index for the next
-        end                           #  ↳ element in this bucket
-
-        v, t = t, v # swap the now sorted destination vector t back into primary vector v
-
-    end
-
-    v
-end
-function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
-    # chunk_size is the number of bits to radix over at once.
-    # We need to allocate an array of size 2^chunk size, and on the other hand the higher
-    # the chunk size the fewer passes we need. Theoretically, chunk size should be based on
-    # the Lambert W function applied to length. Empirically, we use this heuristic:
-    guess = min(10, log(maybe_unsigned(hi-lo))*3/4+3)
-    # TODO the maximum chunk size should be based on architecture cache size.
-
-    # We need iterations * chunk size ≥ bits, and these cld's
-    # make an effort to get iterations * chunk size ≈ bits
-    UInt8(cld(bits, cld(bits, guess)))
-end
-
-# For AbstractVector{Bool}, counting sort is always best.
-# This is an implementation of counting sort specialized for Bools.
-function sort!(v::AbstractVector{B}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
-        t::Union{AbstractVector{B}, Nothing}=nothing) where {B <: Bool}
-    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
-    count = 0
-    @inbounds for i in lo:hi
-        if v[i] == first
-            count += 1
-        end
-    end
-    @inbounds v[lo:lo+count-1] .= first
-    @inbounds v[lo+count:hi] .= !first
-    v
-end
-
-workspace(v::AbstractVector, ::Nothing, len::Integer) = similar(v, len)
-function workspace(v::AbstractVector{T}, t::AbstractVector{T}, len::Integer) where T
-    length(t) < len ? resize!(t, len) : t
-end
-maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt
-maybe_unsigned(x::BitSigned) = unsigned(x)
-function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
-    mn = mx = v[lo]
-    @inbounds for i in (lo+1):hi
-        vi = v[i]
-        lt(o, vi, mn) && (mn = vi)
-        lt(o, mx, vi) && (mx = vi)
-    end
-    mn, mx
-end
-function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
-            t::Union{AbstractVector{T}, Nothing}=nothing) where T
-    # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range!
-    # so we skip straight to the fallback algorithm which is comparison based.
-    U = UIntMappable(T, o)
-    U === nothing && return sort!(v, lo, hi, a.fallback, o)
-
-    # to avoid introducing excessive detection costs for the trivial sorting problem
-    # and to avoid overflow, we check for small inputs before any other runtime checks
-    hi <= lo && return v
-    lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow
-    # only count sort on a short range can compete with insertion sort when lenm1 < 40
-    # and the optimization is not worth the detection cost, so we use insertion sort.
-    lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-
-    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
-    # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted
-    # input and this guarantees presorted input will always be efficiently handled
-    issorted(view(v, lo:hi), o) && return v
-
-    # For large arrays, a reverse-sorted check is essentially free (overhead < 1%)
-    if lenm1 >= 500 && issorted(view(v, lo:hi), ReverseOrdering(o))
-        reverse!(view(v, lo:hi))
-        return v
-    end
-
-    # UInt128 does not support fast bit shifting so we never
-    # dispatch to radix sort but we may still perform count sort
-    if sizeof(U) > 8
-        if T <: Integer && o isa DirectOrdering
-            v_min, v_max = _extrema(v, lo, hi, Forward)
-            v_range = maybe_unsigned(v_max-v_min)
-            v_range == 0 && return v # all same
-
-            # we know lenm1 ≥ 40, so this will never underflow.
-            # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback
-            if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small
-                return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi)
-            end
-        end
-        return sort!(v, lo, hi, a.fallback, o)
-    end
-
-    v_min, v_max = _extrema(v, lo, hi, o)
-    lt(o, v_min, v_max) || return v # all same
-    if T <: Integer && o isa DirectOrdering
-        R = o === Reverse
-        v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min)
-        if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small
-            return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi)
-        end
-    end
-
-    u_min, u_max = uint_map(v_min, o), uint_map(v_max, o)
-    u_range = maybe_unsigned(u_max-u_min)
-    if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small
-        u = uint_map!(v, lo, hi, o)
-        sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi)
-        return uint_unmap!(v, u, lo, hi, o)
-    end
-
-    # if u's range is small, then once we subtract out v_min, we'll get a vector like
-    # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009]
-    # where we only need to radix over the last few bits (5, in the example).
-    bits = unsigned(8sizeof(u_range) - leading_zeros(u_range))
-
-    # radix sort runs in O(bits * lenm1), insertion sort runs in O(lenm1^2). Radix sort
-    # has a constant factor that is three times higher, so radix runtime is 3bits * lenm1
-    # and insertion runtime is lenm1^2. Empirically, insertion is faster than radix iff
-    # lenm1 < 3bits.
-    # Insertion < Radix
-    #   lenm1^2 < 3 * bits * lenm1
-    #     lenm1 < 3bits
-    if lenm1 < 3bits
-        # at lenm1 = 64*3-1, QuickSort is about 20% faster than InsertionSort.
-        alg = a.fallback === QuickSort && lenm1 > 120 ? QuickSort : SMALL_ALGORITHM
-        return sort!(v, lo, hi, alg, o)
-    end
-
-    # At this point, we are committed to radix sort.
-    u = uint_map!(v, lo, hi, o)
-
-    # we subtract u_min to avoid radixing over unnecessary bits. For example,
-    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
-    # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with
-    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
-    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
-    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
-    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
-
-    # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100
-    @inbounds for i in lo:hi
-        u[i] -= u_min
-    end
-
-    u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, workspace(v, t, hi)))
-    uint_unmap!(v, u2, lo, hi, o, u_min)
-end
-
 ## generic sorting methods ##
 
-defalg(v::AbstractArray) = DEFAULT_STABLE
-defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
-defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
-defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
+defalg(::Union{AbstractArray, typeof(DEFAULT_UNSTABLE)}) = InsertionSort
 
 function sort!(v::AbstractVector{T}, alg::Algorithm,
         order::Ordering, t::Union{AbstractVector{T}, Nothing}=nothing) where T
@@ -927,29 +592,6 @@ function sort!(v::AbstractVector{T};
     sort!(v, alg, ord(lt,by,rev,order), workspace)
 end
 
-# sort! for vectors of few unique integers
-function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse,
-                         lo=firstindex(x), hi=lastindex(x))
-    offs = 1 - minval
-
-    counts = fill(0, rangelen)
-    @inbounds for i = lo:hi
-        counts[x[i] + offs] += 1
-    end
-
-    idx = lo
-    @inbounds for i = maybereverse(1:rangelen)
-        lastidx = idx + counts[i] - 1
-        val = i-offs
-        for j = idx:lastidx
-            x[j] = val
-        end
-        idx = lastidx + 1
-    end
-
-    return x
-end
-
 """
     sort(v; alg::Algorithm=defalg(v), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
@@ -1108,7 +750,7 @@ julia> v[p]
 ```
 """
 function sortperm(v::AbstractVector;
-                  alg::Algorithm=DEFAULT_UNSTABLE,
+                  alg::Algorithm=defalg(DEFAULT_UNSTABLE),
                   lt=isless,
                   by=identity,
                   rev::Union{Bool,Nothing}=nothing,
@@ -1155,7 +797,7 @@ julia> v[p]
 ```
 """
 function sortperm!(x::AbstractVector{T}, v::AbstractVector;
-                   alg::Algorithm=DEFAULT_UNSTABLE,
+                   alg::Algorithm=defalg(DEFAULT_UNSTABLE),
                    lt=isless,
                    by=identity,
                    rev::Union{Bool,Nothing}=nothing,
@@ -1230,7 +872,7 @@ julia> sort(A, dims = 2)
 """
 function sort(A::AbstractArray{T};
               dims::Integer,
-              alg::Algorithm=DEFAULT_UNSTABLE,
+              alg::Algorithm=defalg(DEFAULT_UNSTABLE),
               lt=isless,
               by=identity,
               rev::Union{Bool,Nothing}=nothing,
@@ -1260,6 +902,11 @@ end
     Av
 end
 
+workspace(v::AbstractVector, ::Nothing, len::Integer) = similar(v, len)
+function workspace(v::AbstractVector{T}, t::AbstractVector{T}, len::Integer) where T
+    length(t) < len ? resize!(t, len) : t
+end
+
 """
     sort!(A; dims::Integer, alg::Algorithm=defalg(A), lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
@@ -1311,250 +958,4 @@ function sort!(A::AbstractArray{T};
     A
 end
 
-
-## uint mapping to allow radix sorting primitives other than UInts ##
-
-"""
-    UIntMappable(T::Type, order::Ordering)
-
-Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and
-[`uint_unmap`](@ref) are implemented.
-
-If either is not implemented, return `nothing`.
-"""
-UIntMappable(T::Type, order::Ordering) = nothing
-
-"""
-    uint_map(x, order::Ordering)::Unsigned
-
-Map `x` to an un unsigned integer, maintaining sort order.
-
-The map should be reversible with [`uint_unmap`](@ref), so `isless(order, a, b)` must be
-a linear ordering for `a, b <: typeof(x)`. Satisfies
-`isless(order, a, b) === (uint_map(a, order) < uint_map(b, order))`
-and `x === uint_unmap(typeof(x), uint_map(x, order), order)`
-
-See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref)
-"""
-function uint_map end
-
-"""
-    uint_unmap(T::Type, u::Unsigned, order::Ordering)
-
-Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies
-`x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`.
-
-See also: [`uint_map`](@ref) [`UIntMappable`](@ref)
-"""
-function uint_unmap end
-
-
-### Primitive Types
-
-# Integers
-uint_map(x::Unsigned, ::ForwardOrdering) = x
-uint_unmap(::Type{T}, u::T, ::ForwardOrdering) where T <: Unsigned = u
-
-uint_map(x::Signed, ::ForwardOrdering) =
-    unsigned(xor(x, typemin(x)))
-uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed =
-    xor(signed(u), typemin(T))
-
-# unsigned(Int) is not available during bootstrapping.
-for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)]
-    @eval UIntMappable(::Type{<:Union{$U, $S}}, ::ForwardOrdering) = $U
-end
-
-# Floats are not UIntMappable under regular orderings because they fail on NaN edge cases.
-# uint mappings for floats are defined in Float, where the Left and Right orderings
-# guarantee that there are no NaN values
-
-# Chars
-uint_map(x::Char, ::ForwardOrdering) = reinterpret(UInt32, x)
-uint_unmap(::Type{Char}, u::UInt32, ::ForwardOrdering) = reinterpret(Char, u)
-UIntMappable(::Type{Char}, ::ForwardOrdering) = UInt32
-
-### Reverse orderings
-uint_map(x, rev::ReverseOrdering) = ~uint_map(x, rev.fwd)
-uint_unmap(T::Type, u::Unsigned, rev::ReverseOrdering) = uint_unmap(T, ~u, rev.fwd)
-UIntMappable(T::Type, order::ReverseOrdering) = UIntMappable(T, order.fwd)
-
-
-### Vectors
-
-# Convert v to unsigned integers in place, maintaining sort order.
-function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering)
-    u = reinterpret(UIntMappable(eltype(v), order), v)
-    @inbounds for i in lo:hi
-        u[i] = uint_map(v[i], order)
-    end
-    u
-end
-
-function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer,
-                     order::Ordering, offset::U=zero(U)) where U <: Unsigned
-    @inbounds for i in lo:hi
-        v[i] = uint_unmap(eltype(v), u[i]+offset, order)
-    end
-    v
-end
-
-
-## fast clever sorting for floats ##
-
-module Float
-using ..Sort
-using ...Order
-using ..Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret
-
-import Core.Intrinsics: slt_int
-import ..Sort: sort!, UIntMappable, uint_map, uint_unmap
-import ...Order: lt, DirectOrdering
-
-const Floats = Union{Float32,Float64}
-const FPSortable = Union{ # Mixed Float32 and Float64 are not allowed.
-    AbstractVector{Union{Float32, Missing}},
-    AbstractVector{Union{Float64, Missing}},
-    AbstractVector{Float32},
-    AbstractVector{Float64},
-    AbstractVector{Missing}}
-
-struct Left <: Ordering end
-struct Right <: Ordering end
-
-left(::DirectOrdering) = Left()
-right(::DirectOrdering) = Right()
-
-left(o::Perm) = Perm(left(o.order), o.data)
-right(o::Perm) = Perm(right(o.order), o.data)
-
-lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x)
-lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y)
-
-uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x)
-uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u)
-uint_map(x::Float32, ::Right) = reinterpret(UInt32, x)
-uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u)
-UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32
-
-uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x)
-uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u)
-uint_map(x::Float64, ::Right) = reinterpret(UInt64, x)
-uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u)
-UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64
-
-isnan(o::DirectOrdering, x::Floats) = (x!=x)
-isnan(o::DirectOrdering, x::Missing) = false
-isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
-
-ismissing(o::DirectOrdering, x::Floats) = false
-ismissing(o::DirectOrdering, x::Missing) = true
-ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i])
-
-allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing
-allowsmissing(::AbstractVector{<:Integer},
-              ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} =
-    T >: Missing
-
-function specials2left!(testf::Function, v::AbstractVector, o::Ordering,
-                        lo::Integer=firstindex(v), hi::Integer=lastindex(v))
-    i = lo
-    @inbounds while i <= hi && testf(o,v[i])
-        i += 1
-    end
-    j = i + 1
-    @inbounds while j <= hi
-        if testf(o,v[j])
-            v[i], v[j] = v[j], v[i]
-            i += 1
-        end
-        j += 1
-    end
-    return i, hi
-end
-function specials2right!(testf::Function, v::AbstractVector, o::Ordering,
-                         lo::Integer=firstindex(v), hi::Integer=lastindex(v))
-    i = hi
-    @inbounds while lo <= i && testf(o,v[i])
-        i -= 1
-    end
-    j = i - 1
-    @inbounds while lo <= j
-        if testf(o,v[j])
-            v[i], v[j] = v[j], v[i]
-            i -= 1
-        end
-        j -= 1
-    end
-    return lo, i
-end
-
-function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering)
-    lo, hi = firstindex(v), lastindex(v)
-    if allowsmissing(v, o)
-        i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
-        sort!(v, lo, i-1, a, o)
-        return i, hi
-    else
-        return specials2left!(isnan, v, o, lo, hi)
-    end
-end
-function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering)
-    lo, hi = firstindex(v), lastindex(v)
-    if allowsmissing(v, o)
-        _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
-        sort!(v, i+1, hi, a, o)
-        return lo, i
-    else
-        return specials2right!(isnan, v, o, lo, hi)
-    end
-end
-
-specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) =
-    specials2right!(v, a, o)
-specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) =
-    specials2left!(v, a, o)
-specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) =
-    specials2right!(v, a, o)
-specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
-    specials2left!(v, a, o)
-
-issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x))
-issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x))
-issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
-
-function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering,
-        t::Union{AbstractVector, Nothing}=nothing)
-    # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn).
-    # The overhead is O(n). For n < 10, it's not worth it.
-    length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t)
-
-    i, j = lo, hi = specials2end!(v,a,o)
-    @inbounds while true
-        while i <= j &&  issignleft(o,v[i]); i += 1; end
-        while i <= j && !issignleft(o,v[j]); j -= 1; end
-        i <= j || break
-        v[i], v[j] = v[j], v[i]
-        i += 1; j -= 1
-    end
-    sort!(v, lo, j,  a, left(o), t)
-    sort!(v, i,  hi, a, right(o), t)
-    return v
-end
-
-
-fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
-    sort!(v, firstindex(v), lastindex(v), a, o)
-
-function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering,
-        t::Union{FPSortable, Nothing}=nothing)
-    fpsort!(v, a, o, t)
-end
-function sort!(v::AbstractVector{<:Union{Signed, Unsigned}}, a::Algorithm,
-        o::Perm{<:DirectOrdering,<:FPSortable}, t::Union{AbstractVector, Nothing}=nothing)
-    fpsort!(v, a, o, t)
-end
-
-end # module Sort.Float
-
 end # module Sort
diff --git a/base/sysimg.jl b/base/sysimg.jl
index c68d9f9c82bff..b4281ccca9418 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -52,6 +52,7 @@ let
         :LibGit2,
         :Profile,
         :SparseArrays,
+        :StandardSortingAlgorithms,
         :UUIDs,
 
         # 3-depth packages
diff --git a/stdlib/StandardSortingAlgorithms/Project.toml b/stdlib/StandardSortingAlgorithms/Project.toml
new file mode 100644
index 0000000000000..69c7921da68ad
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/Project.toml
@@ -0,0 +1,15 @@
+name = "StandardSortingAlgorithms"
+uuid = "7744cb9a-8a56-1d63-a5da-e2fdf8a12fa2"
+
+[deps]
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[extras]
+Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test", "SparseArrays", "LinearAlgebra", "Future", "Statistics"]
diff --git a/stdlib/StandardSortingAlgorithms/docs/src/index.md b/stdlib/StandardSortingAlgorithms/docs/src/index.md
new file mode 100644
index 0000000000000..0f7636cf2444f
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/docs/src/index.md
@@ -0,0 +1,366 @@
+# Random Numbers
+
+```@meta
+DocTestSetup = :(using Random)
+```
+
+Random number generation in Julia uses the [Xoshiro256++](https://prng.di.unimi.it/) algorithm
+by default, with per-`Task` state.
+Other RNG types can be plugged in by inheriting the `AbstractRNG` type; they can then be used to
+obtain multiple streams of random numbers.
+
+The PRNGs (pseudorandom number generators) exported by the `Random` package are:
+* `TaskLocalRNG`: a token that represents use of the currently active Task-local stream, deterministically seeded from the parent task, or by `RandomDevice` (with system randomness) at program start
+* `Xoshiro`: generates a high-quality stream of random numbers with a small state vector and high performance using the Xoshiro256++ algorithm
+* `RandomDevice`: for OS-provided entropy. This may be used for cryptographically secure random numbers (CS(P)RNG).
+* `MersenneTwister`: an alternate high-quality PRNG which was the default in older versions of Julia, and is also quite fast, but requires much more space to store the state vector and generate a random sequence.
+
+Most functions related to random generation accept an optional `AbstractRNG` object as first argument.
+Some also accept dimension specifications `dims...` (which can also be given as a tuple) to generate
+arrays of random values.
+In a multi-threaded program, you should generally use different RNG objects from different threads
+or tasks in order to be thread-safe. However, the default RNG is thread-safe as of Julia 1.3
+(using a per-thread RNG up to version 1.6, and per-task thereafter).
+
+The provided RNGs can generate uniform random numbers of the following types:
+[`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref), [`BigFloat`](@ref), [`Bool`](@ref),
+[`Int8`](@ref), [`UInt8`](@ref), [`Int16`](@ref), [`UInt16`](@ref), [`Int32`](@ref),
+[`UInt32`](@ref), [`Int64`](@ref), [`UInt64`](@ref), [`Int128`](@ref), [`UInt128`](@ref),
+[`BigInt`](@ref) (or complex numbers of those types).
+Random floating point numbers are generated uniformly in ``[0, 1)``. As `BigInt` represents
+unbounded integers, the interval must be specified (e.g. `rand(big.(1:6))`).
+
+Additionally, normal and exponential distributions are implemented for some `AbstractFloat` and
+`Complex` types, see [`randn`](@ref) and [`randexp`](@ref) for details.
+
+!!! warning
+    Because the precise way in which random numbers are generated is considered an implementation detail, bug fixes and speed improvements may change the stream of numbers that are generated after a version change. Relying on a specific seed or generated stream of numbers during unit testing is thus discouraged - consider testing properties of the methods in question instead.
+
+## Random numbers module
+```@docs
+Random.Random
+```
+
+## Random generation functions
+
+```@docs
+Random.rand
+Random.rand!
+Random.bitrand
+Random.randn
+Random.randn!
+Random.randexp
+Random.randexp!
+Random.randstring
+```
+
+## Subsequences, permutations and shuffling
+
+```@docs
+Random.randsubseq
+Random.randsubseq!
+Random.randperm
+Random.randperm!
+Random.randcycle
+Random.randcycle!
+Random.shuffle
+Random.shuffle!
+```
+
+## Generators (creation and seeding)
+
+```@docs
+Random.default_rng
+Random.seed!
+Random.AbstractRNG
+Random.TaskLocalRNG
+Random.Xoshiro
+Random.MersenneTwister
+Random.RandomDevice
+```
+
+## Hooking into the `Random` API
+
+There are two mostly orthogonal ways to extend `Random` functionalities:
+1) generating random values of custom types
+2) creating new generators
+
+The API for 1) is quite functional, but is relatively recent so it may still have to evolve in subsequent releases of the `Random` module.
+For example, it's typically sufficient to implement one `rand` method in order to have all other usual methods work automatically.
+
+The API for 2) is still rudimentary, and may require more work than strictly necessary from the implementor,
+in order to support usual types of generated values.
+
+### Generating random values of custom types
+
+Generating random values for some distributions may involve various trade-offs. *Pre-computed* values, such as an [alias table](https://en.wikipedia.org/wiki/Alias_method) for discrete distributions, or [“squeezing” functions](https://en.wikipedia.org/wiki/Rejection_sampling) for univariate distributions, can speed up sampling considerably. How much information should be pre-computed can depend on the number of values we plan to draw from a distribution. Also, some random number generators can have certain properties that various algorithms may want to exploit.
+
+The `Random` module defines a customizable framework for obtaining random values that can address these issues. Each invocation of `rand` generates a *sampler* which can be customized with the above trade-offs in mind, by adding methods to `Sampler`, which in turn can dispatch on the random number generator, the object that characterizes the distribution, and a suggestion for the number of repetitions. Currently, for the latter, `Val{1}` (for a single sample) and `Val{Inf}` (for an arbitrary number) are used, with `Random.Repetition` an alias for both.
+
+The object returned by `Sampler` is then used to generate the random values. When implementing the random generation interface for a value `X` that can be sampled from, the implementor should define the method
+
+```julia
+rand(rng, sampler)
+```
+for the particular `sampler` returned by `Sampler(rng, X, repetition)`.
+
+Samplers can be arbitrary values that implement `rand(rng, sampler)`, but for most applications the following predefined samplers may be sufficient:
+
+1. `SamplerType{T}()` can be used for implementing samplers that draw from type `T` (e.g. `rand(Int)`). This is the default returned by `Sampler` for *types*.
+
+2. `SamplerTrivial(self)` is a simple wrapper for `self`, which can be accessed with `[]`. This is the recommended sampler when no pre-computed information is needed (e.g. `rand(1:3)`), and is the default returned by `Sampler` for *values*.
+
+3. `SamplerSimple(self, data)` also contains the additional `data` field, which can be used to store arbitrary pre-computed values, which should be computed in a *custom method* of `Sampler`.
+
+We provide examples for each of these. We assume here that the choice of algorithm is independent of the RNG, so we use `AbstractRNG` in our signatures.
+
+```@docs
+Random.Sampler
+Random.SamplerType
+Random.SamplerTrivial
+Random.SamplerSimple
+```
+
+Decoupling pre-computation from actually generating the values is part of the API, and is also available to the user. As an example, assume that `rand(rng, 1:20)` has to be called repeatedly in a loop: the way to take advantage of this decoupling is as follows:
+
+```julia
+rng = MersenneTwister()
+sp = Random.Sampler(rng, 1:20) # or Random.Sampler(MersenneTwister, 1:20)
+for x in X
+    n = rand(rng, sp) # similar to n = rand(rng, 1:20)
+    # use n
+end
+```
+
+This is the mechanism that is also used in the standard library, e.g. by the default implementation of random array generation (like in `rand(1:20, 10)`).
+
+#### Generating values from a type
+
+Given a type `T`, it's currently assumed that if `rand(T)` is defined, an object of type `T` will be produced. `SamplerType` is the *default sampler for types*. In order to define random generation of values of type `T`, the `rand(rng::AbstractRNG, ::Random.SamplerType{T})` method should be defined, and should return values what `rand(rng, T)` is expected to return.
+
+Let's take the following example: we implement a `Die` type, with a variable number `n` of sides, numbered from `1` to `n`. We want `rand(Die)` to produce a `Die` with a random number of up to 20 sides (and at least 4):
+
+```jldoctest Die
+struct Die
+    nsides::Int # number of sides
+end
+
+Random.rand(rng::AbstractRNG, ::Random.SamplerType{Die}) = Die(rand(rng, 4:20))
+
+# output
+
+```
+
+Scalar and array methods for `Die` now work as expected:
+
+```jldoctest Die; setup = :(Random.seed!(1))
+julia> rand(Die)
+Die(5)
+
+julia> rand(MersenneTwister(0), Die)
+Die(11)
+
+julia> rand(Die, 3)
+3-element Vector{Die}:
+ Die(9)
+ Die(15)
+ Die(14)
+
+julia> a = Vector{Die}(undef, 3); rand!(a)
+3-element Vector{Die}:
+ Die(19)
+ Die(7)
+ Die(17)
+```
+
+#### A simple sampler without pre-computed data
+
+Here we define a sampler for a collection. If no pre-computed data is required, it can be implemented with a `SamplerTrivial` sampler, which is in fact the *default fallback for values*.
+
+In order to define random generation out of objects of type `S`, the following method should be defined: `rand(rng::AbstractRNG, sp::Random.SamplerTrivial{S})`. Here, `sp` simply wraps an object of type `S`, which can be accessed via `sp[]`. Continuing the `Die` example, we want now to define `rand(d::Die)` to produce an `Int` corresponding to one of `d`'s sides:
+
+```jldoctest Die; setup = :(Random.seed!(1))
+julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides);
+
+julia> rand(Die(4))
+1
+
+julia> rand(Die(4), 3)
+3-element Vector{Any}:
+ 2
+ 3
+ 3
+```
+
+Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`.
+
+#### Generating values for an `AbstractFloat` type
+
+`AbstractFloat` types are special-cased, because by default random values are not produced in the whole type domain, but rather in `[0,1)`. The following method should be implemented for `T <: AbstractFloat`: `Random.rand(::AbstractRNG, ::Random.SamplerTrivial{Random.CloseOpen01{T}})`
+
+#### An optimized sampler with pre-computed data
+
+Consider a discrete distribution, where numbers `1:n` are drawn with given probabilities that sum to one. When many values are needed from this distribution, the fastest method is using an [alias table](https://en.wikipedia.org/wiki/Alias_method). We don't provide the algorithm for building such a table here, but suppose it is available in `make_alias_table(probabilities)` instead, and `draw_number(rng, alias_table)` can be used to draw a random number from it.
+
+Suppose that the distribution is described by
+```julia
+struct DiscreteDistribution{V <: AbstractVector}
+    probabilities::V
+end
+```
+and that we *always* want to build an alias table, regardless of the number of values needed (we learn how to customize this below). The methods
+```julia
+Random.eltype(::Type{<:DiscreteDistribution}) = Int
+
+function Random.Sampler(::Type{<:AbstractRNG}, distribution::DiscreteDistribution, ::Repetition)
+    SamplerSimple(disribution, make_alias_table(distribution.probabilities))
+end
+```
+should be defined to return a sampler with pre-computed data, then
+```julia
+function rand(rng::AbstractRNG, sp::SamplerSimple{<:DiscreteDistribution})
+    draw_number(rng, sp.data)
+end
+```
+will be used to draw the values.
+
+#### Custom sampler types
+
+The `SamplerSimple` type is sufficient for most use cases with precomputed data. However, in order to demonstrate how to use custom sampler types, here we implement something similar to `SamplerSimple`.
+
+Going back to our `Die` example: `rand(::Die)` uses random generation from a range, so there is an opportunity for this optimization. We call our custom sampler `SamplerDie`.
+
+```julia
+import Random: Sampler, rand
+
+struct SamplerDie <: Sampler{Int} # generates values of type Int
+    die::Die
+    sp::Sampler{Int} # this is an abstract type, so this could be improved
+end
+
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, r::Random.Repetition) =
+    SamplerDie(die, Sampler(RNG, 1:die.nsides, r))
+# the `r` parameter will be explained later on
+
+rand(rng::AbstractRNG, sp::SamplerDie) = rand(rng, sp.sp)
+```
+
+It's now possible to get a sampler with `sp = Sampler(rng, die)`, and use `sp` instead of `die` in any `rand` call involving `rng`. In the simplistic example above, `die` doesn't need to be stored in `SamplerDie` but this is often the case in practice.
+
+Of course, this pattern is so frequent that the helper type used above, namely `Random.SamplerSimple`, is available,
+saving us the definition of `SamplerDie`: we could have implemented our decoupling with:
+
+```julia
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, r::Random.Repetition) =
+    SamplerSimple(die, Sampler(RNG, 1:die.nsides, r))
+
+rand(rng::AbstractRNG, sp::SamplerSimple{Die}) = rand(rng, sp.data)
+```
+
+Here, `sp.data` refers to the second parameter in the call to the `SamplerSimple` constructor
+(in this case equal to `Sampler(rng, 1:die.nsides, r)`), while the `Die` object can be accessed
+via `sp[]`.
+
+Like `SamplerDie`, any custom sampler must be a subtype of `Sampler{T}` where `T` is the type
+of the generated values. Note that `SamplerSimple(x, data) isa Sampler{eltype(x)}`,
+so this constrains what the first argument to `SamplerSimple` can be
+(it's recommended to use `SamplerSimple` like in the `Die` example, where
+`x` is simply forwarded while defining a `Sampler` method).
+Similarly, `SamplerTrivial(x) isa Sampler{eltype(x)}`.
+
+Another helper type is currently available for other cases, `Random.SamplerTag`, but is
+considered as internal API, and can break at any time without proper deprecations.
+
+
+#### Using distinct algorithms for scalar or array generation
+
+In some cases, whether one wants to generate only a handful of values or a large number of values
+will have an impact on the choice of algorithm. This is handled with the third parameter of the
+`Sampler` constructor. Let's assume we defined two helper types for `Die`, say `SamplerDie1`
+which should be used to generate only few random values, and `SamplerDieMany` for many values.
+We can use those types as follows:
+
+```julia
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, ::Val{1}) = SamplerDie1(...)
+Sampler(RNG::Type{<:AbstractRNG}, die::Die, ::Val{Inf}) = SamplerDieMany(...)
+```
+
+Of course, `rand` must also be defined on those types (i.e. `rand(::AbstractRNG, ::SamplerDie1)` and `rand(::AbstractRNG, ::SamplerDieMany)`). Note that, as usual, `SamplerTrivial` and `SamplerSimple` can be used if custom types are not necessary.
+
+Note: `Sampler(rng, x)` is simply a shorthand for `Sampler(rng, x, Val(Inf))`, and
+`Random.Repetition` is an alias for `Union{Val{1}, Val{Inf}}`.
+
+
+### Creating new generators
+
+The API is not clearly defined yet, but as a rule of thumb:
+1) any `rand` method producing "basic" types (`isbitstype` integer and floating types in `Base`)
+   should be defined for this specific RNG, if they are needed;
+2) other documented `rand` methods accepting an `AbstractRNG` should work out of the box,
+   (provided the methods from 1) what are relied on are implemented),
+   but can of course be specialized for this RNG if there is room for optimization;
+3) `copy` for pseudo-RNGs should return an independent copy that generates the exact same random sequence as the
+   original from that point when called in the same way. When this is not feasible (e.g. hardware-based RNGs),
+   `copy` must not be implemented.
+
+Concerning 1), a `rand` method may happen to work automatically, but it's not officially
+supported and may break without warnings in a subsequent release.
+
+To define a new `rand` method for an hypothetical `MyRNG` generator, and a value specification `s`
+(e.g. `s == Int`, or `s == 1:10`) of type `S==typeof(s)` or `S==Type{s}` if `s` is a type,
+the same two methods as we saw before must be defined:
+
+1) `Sampler(::Type{MyRNG}, ::S, ::Repetition)`, which returns an object of type say `SamplerS`
+2) `rand(rng::MyRNG, sp::SamplerS)`
+
+It can happen that `Sampler(rng::AbstractRNG, ::S, ::Repetition)` is
+already defined in the `Random` module. It would then be possible to
+skip step 1) in practice (if one wants to specialize generation for
+this particular RNG type), but the corresponding `SamplerS` type is
+considered as internal detail, and may be changed without warning.
+
+
+#### Specializing array generation
+
+In some cases, for a given RNG type, generating an array of random
+values can be more efficient with a specialized method than by merely
+using the decoupling technique explained before. This is for example
+the case for `MersenneTwister`, which natively writes random values in
+an array.
+
+To implement this specialization for `MyRNG`
+and for a specification `s`, producing elements of type `S`,
+the following method can be defined:
+`rand!(rng::MyRNG, a::AbstractArray{S}, ::SamplerS)`,
+where `SamplerS` is the type of the sampler returned by `Sampler(MyRNG, s, Val(Inf))`.
+Instead of `AbstractArray`, it's possible to implement the functionality only for a subtype, e.g. `Array{S}`.
+The non-mutating array method of `rand` will automatically call this specialization internally.
+
+```@meta
+DocTestSetup = nothing
+```
+
+# Reproducibility
+
+By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom
+number sequence when running your program multiple times. However, a minor release of Julia (e.g.
+1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in
+particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like
+[`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may
+change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change
+prohibits many algorithmic improvements.
+
+If you need to guarantee exact reproducibility of random data, it is advisable to simply *save the
+data* (e.g. as a supplementary attachment in a scientific publication). (You can also, of course,
+specify a particular Julia version and package manifest, especially if you require bit
+reproducibility.)
+
+Software tests that rely on *specific* "random" data should also generally either save the data,
+embed it into the test code, or use third-party packages like
+[StableRNGs.jl](https://github.com/JuliaRandom/StableRNGs.jl). On the other hand, tests that should
+pass for *most* random data (e.g. testing `A \ (A*x) ≈ x` for a random matrix `A = randn(n,n)`) can
+use an RNG with a fixed seed to ensure that simply running the test many times does not encounter a
+failure due to very improbable data (e.g. an extremely ill-conditioned matrix).
+
+The statistical *distribution* from which random samples are drawn *is* guaranteed to be the same
+across any minor Julia releases.
diff --git a/stdlib/StandardSortingAlgorithms/src/AdaptiveSort.jl b/stdlib/StandardSortingAlgorithms/src/AdaptiveSort.jl
new file mode 100644
index 0000000000000..acda37f97f530
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/AdaptiveSort.jl
@@ -0,0 +1,127 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# For AbstractVector{Bool}, counting sort is always best.
+# This is an implementation of counting sort specialized for Bools.
+function sort!(v::AbstractVector{B}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
+        t::Union{AbstractVector{B}, Nothing}=nothing) where {B <: Bool}
+    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
+    count = 0
+    @inbounds for i in lo:hi
+        if v[i] == first
+            count += 1
+        end
+    end
+    @inbounds v[lo:lo+count-1] .= first
+    @inbounds v[lo+count:hi] .= !first
+    v
+end
+function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering,
+            t::Union{AbstractVector{T}, Nothing}=nothing) where T
+    # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range!
+    # so we skip straight to the fallback algorithm which is comparison based.
+    U = UIntMappable(T, o)
+    U === nothing && return sort!(v, lo, hi, a.fallback, o)
+
+    # to avoid introducing excessive detection costs for the trivial sorting problem
+    # and to avoid overflow, we check for small inputs before any other runtime checks
+    hi <= lo && return v
+    lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow
+    # only count sort on a short range can compete with insertion sort when lenm1 < 40
+    # and the optimization is not worth the detection cost, so we use insertion sort.
+    lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+
+    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
+    # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted
+    # input and this guarantees presorted input will always be efficiently handled
+    issorted(view(v, lo:hi), o) && return v
+
+    # For large arrays, a reverse-sorted check is essentially free (overhead < 1%)
+    if lenm1 >= 500 && issorted(view(v, lo:hi), ReverseOrdering(o))
+        reverse!(view(v, lo:hi))
+        return v
+    end
+
+    # UInt128 does not support fast bit shifting so we never
+    # dispatch to radix sort but we may still perform count sort
+    if sizeof(U) > 8
+        if T <: Integer && o isa DirectOrdering
+            v_min, v_max = _extrema(v, lo, hi, Forward)
+            v_range = maybe_unsigned(v_max-v_min)
+            v_range == 0 && return v # all same
+
+            # we know lenm1 ≥ 40, so this will never underflow.
+            # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback
+            if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small
+                return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi)
+            end
+        end
+        return sort!(v, lo, hi, a.fallback, o)
+    end
+
+    v_min, v_max = _extrema(v, lo, hi, o)
+    lt(o, v_min, v_max) || return v # all same
+    if T <: Integer && o isa DirectOrdering
+        R = o === Reverse
+        v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min)
+        if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small
+            return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi)
+        end
+    end
+
+    u_min, u_max = uint_map(v_min, o), uint_map(v_max, o)
+    u_range = maybe_unsigned(u_max-u_min)
+    if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small
+        u = uint_map!(v, lo, hi, o)
+        sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi)
+        return uint_unmap!(v, u, lo, hi, o)
+    end
+
+    # if u's range is small, then once we subtract out v_min, we'll get a vector like
+    # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009]
+    # where we only need to radix over the last few bits (5, in the example).
+    bits = unsigned(8sizeof(u_range) - leading_zeros(u_range))
+
+    # radix sort runs in O(bits * lenm1), insertion sort runs in O(lenm1^2). Radix sort
+    # has a constant factor that is three times higher, so radix runtime is 3bits * lenm1
+    # and insertion runtime is lenm1^2. Empirically, insertion is faster than radix iff
+    # lenm1 < 3bits.
+    # Insertion < Radix
+    #   lenm1^2 < 3 * bits * lenm1
+    #     lenm1 < 3bits
+    if lenm1 < 3bits
+        # at lenm1 = 64*3-1, QuickSort is about 20% faster than InsertionSort.
+        alg = a.fallback === QuickSort && lenm1 > 120 ? QuickSort : SMALL_ALGORITHM
+        return sort!(v, lo, hi, alg, o)
+    end
+
+    # At this point, we are committed to radix sort.
+    u = uint_map!(v, lo, hi, o)
+
+    # we subtract u_min to avoid radixing over unnecessary bits. For example,
+    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
+    # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with
+    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
+    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
+    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
+    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
+
+    # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100
+    @inbounds for i in lo:hi
+        u[i] -= u_min
+    end
+
+    u2 = radix_sort!(u, lo, hi, bits, reinterpret(U, workspace(v, t, hi)))
+    uint_unmap!(v, u2, lo, hi, o, u_min)
+end
+
+maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt
+maybe_unsigned(x::BitSigned) = unsigned(x)
+function _extrema(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+    mn = mx = v[lo]
+    @inbounds for i in (lo+1):hi
+        vi = v[i]
+        lt(o, vi, mn) && (mn = vi)
+        lt(o, mx, vi) && (mx = vi)
+    end
+    mn, mx
+end
diff --git a/stdlib/StandardSortingAlgorithms/src/Float.jl b/stdlib/StandardSortingAlgorithms/src/Float.jl
new file mode 100644
index 0000000000000..9d488a2a5df8e
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/Float.jl
@@ -0,0 +1,157 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#  Floating point optimizations
+module Float
+using ..Sort
+using ...Order
+using ..Base: @inbounds, AbstractVector, Vector, last, firstindex, lastindex, Missing, Type, reinterpret
+
+import Core.Intrinsics: slt_int
+import ..StandardSortingAlgorithms: sort!, UIntMappable, uint_map, uint_unmap
+import ...Order: lt, DirectOrdering
+
+const Floats = Union{Float32,Float64}
+const FPSortable = Union{ # Mixed Float32 and Float64 are not allowed.
+    AbstractVector{Union{Float32, Missing}},
+    AbstractVector{Union{Float64, Missing}},
+    AbstractVector{Float32},
+    AbstractVector{Float64},
+    AbstractVector{Missing}}
+
+struct Left <: Ordering end
+struct Right <: Ordering end
+
+left(::DirectOrdering) = Left()
+right(::DirectOrdering) = Right()
+
+left(o::Perm) = Perm(left(o.order), o.data)
+right(o::Perm) = Perm(right(o.order), o.data)
+
+lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x)
+lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y)
+
+uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x)
+uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u)
+uint_map(x::Float32, ::Right) = reinterpret(UInt32, x)
+uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u)
+UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32
+
+uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x)
+uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u)
+uint_map(x::Float64, ::Right) = reinterpret(UInt64, x)
+uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u)
+UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64
+
+isnan(o::DirectOrdering, x::Floats) = (x!=x)
+isnan(o::DirectOrdering, x::Missing) = false
+isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
+
+ismissing(o::DirectOrdering, x::Floats) = false
+ismissing(o::DirectOrdering, x::Missing) = true
+ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i])
+
+allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing
+allowsmissing(::AbstractVector{<:Integer},
+              ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} =
+    T >: Missing
+
+function specials2left!(testf::Function, v::AbstractVector, o::Ordering,
+                        lo::Integer=firstindex(v), hi::Integer=lastindex(v))
+    i = lo
+    @inbounds while i <= hi && testf(o,v[i])
+        i += 1
+    end
+    j = i + 1
+    @inbounds while j <= hi
+        if testf(o,v[j])
+            v[i], v[j] = v[j], v[i]
+            i += 1
+        end
+        j += 1
+    end
+    return i, hi
+end
+function specials2right!(testf::Function, v::AbstractVector, o::Ordering,
+                         lo::Integer=firstindex(v), hi::Integer=lastindex(v))
+    i = hi
+    @inbounds while lo <= i && testf(o,v[i])
+        i -= 1
+    end
+    j = i - 1
+    @inbounds while lo <= j
+        if testf(o,v[j])
+            v[i], v[j] = v[j], v[i]
+            i -= 1
+        end
+        j -= 1
+    end
+    return lo, i
+end
+
+function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering)
+    lo, hi = firstindex(v), lastindex(v)
+    if allowsmissing(v, o)
+        i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
+        sort!(v, lo, i-1, a, o)
+        return i, hi
+    else
+        return specials2left!(isnan, v, o, lo, hi)
+    end
+end
+function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering)
+    lo, hi = firstindex(v), lastindex(v)
+    if allowsmissing(v, o)
+        _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
+        sort!(v, i+1, hi, a, o)
+        return lo, i
+    else
+        return specials2right!(isnan, v, o, lo, hi)
+    end
+end
+
+specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) =
+    specials2right!(v, a, o)
+specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) =
+    specials2left!(v, a, o)
+specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) =
+    specials2right!(v, a, o)
+specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
+    specials2left!(v, a, o)
+
+issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x))
+issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x))
+issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
+
+function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering,
+        t::Union{AbstractVector, Nothing}=nothing)
+    # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn).
+    # The overhead is O(n). For n < 10, it's not worth it.
+    length(v) < 10 && return sort!(v, firstindex(v), lastindex(v), SMALL_ALGORITHM, o, t)
+
+    i, j = lo, hi = specials2end!(v,a,o)
+    @inbounds while true
+        while i <= j &&  issignleft(o,v[i]); i += 1; end
+        while i <= j && !issignleft(o,v[j]); j -= 1; end
+        i <= j || break
+        v[i], v[j] = v[j], v[i]
+        i += 1; j -= 1
+    end
+    sort!(v, lo, j,  a, left(o), t)
+    sort!(v, i,  hi, a, right(o), t)
+    return v
+end
+
+
+fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
+    sort!(v, firstindex(v), lastindex(v), a, o)
+
+function sort!(v::FPSortable, a::Algorithm, o::DirectOrdering,
+        t::Union{FPSortable, Nothing}=nothing)
+    fpsort!(v, a, o, t)
+end
+function sort!(v::AbstractVector{<:Union{Signed, Unsigned}}, a::Algorithm,
+        o::Perm{<:DirectOrdering,<:FPSortable}, t::Union{AbstractVector, Nothing}=nothing)
+    fpsort!(v, a, o, t)
+end
+
+end # module Float
diff --git a/stdlib/StandardSortingAlgorithms/src/MergeSort.jl b/stdlib/StandardSortingAlgorithms/src/MergeSort.jl
new file mode 100644
index 0000000000000..e9ff1e55e2b65
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/MergeSort.jl
@@ -0,0 +1,38 @@
+function sort!(v::AbstractVector{T}, lo::Integer, hi::Integer, a::MergeSortAlg, o::Ordering,
+    t0::Union{AbstractVector{T}, Nothing}=nothing) where T
+@inbounds if lo < hi
+    hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+
+    m = midpoint(lo, hi)
+    t = workspace(v, t0, m-lo+1)
+
+    sort!(v, lo,  m,  a, o, t)
+    sort!(v, m+1, hi, a, o, t)
+
+    i, j = 1, lo
+    while j <= m
+        t[i] = v[j]
+        i += 1
+        j += 1
+    end
+
+    i, k = 1, lo
+    while k < j <= hi
+        if lt(o, v[j], t[i])
+            v[k] = v[j]
+            j += 1
+        else
+            v[k] = t[i]
+            i += 1
+        end
+        k += 1
+    end
+    while k < j
+        v[k] = t[i]
+        k += 1
+        i += 1
+    end
+end
+
+return v
+end
diff --git a/stdlib/StandardSortingAlgorithms/src/QuickSort.jl b/stdlib/StandardSortingAlgorithms/src/QuickSort.jl
new file mode 100644
index 0000000000000..62b2bbb2106d2
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/QuickSort.jl
@@ -0,0 +1,96 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::QuickSortAlg, o::Ordering)
+    @inbounds while lo < hi
+        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+        j = partition!(v, lo, hi, o)
+        if j-lo < hi-j
+            # recurse on the smaller chunk
+            # this is necessary to preserve O(log(n))
+            # stack space in the worst case (rather than O(n))
+            lo < (j-1) && sort!(v, lo, j-1, a, o)
+            lo = j+1
+        else
+            j+1 < hi && sort!(v, j+1, hi, a, o)
+            hi = j-1
+        end
+    end
+    return v
+end
+
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
+               o::Ordering)
+    @inbounds while lo < hi
+        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+        j = partition!(v, lo, hi, o)
+
+        if j <= first(a.k)
+            lo = j+1
+        elseif j >= last(a.k)
+            hi = j-1
+        else
+            # recurse on the smaller chunk
+            # this is necessary to preserve O(log(n))
+            # stack space in the worst case (rather than O(n))
+            if j-lo < hi-j
+                lo < (j-1) && sort!(v, lo, j-1, a, o)
+                lo = j+1
+            else
+                hi > (j+1) && sort!(v, j+1, hi, a, o)
+                hi = j-1
+            end
+        end
+    end
+    return v
+end
+
+# partition!
+#
+# select a pivot, and partition v according to the pivot
+function partition!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+    pivot = selectpivot!(v, lo, hi, o)
+    # pivot == v[lo], v[hi] > pivot
+    i, j = lo, hi
+    @inbounds while true
+        i += 1; j -= 1
+        while lt(o, v[i], pivot); i += 1; end;
+        while lt(o, pivot, v[j]); j -= 1; end;
+        i >= j && break
+        v[i], v[j] = v[j], v[i]
+    end
+    v[j], v[lo] = pivot, v[j]
+
+    # v[j] == pivot
+    # v[k] >= pivot for k > j
+    # v[i] <= pivot for i < j
+    return j
+end
+
+# selectpivot!
+#
+# Given 3 locations in an array (lo, mi, and hi), sort v[lo], v[mi], v[hi]) and
+# choose the middle value as a pivot
+#
+# Upon return, the pivot is in v[lo], and v[hi] is guaranteed to be
+# greater than the pivot
+@inline function selectpivot!(v::AbstractVector, lo::Integer, hi::Integer, o::Ordering)
+    @inbounds begin
+        mi = midpoint(lo, hi)
+
+        # sort v[mi] <= v[lo] <= v[hi] such that the pivot is immediately in place
+        if lt(o, v[lo], v[mi])
+            v[mi], v[lo] = v[lo], v[mi]
+        end
+
+        if lt(o, v[hi], v[lo])
+            if lt(o, v[hi], v[mi])
+                v[hi], v[lo], v[mi] = v[lo], v[mi], v[hi]
+            else
+                v[hi], v[lo] = v[lo], v[hi]
+            end
+        end
+
+        # return the pivot
+        return v[lo]
+    end
+end
diff --git a/stdlib/StandardSortingAlgorithms/src/RadixSort.jl b/stdlib/StandardSortingAlgorithms/src/RadixSort.jl
new file mode 100644
index 0000000000000..93ba2c976f658
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/RadixSort.jl
@@ -0,0 +1,65 @@
+# This is a stable least significant bit first radix sort.
+#
+# That is, it first sorts the entire vector by the last chunk_size bits, then by the second
+# to last chunk_size bits, and so on. Stability means that it will not reorder two elements
+# that compare equal. This is essential so that the order introduced by earlier,
+# less significant passes is preserved by later passes.
+#
+# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it
+#  * counts the number of entries that fall into each bucket
+#  * uses those counts to compute the indices to move elements of those buckets into
+#  * moves elements into the computed indices in the swap array
+#  * switches the swap and working array
+#
+# In the case of an odd number of passes, the returned vector will === the input vector t,
+# not v. This is one of the many reasons radix_sort! is not exported.
+function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned,
+                     t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
+    # bits is unsigned for performance reasons.
+    mask = UInt(1) << chunk_size - 1
+    counts = Vector{UInt}(undef, mask+2)
+
+    @inbounds for shift in 0:chunk_size:bits-1
+
+        # counts[2:mask+2] will store the number of elements that fall into each bucket.
+        # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff.
+        counts .= 0
+        for k in lo:hi
+            x = v[k]                  # lookup the element
+            i = (x >> shift)&mask + 2 # compute its bucket's index for this pass
+            counts[i] += 1            # increment that bucket's count
+        end
+
+        counts[1] = lo                # set target index for the first bucket
+        cumsum!(counts, counts)       # set target indices for subsequent buckets
+        # counts[1:mask+1] now stores indices where the first member of each bucket
+        # belongs, not the number of elements in each bucket. We will put the first element
+        # of bucket 0x00 in t[counts[1]], the next element of bucket 0x00 in t[counts[1]+1],
+        # and the last element of bucket 0x00 in t[counts[2]-1].
+
+        for k in lo:hi
+            x = v[k]                  # lookup the element
+            i = (x >> shift)&mask + 1 # compute its bucket's index for this pass
+            j = counts[i]             # lookup the target index
+            t[j] = x                  # put the element where it belongs
+            counts[i] = j + 1         # increment the target index for the next
+        end                           #  ↳ element in this bucket
+
+        v, t = t, v # swap the now sorted destination vector t back into primary vector v
+
+    end
+
+    v
+end
+function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
+    # chunk_size is the number of bits to radix over at once.
+    # We need to allocate an array of size 2^chunk size, and on the other hand the higher
+    # the chunk size the fewer passes we need. Theoretically, chunk size should be based on
+    # the Lambert W function applied to length. Empirically, we use this heuristic:
+    guess = min(10, log(maybe_unsigned(hi-lo))*3/4+3)
+    # TODO the maximum chunk size should be based on architecture cache size.
+
+    # We need iterations * chunk size ≥ bits, and these cld's
+    # make an effort to get iterations * chunk size ≈ bits
+    UInt8(cld(bits, cld(bits, guess)))
+end
diff --git a/stdlib/StandardSortingAlgorithms/src/StandardSortingAlgorithms.jl b/stdlib/StandardSortingAlgorithms/src/StandardSortingAlgorithms.jl
new file mode 100644
index 0000000000000..7b45676e1f928
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/StandardSortingAlgorithms.jl
@@ -0,0 +1,24 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module StandardSortingAlgorithms
+
+using Base.Sort, Base.Order
+using Base.Sort: AdaptiveSort, BitSigned, QuickSortAlg, MergeSortAlg, midpoint, workspace
+import Base.Sort.sort!, Base.Sort.defalg
+
+defalg(::AbstractArray) = DEFAULT_STABLE
+defalg(::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
+defalg(::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
+defalg(::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
+
+defalg(::typeof(DEFAULT_UNSTABLE)) = DEFAULT_UNSTABLE
+
+include("AdaptiveSort.jl")
+include("QuickSort.jl")
+include("MergeSort.jl")
+include("RadixSort.jl")
+include("uint_mappings.jl")
+include("sort_int_range.jl")
+include("Float.jl")
+
+end
diff --git a/stdlib/StandardSortingAlgorithms/src/sort_int_range.jl b/stdlib/StandardSortingAlgorithms/src/sort_int_range.jl
new file mode 100644
index 0000000000000..0b6b982072fb1
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/sort_int_range.jl
@@ -0,0 +1,22 @@
+# sort! for vectors of few unique integers
+function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse,
+                         lo=firstindex(x), hi=lastindex(x))
+    offs = 1 - minval
+
+    counts = fill(0, rangelen)
+    @inbounds for i = lo:hi
+        counts[x[i] + offs] += 1
+    end
+
+    idx = lo
+    @inbounds for i = maybereverse(1:rangelen)
+        lastidx = idx + counts[i] - 1
+        val = i-offs
+        for j = idx:lastidx
+            x[j] = val
+        end
+        idx = lastidx + 1
+    end
+
+    return x
+end
diff --git a/stdlib/StandardSortingAlgorithms/src/uint_mappings.jl b/stdlib/StandardSortingAlgorithms/src/uint_mappings.jl
new file mode 100644
index 0000000000000..d6cdf65fb45e1
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/src/uint_mappings.jl
@@ -0,0 +1,88 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# uint mapping to allow radix sorting primitives other than UInts
+
+"""
+    UIntMappable(T::Type, order::Ordering)
+
+Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and
+[`uint_unmap`](@ref) are implemented.
+
+If either is not implemented, return `nothing`.
+"""
+UIntMappable(T::Type, order::Ordering) = nothing
+
+"""
+    uint_map(x, order::Ordering)::Unsigned
+
+Map `x` to an un unsigned integer, maintaining sort order.
+
+The map should be reversible with [`uint_unmap`](@ref), so `isless(order, a, b)` must be
+a linear ordering for `a, b <: typeof(x)`. Satisfies
+`isless(order, a, b) === (uint_map(a, order) < uint_map(b, order))`
+and `x === uint_unmap(typeof(x), uint_map(x, order), order)`
+
+See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref)
+"""
+function uint_map end
+
+"""
+    uint_unmap(T::Type, u::Unsigned, order::Ordering)
+
+Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies
+`x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`.
+
+See also: [`uint_map`](@ref) [`UIntMappable`](@ref)
+"""
+function uint_unmap end
+
+
+## Primitive Types
+
+# Integers
+uint_map(x::Unsigned, ::ForwardOrdering) = x
+uint_unmap(::Type{T}, u::T, ::ForwardOrdering) where T <: Unsigned = u
+
+uint_map(x::Signed, ::ForwardOrdering) =
+    unsigned(xor(x, typemin(x)))
+uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed =
+    xor(signed(u), typemin(T))
+
+# unsigned(Int) is not available during bootstrapping.
+for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)]
+    @eval UIntMappable(::Type{<:Union{$U, $S}}, ::ForwardOrdering) = $U
+end
+
+# Floats are not UIntMappable under regular orderings because they fail on NaN edge cases.
+# uint mappings for floats are defined in Float, where the Left and Right orderings
+# guarantee that there are no NaN values
+
+# Chars
+uint_map(x::Char, ::ForwardOrdering) = reinterpret(UInt32, x)
+uint_unmap(::Type{Char}, u::UInt32, ::ForwardOrdering) = reinterpret(Char, u)
+UIntMappable(::Type{Char}, ::ForwardOrdering) = UInt32
+
+## Reverse orderings
+uint_map(x, rev::ReverseOrdering) = ~uint_map(x, rev.fwd)
+uint_unmap(T::Type, u::Unsigned, rev::ReverseOrdering) = uint_unmap(T, ~u, rev.fwd)
+UIntMappable(T::Type, order::ReverseOrdering) = UIntMappable(T, order.fwd)
+
+
+## Vectors
+
+# Convert v to unsigned integers in place, maintaining sort order.
+function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering)
+    u = reinterpret(UIntMappable(eltype(v), order), v)
+    @inbounds for i in lo:hi
+        u[i] = uint_map(v[i], order)
+    end
+    u
+end
+
+function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer,
+                     order::Ordering, offset::U=zero(U)) where U <: Unsigned
+    @inbounds for i in lo:hi
+        v[i] = uint_unmap(eltype(v), u[i]+offset, order)
+    end
+    v
+end
diff --git a/stdlib/StandardSortingAlgorithms/test/runtests.jl b/stdlib/StandardSortingAlgorithms/test/runtests.jl
new file mode 100644
index 0000000000000..a396cfa9e727d
--- /dev/null
+++ b/stdlib/StandardSortingAlgorithms/test/runtests.jl
@@ -0,0 +1,997 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, SparseArrays
+using Test: guardseed
+using Statistics: mean
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays
+
+using Random
+using Random.DSFMT
+
+using Random: Sampler, SamplerRangeFast, SamplerRangeInt, SamplerRangeNDL, MT_CACHE_F, MT_CACHE_I
+
+import Future # randjump
+
+@testset "Issue #6573" begin
+    Random.seed!(0)
+    rand()
+    x = rand(384)
+    @test findall(x .== rand()) == []
+end
+
+@test rand() != rand()
+@test 0.0 <= rand() < 1.0
+@test rand(UInt32) >= 0
+@test -10 <= rand(-10:-5) <= -5
+@test -10 <= rand(-10:5) <= 5
+@test minimum([rand(Int32(1):Int32(7^7)) for i = 1:100000]) > 0
+@test typeof(rand(false:true)) === Bool
+@test typeof(rand(Char)) === Char
+@test length(randn(4, 5)) == 20
+@test length(randn(ComplexF64, 4, 5)) == 20
+@test length(bitrand(4, 5)) == 20
+
+@test rand(MersenneTwister(0)) == 0.8236475079774124
+@test rand(MersenneTwister(42)) == 0.5331830160438613
+# Try a seed larger than 2^32
+@test rand(MersenneTwister(5294967296)) == 0.3498809918210497
+
+# Test array filling, Issues #7643, #8360
+@test rand(MersenneTwister(0), 1) == [0.8236475079774124]
+let A = zeros(2, 2)
+    rand!(MersenneTwister(0), A)
+    @test A == [0.8236475079774124  0.16456579813368521;
+                0.9103565379264364  0.17732884646626457]
+end
+let A = zeros(2, 2)
+    @test_throws MethodError rand!(MersenneTwister(0), A, 5)
+    @test rand(MersenneTwister(0), Int64, 1) == [-3433174948434291912]
+end
+let A = zeros(Int64, 2, 2)
+    rand!(MersenneTwister(0), A)
+    @test A == [858542123778948672  5715075217119798169;
+                8690327730555225005 8435109092665372532]
+end
+
+# rand from AbstractArray
+let mt = MersenneTwister()
+    @test rand(mt, 0:3:1000) in 0:3:1000
+    @test issubset(rand!(mt, Vector{Int}(undef, 100), 0:3:1000), 0:3:1000)
+    coll = Any[2, UInt128(128), big(619), "string"]
+    @test rand(mt, coll) in coll
+    @test issubset(rand(mt, coll, 2, 3), coll)
+
+    # check API with default RNG:
+    rand(0:3:1000)
+    rand!(Vector{Int}(undef, 100), 0:3:1000)
+    rand(coll)
+    rand(coll, 2, 3)
+end
+
+# randn
+@test randn(MersenneTwister(42)) == -0.5560268761463861
+let A = zeros(2, 2)
+    randn!(MersenneTwister(42), A)
+    @test A == [-0.5560268761463861  0.027155338009193845;
+                -0.444383357109696  -0.29948409035891055]
+end
+
+let B = zeros(ComplexF64, 2)
+    randn!(MersenneTwister(42), B)
+    @test B == [ComplexF64(-0.5560268761463861,-0.444383357109696),
+                ComplexF64(0.027155338009193845,-0.29948409035891055)] * 0.7071067811865475244008
+end
+
+for T in (Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, UInt128, BigInt,
+          Float16, Float32, Float64, Rational{Int})
+    r = rand(convert(T, 97):convert(T, 122))
+    @test typeof(r) == T
+    @test 97 <= r <= 122
+    r = rand(convert(T, 97):convert(T,2):convert(T, 122),2)[1]
+    @test typeof(r) == T
+    @test 97 <= r <= 122
+    @test mod(r,2)==1
+
+    if T<:Integer && !(T===BigInt)
+        x = rand(typemin(T):typemax(T))
+        @test isa(x,T)
+        @test typemin(T) <= x <= typemax(T)
+    end
+end
+
+@test !any([(Random.maxmultiple(i)+i) > 0xFF for i in 0x00:0xFF])
+@test all([(Random.maxmultiple(i)+1) % i for i in 0x01:0xFF] .== 0)
+@test all([(Random.maxmultiple(i)+1+i) > 0xFF for i in 0x00:0xFF])
+@test length(0x00:0xFF)== Random.maxmultiple(0x0)+1
+
+
+if sizeof(Int32) < sizeof(Int)
+    local r = rand(Int32(-1):typemax(Int32))
+    @test typeof(r) == Int32
+    @test -1 <= r <= typemax(Int32)
+    for U = (Int64, UInt64)
+        @test all(div(one(UInt128) << 52, k)*k - 1 == SamplerRangeInt(map(U, 1:k)).u
+                  for k in 13 .+ Int64(2).^(32:51))
+        @test all(div(one(UInt128) << 64, k)*k - 1 == SamplerRangeInt(map(U, 1:k)).u
+                  for k in 13 .+ Int64(2).^(52:62))
+    end
+end
+
+# BigInt specific
+for T in [UInt32, UInt64, UInt128, Int128]
+    local r, s
+    s = big(typemax(T)-1000) : big(typemax(T)) + 10000
+    # s is a 11001-length array
+    @test rand(s) isa BigInt
+    @test sum(rand(s, 1000) .== rand(s, 1000)) <= 20
+    @test big(typemax(T)-1000) <= rand(s) <= big(typemax(T)) + 10000
+    r = rand(s, 1, 2)
+    @test size(r) == (1, 2)
+    @test typeof(r) == Matrix{BigInt}
+    guardseed() do
+        Random.seed!(0)
+        r = rand(s)
+        Random.seed!(0)
+        @test rand(s) == r
+    end
+end
+
+# Test ziggurat tables
+ziggurat_table_size = 256
+nmantissa           = Int64(2)^51 # one bit for the sign
+ziggurat_nor_r      = parse(BigFloat,"3.65415288536100879635194725185604664812733315920964488827246397029393565706474")
+erfc_zigg_root2     = parse(BigFloat,"2.580324876539008898343885504487203185398584536409033046076029509351995983934371e-04")
+nor_section_area    = ziggurat_nor_r*exp(-ziggurat_nor_r^2/2) + erfc_zigg_root2*sqrt(big(π)/2)
+emantissa           = Int64(2)^52
+ziggurat_exp_r      = parse(BigFloat,"7.69711747013104971404462804811408952334296818528283253278834867283241051210533")
+exp_section_area    = (ziggurat_exp_r + 1)*exp(-ziggurat_exp_r)
+
+ki = Vector{UInt64}(undef, ziggurat_table_size)
+wi = Vector{Float64}(undef, ziggurat_table_size)
+fi = Vector{Float64}(undef, ziggurat_table_size)
+# Tables for exponential variates
+ke = Vector{UInt64}(undef, ziggurat_table_size)
+we = Vector{Float64}(undef, ziggurat_table_size)
+fe = Vector{Float64}(undef, ziggurat_table_size)
+function randmtzig_fill_ziggurat_tables() # Operates on the global arrays
+    wib = big.(wi)
+    fib = big.(fi)
+    web = big.(we)
+    feb = big.(fe)
+    # Ziggurat tables for the normal distribution
+    x1 = ziggurat_nor_r
+    wib[256] = x1/nmantissa
+    fib[256] = exp(-0.5*x1*x1)
+    # Index zero is special for tail strip, where Marsaglia and Tsang
+    # defines this as
+    # k_0 = 2^31 * r * f(r) / v, w_0 = 0.5^31 * v / f(r), f_0 = 1,
+    # where v is the area of each strip of the ziggurat.
+    ki[1] = trunc(UInt64,x1*fib[256]/nor_section_area*nmantissa)
+    wib[1] = nor_section_area/fib[256]/nmantissa
+    fib[1] = one(BigFloat)
+
+    for i = 255:-1:2
+        # New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
+        # need inverse operator of y = exp(-0.5*x*x) -> x = sqrt(-2*ln(y))
+        x = sqrt(-2.0*log(nor_section_area/x1 + fib[i+1]))
+        ki[i+1] = trunc(UInt64,x/x1*nmantissa)
+        wib[i] = x/nmantissa
+        fib[i] = exp(-0.5*x*x)
+        x1 = x
+    end
+
+    ki[2] = UInt64(0)
+
+    # Zigurrat tables for the exponential distribution
+    x1 = ziggurat_exp_r
+    web[256] = x1/emantissa
+    feb[256] = exp(-x1)
+
+    # Index zero is special for tail strip, where Marsaglia and Tsang
+    # defines this as
+    # k_0 = 2^32 * r * f(r) / v, w_0 = 0.5^32 * v / f(r), f_0 = 1,
+    # where v is the area of each strip of the ziggurat.
+    ke[1] = trunc(UInt64,x1*feb[256]/exp_section_area*emantissa)
+    web[1] = exp_section_area/feb[256]/emantissa
+    feb[1] = one(BigFloat)
+
+    for i = 255:-1:2
+        # New x is given by x = f^{-1}(v/x_{i+1} + f(x_{i+1})), thus
+        # need inverse operator of y = exp(-x) -> x = -ln(y)
+        x = -log(exp_section_area/x1 + feb[i+1])
+        ke[i+1] = trunc(UInt64,x/x1*emantissa)
+        web[i] = x/emantissa
+        feb[i] = exp(-x)
+        x1 = x
+    end
+    ke[2] = zero(UInt64)
+
+    wi[:] = wib
+    fi[:] = fib
+    we[:] = web
+    fe[:] = feb
+    return nothing
+end
+randmtzig_fill_ziggurat_tables()
+@test all(ki == Random.ki)
+@test all(wi == Random.wi)
+@test all(fi == Random.fi)
+@test all(ke == Random.ke)
+@test all(we == Random.we)
+@test all(fe == Random.fe)
+
+for U in (Int64, UInt64)
+    @test all(div(one(UInt64) << 52, k)*k - 1 == SamplerRangeInt(map(U, 1:k)).u
+              for k in 13 .+ Int64(2).^(1:30))
+end
+
+#issue 8257
+let i8257 = 1:1/3:100
+    for i = 1:100
+        @test rand(i8257) in i8257
+    end
+end
+
+# test code paths of rand!
+
+let mt = MersenneTwister(0)
+    A128 = Vector{UInt128}()
+    @test length(rand!(mt, A128)) == 0
+    for (i,n) in enumerate([1, 3, 5, 6, 10, 11, 30])
+        resize!(A128, n)
+        rand!(mt, A128)
+        @test length(A128) == n
+        @test A128[end] == UInt128[0x15de6b23025813ad129841f537a04e40,
+                                   0xcfa4db38a2c65bc4f18c07dc91125edf,
+                                   0x33bec08136f19b54290982449b3900d5,
+                                   0xde41af3463e74cb830dad4add353ca20,
+                                   0x066d8695ebf85f833427c93416193e1f,
+                                   0x48fab49cc9fcee1c920d6dae629af446,
+                                   0x4b54632b4619f4eca22675166784d229][i]
+    end
+
+    Random.seed!(mt, 0)
+    Aend = Any[]
+    Bend = Any[]
+    for (i,T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
+        A = Vector{T}(undef, 16)
+        B = Vector{T}(undef, 31)
+        rand!(mt, A)
+        rand!(mt, B)
+        push!(Aend, A[end])
+        push!(Bend, B[end])
+    end
+    @test Aend == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0x68a9f9865393cfd6,
+                      33687499368208574024854346399216845930, Float16(0.7744), 0.97259974f0]
+    @test Bend == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, -3433174948434291912, 0xd461716f27c91500,
+                      -85900088726243933988214632401750448432, Float16(0.10645), 0.13879478f0]
+
+    Random.seed!(mt, 0)
+    AF64 = Vector{Float64}(undef, Random.dsfmt_get_min_array_size()-1)
+    @test rand!(mt, AF64)[end] == 0.957735065345398
+    @test rand!(mt, AF64)[end] == 0.6492481059865669
+    resize!(AF64, 2*length(mt.vals))
+    @test invoke(rand!, Tuple{MersenneTwister,AbstractArray{Float64},Random.SamplerTrivial{Random.CloseOpen01_64}},
+                 mt, AF64, Random.SamplerTrivial(Random.CloseOpen01()))[end]  == 0.1142787906708973
+end
+
+# Issue #9037
+let mt = MersenneTwister(0)
+    a = Vector{Float64}()
+    resize!(a, 1000) # could be 8-byte aligned
+    b = Vector{Float64}(undef, 1000) # should be 16-byte aligned
+    c8 = Vector{UInt64}(undef, 1001)
+    pc8 = pointer(c8)
+    if Int(pc8) % 16 == 0
+        # Make sure pc8 is not 16-byte aligned since that's what we want to test.
+        # It still has to be 8-byte aligned since it is otherwise invalid on
+        # certain architectures (e.g. ARM)
+        pc8 += 8
+    end
+    c = unsafe_wrap(Array, Ptr{Float64}(pc8), 1000) # Int(pointer(c)) % 16 == 8
+
+    for A in (a, b, c)
+        local A
+        Random.seed!(mt, 0)
+        rand(mt) # this is to fill mt.vals, cf. #9040
+        rand!(mt, A) # must not segfault even if Int(pointer(A)) % 16 != 0
+        @test A[end-4:end] == [0.3371041633752143, 0.41147647589610803, 0.6063082992397912, 0.9103565379264364, 0.16456579813368521]
+    end
+end
+
+# make sure reading 128-bit ints from RandomDevice works
+let a = [rand(RandomDevice(), UInt128) for i=1:10]
+    @test reduce(|, a)>>>64 != 0
+end
+
+# wrapper around Float64 to check fallback random generators
+struct FakeFloat64 <: AbstractFloat
+    x::Float64
+end
+Base.rand(rng::AbstractRNG, ::Random.SamplerTrivial{Random.CloseOpen01{FakeFloat64}}) = FakeFloat64(rand(rng))
+for f in (:sqrt, :log, :log1p, :one, :zero, :abs, :+, :-)
+    @eval Base.$f(x::FakeFloat64) = FakeFloat64($f(x.x))
+end
+for f in (:+, :-, :*, :/)
+    @eval begin
+        Base.$f(x::FakeFloat64, y::FakeFloat64) = FakeFloat64($f(x.x,y.x))
+        Base.$f(x::FakeFloat64, y::Real) = FakeFloat64($f(x.x,y))
+        Base.$f(x::Real, y::FakeFloat64) = FakeFloat64($f(x,y.x))
+    end
+end
+for f in (:<, :<=, :>, :>=, :(==), :(!=))
+    @eval begin
+        Base.$f(x::FakeFloat64, y::FakeFloat64) = $f(x.x,y.x)
+        Base.$f(x::FakeFloat64, y::Real) = $f(x.x,y)
+        Base.$f(x::Real, y::FakeFloat64) = $f(x,y.x)
+    end
+end
+
+# test all rand APIs
+for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
+    ftypes = [Float16, Float32, Float64, FakeFloat64, BigFloat]
+    cftypes = [ComplexF16, ComplexF32, ComplexF64, ftypes...]
+    types = [Bool, Char, BigFloat, Base.BitInteger_types..., ftypes...]
+    randset = Set(rand(Int, 20))
+    randdict = Dict(zip(rand(Int,10), rand(Int, 10)))
+    collections = [BitSet(rand(1:100, 20))          => Int,
+                   randset                          => Int,
+                   GenericSet(randset)              => Int,
+                   randdict                         => Pair{Int,Int},
+                   GenericDict(randdict)            => Pair{Int,Int},
+                   1:100                            => Int,
+                   rand(Int, 100)                   => Int,
+                   Int                              => Int,
+                   Float64                          => Float64,
+                   "qwèrtï"                         => Char,
+                   GenericString("qwèrtï")          => Char,
+                   OffsetArray(rand(2, 3), (4, -5)) => Float64]
+    functypes = Dict(rand  => types, randn  => cftypes, randexp  => ftypes,
+                     rand! => types, randn! => cftypes, randexp! => ftypes)
+
+    b2 = big(2)
+    u3 = UInt(3)
+    for f in [rand, randn, randexp]
+        f(rng...)                     ::Float64
+        f(rng..., 5)                  ::Vector{Float64}
+        f(rng..., 2, 3)               ::Array{Float64, 2}
+        f(rng..., b2, u3)             ::Array{Float64, 2}
+        for T in functypes[f]
+            a0 = f(rng..., T)         ::T
+            a1 = f(rng..., T, 5)      ::Vector{T}
+            a2 = f(rng..., T, 2, 3)   ::Array{T, 2}
+            a3 = f(rng..., T, b2, u3) ::Array{T, 2}
+            a4 = f(rng..., T, (2, 3)) ::Array{T, 2}
+            if T <: AbstractFloat && f === rand
+                for a in [a0, a1..., a2..., a3..., a4...]
+                    @test 0.0 <= a < 1.0
+                end
+            end
+        end
+    end
+    for (C, T) in collections
+        a0  = rand(rng..., C)                                                       ::T
+        a1  = rand(rng..., C, 5)                                                    ::Vector{T}
+        a2  = rand(rng..., C, 2, 3)                                                 ::Array{T, 2}
+        a3  = rand(rng..., C, (2, 3))                                               ::Array{T, 2}
+        a4  = rand(rng..., C, b2, u3)                                               ::Array{T, 2}
+        a5  = rand!(rng..., Array{T}(undef, 5), C)                          ::Vector{T}
+        a6  = rand!(rng..., Array{T}(undef, 2, 3), C)                       ::Array{T, 2}
+        a7  = rand!(rng..., GenericArray{T}(undef, 5), C)                   ::GenericArray{T, 1}
+        a8  = rand!(rng..., GenericArray{T}(undef, 2, 3), C)                ::GenericArray{T, 2}
+        a9  = rand!(rng..., OffsetArray(Array{T}(undef, 5), 9), C)          ::OffsetArray{T, 1}
+        a10 = rand!(rng..., OffsetArray(Array{T}(undef, 2, 3), (-2, 4)), C) ::OffsetArray{T, 2}
+        @test size(a1) == (5,)
+        @test size(a2) == size(a3) == (2, 3)
+        for a in [a0, a1..., a2..., a3..., a4..., a5..., a6..., a7..., a8..., a9..., a10...]
+            if C isa Type
+                @test a isa C
+            else
+                @test a in C
+            end
+        end
+    end
+    for C in [1:0, Dict(), Set(), BitSet(), Int[],
+              GenericDict(Dict()), GenericSet(Set()),
+              "", Test.GenericString("")]
+        @test_throws ArgumentError rand(rng..., C)
+        @test_throws ArgumentError rand(rng..., C, 5)
+    end
+    for f! in [rand!, randn!, randexp!]
+        for T in functypes[f!]
+            X = T == Bool ? T[0,1] : T[0,1,2]
+            for A in (Vector{T}(undef, 5),
+                      Matrix{T}(undef, 2, 3),
+                      GenericArray{T}(undef, 5),
+                      GenericArray{T}(undef, 2, 3),
+                      OffsetArray(Array{T}(undef, 5), -3),
+                      OffsetArray(Array{T}(undef, 2, 3), (4, 5)))
+                local A
+                f!(rng..., A)                    ::typeof(A)
+                if f! === rand!
+                    f!(rng..., A, X)             ::typeof(A)
+                    if A isa Array && T !== Char # Char/Integer comparison
+                        f!(rng..., sparse(A))    ::typeof(sparse(A))
+                        f!(rng..., sparse(A), X) ::typeof(sparse(A))
+                    end
+                end
+            end
+        end
+    end
+
+    bitrand(rng..., 5)             ::BitArray{1}
+    bitrand(rng..., 2, 3)          ::BitArray{2}
+    bitrand(rng..., b2, u3)        ::BitArray{2}
+    rand!(rng..., BitVector(undef, 5))     ::BitArray{1}
+    rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
+
+    # Test that you cannot call randn or randexp with non-Float types.
+    for r in [randn, randexp]
+        @test_throws MethodError r(Int)
+        @test_throws MethodError r(Int32)
+        @test_throws MethodError r(Bool)
+        @test_throws MethodError r(String)
+        @test_throws MethodError r(AbstractFloat)
+
+        @test_throws MethodError r(Int64, (2,3))
+        @test_throws MethodError r(String, 1)
+
+        @test_throws MethodError r(rng..., Number, (2,3))
+        @test_throws MethodError r(rng..., Any, 1)
+    end
+end
+
+function hist(X, n)
+    v = zeros(Int, n)
+    for x in X
+        v[floor(Int, x*n) + 1] += 1
+    end
+    v
+end
+
+# test uniform distribution of floats
+for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
+    T in [Float16, Float32, Float64, BigFloat],
+        prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
+    setprecision(BigFloat, prec) do
+        # array version
+        counts = hist(rand(rng, T, 2000), 4)
+        @test minimum(counts) > 300 # should fail with proba < 1e-26
+        # scalar version
+        counts = hist([rand(rng, T) for i in 1:2000], 4)
+        @test minimum(counts) > 300
+    end
+end
+
+@testset "rand(Bool) uniform distribution" begin
+    for n in [rand(1:8), rand(9:16), rand(17:64)]
+        a = zeros(Bool, n)
+        a8 = unsafe_wrap(Array, Ptr{UInt8}(pointer(a)), length(a); own=false) # unsafely observe the actual bit patterns in `a`
+        as = zeros(Int, n)
+        # we will test statistical properties for each position of a,
+        # but also for 3 linear combinations of positions (for the array version)
+        lcs = unique!.([rand(1:n, 2), rand(1:n, 3), rand(1:n, 5)])
+        aslcs = zeros(Int, 3)
+        for rng = (MersenneTwister(), RandomDevice(), Xoshiro())
+            for scalar = [false, true]
+                fill!(a, 0)
+                fill!(as, 0)
+                fill!(aslcs, 0)
+                for _ = 1:49
+                    if scalar
+                        for i in eachindex(as)
+                            as[i] += rand(rng, Bool)
+                        end
+                    else
+                        as .+= rand!(rng, a)
+                        @test all(x -> x === 0x00 || x === 0x01, a8)
+                        aslcs .+= [xor(getindex.(Ref(a), lcs[i])...) for i in 1:3]
+                    end
+                end
+                @test all(x -> 7 <= x <= 42, as) # for each x, fails with proba ≈ 2/35_000_000
+                if !scalar
+                    @test all(x -> 7 <= x <= 42, aslcs)
+                end
+            end
+        end
+    end
+end
+
+@testset "reproducility of methods for $RNG" for RNG=(MersenneTwister,Xoshiro)
+    mta, mtb = RNG(42), RNG(42)
+
+    @test rand(mta) == rand(mtb)
+    @test rand(mta,10) == rand(mtb,10)
+    @test randn(mta) == randn(mtb)
+    @test randn(mta,10) == randn(mtb,10)
+    @test randexp(mta) == randexp(mtb)
+    @test randexp(mta,10) == randexp(mtb,10)
+    @test rand(mta,1:100) == rand(mtb,1:100)
+    @test rand(mta,1:10,10) == rand(mtb,1:10,10)
+    @test rand(mta,Bool) == rand(mtb,Bool)
+    @test bitrand(mta,10) == bitrand(mtb,10)
+
+    @test randstring(mta) == randstring(mtb)
+    @test randstring(mta,10) == randstring(mtb,10)
+
+    @test randsubseq(mta,1:10,0.4) == randsubseq(mtb,1:10,0.4)
+    @test randsubseq!(mta,Int[],1:10,0.4) == randsubseq!(mtb,Int[],1:10,0.4)
+
+    @test shuffle(mta,Vector(1:10)) == shuffle(mtb,Vector(1:10))
+    @test shuffle!(mta,Vector(1:10)) == shuffle!(mtb,Vector(1:10))
+    @test shuffle(mta,Vector(2:11)) == shuffle(mtb,2:11)
+    @test shuffle!(mta, rand(mta, 2, 3)) == shuffle!(mtb, rand(mtb, 2, 3))
+    @test shuffle(mta, rand(mta, 2, 3)) == shuffle(mtb, rand(mtb, 2, 3))
+
+    @test randperm(mta,10) == randperm(mtb,10)
+    @test sort!(randperm(10)) == sort!(shuffle(1:10)) == 1:10
+    @test randperm(mta,big(10)) == randperm(mtb,big(10)) # cf. #16376
+    @test randperm(0) == []
+    @test_throws ArgumentError randperm(-1)
+
+    let p = randperm(UInt16(12))
+        @test typeof(p) ≡ Vector{UInt16}
+        @test sort!(p) == 1:12
+    end
+
+    A, B = Vector{Int}(undef, 10), Vector{Int}(undef, 10)
+    @test randperm!(mta, A) == randperm!(mtb, B)
+    @test randperm!(A) === A
+
+    @test randcycle(mta,10) == randcycle(mtb,10)
+    @test randcycle!(mta, A) == randcycle!(mtb, B)
+    @test randcycle!(A) === A
+
+    let p = randcycle(UInt16(10))
+        @test typeof(p) ≡ Vector{UInt16}
+        @test sort!(p) == 1:10
+    end
+
+    @test sprand(mta,1,1,0.9) == sprand(mtb,1,1,0.9)
+    @test sprand(mta,10,10,0.3) == sprand(mtb,10,10,0.3)
+end
+
+@testset "MersenneTwister polynomial generation and jump" begin
+    seed = rand(UInt)
+    mta = MersenneTwister(seed)
+    mtb = MersenneTwister(seed)
+    step = 25000*2
+    size = 4
+    jump25000 = "35931a4947eeab70a9abbfaca4a79cfcf2610a35a586c6f4e4bdfa826d538cbfb0432c37321fcec4f6c98de3df06685087032988b0ad9a2144562aa82e06f2f6f256b5b412c524e35383a894da7b04e142c4156290585186d8fc06d3141a778220c2519a851b5a9e5947a3f745b71804631988825e21dba40392ff4c036b30d2d013b45e2be94b5e130a9c6424d2e82f48c855c81bd10757fdb5a91e23e9e312e430514ea31631d8897b4cf26eb39b37be0c92706e5637d4b34c1e4046b741e455df195cb512e8e0f8d578175a3da5e00d7ce247d9b92042b1b515d01f7f89fe661ebccb06dfb77bc0fbb99806921b472ccce58f2166ac058d9cf427ad7d74986e60a56d2fee0a8b680e466a8ea4e508a76c058b6f97b99c9aa5b10297b1a1bd6a8e80f3a79e008fa55a4a8915fbdec78b6b117ad67e195311fe79fc084c33f6db546f5b7602d010fa8b830e3f1b00cef00ee16840178fc7e9aa5f1cee625d43de8488bf6c8bd379ea6f97c55c7a9ee091477a23533d5e52e194bd9d4e17b02a64a2736feb3779fabd5777e448ffee0f2d4b38a8e7441822b882fc6df0bde8541e85c0c78a05936cff0c88a50980b7a84971fba3650991fe2cba425ac4b4289e7b06ce2cfabfcc8a553201e8c74b45e4ae74b6d054e37af95e6fd55e029b7c526b85ecfb3be8db670218ee3dda7b2a54ab1ed26eefe4cd1d2a9c589a6e94d0aa3ebe29e40e616aa0b731061c3d6e247ec610024a1a97b7adb7919308b0fb5dd5d51a58aa2f55d77b88037de7c1a74823c96cb09d22dd7f90dba14eefdcffaab34d323c829f24742f6f6b32b0724a26ae4a81130a8a275d30c21e6245fa27cf26d606a49bccba2980697c32d9efe583c4ee2140569025c4f044d744bc40cec1660d9e4d2de3a4de83bae4f0a9fdb34ef4509b2b4e6c37967a485a52d69d1573bb826bc64c966de9c792b7c2f07b645c56a29381911a98928e48516f246a55bcaa78f3c7d1c30127df5f06ba0a2d6a5e54605a20e60fab30c01a9472cb610ca0ef2418a985af00c7e47539111bf539dd554297d0374a7ff627d879600595b442c8dcffcffa3bbb07e5c7882ff0858142be4deac448698f0917fe2b7a9b686a9df1fa929f06a51aff992a6ee0b0605f8b34b87600cfa0af2475333b78625ce1520c793dc5080218247b4e41bbd7d9dab163470fe17a3d2622cdce979cc5565b0bc04eabaf656f21fa072a18ab33c656b665248ef20321407fef263b1c67316f2c6f236951990099e42d4614d8e08b27aa89d9f4548fa321d4b381d2da04fd7f17d6b9a68adfd0e4427196d25dcad869f8a155c6242f7d072baa5e7405ceb65dfaa3eb864bfe679a17df34273fde5037befe9ed5391b932cee271f59128c61ab3f0fc3f7cf8ff051fbda8382c64579efddd494c79850c56bda73bcd39c20c2820d191995b3335253c3b0ac8f5e5373f40c228886e6c526c2c249a5304578ba2a80f591c34ca1eaa84d6cc9399cf3f1207e61c4acada647e4e87ad5fba84aeeff6b6881d35bda77c74384fc5e279a0f495d509bc882c2b8bc790651a6d7a4ecba23a3f05111e1d8be37c03439fbd484668ceab69a52b7d519b169cbbcf634ee5e3bf78a5f8771f95fea03f2cb889e116a9f5de3abeacb8e42475fb5d022484b02d11f1e406332e0a773098fc4f0baa57cda2863c554f291d4eb74e63e4b3d44b0ed156bff1820003d407a3aaa9e6dfaa226ba7ef2fd0eff90a5482926f47f24f67019edccb6fd329eef30b5fb2125276aa1fe75a702b32c907ab133c72a74e77e0a5eb48fc5176b9d65b75b0038e1a9ed74ec2a3dcd2348fa54256f082abc01a301bacef7380f20ee0411c08a35dafdaae9f9fc123448da28626ffcc654e9d522bc8b8776b13a3310f7eeb4d27290ef4cbc7492fbcb5409d455748a8a1f087430cf5e6f453e2caa0c5343fcf4374cc38bead49941d8ab59b4d5181716c238aa88dbf1c4a2da3a9a9b9435d5ee1d51d27b0655a4308c1252aaf633cd8f44a351ffc8cec65de0b7e4e2556100e2ae9bc511044351109a6254b2d387b1a72c768f43fa7be6b93806e323b55c3e7925ed627dc708fde0954b299b1ca33bb7fbe33e0f9e4ce5b4f26efaf8e5b9507ada4f8658998eb7167afbd4482ee47cc60f4039d6a77c1fb126033bfc2e7c6162ff7561f22e263325c53a014a4ac9390fe6fab5d433c1e9896fe561f22fc8290f3f4560b676f3dfbfd1fe605343a0685349241b83a28d61cc0292d1f638a36d3d87bfa9f72f9df1cfe90692dfda5bd5e698362f5316984cbe73a132a801acbca76b5f5c23d98a217f2159b6cbbcdf8f52d23ea24c9471a31562a651b20e05cd0300ee500a450cfdaa4d2d83f7e7e27f8b7c793cf80f8067dadef77e49a64c373b97bac4dd472e5145072c73d0fdd68d9646c8a8ed9aec6c40bc915ae44ae27391ca0f1a4d2cb1c3d097be614e6eba807f4549d769a5872f268ccf770f2682d844490348f0b6a0d2b51aadbb5523cf708b66f9928eed12b35a39cf42d283b29f5283e1c8ba1f73457af17b14cdfdf9a85b0589acf1f9504e46b0bab8be848dac5673587035b99f56c41d3195bbba1616b149a22193cfb760d6bf2d84861653cd21be9a2d33187cb25d47fbecdd4626d1d97202f460a39b7128cadb77ddf682feca61fb6de0290df598a565a6361a91d76c0c685046489ed4cb1dcc4f1cea849c12dc4a3d38d3010567f387590532b78927e92f0b718c84e882b3df071a78a011d0fd56d4101dcb009914a16a781b240a6fb2440c72b0ffb365be9d3459d114e665a0d35d7b8bd280101d85d1211d939ba0b15ab528c4f9dd2b001172561d211671b96873010ae3c2b8317f773d735698914228764b831423ae19dd4bbb008b9f1bd1e3ebdd626e629a46a9dd70bdd4bb30e2279e83c12bbbead6479b5f9980b1a9c785395520703a9367d931b45c1566c9d314b1745cafc6d0667cc9bc94d0c53a960c829eb09b768ab6bb2133e4fea1d939f3d3f8e1237210cf3577c830f0493073dc1d189abf27402b8b31b7c172c43dbf331a0828adfe737380e763d0ab0bfaaf94ec04830f94380a83718f340c4eeb20d7eb22b94613be84a9ed332ab364efff6cb37eec35d186185cca725e7a748f6bdb427604fb1628d49a7424a5a62a2e930fe142b035503af332fe748d5e63591b9ac54071ca843d5e474a48837de8b80387f3269ab50d2fd99c08c971e015d13fa02c7c315922ce58bdacbf8ee48827851a61fca59882d7eadcce3166dfe012aa9ec849e698e776a4d384f4755b506a222636942a81bbbffa1ff47e4d81fe68120aebcfd1a7e0000fd0cffdc44e1f0cd69ea2b4936564c78af51fed1cc8e34f0b46d6330b4b50ddee09335b7b0be0bc9f7f8e48415e15d08f811653d21bc6dd152742b086caadcc6dff5e27b40da42c2f1ebf3dd2bd51c418718e499859239317fcab10892eadf1c0ebf7a4246bce4cce3617193032f3e41b977dc8650298ac39631c527460364effea0f0bfd043df72ead0406aba1bcd636d65d7b89979eb8e1";
+    jump1e20  = "e172e20c5d2de26b567c0cace9e7c6cc4407bd5ffcd22ca59d37b73d54fdbd937cd3abc6f502e8c186dbd4f1a06b9e2b894f31be77424f94dddfd5a45888a84ca66eeeb242eefe6764ed859dafccae7a6a635b3a63fe9dfbbd5f2d3f2610d39388f53060e84edae75be4f4f2272c0f1f26d1231836ad040ab091550f8a3a5423fb3ab83e068fe2684057f15691c4dc757a3aee4bca8595bf1ad03500d9620a5dbe3b2d64380694895d2f379ca928238293ea267ce14236d5be816a61f018fe4f6bc3c9865f5d4d4186e320ab653d1f3c035ae83e2ad725648a67d3480331e763a1dcdfb5711b56796170b124f5febd723a664a2deefbfa9999d922a108b0e683582ae8d3baacb5bb56683405ea9e6e0d71ddb24b2229c72bb9d07061f2d1fa097ade823b607a2029d6e121ae09d93de01a154199e8e6a6e77c970bda72ba8079b2b3a15dd494a3188b1d94a25ae108a8a5bd0b050e6ce64a365a21420e07fdeebecae02eb68a4304b59283055d22c27d680ea35952834d828c9b9b9dd1a886b4f7fe82fe8f2a962e1e5390e563dc281c799aee2a441b7a813facb6ff5e94c059710dcfe7e6b1635e21ae0dc878dd5f7cc0e1101a74452495a67d23a2672c939f32c81d4a2611073990e92a084cc3a62fd42ee566f29d963a9cc5100ccd0a200f49ce0a74fa891efa1b974d342b7fedf9269e40d9b34e3c59c3d37201aecd5a04f4ae3d0c9a68c7ab78c662390e4cf36cb63ea3539c442efd0bf4aace4b8c8bde93c3d84b4d6290adfae1c5e3fcd457b6f3159e501f17b72ff6bc13d6bf61fbdafabefd16ac1dae0bca667e4e16a2b800732f1d0a9274c8a4c6cccd2db62fc275dc308c31c11cd6fda78de2f81f0e542b76b42b2cc09ed8f965d94c714c9918064f53af5379cfbbc31edf9cbce694f63a75f122048de6e57b094908f749661456813a908027f5d8397ab7962bf75ac779a3e1b7ae3fbc93397a67b486bb849befff1de6162ef2819715a88f41881e366ace692a900796a2806393898dd1750ac2b4ca3d34ca48942322fb6375f0c9a00c9701048ee8d7d7a17e11739177a7ad5027556e85835daf8594d84a97fe6621c0fce1495ae6ab8676cdc992d247acf5a4e5ec8c4755fde28117228d2c3ecf89edb91e93d949e2174924572265e36d176d082ed1be884e51d885ba3cda175c51edcee5042eaf519d292aa05aa4185b03858d710a9d0880b3d4e5111f858a52fe352cbe0a24f06a3d977ae2eb85e2a03a68131d0ab91dac4941067cf90ecd0fce156bcd40b8968cd4aa11e0b4353b14508d79d13ac00af4a4d452496b7f2393699889aa1e508427dbf0be3db91d955feb51e559af57640c6b3f9d5f95609852c28f9462a9869dd93acbdb1aafb2381ebb886a0b3fcec278f8bb0f62c23e157e49b89245b0881268ce594acbddd3605b9eaa77c9ff513e0dbad514914136d96fe2843fe2b4e886a0b718a9b8d1132132110618d0d3595da284cd2a4c9d09386199e4f4d7723983d3a374b51cf20dac5cabb4ff7e7197c2ebd9318463409baa583d6a6115c1b768282ff37b0fe152c97671e400d5ccba7d6875df0bf95c5d91257fedb124de393f31908d0e36251326aa29dd5be86291c80b4bf78f419ec151eeaeff643a58b48ab35ad2cd2c0b77b1965966ef3db6b6373cb2c4b590cef2f16f4d6f62f13a6cbf1a481565b5935edd4e76f7b6a8fd0d74bc336b40a803aec38125c006c877dfdcdb9ba2b7aecab5cafe6076e024c73e3567adf97f607a71d180402c22a20a8388f517484cc4198f97c2fe4f3407e0dc577e61f0f71354aa601cf4e3e42e1edd8722d50f5af3441f68caa568cc1c3a19956c1233f265bb47236afab24ee42b27b0042b90693d77c1923147360ae6503f6ba6abbc9dd52a7b4c36a3b6b55f6a80cfa7f101dd9f1bfc7d7eaf09a5d636b510228f245bfb37b4625025d2c911435cdf6f878113753e0804ab8ecab870ad733b9728d7636b17578b41239393e7de47cbce871137d2b61729dda67b2b84cd3363aad64c5dd5bd172f1f091305b1ff78982abe7dab1588036d097cf497e300e6c78a926048febd1b9462c07f5868928357b74297c87f503056b89f786d22a538b6702e290bca04639a0f1d0939b67f409e5e58e472a6a07fa543e2531c2567ec73c41f6769b6ba94c5aa0a030d006f5b6b1c5fb218b86a8f63a48bc867466f20f699859e87956f48a182d26ed451861dd21201ecc7239037ada67319bdf0849c387c73a110af798b4c5f9018bc97993e060ea2a2937fa2eb095d65ec07009fc407a350f1d6fb3c98a0a5f204be985b0cb6962f0eb7844a179c4598a92ea32d2d706c800034d2e960ded5b476d77073316b933fb3e6ba2f4f24a3b73a1e4d8ed1491d757ecf56fd72465dac0000736744d28d29073091587c8bccad302f7054e8a32bb8724974d9f3e449fc70b2a41f0008b548f717ac0a2c3a6580bfb50774933a578ad6acdcb89940bb406ea540893f097d8a88d1609ed605f25499de939083a0c8a7c6db462df5dfa06c298dd233e249433a54267d5cdc22e5524705b7d6b16b96bb2cb83e00cef62e21d91528a74cf95bfd1d391590c93f4058e9bb02656fd087a5b63d738d1c3b5cf533fd59c81cf9136bfcd3e955c19daf9906ef175791fde6a1d98155d7881e241c3522551cf9fcae42e1e46929ea39fd00943446823f9755085ccc8456a3090b73a3031a201d9c704a4ad4868dd9b6d06205560013973f60d637de2f18354bf4523d9d81dc2a7e78cd42c586364bbe0ed86fde0f081f801c1a4abb830839b7796d9a01f141bec8bd93144104c6dc59170162c0a5a639eb63a0a164970de50eb2e04f027394b26ed48d341f7851994df79d7cd663672a556f25e5e16a3adbe1003d631de938fabfed234df12b5ff3027f4a2da823834cb098e0f977a4eb9614579d5c7a1d400a1a933a657aef8ea1a66743d73b0cf37a7d64e9a63e4c7b09945f0db750b311b39783fb5ea216616751967d480a630d3da7c89d1c7beae20369137e96734a4cfedca56a7887f076fe4fe97534ad3e4f74d1a81750581a5ea214b440c7f30331ab86c257534c71175d1e731303a48b01c589fda4fb0d4368b4dd63d91204cb6fc389b2202aa94391907bfb72902a4031f5589ed5f391c2ce92aa998c200ba3c77d8bd747b9d0a29fa85cda3949a6d2bd0c3402e68f98fd451aa27b6c2dfd170e004577cbdb25e3a1b9852e9f66a370789c47bfce722446dade1b32ceae71ee0e1d96edf7ed08a93e3690056f46c3d8e63f88e53673ee71d72cfedbeba493ee91333120e09e9ce9f9c9a7a400f814ea618b1de48f9805e092f4e20f301fbb65caa83735a2a5c89befe4bce4116dca3688e1e14c6f09a945671dedbb5c0ba526842b6cae31d8b5ff978bae928a17a75c134630dd9de988f6ad3d89a071b33775a9660a40b48ec61ad3f93ac81cb1c65d8b0bab5c214786abd13cc10a8ea2e2a370e86e2fa1a372d83c9697b5e37b281e51507685f714fdaebe49ffc93a5582e1936eaee8e4140a4b72"
+    @test DSFMT.GF2X(jump25000) == DSFMT.calc_jump(25000)
+    @test DSFMT.GF2X(jump1e20)  == DSFMT.calc_jump(big(10)^20)
+
+    # check validity of the implementation of copy(::GF2X)
+    let z = big(1); @assert z !== z+0 end
+
+    # test PRNG jump
+
+    function randjumpvec(m, steps, len) # old version of randjump
+        mts = accumulate(Future.randjump, fill(steps, len-1); init=m)
+        pushfirst!(mts, m)
+        mts
+    end
+
+    mts = randjumpvec(mta, 25000, size)
+    @test length(mts) == 4
+
+    for x in (rand(mts[k], Float64) for j=1:step, k=1:size)
+        @test rand(mtb, Float64) == x
+    end
+
+    @testset "generated RNGs are in a deterministic state (relatively to ==)" begin
+        m = MersenneTwister()
+        @test Future.randjump(m, 25000) == Future.randjump(m, 25000)
+    end
+end
+
+# test that the following is not an error (#16925)
+guardseed() do
+    Random.seed!(typemax(UInt))
+    Random.seed!(typemax(UInt128))
+end
+
+# copy, == and hash
+let seed = rand(UInt32, 10)
+    r = MersenneTwister(seed)
+    @test r == MersenneTwister(seed) # r.vals should be all zeros
+    @test hash(r) == hash(MersenneTwister(seed))
+    s = copy(r)
+    @test s == r && s !== r
+    @test hash(s) == hash(r)
+    skip, len = rand(0:2000, 2)
+    for j=1:skip
+        rand(r)
+        rand(s)
+    end
+    @test rand(r, len) == rand(s, len)
+    @test s == r
+    @test hash(s) == hash(r)
+    h = rand(UInt)
+    @test hash(s, h) == hash(r, h)
+end
+
+# MersenneTwister initialization with invalid values
+@test_throws DomainError DSFMT.DSFMT_state(zeros(Int32, rand(0:DSFMT.JN32-1)))
+
+@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
+                                         zeros(Float64, 10), zeros(UInt128, MT_CACHE_I>>4), 0, 0, 0, 0, -1, -1)
+
+@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
+                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), -1, 0, 0, 0, -1, -1)
+
+@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
+                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>3), 0, 0, 0, 0, -1, -1)
+
+@test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
+                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), 0, -1, 0, 0, -1, -1)
+
+# seed is private to MersenneTwister
+let seed = rand(UInt32, 10)
+    r = MersenneTwister(seed)
+    @test r.seed == seed && r.seed !== seed
+    # RNGs do not share their seed in randjump
+    let r2 = Future.randjump(r, big(10)^20)
+        @test  r.seed !== r2.seed
+        Random.seed!(r2)
+        @test seed == r.seed != r2.seed
+    end
+    resize!(seed, 4)
+    @test r.seed != seed
+end
+
+# Random.seed!(rng, ...) returns rng (#21248)
+guardseed() do
+    g = Random.default_rng()
+    m = MersenneTwister(0)
+    @test Random.seed!() === g
+    @test Random.seed!(rand(UInt)) === g
+    @test Random.seed!(rand(UInt32, rand(1:8))) === g
+    @test Random.seed!(m) === m
+    @test Random.seed!(m, rand(UInt)) === m
+    @test Random.seed!(m, rand(UInt32, rand(1:10))) === m
+    @test Random.seed!(m, rand(1:10)) === m
+end
+
+# Issue 20062 - ensure internal functions reserve_1, reserve are type-stable
+let r = MersenneTwister(0)
+    @inferred Random.reserve_1(r)
+    @inferred Random.reserve(r, 1)
+end
+
+# test randstring API
+let b = ['0':'9';'A':'Z';'a':'z']
+    for rng = [[], [MersenneTwister(0)]]
+        @test length(randstring(rng...)) == 8
+        @test length(randstring(rng..., 20)) == 20
+        @test issubset(randstring(rng...), b)
+        for c = ['a':'z', "qwèrtï", Set(codeunits("gcat"))],
+                len = [8, 20]
+            s = len == 8 ? randstring(rng..., c) : randstring(rng..., c, len)
+            @test length(s) == len
+            if eltype(c) == Char
+                @test issubset(s, c)
+            else # UInt8
+                @test issubset(s, Set(Char(v) for v in c))
+            end
+        end
+    end
+    @test randstring(MersenneTwister(0)) == randstring(MersenneTwister(0), b)
+end
+
+# this shouldn't crash (#22403)
+@test_throws MethodError rand!(Union{UInt,Int}[1, 2, 3])
+
+@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice, Xoshiro)
+    m = RNG()
+    a = rand(m, Int)
+    m = RNG()
+    @test rand(m, Int) != a
+    # passing `nothing` is equivalent to passing nothing
+    m = RNG(nothing)
+    b = rand(m, Int)
+    @test b != a
+    Random.seed!(m)
+    c = rand(m, Int)
+    @test c ∉ (a, b)
+    Random.seed!(m)
+    @test rand(m, Int) ∉ (a, b, c)
+    Random.seed!(m, nothing)
+    d = rand(m, Int)
+    @test d ∉ (a, b, c)
+    Random.seed!(m, nothing)
+    @test rand(m, Int) ∉ (a, b, c, d)
+end
+
+@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
+    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
+    if RNG == Xoshiro
+        push!(seeds, rand(UInt64, rand(1:4)))
+    end
+    for seed=seeds
+        m = RNG(seed)
+        a = [rand(m) for _=1:100]
+        Random.seed!(m, seed)
+        @test a == [rand(m) for _=1:100]
+    end
+end
+
+@testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
+
+    for seed=seeds
+        Random.seed!(seed)
+        @test Random.GLOBAL_SEED === seed
+    end
+    # two separate loops as otherwise we are no sure that the second call (with GLOBAL_RNG)
+    # actually sets GLOBAL_SEED
+    for seed=seeds
+        Random.seed!(Random.GLOBAL_RNG, seed)
+        @test Random.GLOBAL_SEED === seed
+    end
+
+    Random.seed!(nothing)
+    seed1 = Random.GLOBAL_SEED
+    @test seed1 isa Vector{UInt64} # could change, but must not be nothing
+
+    Random.seed!(Random.GLOBAL_RNG, nothing)
+    seed2 = Random.GLOBAL_SEED
+    @test seed2 isa Vector{UInt64}
+    @test seed2 != seed1
+
+    Random.seed!()
+    seed3 = Random.GLOBAL_SEED
+    @test seed3 isa Vector{UInt64}
+    @test seed3 != seed2
+
+    Random.seed!(Random.GLOBAL_RNG)
+    seed4 = Random.GLOBAL_SEED
+    @test seed4 isa Vector{UInt64}
+    @test seed4 != seed3
+end
+
+struct RandomStruct23964 end
+@testset "error message when rand not defined for a type" begin
+    @test_throws MethodError rand(nothing)
+    @test_throws MethodError rand(RandomStruct23964())
+end
+
+@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice(), Xoshiro()),
+                                                        T ∈ (Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
+    for S in (SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
+        S == SamplerRangeNDL && sizeof(T) > 8 && continue
+        r = T(1):T(108)
+        @test rand(RNG, S(r)) ∈ r
+        @test rand(RNG, S(typemin(T):typemax(T))) isa T
+        a, b = sort!(rand(-1000:1000, 2) .% T)
+        @test rand(RNG, S(a:b)) ∈ a:b
+    end
+end
+
+@testset "rand! is allocation-free" begin
+    for A in (Array{Int}(undef, 20), Array{Float64}(undef, 5, 4), BitArray(undef, 20), BitArray(undef, 50, 40))
+        rand!(A)
+        @test @allocated(rand!(A)) == 0
+    end
+end
+
+@testset "gentype for UniformBits" begin
+    @test Random.gentype(Random.UInt52()) == UInt64
+    @test Random.gentype(Random.UInt52(UInt128)) == UInt128
+    @test Random.gentype(Random.UInt104()) == UInt128
+end
+
+@testset "shuffle[!]" begin
+    a = []
+    @test shuffle(a) == a # issue #28727
+    @test shuffle!(a) === a
+    a = rand(Int, 1)
+    @test shuffle(a) == a
+end
+
+@testset "rand(::Tuple)" begin
+    for x in (0x1, 1)
+        @test rand((x,)) == 0x1
+        @test rand((x, 2)) ∈ 1:2
+        @test rand((x, 2, 3)) ∈ 1:3
+        @test rand((x, 2, 3, 4)) ∈ 1:4
+        @test rand((x, 2, 3, 4, 5)) ∈ 1:5
+        @test rand((x, 2, 3, 4, 6)) ∈ 1:6
+    end
+end
+
+@testset "GLOBAL_RNG" begin
+    local GLOBAL_RNG = Random.GLOBAL_RNG
+    local LOCAL_RNG = Random.default_rng()
+    @test VERSION < v"2" # deprecate this in v2
+
+    @test Random.seed!(GLOBAL_RNG, nothing) === LOCAL_RNG
+    @test Random.seed!(GLOBAL_RNG, UInt32[0]) === LOCAL_RNG
+    @test Random.seed!(GLOBAL_RNG, 0) === LOCAL_RNG
+    @test Random.seed!(GLOBAL_RNG) === LOCAL_RNG
+
+    xo = Xoshiro()
+    @test copy!(xo, GLOBAL_RNG) === xo
+    @test xo == LOCAL_RNG
+    Random.seed!(xo, 2)
+    @test xo != LOCAL_RNG
+    @test copy!(GLOBAL_RNG, xo) === LOCAL_RNG
+    @test xo == LOCAL_RNG
+    xo2 = copy(GLOBAL_RNG)
+    @test xo2 !== LOCAL_RNG
+    @test xo2 == LOCAL_RNG
+
+    for T in (Random.UInt52Raw{UInt64},
+              Random.UInt104Raw{UInt128},
+              Random.CloseOpen12_64)
+        x = Random.SamplerTrivial(T())
+        @test rand(GLOBAL_RNG, x) === rand(xo, x)
+    end
+    for T in (Int64, UInt64, Int128, UInt128, Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)
+        x = Random.SamplerType{T}()
+        @test rand(GLOBAL_RNG, x) === rand(xo, x)
+    end
+
+    A = fill(0.0, 100, 100)
+    B = fill(1.0, 100, 100)
+    vA = view(A, :, :)
+    vB = view(B, :, :)
+    I1 = Random.SamplerTrivial(Random.CloseOpen01{Float64}())
+    I2 = Random.SamplerTrivial(Random.CloseOpen12{Float64}())
+    @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B
+    B = fill!(B, 1.0)
+    @test rand!(GLOBAL_RNG, vA, I1) === vA
+    rand!(xo, vB, I1)
+    @test A == B
+    for T in (Float16, Float32)
+        B = fill!(B, 1.0)
+        @test rand!(GLOBAL_RNG, A, I2) === A == rand!(xo, B, I2) === B
+        B = fill!(B, 1.0)
+        @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B
+    end
+    for T in Base.BitInteger_types
+        x = Random.SamplerType{T}()
+        B = fill!(B, 1.0)
+        @test rand!(GLOBAL_RNG, A, x) === A == rand!(xo, B, x) === B
+    end
+    # issue #33170
+    @test Sampler(GLOBAL_RNG, 2:4, Val(1)) isa SamplerRangeNDL
+    @test Sampler(GLOBAL_RNG, 2:4, Val(Inf)) isa SamplerRangeNDL
+
+    rng = copy(GLOBAL_RNG)
+    # make sure _GLOBAL_RNG and the underlying implementation use the same code path
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+end
+
+@testset "RNGs broadcast as scalars: T" for T in (MersenneTwister, RandomDevice)
+    @test length.(rand.(T(), 1:3)) == 1:3
+end
+
+@testset "generated scalar integers do not overlap" begin
+    m = MersenneTwister()
+    xs = reinterpret(UInt64, m.ints)
+    x = rand(m, UInt128)  # m.idxI % 16 == 0
+    @test x % UInt64 == xs[end-1]
+    x = rand(m, UInt64)
+    @test x == xs[end-2]
+    x = rand(m, UInt64)
+    @test x == xs[end-3]
+    x = rand(m, UInt64)
+    @test x == xs[end-4]
+    x = rand(m, UInt128) # m.idxI % 16 == 8
+    @test (x >> 64) % UInt64 == xs[end-6]
+    @test x % UInt64 == xs[end-7]
+    x = rand(m, UInt64)
+    @test x == xs[end-8] # should not be == xs[end-7]
+
+    s = Set{UInt64}()
+    n = 0
+    for _=1:2000
+        x = rand(m, rand((UInt64, UInt128, Int64, Int128)))
+        if sizeof(x) == 8
+            push!(s, x % UInt64)
+            n += 1
+        else
+            push!(s, x % UInt64, (x >> 64) % UInt64)
+            n += 2
+        end
+    end
+    @test length(s) == n
+end
+
+@testset "show" begin
+    @testset "MersenneTwister" begin
+        m = MersenneTwister(123)
+        @test string(m) == "MersenneTwister(123)"
+        Random.jump!(m, 2*big(10)^20)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 0))"
+        @test m == MersenneTwister(123, (200000000000000000000, 0))
+        rand(m)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 1002, 0, 1))"
+
+        @test m == MersenneTwister(123, (200000000000000000000, 1002, 0, 1))
+        rand(m, Int64)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))"
+        @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
+
+        m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
+        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
+        rand(m, Int64)
+        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
+        @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
+
+        m = MersenneTwister(0); rand(m, Int64); rand(m)
+        @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
+        @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+    end
+
+    @testset "RandomDevice" begin
+        @test string(RandomDevice()) == "$RandomDevice()"
+    end
+end
+
+@testset "rand[!] for BigInt/BigFloat" begin
+    rng = MersenneTwister()
+    s = Random.SamplerBigInt(MersenneTwister, 1:big(9))
+    x = rand(s)
+    @test x isa BigInt
+    y = rand!(rng, x, s)
+    @test y === x
+    @test x in 1:9
+
+    for t = BigInt[0, 10, big(2)^100]
+        s = Random.Sampler(rng, t:t) # s.nlimbs == 0
+        @test rand(rng, s) == t
+        @test x === rand!(rng, x, s) == t
+
+        s = Random.Sampler(rng, big(-1):t) # s.nlimbs != 0
+        @test rand(rng, s) ∈ -1:t
+        @test x === rand!(rng, x, s) ∈ -1:t
+
+    end
+
+    s = Random.Sampler(MersenneTwister, Random.CloseOpen01(BigFloat))
+    x = rand(s)
+    @test x isa BigFloat
+    y = rand!(rng, x, s)
+    @test y === x
+    @test 0 <= x < 1
+    s = Random.Sampler(MersenneTwister, Random.CloseOpen12(BigFloat))
+    y = rand!(rng, x, s)
+    @test y === x
+    @test 1 <= x < 2
+
+    old_prec = precision(BigFloat)
+    setprecision(100) do
+        x = rand(s) # should use precision of s
+        @test precision(x) == old_prec
+        x = BigFloat()
+        @test_throws ArgumentError rand!(rng, x, s) # incompatible precision
+    end
+    s = setprecision(100) do
+        Random.Sampler(MersenneTwister, Random.CloseOpen01(BigFloat))
+    end
+    x = rand(s) # should use precision of s
+    @test precision(x) == 100
+    x = BigFloat()
+    @test_throws ArgumentError rand!(rng, x, s) # incompatible precision
+end
+
+@testset "shuffle! for BitArray" begin
+    # Test that shuffle! is uniformly random on BitArrays
+    rng = MersenneTwister(123)
+    a = (reshape(1:(4*5), 4, 5) .<= 2) # 4x5 BitMatrix whose first two elements are true, rest are false
+    m = mean(1:50_000) do _
+        shuffle!(rng, a)
+    end # mean result of shuffle!-ing a 50_000 times. If the shuffle! is uniform, then each index has a
+    # 10% chance of having a true in it, so each value should converge to 0.1.
+    @test minimum(m) >= 0.094
+    @test maximum(m) <= 0.106
+end