From cacbb7f26ccd58c07720e24eb6dc5d8d76b3288c Mon Sep 17 00:00:00 2001 From: Andy Ferris Date: Wed, 5 Dec 2018 15:42:43 +1000 Subject: [PATCH 1/2] Make `unique(f, itr)` and `unique!(f, itr)` faster Avoid creation of a `Set{Any}`. --- base/set.jl | 73 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 57 insertions(+), 16 deletions(-) diff --git a/base/set.jl b/base/set.jl index 6afff54d7cf0a..cb942b519e453 100644 --- a/base/set.jl +++ b/base/set.jl @@ -167,15 +167,39 @@ julia> unique(x -> x^2, [1, -1, 3, -3, 4]) """ function unique(f, C) out = Vector{eltype(C)}() - seen = Set() - for x in C + + s = iterate(C) + if s === nothing + return out + end + (x, i) = s + y = f(x) + seen = Set{typeof(y)}() + push!(seen, y) + push!(out, x) + + return _unique!(f, out, C, seen, i) +end + +function _unique!(f, out::AbstractVector, C, seen::Set, i) + s = iterate(C, i) + while s !== nothing + (x, i) = s y = f(x) - if !in(y, seen) - push!(seen, y) + if y ∉ seen push!(out, x) + if y isa eltype(seen) + push!(seen, y) + else + seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen) + push!(seen2, y) + return _unique!(f, out, C, seen2, i) + end end + s = iterate(C, i) end - out + + return out end """ @@ -205,22 +229,39 @@ julia> unique!(iseven, [2, 3, 5, 7, 9]) ``` """ function unique!(f, A::AbstractVector) - seen = Set() - idxs = eachindex(A) - y = iterate(idxs) - count = 0 - for x in A - t = f(x) - if t ∉ seen - push!(seen,t) + if length(A) <= 1 + return A + end + + i = firstindex(A) + x = @inbounds A[i] + y = f(x) + seen = Set{typeof(y)}() + push!(seen, y) + return _unique!(f, A, seen, 1, i+1) +end + +function _unique!(f, A::AbstractVector, seen::Set, count::Integer, i::Integer) + while i <= lastindex(A) + x = @inbounds A[i] + y = f(x) + if y ∉ seen count += 1 - A[y[1]] = x - y = iterate(idxs, y[2]) + @inbounds A[count] = x + if y isa eltype(seen) + push!(seen, y) + else + seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen) + push!(seen2, y) + return _unique!(f, A, seen2, count, i+1) + end end + i += 1 end - resize!(A, count) + return resize!(A, count) end + # If A is not grouped, then we will need to keep track of all of the elements that we have # seen so far. _unique!(A::AbstractVector) = unique!(identity, A::AbstractVector) From db5973ac40e277b35a411e6269f2b2a1f25830ae Mon Sep 17 00:00:00 2001 From: Andy Ferris Date: Thu, 6 Dec 2018 23:09:24 +1000 Subject: [PATCH 2/2] Fix unique! for resizable OffsetVector --- base/set.jl | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/base/set.jl b/base/set.jl index cb942b519e453..ff637f2fa9806 100644 --- a/base/set.jl +++ b/base/set.jl @@ -238,27 +238,27 @@ function unique!(f, A::AbstractVector) y = f(x) seen = Set{typeof(y)}() push!(seen, y) - return _unique!(f, A, seen, 1, i+1) + return _unique!(f, A, seen, i, i+1) end -function _unique!(f, A::AbstractVector, seen::Set, count::Integer, i::Integer) +function _unique!(f, A::AbstractVector, seen::Set, current::Integer, i::Integer) while i <= lastindex(A) x = @inbounds A[i] y = f(x) if y ∉ seen - count += 1 - @inbounds A[count] = x + current += 1 + @inbounds A[current] = x if y isa eltype(seen) push!(seen, y) else seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen) push!(seen2, y) - return _unique!(f, A, seen2, count, i+1) + return _unique!(f, A, seen2, current, i+1) end end i += 1 end - return resize!(A, count) + return resize!(A, current - firstindex(A) + 1) end