Skip to content

Commit c2fb1dc

Browse files
authored
Make unique(f, itr) and unique!(f, itr) faster (#30286)
* Make `unique(f, itr)` and `unique!(f, itr)` faster Avoid creation of a `Set{Any}`. * Fix unique! for resizable OffsetVector
1 parent d7c3926 commit c2fb1dc

File tree

1 file changed

+58
-17
lines changed

1 file changed

+58
-17
lines changed

base/set.jl

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -167,15 +167,39 @@ julia> unique(x -> x^2, [1, -1, 3, -3, 4])
167167
"""
168168
function unique(f, C)
169169
out = Vector{eltype(C)}()
170-
seen = Set()
171-
for x in C
170+
171+
s = iterate(C)
172+
if s === nothing
173+
return out
174+
end
175+
(x, i) = s
176+
y = f(x)
177+
seen = Set{typeof(y)}()
178+
push!(seen, y)
179+
push!(out, x)
180+
181+
return _unique!(f, out, C, seen, i)
182+
end
183+
184+
function _unique!(f, out::AbstractVector, C, seen::Set, i)
185+
s = iterate(C, i)
186+
while s !== nothing
187+
(x, i) = s
172188
y = f(x)
173-
if !in(y, seen)
174-
push!(seen, y)
189+
if y seen
175190
push!(out, x)
191+
if y isa eltype(seen)
192+
push!(seen, y)
193+
else
194+
seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen)
195+
push!(seen2, y)
196+
return _unique!(f, out, C, seen2, i)
197+
end
176198
end
199+
s = iterate(C, i)
177200
end
178-
out
201+
202+
return out
179203
end
180204

181205
"""
@@ -208,22 +232,39 @@ julia> unique!(iseven, [2, 3, 5, 7, 9])
208232
```
209233
"""
210234
function unique!(f, A::AbstractVector)
211-
seen = Set()
212-
idxs = eachindex(A)
213-
y = iterate(idxs)
214-
count = 0
215-
for x in A
216-
t = f(x)
217-
if t seen
218-
push!(seen,t)
219-
count += 1
220-
A[y[1]] = x
221-
y = iterate(idxs, y[2])
235+
if length(A) <= 1
236+
return A
237+
end
238+
239+
i = firstindex(A)
240+
x = @inbounds A[i]
241+
y = f(x)
242+
seen = Set{typeof(y)}()
243+
push!(seen, y)
244+
return _unique!(f, A, seen, i, i+1)
245+
end
246+
247+
function _unique!(f, A::AbstractVector, seen::Set, current::Integer, i::Integer)
248+
while i <= lastindex(A)
249+
x = @inbounds A[i]
250+
y = f(x)
251+
if y seen
252+
current += 1
253+
@inbounds A[current] = x
254+
if y isa eltype(seen)
255+
push!(seen, y)
256+
else
257+
seen2 = convert(Set{promote_typejoin(eltype(seen), typeof(y))}, seen)
258+
push!(seen2, y)
259+
return _unique!(f, A, seen2, current, i+1)
260+
end
222261
end
262+
i += 1
223263
end
224-
resize!(A, count)
264+
return resize!(A, current - firstindex(A) + 1)
225265
end
226266

267+
227268
# If A is not grouped, then we will need to keep track of all of the elements that we have
228269
# seen so far.
229270
_unique!(A::AbstractVector) = unique!(identity, A::AbstractVector)

0 commit comments

Comments
 (0)