Skip to content

Commit 9e7f85d

Browse files
authored
Merge cab4ebd into 0978251
2 parents 0978251 + cab4ebd commit 9e7f85d

File tree

2 files changed

+145
-1
lines changed

2 files changed

+145
-1
lines changed

base/missing.jl

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ IteratorSize(::Type{<:SkipMissing}) = SizeUnknown()
177177
IteratorEltype(::Type{SkipMissing{T}}) where {T} = IteratorEltype(T)
178178
eltype(::Type{SkipMissing{T}}) where {T} = nonmissingtype(eltype(T))
179179

180-
function Base.iterate(itr::SkipMissing, state...)
180+
function iterate(itr::SkipMissing, state...)
181181
y = iterate(itr.x, state...)
182182
y === nothing && return nothing
183183
item, state = y
@@ -189,6 +189,99 @@ function Base.iterate(itr::SkipMissing, state...)
189189
item, state
190190
end
191191

192+
# Optimized mapreduce implementation
193+
mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) = _mapreduce(f, op, IndexStyle(itr.x), itr)
194+
195+
"Sentinel indicating that no non-missing value was encountered."
196+
struct AllMissing end
197+
198+
function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
199+
A = itr.x
200+
local ai
201+
inds = LinearIndices(A)
202+
i = first(inds)
203+
ilast = last(inds)
204+
while i <= ilast
205+
@inbounds ai = A[i]
206+
ai === missing || break
207+
i += 1
208+
end
209+
i > ilast && return mapreduce_empty(f, op, eltype(itr))
210+
a1 = ai
211+
i += 1
212+
while i <= ilast
213+
@inbounds ai = A[i]
214+
ai === missing || break
215+
i += 1
216+
end
217+
i > ilast && return mapreduce_first(f, op, a1)
218+
# We know A contains at least two non-missing entries, therefore AllMissing() is not
219+
# a possible return value: the check provides that information to inference
220+
s = mapreduce_impl(f, op, itr, first(inds), last(inds))
221+
s isa AllMissing && error("got AllMissing for an array with non-missing values")
222+
return s
223+
end
224+
225+
_mapreduce(f, op, ::IndexCartesian, itr::SkipMissing) = mapfoldl(f, op, itr)
226+
227+
mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
228+
mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
229+
230+
@noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
231+
ifirst::Integer, ilast::Integer, blksize::Int)
232+
A = itr.x
233+
if ifirst == ilast
234+
@inbounds a1 = A[ifirst]
235+
if a1 === missing
236+
return AllMissing()
237+
else
238+
return mapreduce_first(f, op, a1)
239+
end
240+
elseif ifirst + blksize > ilast
241+
# sequential portion
242+
local ai
243+
i = ifirst
244+
while i <= ilast
245+
@inbounds ai = A[i]
246+
ai === missing || break
247+
i += 1
248+
end
249+
i > ilast && return AllMissing()
250+
a1 = ai::eltype(itr)
251+
i += 1
252+
while i <= ilast
253+
@inbounds ai = A[i]
254+
ai === missing || break
255+
i += 1
256+
end
257+
i > ilast && return mapreduce_first(f, op, a1)
258+
a2 = ai::eltype(itr)
259+
# Unexpectedly, the following assertion allows SIMD instructions to be emitted
260+
A[i]::eltype(itr)
261+
i += 1
262+
v = op(f(a1), f(a2))
263+
@simd for i = i:ilast
264+
@inbounds ai = A[i]
265+
if ai !== missing
266+
v = op(v, f(ai))
267+
end
268+
end
269+
return v
270+
else
271+
# pairwise portion
272+
imid = (ifirst + ilast) >> 1
273+
v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
274+
v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
275+
if v1 isa AllMissing
276+
return v2
277+
elseif v2 isa AllMissing
278+
return v1
279+
else
280+
return op(v1, v2)
281+
end
282+
end
283+
end
284+
192285
"""
193286
coalesce(x, y...)
194287

test/missing.jl

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,57 @@ end
361361
@test eltype(x) === Any
362362
@test collect(x) == [1, 2, 4]
363363
@test collect(x) isa Vector{Int}
364+
365+
@testset "mapreduce" begin
366+
# Vary size to test splitting blocks with several configurations of missing values
367+
for T in (Int, Float64),
368+
A in (rand(T, 10), rand(T, 1000), rand(T, 10000))
369+
if T === Int
370+
@test sum(A) === sum(skipmissing(A)) ===
371+
reduce(+, skipmissing(A)) === mapreduce(identity, +, skipmissing(A))
372+
else
373+
@test sum(A) sum(skipmissing(A)) ===
374+
reduce(+, skipmissing(A)) === mapreduce(identity, +, skipmissing(A))
375+
end
376+
@test mapreduce(cos, *, A) mapreduce(cos, *, skipmissing(A))
377+
378+
B = Vector{Union{T,Missing}}(A)
379+
replace!(x -> rand(Bool) ? x : missing, B)
380+
if T === Int
381+
@test sum(collect(skipmissing(B))) === sum(skipmissing(B)) ===
382+
reduce(+, skipmissing(B)) === mapreduce(identity, +, skipmissing(B))
383+
else
384+
@test sum(collect(skipmissing(B))) sum(skipmissing(B)) ===
385+
reduce(+, skipmissing(B)) === mapreduce(identity, +, skipmissing(B))
386+
end
387+
@test mapreduce(cos, *, collect(skipmissing(A))) mapreduce(cos, *, skipmissing(A))
388+
389+
# Test block full of missing values
390+
B[1:length(B)÷2] .= missing
391+
if T === Int
392+
@test sum(collect(skipmissing(B))) == sum(skipmissing(B)) ==
393+
reduce(+, skipmissing(B)) == mapreduce(identity, +, skipmissing(B))
394+
else
395+
@test sum(collect(skipmissing(B))) sum(skipmissing(B)) ==
396+
reduce(+, skipmissing(B)) == mapreduce(identity, +, skipmissing(B))
397+
end
398+
399+
@test mapreduce(cos, *, collect(skipmissing(A))) mapreduce(cos, *, skipmissing(A))
400+
end
401+
402+
# Patterns that exercize code paths for inputs with 1 or 2 non-missing values
403+
@test sum(skipmissing([1, missing, missing, missing])) === 1
404+
@test sum(skipmissing([missing, missing, missing, 1])) === 1
405+
@test sum(skipmissing([1, missing, missing, missing, 2])) === 3
406+
@test sum(skipmissing([missing, missing, missing, 1, 2])) === 3
407+
408+
for n in 0:3
409+
itr = skipmissing(Vector{Union{Int,Missing}}(fill(missing, n)))
410+
@test sum(itr) == reduce(+, itr) == mapreduce(identity, +, itr) === 0
411+
@test_throws ArgumentError reduce(x -> x/2, itr)
412+
@test_throws ArgumentError mapreduce(x -> x/2, +, itr)
413+
end
414+
end
364415
end
365416

366417
@testset "coalesce" begin

0 commit comments

Comments
 (0)