-
-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Description
With the tuple overhaul merged, I wonder if we're getting closer to being able to hope that SubArray construction might someday become a no-op in certain circumstances. Here's my "elide-friendly" test function:
function sumcols(A)
s = 0.0
for j = 1:size(A,2)
Aj = slice(A, :, j)
for i = 1:size(A,1) # length(Aj)
@inbounds s += A[i,j] # Aj[i]
end
end
s
endNote that this is written in a way that emphasizes that escape-analysis is not needed to make this non-allocating, because Aj is not actually used.
I also took the liberty of locally sprinkling @_inline_meta()s (and equivalent) in all relevant places and rebuilding julia.
For a Matrix{Float64} A, I get this from @time:
julia> @time sumcols(A)
elapsed time: 0.000669812 seconds (859 kB allocated)
24920.401360340726and this from @code_typed sumcols(A):
1-element Array{Any,1}:
:($(Expr(:lambda, Any[:A], Any[Any[:s,symbol("#s2"),:j,:Aj,symbol("#s3"),:i,symbol("######f#8031#8033#8035"),symbol("######s#8032#8034#8036")],Any[Any[:A,Array{Float64,2},0],Any[:s,Float64,2],Any[symbol("#s2"),Int64,2],Any[:j,Int64,18],Any[:Aj,SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2},18],Any[symbol("#s3"),Int64,2],Any[:i,Int64,18],Any[symbol("######f#8031#8033#8035"),Int64,2],Any[symbol("######s#8032#8034#8036"),Int64,2]],Any[],Any[UnitRange{Int64},Tuple{Int64,Int64},UnitRange{Int64},Tuple{Int64,Int64},Int64,Colon,Int64,Int64,Tuple{Int64},Int64,Int64,Int64,Int64,Int64,Int64]], :(begin # none, line 2:
s = 0.0 # line 3:
GenSym(4) = (top(arraysize))(A::Array{Float64,2},2)::Int64
GenSym(0) = $(Expr(:new, UnitRange{Int64}, 1, :(((top(getfield))(Intrinsics,:select_value))((top(sle_int))(1,GenSym(4))::Bool,GenSym(4),(top(box))(Int64,(top(sub_int))(1,1)))::Int64)))
#s2 = (top(getfield))(GenSym(0),:start)::Int64
unless (top(box))(Bool,(top(not_int))(#s2::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1))::Bool)) goto 1
2:
GenSym(11) = #s2::Int64
GenSym(12) = (top(box))(Int64,(top(add_int))(#s2::Int64,1))
j = GenSym(11)
#s2 = GenSym(12) # line 4:
GenSym(5) = (:)
(top(arraysize))(A::Array{Float64,2},1)::Int64
nothing
checkbounds((top(arraysize))(A::Array{Float64,2},2)::Int64,j::Int64)::Bool
######f#8031#8033#8035 = 1
######s#8032#8034#8036 = 1
######f#8031#8033#8035 = (top(box))(Int64,(top(add_int))(######f#8031#8033#8035::Int64,(top(box))(Int64,(top(mul_int))((top(box))(Int64,(top(sub_int))(1,1)),######s#8032#8034#8036::Int64))))
GenSym(6) = (top(arraysize))(A::Array{Float64,2},1)::Int64
######s#8032#8034#8036 = (top(box))(Int64,(top(mul_int))(######s#8032#8034#8036::Int64,GenSym(6)))
######f#8031#8033#8035 = (top(box))(Int64,(top(add_int))(######f#8031#8033#8035::Int64,(top(box))(Int64,(top(mul_int))((top(box))(Int64,(top(sub_int))(j::Int64,1)),######s#8032#8034#8036::Int64))))
GenSym(7) = (top(arraysize))(A::Array{Float64,2},2)::Int64
######s#8032#8034#8036 = (top(box))(Int64,(top(mul_int))(######s#8032#8034#8036::Int64,GenSym(7)))
GenSym(9) = (top(arraysize))(A::Array{Float64,2},1)::Int64
Aj = $(Expr(:new, SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2}, :(A::Array{Float64,2}), :(tuple(GenSym(5),j::Int64)::Tuple{Colon,Int64}), :(tuple(GenSym(9))::Tuple{Int64}), :(######f#8031#8033#8035::Int64), :((top(box))(Int64,(top(mul_int))(1,1))))) # line 5:
GenSym(10) = (top(arraysize))(A::Array{Float64,2},1)::Int64
GenSym(2) = $(Expr(:new, UnitRange{Int64}, 1, :(((top(getfield))(Intrinsics,:select_value))((top(sle_int))(1,GenSym(10))::Bool,GenSym(10),(top(box))(Int64,(top(sub_int))(1,1)))::Int64)))
#s3 = (top(getfield))(GenSym(2),:start)::Int64
unless (top(box))(Bool,(top(not_int))(#s3::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(2),:stop)::Int64,1))::Bool)) goto 5
6:
GenSym(13) = #s3::Int64
GenSym(14) = (top(box))(Int64,(top(add_int))(#s3::Int64,1))
i = GenSym(13)
#s3 = GenSym(14) # line 6:
s = (top(box))(Float64,(top(add_float))(s::Float64,(top(arrayref))((top(getfield))(Aj::SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2},:parent)::Array{Float64,2},(top(box))(Int64,(top(sub_int))((top(box))(Int64,(top(add_int))((top(getfield))(Aj::SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2},:first_index)::Int64,i::Int64)),1)))::Float64))
7:
unless (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s3::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(2),:stop)::Int64,1))::Bool)))) goto 6
5:
4:
3:
unless (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s2::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1))::Bool)))) goto 2
1:
0: # line 9:
return s::Float64
end::Float64))))
From what I can tell, this demonstrates that everything was indeed inlined (a search for call turns up empty).
The corresponding LLVM IR has several alloca/allocobj calls in it, which I presume is the source of the allocation.
Users-list discussion: https://groups.google.com/d/msg/julia-users/sq5gj-3TdQU/NB0bjmqWjZ0J