Skip to content

Eliding SubArray construction (getting greedy) #10899

@timholy

Description

@timholy

With the tuple overhaul merged, I wonder if we're getting closer to being able to hope that SubArray construction might someday become a no-op in certain circumstances. Here's my "elide-friendly" test function:

function sumcols(A)
    s = 0.0
    for j = 1:size(A,2)
        Aj = slice(A, :, j)
        for i = 1:size(A,1)  # length(Aj)
            @inbounds s += A[i,j]  # Aj[i]
        end
    end
    s
end

Note that this is written in a way that emphasizes that escape-analysis is not needed to make this non-allocating, because Aj is not actually used.

I also took the liberty of locally sprinkling @_inline_meta()s (and equivalent) in all relevant places and rebuilding julia.

For a Matrix{Float64} A, I get this from @time:

julia> @time sumcols(A)
elapsed time: 0.000669812 seconds (859 kB allocated)
24920.401360340726

and this from @code_typed sumcols(A):

1-element Array{Any,1}:
 :($(Expr(:lambda, Any[:A], Any[Any[:s,symbol("#s2"),:j,:Aj,symbol("#s3"),:i,symbol("######f#8031#8033#8035"),symbol("######s#8032#8034#8036")],Any[Any[:A,Array{Float64,2},0],Any[:s,Float64,2],Any[symbol("#s2"),Int64,2],Any[:j,Int64,18],Any[:Aj,SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2},18],Any[symbol("#s3"),Int64,2],Any[:i,Int64,18],Any[symbol("######f#8031#8033#8035"),Int64,2],Any[symbol("######s#8032#8034#8036"),Int64,2]],Any[],Any[UnitRange{Int64},Tuple{Int64,Int64},UnitRange{Int64},Tuple{Int64,Int64},Int64,Colon,Int64,Int64,Tuple{Int64},Int64,Int64,Int64,Int64,Int64,Int64]], :(begin  # none, line 2:
        s = 0.0 # line 3:
        GenSym(4) = (top(arraysize))(A::Array{Float64,2},2)::Int64
        GenSym(0) = $(Expr(:new, UnitRange{Int64}, 1, :(((top(getfield))(Intrinsics,:select_value))((top(sle_int))(1,GenSym(4))::Bool,GenSym(4),(top(box))(Int64,(top(sub_int))(1,1)))::Int64)))
        #s2 = (top(getfield))(GenSym(0),:start)::Int64
        unless (top(box))(Bool,(top(not_int))(#s2::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1))::Bool)) goto 1
        2: 
        GenSym(11) = #s2::Int64
        GenSym(12) = (top(box))(Int64,(top(add_int))(#s2::Int64,1))
        j = GenSym(11)
        #s2 = GenSym(12) # line 4:
        GenSym(5) = (:)
        (top(arraysize))(A::Array{Float64,2},1)::Int64
        nothing
        checkbounds((top(arraysize))(A::Array{Float64,2},2)::Int64,j::Int64)::Bool
        ######f#8031#8033#8035 = 1
        ######s#8032#8034#8036 = 1
        ######f#8031#8033#8035 = (top(box))(Int64,(top(add_int))(######f#8031#8033#8035::Int64,(top(box))(Int64,(top(mul_int))((top(box))(Int64,(top(sub_int))(1,1)),######s#8032#8034#8036::Int64))))
        GenSym(6) = (top(arraysize))(A::Array{Float64,2},1)::Int64
        ######s#8032#8034#8036 = (top(box))(Int64,(top(mul_int))(######s#8032#8034#8036::Int64,GenSym(6)))
        ######f#8031#8033#8035 = (top(box))(Int64,(top(add_int))(######f#8031#8033#8035::Int64,(top(box))(Int64,(top(mul_int))((top(box))(Int64,(top(sub_int))(j::Int64,1)),######s#8032#8034#8036::Int64))))
        GenSym(7) = (top(arraysize))(A::Array{Float64,2},2)::Int64
        ######s#8032#8034#8036 = (top(box))(Int64,(top(mul_int))(######s#8032#8034#8036::Int64,GenSym(7)))
        GenSym(9) = (top(arraysize))(A::Array{Float64,2},1)::Int64
        Aj = $(Expr(:new, SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2}, :(A::Array{Float64,2}), :(tuple(GenSym(5),j::Int64)::Tuple{Colon,Int64}), :(tuple(GenSym(9))::Tuple{Int64}), :(######f#8031#8033#8035::Int64), :((top(box))(Int64,(top(mul_int))(1,1))))) # line 5:
        GenSym(10) = (top(arraysize))(A::Array{Float64,2},1)::Int64
        GenSym(2) = $(Expr(:new, UnitRange{Int64}, 1, :(((top(getfield))(Intrinsics,:select_value))((top(sle_int))(1,GenSym(10))::Bool,GenSym(10),(top(box))(Int64,(top(sub_int))(1,1)))::Int64)))
        #s3 = (top(getfield))(GenSym(2),:start)::Int64
        unless (top(box))(Bool,(top(not_int))(#s3::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(2),:stop)::Int64,1))::Bool)) goto 5
        6: 
        GenSym(13) = #s3::Int64
        GenSym(14) = (top(box))(Int64,(top(add_int))(#s3::Int64,1))
        i = GenSym(13)
        #s3 = GenSym(14) # line 6:
        s = (top(box))(Float64,(top(add_float))(s::Float64,(top(arrayref))((top(getfield))(Aj::SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2},:parent)::Array{Float64,2},(top(box))(Int64,(top(sub_int))((top(box))(Int64,(top(add_int))((top(getfield))(Aj::SubArray{Float64,1,Array{Float64,2},Tuple{Colon,Int64},2},:first_index)::Int64,i::Int64)),1)))::Float64))
        7: 
        unless (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s3::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(2),:stop)::Int64,1))::Bool)))) goto 6
        5: 
        4: 
        3: 
        unless (top(box))(Bool,(top(not_int))((top(box))(Bool,(top(not_int))(#s2::Int64 === (top(box))(Int64,(top(add_int))((top(getfield))(GenSym(0),:stop)::Int64,1))::Bool)))) goto 2
        1: 
        0:  # line 9:
        return s::Float64
    end::Float64))))

From what I can tell, this demonstrates that everything was indeed inlined (a search for call turns up empty).

The corresponding LLVM IR has several alloca/allocobj calls in it, which I presume is the source of the allocation.

Users-list discussion: https://groups.google.com/d/msg/julia-users/sq5gj-3TdQU/NB0bjmqWjZ0J

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions