-
-
Notifications
You must be signed in to change notification settings - Fork 5.7k
Closed
Labels
compiler:codegenGeneration of LLVM IR and native codeGeneration of LLVM IR and native codeperformanceMust go fasterMust go faster
Description
I have an immutable type which is wrapping a NTuple and noticed that indexing into the wrapped tuple is much slower than indexing into the naked tuple.
Here is some code that that defines an indexing into a tuple and a wrapped tuple:
function get_idx_tuple(n::NTuple, i::Int)
@inbounds v = n[i]
return v
end
immutable TupleWrap{N, T}
data::NTuple{N, T}
end
function get_idx_tuplewrap(n::TupleWrap, i::Int)
@inbounds v = n.data[i]
return v
end
tuple = (rand(15)...)
tuple_wrap = TupleWrap((rand(15)...))Indexing into the tuple:
julia> @code_native get_idx_tuple(tuple, 5)
.text
Filename: none
Source line: 0
pushq %rbp
movq %rsp, %rbp
Source line: 2
vmovsd -8(%rdi,%rsi,8), %xmm0 # xmm0 = mem[0],zero
Source line: 3
popq %rbp
retqIndexing into the wrapper
julia> @code_native get_idx_tuplewrap(tuple_wrap, 5)
.text
Filename: none
Source line: 0
pushq %rbp
movq %rsp, %rbp
Source line: 2
vmovsd 112(%rdi), %xmm8 # xmm8 = mem[0],zero
vmovsd 104(%rdi), %xmm9 # xmm9 = mem[0],zero
vmovsd 96(%rdi), %xmm10 # xmm10 = mem[0],zero
vmovsd 88(%rdi), %xmm11 # xmm11 = mem[0],zero
vmovsd 80(%rdi), %xmm12 # xmm12 = mem[0],zero
vmovsd 72(%rdi), %xmm13 # xmm13 = mem[0],zero
vmovsd 64(%rdi), %xmm14 # xmm14 = mem[0],zero
vmovsd 56(%rdi), %xmm7 # xmm7 = mem[0],zero
vmovsd 48(%rdi), %xmm0 # xmm0 = mem[0],zero
vmovsd 40(%rdi), %xmm1 # xmm1 = mem[0],zero
vmovsd 32(%rdi), %xmm2 # xmm2 = mem[0],zero
vmovsd 24(%rdi), %xmm3 # xmm3 = mem[0],zero
vmovsd 16(%rdi), %xmm4 # xmm4 = mem[0],zero
vmovsd (%rdi), %xmm5 # xmm5 = mem[0],zero
vmovsd 8(%rdi), %xmm6 # xmm6 = mem[0],zero
vmovsd %xmm5, -120(%rbp)
vmovsd %xmm6, -112(%rbp)
vmovsd %xmm4, -104(%rbp)
vmovsd %xmm3, -96(%rbp)
vmovsd %xmm2, -88(%rbp)
vmovsd %xmm1, -80(%rbp)
vmovsd %xmm0, -72(%rbp)
vmovsd %xmm7, -64(%rbp)
vmovsd %xmm14, -56(%rbp)
vmovsd %xmm13, -48(%rbp)
vmovsd %xmm12, -40(%rbp)
vmovsd %xmm11, -32(%rbp)
vmovsd %xmm10, -24(%rbp)
vmovsd %xmm9, -16(%rbp)
vmovsd %xmm8, -8(%rbp)
vmovsd -128(%rbp,%rsi,8), %xmm0 # xmm0 = mem[0],zero
Source line: 3
popq %rbp
retqI am not good at assembler but it looks like the wrapped indexing is accessing all the elements of the tuple instead of just directly returning from the offset into the data. For longer tuples this starts to generate extremely long code when (in my layman guess) the registers run out.
Metadata
Metadata
Assignees
Labels
compiler:codegenGeneration of LLVM IR and native codeGeneration of LLVM IR and native codeperformanceMust go fasterMust go faster