|
| 1 | +# Based on https://benchmarksgame-team.pages.debian.net/benchmarksgame/program/nbody-rust-7.html |
| 2 | + |
| 3 | +module NBody |
| 4 | + |
| 5 | +using StaticArrays, SIMD, Printf |
| 6 | +using Base: llvmcall |
| 7 | + |
| 8 | +const solar_mass = 4π^2 |
| 9 | +const days_per_year = 365.24 |
| 10 | +const NBODIES = 5 |
| 11 | +const NPAIRS = Int(NBODIES * (NBODIES - 1) / 2) |
| 12 | +const PAIRS = Tuple((i,j) for i = 1:5, j = 1:5 if j > i) |
| 13 | + |
| 14 | +struct Bodies |
| 15 | + x::MMatrix{NBODIES, 3, Float64} |
| 16 | + v::MMatrix{NBODIES, 3, Float64} |
| 17 | + m::NTuple{NBODIES, Float64} |
| 18 | +end |
| 19 | + |
| 20 | +macro const_unroll(for_loop) |
| 21 | + cond = for_loop.args[1] |
| 22 | + body = for_loop.args[2] |
| 23 | + |
| 24 | + conds = (cond.head == :block) ? cond.args : Any[cond] |
| 25 | + bind_syms = [cond.args[1] for cond = conds] |
| 26 | + const_bounds = collect(Iterators.product((eval(cond.args[2]) for cond = conds)...)) |
| 27 | + bind_exprs = [] |
| 28 | + for bind_vals = const_bounds |
| 29 | + binding_list = Any[] |
| 30 | + for (sym, val) = collect(Iterators.zip(bind_syms, bind_vals)) |
| 31 | + push!(binding_list, Expr(:(=), esc(sym), esc(val))) |
| 32 | + end |
| 33 | + push!(bind_exprs, Expr(:let, Expr(:block, binding_list...), esc(body))) |
| 34 | + end |
| 35 | + |
| 36 | + return Expr(:block, bind_exprs...) |
| 37 | +end |
| 38 | + |
| 39 | +function init_bodies!(bodies) |
| 40 | + x, v = bodies.x, bodies.v |
| 41 | + # Sun |
| 42 | + x[1, :] = [0, 0, 0] |
| 43 | + v[1, :] = [0, 0, 0] |
| 44 | + |
| 45 | + # Jupiter |
| 46 | + x[2, :] = [ |
| 47 | + 4.84143144246472090e+00, |
| 48 | + -1.16032004402742839e+00, |
| 49 | + -1.03622044471123109e-01, |
| 50 | + ] |
| 51 | + v[2, :] = [ |
| 52 | + 1.66007664274403694e-03, |
| 53 | + 7.69901118419740425e-03, |
| 54 | + -6.90460016972063023e-05, |
| 55 | + ] .* days_per_year |
| 56 | + |
| 57 | + # Saturn |
| 58 | + x[3, :] = [ |
| 59 | + 8.34336671824457987e+00, |
| 60 | + 4.12479856412430479e+00, |
| 61 | + -4.03523417114321381e-01, |
| 62 | + ] |
| 63 | + v[3, :] = [ |
| 64 | + -2.76742510726862411e-03, |
| 65 | + 4.99852801234917238e-03, |
| 66 | + 2.30417297573763929e-05, |
| 67 | + ] .* days_per_year |
| 68 | + |
| 69 | + # Uranus |
| 70 | + x[4, :] = [ |
| 71 | + 1.28943695621391310e+01, |
| 72 | + -1.51111514016986312e+01, |
| 73 | + -2.23307578892655734e-01, |
| 74 | + ] |
| 75 | + v[4, :] = [ |
| 76 | + 2.96460137564761618e-03, |
| 77 | + 2.37847173959480950e-03, |
| 78 | + -2.96589568540237556e-05, |
| 79 | + ] .* days_per_year |
| 80 | + |
| 81 | + # Neptune |
| 82 | + x[5, :] = [ |
| 83 | + 1.53796971148509165e+01, |
| 84 | + -2.59193146099879641e+01, |
| 85 | + 1.79258772950371181e-01, |
| 86 | + ] |
| 87 | + v[5, :] = [ |
| 88 | + 2.68067772490389322e-03, |
| 89 | + 1.62824170038242295e-03, |
| 90 | + -9.51592254519715870e-05, |
| 91 | + ] * days_per_year |
| 92 | +end |
| 93 | + |
| 94 | +const __m128 = NTuple{4, VecElement{Float32}} |
| 95 | +const __m128d = NTuple{2, VecElement{Float64}} |
| 96 | +const v2d = Vec{2, Float64} |
| 97 | + |
| 98 | +@inline function rsqrt_pd(v2::v2d) |
| 99 | + v2d(rsqrt_ccall(v2.elts)) |
| 100 | +end |
| 101 | + |
| 102 | +@inline function rsqrt_pd_newton(v2::v2d) |
| 103 | + guess = rsqrt_pd(v2) |
| 104 | + # We only need one Newton step to achieve desired accuracy |
| 105 | + guess = guess * 1.5 - ((0.5 * v2) * guess) * (guess * guess) |
| 106 | + guess |
| 107 | +end |
| 108 | + |
| 109 | +rsqrt(f::__m128) = ccall("llvm.x86.sse.rsqrt.ps", llvmcall, __m128, (__m128, ), f); |
| 110 | +_mm_cvtpd_ps(f::__m128d) = ccall("llvm.x86.sse2.cvtpd2ps", llvmcall, __m128, (__m128d, ), f); |
| 111 | +_mm_cvtps_pd(f::__m128) = llvmcall(("", " |
| 112 | + %2 = shufflevector <4 x float> %0, <4 x float> undef, <2 x i32> <i32 0, i32 1> |
| 113 | + %3 = fpext <2 x float> %2 to <2 x double> |
| 114 | + ret <2 x double> %3"), |
| 115 | + __m128d, |
| 116 | + Tuple{__m128}, f) |
| 117 | +@inline rsqrt_ccall(f::__m128d) = _mm_cvtps_pd(rsqrt(_mm_cvtpd_ps(f))) |
| 118 | + |
| 119 | +@inline function advance(#x, v, m, dt, dx, dmag) |
| 120 | + x::MMatrix{NBODIES, 3, Float64, NBODIES * 3}, |
| 121 | + v::MMatrix{NBODIES, 3, Float64, NBODIES * 3}, |
| 122 | + m::NTuple{NBODIES, Float64}, |
| 123 | + dt::Float64, |
| 124 | + dx::MMatrix{NPAIRS, 3, Float64, NPAIRS * 3}, |
| 125 | + dmag::MVector{NPAIRS, Float64}) |
| 126 | + |
| 127 | + dmag_v2d_ptr = Base.unsafe_convert(Ptr{v2d}, pointer_from_objref(dmag)) |
| 128 | + dx_v2d_ptr = Base.unsafe_convert(Ptr{v2d}, pointer_from_objref(dx)) |
| 129 | + |
| 130 | + # Unroll loop to calculate distances + store two at a time |
| 131 | + @inbounds for k1 = 1:2:length(PAIRS) |
| 132 | + k2 = k1 + 1 |
| 133 | + k_v2d = k2 ÷ 2 |
| 134 | + |
| 135 | + i1, j1 = PAIRS[k1] |
| 136 | + i2, j2 = PAIRS[k2] |
| 137 | + |
| 138 | + dx1 = v2d((x[i1, 1], x[i2, 1])) - v2d((x[j1, 1], x[j2, 1])) |
| 139 | + dx2 = v2d((x[i1, 2], x[i2, 2])) - v2d((x[j1, 2], x[j2, 2])) |
| 140 | + dx3 = v2d((x[i1, 3], x[i2, 3])) - v2d((x[j1, 3], x[j2, 3])) |
| 141 | + unsafe_store!(dx_v2d_ptr, dx1, k_v2d) |
| 142 | + unsafe_store!(dx_v2d_ptr, dx2, k_v2d + NPAIRS ÷ 2) |
| 143 | + unsafe_store!(dx_v2d_ptr, dx3, k_v2d + NPAIRS) |
| 144 | + |
| 145 | + dsq = dx1^2 + dx2^2 + dx3^2 |
| 146 | + drsqrt = rsqrt_pd_newton(dsq) |
| 147 | + mag = dt * drsqrt / dsq |
| 148 | + unsafe_store!(dmag_v2d_ptr, mag, k_v2d) |
| 149 | + end |
| 150 | + |
| 151 | + k = 1 |
| 152 | + @inbounds for k = 1:length(PAIRS) |
| 153 | + i, j = PAIRS[k] |
| 154 | + |
| 155 | + dmag_i = dmag[k] * m[i] |
| 156 | + dmag_j = dmag[k] * m[j] |
| 157 | + for d = 1:3 |
| 158 | + dx_k = dx[k, d] |
| 159 | + v[i, d] -= dx_k * dmag_j |
| 160 | + v[j, d] += dx_k * dmag_i |
| 161 | + end |
| 162 | + end |
| 163 | + |
| 164 | + @inbounds begin |
| 165 | + @const_unroll for i = 1:NBODIES |
| 166 | + @const_unroll for d = 1:3 |
| 167 | + x[i, d] += dt * v[i, d] |
| 168 | + end |
| 169 | + end |
| 170 | + end |
| 171 | +end |
| 172 | + |
| 173 | +function energy(bodies) |
| 174 | + x, v, m = bodies.x, bodies.v, bodies.m |
| 175 | + e = 0.0 |
| 176 | + for i = 1:NBODIES |
| 177 | + e += 0.5 * m[i] * sum(v[i, :].^2) |
| 178 | + for j = i + 1:NBODIES |
| 179 | + dx = x[i, :] - x[j, :] |
| 180 | + distance = sqrt(sum(dx .* dx)) |
| 181 | + e -= (m[i] * m[j]) / distance |
| 182 | + end |
| 183 | + end |
| 184 | + return e |
| 185 | +end |
| 186 | + |
| 187 | +function init_sun!(bodies) |
| 188 | + px = [0.0, 0.0, 0.0] |
| 189 | + for i = 1:NBODIES |
| 190 | + px += bodies.v[i, :] * bodies.m[i] |
| 191 | + end |
| 192 | + bodies.v[1, :] = -px ./ solar_mass |
| 193 | +end |
| 194 | + |
| 195 | +function main(iterations::Int64) |
| 196 | + n = iterations |
| 197 | + |
| 198 | + x = zeros(MMatrix{NBODIES, 3, Float64, 15}) |
| 199 | + v = zeros(MMatrix{NBODIES, 3, Float64, 15}) |
| 200 | + m = NTuple{NBODIES, Float64}(( |
| 201 | + 1.0, |
| 202 | + 9.54791938424326609e-04, |
| 203 | + 2.85885980666130812e-04, |
| 204 | + 4.36624404335156298e-05, |
| 205 | + 5.15138902046611451e-05, |
| 206 | + ) .* solar_mass) |
| 207 | + bodies = Bodies(x, v, m) |
| 208 | + |
| 209 | + init_bodies!(bodies) |
| 210 | + init_sun!(bodies) |
| 211 | + @printf("%.9f\n", energy(bodies)) |
| 212 | + |
| 213 | + # Buffers |
| 214 | + dx = zeros(MMatrix{NPAIRS, 3, Float64, 30}) |
| 215 | + dmag = zeros(MVector{NPAIRS, Float64}) |
| 216 | + for _ = 1:n |
| 217 | + advance(x, v, m, 0.01, dx, dmag) |
| 218 | + end |
| 219 | + |
| 220 | + @printf("%.9f\n", energy(bodies)) |
| 221 | +end |
| 222 | + |
| 223 | +end |
| 224 | + |
| 225 | +@time NBody.main(parse(Int64, ARGS[1])) |
| 226 | +@time NBody.main(parse(Int64, ARGS[1])) |
| 227 | +# using StaticArrays, InteractiveUtils |
| 228 | +# code_native(nb.advance, |
| 229 | +# (MMatrix{nb.NBODIES, 3, Float64, nb.NBODIES * 3}, |
| 230 | +# MMatrix{nb.NBODIES, 3, Float64, nb.NBODIES * 3}, |
| 231 | +# NTuple{nb.NBODIES, Float64}, |
| 232 | +# Float64, |
| 233 | +# MMatrix{nb.NPAIRS, 3, Float64, nb.NPAIRS * 3}, |
| 234 | +# MVector{nb.NPAIRS, Float64})) |
0 commit comments