@@ -864,18 +864,17 @@ usage invertibility is actually a benefit (as is explained below) and adding as
864864little additional memory overhead to each task object as possible is preferred.
865865
866866The goal of jl_rng_split is to perturb the state of each child task's RNG in
867- such a way each that for an entire tree of tasks spawned starting with a given
868- state in a root task, no two tasks have the same RNG state. Moreover, we want to
869- do this in a way that is deterministic and repeatable based on (1) the root
870- task's seed, (2) how many random numbers are generated, and (3) the task tree
871- structure. The RNG state of a parent task is allowed to affect the initial RNG
872- state of a child task, but the mere fact that a child was spawned should not
873- alter the RNG output of the parent. This second requirement rules out using the
874- main RNG to seed children: if we use the main RNG, we either advance it, which
875- affects the parent's RNG stream or, if we don't advance it, then every child
876- would have an identical RNG stream. Therefore some separate state must be
877- maintained and changed upon forking a child task while leaving the main RNG
878- state unchanged.
867+ such a way that for an entire tree of tasks spawned starting with a given root
868+ task state, no two tasks have the same RNG state. Moreover, we want to do this
869+ in a way that is deterministic and repeatable based on (1) the root task's seed,
870+ (2) how many random numbers are generated, and (3) the task tree structure. The
871+ RNG state of a parent task is allowed to affect the initial RNG state of a child
872+ task, but the mere fact that a child was spawned should not alter the RNG output
873+ of the parent. This second requirement rules out using the main RNG to seed
874+ children: if we use the main RNG, we either advance it, which affects the
875+ parent's RNG stream or, if we don't advance it, then every child would have an
876+ identical RNG stream. Therefore some separate state must be maintained and
877+ changed upon forking a child task while leaving the main RNG state unchanged.
879878
880879The basic approach is that used by the DotMix [2] and SplitMix [3] RNG systems:
881880each task is uniquely identified by a sequence of "pedigree" numbers, indicating
@@ -1030,14 +1029,14 @@ cannot have hash collisions. What about parent colliding with child? That can
10301029only happen if all four main RNG registers are perturbed by exactly zero. This
10311030seems unlikely, but could it occur? Consider the core of the output function:
10321031
1033- p ^= p >> ((p >> 59) + 5);
1034- p *= m[i];
1035- p ^= p >> 43
1032+ w ^= w >> ((w >> 59) + 5);
1033+ w *= m[i];
1034+ w ^= w >> 43;
10361035
10371036It's easy to check that this maps zero to zero. An unchanged parent RNG can only
1038- happen if all four `p ` values are zero at the end of this, which implies that
1037+ happen if all four `w ` values are zero at the end of this, which implies that
10391038they were all zero at the beginning. However, that is impossible since the four
1040- `p ` values differ from `x` by different additive constants, so they cannot all
1039+ `w ` values differ from `x` by different additive constants, so they cannot all
10411040be zero. Stated more generally, this non-collision property: assuming the main
10421041RNG isn't used between task forks, sibling and parent tasks cannot have RNG
10431042collisions. If the task tree structure is more deeply nested or if there are
@@ -1060,27 +1059,31 @@ void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSA
10601059 src [4 ] = dst [4 ] = x * 0xd1342543de82ef95 + 1 ;
10611060 // high spectrum multiplier from https://arxiv.org/abs/2001.05304
10621061
1062+ // random xor constants
10631063 static const uint64_t a [4 ] = {
1064- 0x214c146c88e47cb7 , // random additive offsets...
1064+ 0x214c146c88e47cb7 ,
10651065 0xa66d8cc21285aafa ,
10661066 0x68c7ef2d7b1a54d4 ,
10671067 0xb053a7d7aa238c61
10681068 };
1069+ // random odd multipliers
10691070 static const uint64_t m [4 ] = {
10701071 0xaef17502108ef2d9 , // standard PCG multiplier
1071- 0xf34026eeb86766af , // random odd multipliers...
1072- 0x38fd70ad58dd9fbb ,
1073- 0x6677f9b93ab0c04d
1072+ 0x5329a060d41b0fe3 ,
1073+ 0x1028b28b062ae5b9 ,
1074+ 0x6095c81c297fdbc5
10741075 };
10751076
10761077 // PCG-RXS-M-XS-64 output with four variants
10771078 for (int i = 0 ; i < 4 ; i ++ ) {
1078- uint64_t s = bswap_64 (src [i ]);
1079- uint64_t w = x + a [i ];
1080- w ^= w >> ((w >> 59 ) + 5 );
1081- w *= m [i ];
1082- w ^= w >> 43 ;
1083- dst [i ] = 2 * s * w + s + w ; // (2s+1)(2w+1) ÷ 2
1079+ uint64_t s = src [i ];
1080+ uint64_t w = x ^ a [i ];
1081+ s += w * (2 * s + 1 ); // s = (2s+1)(2w+1)÷2 % 2^64
1082+ s ^= s >> ((s >> 59 ) + 5 );
1083+ s *= m [i ];
1084+ s ^= s >> 43 ;
1085+ // mix key into the state
1086+ dst [i ] = s ;
10841087 }
10851088}
10861089
0 commit comments