Skip to content

Commit 204b32a

Browse files
committed
ballet: Add Reed-Solomon encoder
1 parent 7bd8117 commit 204b32a

18 files changed

+6973
-0
lines changed

src/ballet/reedsol/Local.mk

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
$(call add-hdrs,fd_reedsol.h)
2+
ifdef FD_HAS_GFNI
3+
$(call add-asms,fd_reedsol_gfni_32,fd_ballet)
4+
endif
5+
$(call add-objs,fd_reedsol,fd_ballet)
6+
$(call add-objs,fd_reedsol_internal,fd_ballet)
7+
$(call make-unit-test,test_reedsol,test_reedsol,fd_ballet fd_util)
16 KB
Binary file not shown.
2.5 KB
Binary file not shown.
8 KB
Binary file not shown.

src/ballet/reedsol/fd_reedsol.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include "fd_reedsol.h"
2+
#include "fd_reedsol_internal.h"
3+
4+
void fd_reedsol_encode_fini( fd_reedsol_t * rs ) {
5+
#if FD_HAS_GFNI
6+
if( FD_LIKELY( (rs->data_shred_cnt==32UL) & (rs->parity_shred_cnt==32UL ) ) )
7+
fd_reedsol_encode_32_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->parity_shred, rs->scratch );
8+
else
9+
fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt );
10+
#else
11+
fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt );
12+
#endif
13+
14+
rs->data_shred_cnt = 0UL;
15+
rs->parity_shred_cnt = 0UL;
16+
}

src/ballet/reedsol/fd_reedsol.h

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_h
2+
#define HEADER_fd_src_ballet_reedsol_fd_reedsol_h
3+
4+
/* fd_reedsol provides APIs for producing Reed-Solomon encoded parity
5+
data and for reconstructing missing data from parity data. The
6+
encoding process is optimized, and highly optimized for Turbine's
7+
typical case.
8+
9+
Reed-Solomon works in GF(2^8), i.e. the codeword size is 1 byte, but
10+
is typically used on each byte of larger pieces of data called
11+
shreds (a Solana-specific term, often called shards elswhere in the
12+
literature). Mathematically, the encoding process forms a vector
13+
from the input data, taking one byte from each shred, and
14+
left-multiplies the vector by a constant matrix in GF(2^8). The
15+
resulting vector contains one byte for each of the parity shreds.
16+
Solana also calls parity shreds "code" shreds, but due to the naming
17+
collision with executable code, we have opted for "parity." This
18+
mathematical structure thus forces each shred to be of identical size
19+
but doesn't otherwise impose any size restrictions. */
20+
21+
#include "../../util/fd_util.h"
22+
23+
// TODO: Define decode API
24+
//#define SET_NAME reedsol_shred_set
25+
//#include "../../util/tmpl/fd_smallset.c"
26+
27+
/* FD_REEDSOL_{DATA, PARITY}_SHREDS_MAX describe the inclusive maximum
28+
number of data and parity shreds that this implementation supports.
29+
These limits are not mathematical limits, but limits based on current
30+
Solana needs and performance. It is common for both shred counts to
31+
be at their maximum values. */
32+
#define FD_REEDSOL_DATA_SHREDS_MAX (32UL)
33+
#define FD_REEDSOL_PARITY_SHREDS_MAX (32UL)
34+
35+
36+
#define FD_REEDSOL_ALIGN (128UL)
37+
#define FD_REEDSOL_FOOTPRINT (1664UL)
38+
39+
struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private {
40+
uchar scratch[ 1024 ]; /* Used for the ultra high performance implementation */
41+
42+
/* shred_sz: the size of each shred in bytes (all shreds must be the
43+
same size) */
44+
ulong shred_sz;
45+
46+
/* {data, parity}_shred_cnt: the number of data or parity shreds
47+
(respectively) have been added to the in-process operation */
48+
ulong data_shred_cnt;
49+
ulong parity_shred_cnt;
50+
51+
/* {data, parity}_shred: pointers to the first byte of each shred */
52+
uchar * data_shred[ FD_REEDSOL_DATA_SHREDS_MAX ];
53+
uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ];
54+
55+
/* {data, parity}_shred_valid: whether the shred at the corresponding
56+
index contains valid data. Used only for decoding operations. */
57+
//fd_reedsol_shred_set_t data_shred_valid;
58+
//fd_reedsol_shred_set_t parity_shred_valid;
59+
};
60+
61+
typedef struct fd_reedsol_private fd_reedsol_t;
62+
63+
FD_PROTOTYPES_BEGIN
64+
65+
/* fd_reedsol_{align, footprint} return the alignment and footprint
66+
required in bytes for a fd_reedsol_t. */
67+
static inline FD_FN_CONST ulong fd_reedsol_align( void ) { return FD_REEDSOL_ALIGN; }
68+
static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; }
69+
70+
71+
/* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that
72+
will encode shreds of size shred_sz. mem is assumed to be a piece
73+
of memory that meets the alignment and size constraints specified
74+
above. Takes a write interest in mem that persists until the
75+
operation is canceled or finalized. shred_sz must be >= 32. Returns
76+
mem. */
77+
78+
static inline fd_reedsol_t *
79+
fd_reedsol_encode_init( void * mem, ulong shred_sz ) {
80+
fd_reedsol_t * rs = (fd_reedsol_t *)mem;
81+
82+
rs->shred_sz = shred_sz;
83+
rs->data_shred_cnt = 0UL;
84+
rs->parity_shred_cnt = 0UL;
85+
86+
return rs;
87+
}
88+
89+
/* fd_reedsol_encode_add_data_shred: adds a shred consisting of the
90+
memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding
91+
operation. Takes a read interest in the shred that persists for
92+
the lifetime of the operation (i.e. until finalized or cancelled).
93+
Data shreds have no alignment restrictions and can overlap with each
94+
other but should not overlap with any parity shreds in the same
95+
encoding operation.
96+
97+
Note: The order in which data shreds are added relative to other data
98+
shreds matters. It impacts the parity data produced by the encoding
99+
operation. */
100+
101+
static inline fd_reedsol_t *
102+
fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, void const * ptr ) {
103+
/* The argument is const to make it clear that an encoding operation
104+
won't write to the shred, but we store them in the struct as
105+
non-const so that the same struct can be used for encoding and
106+
decoding operations, in which the data shreds actually are
107+
writeable. */
108+
rs->data_shred[ rs->data_shred_cnt++ ] = (uchar *)ptr;
109+
return rs;
110+
}
111+
112+
/* fd_reedsol_encode_add_parity_shred: adds the block of memory
113+
[ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding operation
114+
as the destination of a parity shred. Takes a write interest in the
115+
memory that persists for the lifetime of the operation (i.e. until
116+
finalized or cancelled). Parity shreds have no alignment
117+
restrictions but must not overlap with each other or with data shreds
118+
in the same operation (U.B. if they overlap).
119+
120+
Note: The order in which parity shreds are added matters only insofar
121+
as which data will be written to which location. */
122+
123+
static inline fd_reedsol_t *
124+
fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, void * ptr ) {
125+
rs->parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr;
126+
return rs;
127+
}
128+
129+
130+
/* fd_reedsol_encode_cancel cancels an in-progress encoding operation.
131+
Releases any read or write interests in any shreds that were added to
132+
the operation. Upon return, the contents of the parity shreds are
133+
undefined. */
134+
135+
static inline void
136+
fd_reedsol_encode_cancel( fd_reedsol_t * rs ) {
137+
rs->data_shred_cnt = 0UL;
138+
rs->parity_shred_cnt = 0UL;
139+
}
140+
141+
/* fd_reedsol_encode_fini finishes the in-progress encoding operation.
142+
Upon return, the parity shreds will be filled with the correct
143+
Reed-Solomon encoded parity data. Upon return, this will no longer
144+
have any read or write interest in any of the provided shreds. */
145+
void fd_reedsol_encode_fini( fd_reedsol_t * rs );
146+
147+
148+
/* FIXME: Add decode API */
149+
150+
#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */
151+
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h
2+
#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h
3+
#include "../../util/simd/fd_avx.h"
4+
5+
#define FD_REEDSOL_GF_ARITH_DEFINED 1
6+
7+
typedef wb_t gf_t;
8+
#define GF_WIDTH W_FOOTPRINT
9+
#define gf_ldu wb_ldu
10+
#define gf_stu wb_stu
11+
#define gf_zero wb_zero
12+
13+
#ifdef INCLUDE_CONSTANTS
14+
FD_IMPORT_BINARY( fd_reedsol_arith_consts_avx_mul, "src/ballet/reedsol/constants/avx2_constants.bin" );
15+
#undef INCLUDE_CONSTANTS
16+
#else
17+
extern uchar const fd_reedsol_arith_consts_avx_mul[] __attribute__((aligned(128)));
18+
#endif
19+
20+
static uchar const fd_reedsol_arith_scale4[ 256UL ] = {
21+
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 29, 13, 61, 45, 93, 77, 125, 109, 157, 141, 189, 173, 221, 205, 253, 237, 58, 42, 26, 10, 122,
22+
106, 90, 74, 186, 170, 154, 138, 250, 234, 218, 202, 39, 55, 7, 23, 103, 119, 71, 87, 167, 183, 135, 151, 231, 247, 199, 215, 116, 100, 84, 68, 52, 36, 20, 4, 244, 228,
23+
212, 196, 180, 164, 148, 132, 105, 121, 73, 89, 41, 57, 9, 25, 233, 249, 201, 217, 169, 185, 137, 153, 78, 94, 110, 126, 14, 30, 46, 62, 206, 222, 238, 254, 142, 158, 174,
24+
190, 83, 67, 115, 99, 19, 3, 51, 35, 211, 195, 243, 227, 147, 131, 179, 163, 232, 248, 200, 216, 168, 184, 136, 152, 104, 120, 72, 88, 40, 56, 8, 24, 245, 229, 213, 197,
25+
181, 165, 149, 133, 117, 101, 85, 69, 53, 37, 21, 5, 210, 194, 242, 226, 146, 130, 178, 162, 82, 66, 114, 98, 18, 2, 50, 34, 207, 223, 239, 255, 143, 159, 175, 191, 79,
26+
95, 111, 127, 15, 31, 47, 63, 156, 140, 188, 172, 220, 204, 252, 236, 28, 12, 60, 44, 92, 76, 124, 108, 129, 145, 161, 177, 193, 209, 225, 241, 1, 17, 33, 49, 65, 81,
27+
97, 113, 166, 182, 134, 150, 230, 246, 198, 214, 38, 54, 6, 22, 102, 118, 70, 86, 187, 171, 155, 139, 251, 235, 219, 203, 59, 43, 27, 11, 123, 107, 91, 75 }; /* Needs to be available at compile time, not link time, to allow the optimizer to use it */
28+
29+
#define GF_ADD( a, b ) wb_xor( a, b )
30+
#define GF_MUL( a, c ) (__extension__({ \
31+
wb_t lo = wb_and( a, wb_bcast( 0x0F ) ); \
32+
wb_t hi = wb_shru( a, 4 ); \
33+
wb_t p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*c ), lo ); \
34+
wb_t p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ c ] ), hi ); \
35+
/* c is known at compile time, so this is not a runtime branch */ \
36+
(c==0) ? wb_zero() : ( (c==1) ? a : wb_xor( p0, p1 ) ); } ))
37+
38+
39+
40+
#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h
2+
#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h
3+
#include "../../util/simd/fd_avx.h"
4+
5+
#define FD_REEDSOL_GF_ARITH_DEFINED 1
6+
7+
typedef wb_t gf_t;
8+
#define GF_WIDTH W_FOOTPRINT
9+
#define gf_ldu wb_ldu
10+
#define gf_stu wb_stu
11+
#define gf_zero wb_zero
12+
13+
#ifdef INCLUDE_CONSTANTS
14+
FD_IMPORT_BINARY( fd_reedsol_arith_consts_gfni_mul, "src/ballet/reedsol/constants/gfni_constants.bin" );
15+
#undef INCLUDE_CONSTANTS
16+
#else
17+
extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(128)));
18+
#endif
19+
20+
#define GF_ADD( a, b ) wb_xor( a, b )
21+
22+
/* Older versions of GCC have a bug that cause them to think
23+
_mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two arguments
24+
(other than that the second argument can be a memory address). That's
25+
totally incorrect. It was fixed in GCC 10. See
26+
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92889 for more details. */
27+
#if !FD_USING_CLANG
28+
#define GCC_VERSION (__GNUC__ * 10000 \
29+
+ __GNUC_MINOR__ * 100 \
30+
+ __GNUC_PATCHLEVEL__)
31+
#endif
32+
33+
#if FD_USING_CLANG || (GCC_VERSION >= 100000)
34+
/* c is known at compile time, so this is not a runtime branch */
35+
#define GF_MUL( a, c ) ((c==0) ? wb_zero() : ( (c==1) ? (a) : _mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) ))
36+
37+
#else
38+
39+
#define GF_MUL( a, c ) (__extension__({ \
40+
wb_t product; \
41+
__asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \
42+
: [out]"=x"(product) \
43+
: [cons]"xm"( wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ) ), \
44+
[vec]"x" (a) ); \
45+
(c==0) ? wb_zero() : ( (c==1) ? (a) : product ); }))
46+
47+
#endif
48+
49+
50+
51+
#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h
2+
#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h
3+
#include "../../util/fd_util_base.h"
4+
5+
#define FD_REEDSOL_GF_ARITH_DEFINED 1
6+
7+
typedef ulong gf_t; /* One byte stored in a ulong */
8+
#define GF_WIDTH 1UL
9+
static inline gf_t gf_ldu( uchar const * addr ) { return (ulong)(*addr); }
10+
static inline void gf_stu( uchar * addr, gf_t v ) { *addr = (uchar)v; }
11+
#define gf_zero() (0UL)
12+
13+
#ifdef INCLUDE_CONSTANTS
14+
FD_IMPORT_BINARY( fd_reedsol_arith_consts_generic_mul, "src/ballet/reedsol/constants/generic_constants.bin" );
15+
#undef INCLUDE_CONSTANTS
16+
#else
17+
extern uchar const fd_reedsol_arith_consts_generic_mul[] __attribute__((aligned(128)));
18+
#endif
19+
static short const * gf_arith_log_tbl = (short const *)fd_reedsol_arith_consts_generic_mul; /* Indexed [0, 256) */
20+
static uchar const * gf_arith_invlog_tbl = fd_reedsol_arith_consts_generic_mul + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512, 512) */
21+
22+
#define GF_ADD( a, b ) ((a)^(b))
23+
24+
/* c is known at compile time, so this is not a runtime branch.
25+
Exposing log_tbl at compile time would let the compiler remove a
26+
branch, but we don't care too much about performance in this case. */
27+
#define GF_MUL( a, c ) ((c==0) ? 0UL : ( (c==1) ? (a) : (ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] ))
28+
29+
30+
31+
#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h */

0 commit comments

Comments
 (0)