firedancer-io
diff --git a/‎src/ballet/reedsol/Local.mk‎
Lines changed: 7 additions & 0 deletions b/‎src/ballet/reedsol/Local.mk‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/ballet/reedsol/constants/avx2_constants.bin‎
16 KB b/‎src/ballet/reedsol/constants/avx2_constants.bin‎
16 KB
diff --git a/‎src/ballet/reedsol/constants/generic_constants.bin‎
2.5 KB b/‎src/ballet/reedsol/constants/generic_constants.bin‎
2.5 KB
diff --git a/‎src/ballet/reedsol/constants/gfni_constants.bin‎
8 KB b/‎src/ballet/reedsol/constants/gfni_constants.bin‎
8 KB
diff --git a/‎src/ballet/reedsol/fd_reedsol.c‎
Lines changed: 16 additions & 0 deletions b/‎src/ballet/reedsol/fd_reedsol.c‎
Lines changed: 16 additions & 0 deletions
diff --git a/‎src/ballet/reedsol/fd_reedsol.h‎
Lines changed: 151 additions & 0 deletions b/‎src/ballet/reedsol/fd_reedsol.h‎
Lines changed: 151 additions & 0 deletions
diff --git a/‎src/ballet/reedsol/fd_reedsol_arith_avx2.h‎
Lines changed: 40 additions & 0 deletions b/‎src/ballet/reedsol/fd_reedsol_arith_avx2.h‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎src/ballet/reedsol/fd_reedsol_arith_gfni.h‎
Lines changed: 51 additions & 0 deletions b/‎src/ballet/reedsol/fd_reedsol_arith_gfni.h‎
Lines changed: 51 additions & 0 deletions
diff --git a/‎src/ballet/reedsol/fd_reedsol_arith_none.h‎
Lines changed: 31 additions & 0 deletions b/‎src/ballet/reedsol/fd_reedsol_arith_none.h‎
Lines changed: 31 additions & 0 deletions
@@ -0,0 +1,7 @@
+$(call add-hdrs,fd_reedsol.h)
+ifdef FD_HAS_GFNI
+$(call add-asms,fd_reedsol_gfni_32,fd_ballet)
+endif
+$(call add-objs,fd_reedsol,fd_ballet)
+$(call add-objs,fd_reedsol_internal,fd_ballet)
+$(call make-unit-test,test_reedsol,test_reedsol,fd_ballet fd_util)
@@ -0,0 +1,16 @@
+#include "fd_reedsol.h"
+#include "fd_reedsol_internal.h"
+
+void fd_reedsol_encode_fini( fd_reedsol_t * rs ) {
+#if FD_HAS_GFNI
+  if( FD_LIKELY( (rs->data_shred_cnt==32UL) & (rs->parity_shred_cnt==32UL ) ) )
+    fd_reedsol_encode_32_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->parity_shred, rs->scratch );
+  else
+    fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt );
+#else
+  fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt );
+#endif
+
+  rs->data_shred_cnt = 0UL;
+  rs->parity_shred_cnt = 0UL;
+}
@@ -0,0 +1,151 @@
+#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_h
+#define HEADER_fd_src_ballet_reedsol_fd_reedsol_h
+
+/* fd_reedsol provides APIs for producing Reed-Solomon encoded parity
+   data and for reconstructing missing data from parity data.  The
+   encoding process is optimized, and highly optimized for Turbine's
+   typical case.
+
+   Reed-Solomon works in GF(2^8), i.e. the codeword size is 1 byte, but
+   is typically used on each byte of larger pieces of data called
+   shreds (a Solana-specific term, often called shards elswhere in the
+   literature).  Mathematically, the encoding process forms a vector
+   from the input data, taking one byte from each shred, and
+   left-multiplies the vector by a constant matrix in GF(2^8).  The
+   resulting vector contains one byte for each of the parity shreds.
+   Solana also calls parity shreds "code" shreds, but due to the naming
+   collision with executable code, we have opted for "parity."  This
+   mathematical structure thus forces each shred to be of identical size
+   but doesn't otherwise impose any size restrictions. */
+
+#include "../../util/fd_util.h"
+
+// TODO: Define decode API
+//#define SET_NAME reedsol_shred_set
+//#include "../../util/tmpl/fd_smallset.c"
+
+/* FD_REEDSOL_{DATA, PARITY}_SHREDS_MAX describe the inclusive maximum
+   number of data and parity shreds that this implementation supports.
+   These limits are not mathematical limits, but limits based on current
+   Solana needs and performance.  It is common for both shred counts to
+   be at their maximum values. */
+#define FD_REEDSOL_DATA_SHREDS_MAX   (32UL)
+#define FD_REEDSOL_PARITY_SHREDS_MAX (32UL)
+
+
+#define FD_REEDSOL_ALIGN     (128UL)
+#define FD_REEDSOL_FOOTPRINT (1664UL)
+
+struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private {
+  uchar scratch[ 1024 ]; /* Used for the ultra high performance implementation */
+
+  /* shred_sz: the size of each shred in bytes (all shreds must be the
+     same size) */
+  ulong shred_sz;
+
+  /* {data, parity}_shred_cnt: the number of data or parity shreds
+     (respectively) have been added to the in-process operation */
+  ulong data_shred_cnt;
+  ulong parity_shred_cnt;
+
+  /* {data, parity}_shred: pointers to the first byte of each shred */
+  uchar * data_shred[   FD_REEDSOL_DATA_SHREDS_MAX   ];
+  uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ];
+
+  /* {data, parity}_shred_valid: whether the shred at the corresponding
+     index contains valid data.  Used only for decoding operations. */
+  //fd_reedsol_shred_set_t data_shred_valid;
+  //fd_reedsol_shred_set_t parity_shred_valid;
+};
+
+typedef struct fd_reedsol_private fd_reedsol_t;
+
+FD_PROTOTYPES_BEGIN
+
+/* fd_reedsol_{align, footprint} return the alignment and footprint
+   required in bytes for a fd_reedsol_t. */
+static inline FD_FN_CONST ulong fd_reedsol_align(     void ) { return FD_REEDSOL_ALIGN;     }
+static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; }
+
+
+/* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that
+   will encode shreds of size shred_sz.  mem is assumed to be a piece
+   of memory that meets the alignment and size constraints specified
+   above.  Takes a write interest in mem that persists until the
+   operation is canceled or finalized.  shred_sz must be >= 32. Returns
+   mem. */
+
+static inline fd_reedsol_t *
+fd_reedsol_encode_init( void * mem, ulong shred_sz ) {
+  fd_reedsol_t * rs = (fd_reedsol_t *)mem;
+
+  rs->shred_sz = shred_sz;
+  rs->data_shred_cnt   = 0UL;
+  rs->parity_shred_cnt = 0UL;
+
+  return rs;
+}
+
+/* fd_reedsol_encode_add_data_shred: adds a shred consisting of the
+   memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding
+   operation.  Takes a read interest in the shred that persists for
+   the lifetime of the operation (i.e. until finalized or cancelled).
+   Data shreds have no alignment restrictions and can overlap with each
+   other but should not overlap with any parity shreds in the same
+   encoding operation.
+
+   Note: The order in which data shreds are added relative to other data
+   shreds matters.  It impacts the parity data produced by the encoding
+   operation. */
+
+static inline fd_reedsol_t *
+fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, void const * ptr ) {
+  /* The argument is const to make it clear that an encoding operation
+     won't write to the shred, but we store them in the struct as
+     non-const so that the same struct can be used for encoding and
+     decoding operations, in which the data shreds actually are
+     writeable. */
+  rs->data_shred[ rs->data_shred_cnt++ ] = (uchar *)ptr;
+  return rs;
+}
+
+/* fd_reedsol_encode_add_parity_shred: adds the block of memory
+   [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding operation
+   as the destination of a parity shred.  Takes a write interest in the
+   memory that persists for the lifetime of the operation (i.e. until
+   finalized or cancelled).  Parity shreds have no alignment
+   restrictions but must not overlap with each other or with data shreds
+   in the same operation (U.B. if they overlap).
+
+   Note: The order in which parity shreds are added matters only insofar
+   as which data will be written to which location. */
+
+static inline fd_reedsol_t *
+fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, void * ptr ) {
+  rs->parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr;
+  return rs;
+}
+
+
+/* fd_reedsol_encode_cancel cancels an in-progress encoding operation.
+   Releases any read or write interests in any shreds that were added to
+   the operation.  Upon return, the contents of the parity shreds are
+   undefined. */
+
+static inline void
+fd_reedsol_encode_cancel( fd_reedsol_t * rs ) {
+  rs->data_shred_cnt   = 0UL;
+  rs->parity_shred_cnt = 0UL;
+}
+
+/* fd_reedsol_encode_fini finishes the in-progress encoding operation.
+   Upon return, the parity shreds will be filled with the correct
+   Reed-Solomon encoded parity data.  Upon return, this will no longer
+   have any read or write interest in any of the provided shreds. */
+void fd_reedsol_encode_fini( fd_reedsol_t * rs );
+
+
+/* FIXME: Add decode API */
+
+#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */
+
@@ -0,0 +1,40 @@
+#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h
+#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h
+#include "../../util/simd/fd_avx.h"
+
+#define FD_REEDSOL_GF_ARITH_DEFINED 1
+
+typedef wb_t gf_t;
+#define GF_WIDTH W_FOOTPRINT
+#define gf_ldu wb_ldu
+#define gf_stu wb_stu
+#define gf_zero wb_zero
+
+#ifdef INCLUDE_CONSTANTS
+FD_IMPORT_BINARY( fd_reedsol_arith_consts_avx_mul,    "src/ballet/reedsol/constants/avx2_constants.bin" );
+#undef INCLUDE_CONSTANTS
+#else
+extern uchar const fd_reedsol_arith_consts_avx_mul[]  __attribute__((aligned(128)));
+#endif
+
+static uchar const fd_reedsol_arith_scale4[ 256UL ] = {
+  0,  16,  32,  48,  64,  80,  96, 112, 128, 144, 160, 176, 192, 208, 224, 240,  29,  13,  61,  45,  93,  77, 125, 109, 157, 141, 189, 173, 221, 205, 253, 237,  58,  42,  26,  10, 122,
+  106,  90,  74, 186, 170, 154, 138, 250, 234, 218, 202,  39,  55,   7,  23, 103, 119,  71,  87, 167, 183, 135, 151, 231, 247, 199, 215, 116, 100,  84,  68,  52,  36,  20,   4, 244, 228,
+  212, 196, 180, 164, 148, 132, 105, 121,  73,  89,  41,  57,   9,  25, 233, 249, 201, 217, 169, 185, 137, 153,  78,  94, 110, 126,  14,  30,  46,  62, 206, 222, 238, 254, 142, 158, 174,
+  190,  83,  67, 115,  99,  19,   3,  51,  35, 211, 195, 243, 227, 147, 131, 179, 163, 232, 248, 200, 216, 168, 184, 136, 152, 104, 120,  72,  88,  40,  56,   8,  24, 245, 229, 213, 197,
+  181, 165, 149, 133, 117, 101,  85,  69,  53,  37,  21,   5, 210, 194, 242, 226, 146, 130, 178, 162,  82,  66, 114,  98,  18,   2,  50,  34, 207, 223, 239, 255, 143, 159, 175, 191,  79,
+  95, 111, 127,  15,  31,  47,  63, 156, 140, 188, 172, 220, 204, 252, 236,  28,  12,  60,  44,  92,  76, 124, 108, 129, 145, 161, 177, 193, 209, 225, 241,   1,  17,  33,  49,  65,  81,
+  97, 113, 166, 182, 134, 150, 230, 246, 198, 214,  38,  54,   6,  22, 102, 118,  70,  86, 187, 171, 155, 139, 251, 235, 219, 203,  59,  43,  27,  11, 123, 107,  91,  75 }; /* Needs to be available at compile time, not link time, to allow the optimizer to use it */
+
+#define GF_ADD( a, b ) wb_xor( a, b )
+#define GF_MUL( a, c ) (__extension__({                                                                 \
+  wb_t lo = wb_and( a, wb_bcast( 0x0F ) );                                                              \
+  wb_t hi = wb_shru( a, 4 );                                                                            \
+  wb_t p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*c ), lo );                            \
+  wb_t p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ c ] ), hi ); \
+  /* c is known at compile time, so this is not a runtime branch */                                     \
+  (c==0) ? wb_zero() : ( (c==1) ? a : wb_xor( p0, p1 ) ); } ))
+
+
+
+#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */
@@ -0,0 +1,51 @@
+#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h
+#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h
+#include "../../util/simd/fd_avx.h"
+
+#define FD_REEDSOL_GF_ARITH_DEFINED 1
+
+typedef wb_t gf_t;
+#define GF_WIDTH W_FOOTPRINT
+#define gf_ldu wb_ldu
+#define gf_stu wb_stu
+#define gf_zero wb_zero
+
+#ifdef INCLUDE_CONSTANTS
+FD_IMPORT_BINARY( fd_reedsol_arith_consts_gfni_mul,    "src/ballet/reedsol/constants/gfni_constants.bin" );
+#undef INCLUDE_CONSTANTS
+#else
+extern uchar const fd_reedsol_arith_consts_gfni_mul[]  __attribute__((aligned(128)));
+#endif
+
+#define GF_ADD( a, b ) wb_xor( a, b )
+
+/* Older versions of GCC have a bug that cause them to think
+   _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two arguments
+   (other than that the second argument can be a memory address).  That's
+   totally incorrect. It was fixed in GCC 10.  See
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92889 for more details. */
+#if !FD_USING_CLANG
+#define GCC_VERSION (__GNUC__ * 10000 \
+                     + __GNUC_MINOR__ * 100 \
+                     + __GNUC_PATCHLEVEL__)
+#endif
+
+#if FD_USING_CLANG || (GCC_VERSION >= 100000)
+/* c is known at compile time, so this is not a runtime branch */
+#define GF_MUL( a, c ) ((c==0) ? wb_zero() : ( (c==1) ? (a) : _mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) ))
+
+#else
+
+#define GF_MUL( a, c )  (__extension__({                                       \
+      wb_t product;                                                            \
+      __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]"                  \
+          : [out]"=x"(product)                                                 \
+          : [cons]"xm"( wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ) ),  \
+            [vec]"x" (a) );                                                    \
+      (c==0) ? wb_zero() : ( (c==1) ? (a) : product ); }))
+
+#endif
+
+
+
+#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */
@@ -0,0 +1,31 @@
+#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h
+#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h
+#include "../../util/fd_util_base.h"
+
+#define FD_REEDSOL_GF_ARITH_DEFINED 1
+
+typedef ulong gf_t; /* One byte stored in a ulong */
+#define GF_WIDTH 1UL
+static inline gf_t gf_ldu( uchar const * addr ) { return (ulong)(*addr); }
+static inline void gf_stu( uchar * addr, gf_t v ) { *addr = (uchar)v; }
+#define gf_zero() (0UL)
+
+#ifdef INCLUDE_CONSTANTS
+FD_IMPORT_BINARY( fd_reedsol_arith_consts_generic_mul, "src/ballet/reedsol/constants/generic_constants.bin" );
+#undef INCLUDE_CONSTANTS
+#else
+extern uchar const fd_reedsol_arith_consts_generic_mul[]  __attribute__((aligned(128)));
+#endif
+static short const * gf_arith_log_tbl     = (short const *)fd_reedsol_arith_consts_generic_mul; /* Indexed [0, 256) */
+static uchar const * gf_arith_invlog_tbl  = fd_reedsol_arith_consts_generic_mul + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512, 512) */
+
+#define GF_ADD( a, b ) ((a)^(b))
+
+/* c is known at compile time, so this is not a runtime branch.
+   Exposing log_tbl at compile time would let the compiler remove a
+   branch, but we don't care too much about performance in this case. */
+#define GF_MUL( a, c ) ((c==0) ? 0UL : ( (c==1) ? (a) : (ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] ))
+
+
+
+#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h */