From ce968665df9199133548d0d7253bb1a3da682c79 Mon Sep 17 00:00:00 2001 From: Philip Taffet Date: Fri, 17 Mar 2023 16:32:56 -0500 Subject: [PATCH 1/6] ballet: Add Reed-Solomon encoder --- src/ballet/reedsol/Local.mk | 7 + .../reedsol/constants/avx2_constants.bin | Bin 0 -> 16384 bytes .../reedsol/constants/generic_constants.bin | Bin 0 -> 2560 bytes .../reedsol/constants/gfni_constants.bin | Bin 0 -> 8192 bytes src/ballet/reedsol/fd_reedsol.c | 16 + src/ballet/reedsol/fd_reedsol.h | 151 + src/ballet/reedsol/fd_reedsol_arith_avx2.h | 41 + src/ballet/reedsol/fd_reedsol_arith_gfni.h | 51 + src/ballet/reedsol/fd_reedsol_arith_none.h | 33 + src/ballet/reedsol/fd_reedsol_fft.h | 475 ++ src/ballet/reedsol/fd_reedsol_gfni_32.S | 414 ++ src/ballet/reedsol/fd_reedsol_internal.c | 278 ++ src/ballet/reedsol/fd_reedsol_internal.h | 19 + src/ballet/reedsol/fd_reedsol_ppt.h | 4161 +++++++++++++++++ src/ballet/reedsol/gen_tbls.py | 59 + src/ballet/reedsol/generate_fft.py | 251 + src/ballet/reedsol/generate_ppt.py | 374 ++ src/ballet/reedsol/test_reedsol.c | 646 +++ 18 files changed, 6976 insertions(+) create mode 100644 src/ballet/reedsol/Local.mk create mode 100644 src/ballet/reedsol/constants/avx2_constants.bin create mode 100644 src/ballet/reedsol/constants/generic_constants.bin create mode 100644 src/ballet/reedsol/constants/gfni_constants.bin create mode 100644 src/ballet/reedsol/fd_reedsol.c create mode 100644 src/ballet/reedsol/fd_reedsol.h create mode 100644 src/ballet/reedsol/fd_reedsol_arith_avx2.h create mode 100644 src/ballet/reedsol/fd_reedsol_arith_gfni.h create mode 100644 src/ballet/reedsol/fd_reedsol_arith_none.h create mode 100644 src/ballet/reedsol/fd_reedsol_fft.h create mode 100644 src/ballet/reedsol/fd_reedsol_gfni_32.S create mode 100644 src/ballet/reedsol/fd_reedsol_internal.c create mode 100644 src/ballet/reedsol/fd_reedsol_internal.h create mode 100644 src/ballet/reedsol/fd_reedsol_ppt.h create mode 100644 src/ballet/reedsol/gen_tbls.py create mode 100644 src/ballet/reedsol/generate_fft.py create mode 100644 src/ballet/reedsol/generate_ppt.py create mode 100644 src/ballet/reedsol/test_reedsol.c diff --git a/src/ballet/reedsol/Local.mk b/src/ballet/reedsol/Local.mk new file mode 100644 index 00000000000..45282343b23 --- /dev/null +++ b/src/ballet/reedsol/Local.mk @@ -0,0 +1,7 @@ +$(call add-hdrs,fd_reedsol.h) +ifdef FD_HAS_GFNI +$(call add-asms,fd_reedsol_gfni_32,fd_ballet) +endif +$(call add-objs,fd_reedsol,fd_ballet) +$(call add-objs,fd_reedsol_internal,fd_ballet) +$(call make-unit-test,test_reedsol,test_reedsol,fd_ballet fd_util) diff --git a/src/ballet/reedsol/constants/avx2_constants.bin b/src/ballet/reedsol/constants/avx2_constants.bin new file mode 100644 index 0000000000000000000000000000000000000000..d42e912e38edd4dd5c530f28600895ec71750613 GIT binary patch literal 16384 zcmajm=OfpD8wK!72t{a_Wfm!Y-shZ)=s*53EMR0}UdY1AwrDXs#}ZCLFJM~8x`=%V*V5%HdHDrb z6Pj@$Gdt%ZHeP|%tCsUDZ z^g{MUysMWl-K?-(SyD<|l+Y|o*aQWZu251@kP#PKzlqSST)e`Po0U{`jSn2Qcl9PT z+fo7XO$sWyYKP36Y<;~5y=eJrNzv^J#)mX_d%HUwC-mZ#t2Rm~%N#J}x_Vm)vfw>FtzxHzaNPU4J{FSFBgr zqvm^}pgJJ=&&S>dLa*GYd|>zKqqpLxUg)K&`vciZyfB>#6`T89YDyIJzE zx3O+r|MZ8<@|X}p^T}?oIls^7ao4Z6Mfa`-6PjOM%HG{VyR-G(e0g?65}^eYHaU46 zQtNM+{Cc-I;4+~FmBd}0boada)Bh+YqTnu}S1C)JcGzw9;cNHPH2?eugkHT})cd&R zq3Ox??vSLrHwZ1XQ{>z+b;}>`n_dK^R^|~}SoN@9X5Fh*J1ozqR}cLqw21n#puDE{ zE9Cck#@>E5OK4He;~_WNCpo0`PF%cJ|CZ2V+O`qJtsj}!Y8(!|@pPQf;<`>Tcl-bB z;PWuK89G`|=r#H$6H11EZ{fOdC@bjm6GE@u<(N|Qe0I~~BRcoue~u7ZVz)!ugYK_u z7M$2sarJvAp(TyIGef5PEj>8a{O!4UkOa#^PI@nLNOp*J2leLL;Lqw^+f*g9S(RuFpAA+NiaCmTEt zNH3@#PrXlQDGT@Vq<5{(8ZtsXzeBGPTH4C3D*jD}t^T&i%mH&g4s zduZ@U6I#*b;Na)di+OQve|&cH2oqY#RrghYOu^9T&BPtdh8~38>Td9ED5+s|qfdgnRyA5B4(6VGCcS8Qt+1zTy8@e*?oSyKu!?<7 zXzhqrW{uFNi*{YAEGdTbM%btr)(Y0z6?U^ z#l76>TKL9&+pFsa3tq$!T0dcU%Z04Zj>7YItyi@96WSnYNaoD7@uTbJYRz~XqX@k# z<@u%~_kJE)`7p|B+f)vr4X^aAv$->GD)ji`S%ud{gx;Ojz2-#4cYUVbl#?P~stIj$ zwM)dMc3PcdF#g!azompW&YZU7_02(f&y{+-gf`9n zwwGll=f;mE7XNOVvk}@X|EtM@@44ACO#8oN?p;XeJq3RZ`9@2wJdr)#7kFUrTGywSk8L6J{#!rQR!)@%^lUuakl?IL=mWPu zoL|$C_$ym~Y4C{RNkShi{p=&$7B^d{$`{_f^)R6i-JSH1u221R+lncpaf2J7&F@dR zt#3|yQ?Z9L{_z@LLR*x-bC&4|9k_IKXW3^?GeTQdzITv+7S$bXzvaOgg9V|js@~Xc zdlB54;3-)=v&@jt)-|t>>>TlXn0`(y=g%s2LLaWnO#f@SLqg|$aOJ=menKC4m~;7u zxw7=`iHQ4sr&kdA=%f6^? z(ZnBP;oD6J?J`)J*XF~se{1;YY?_oRp-&E%WIegSWxd&daz0krh|sRDVuo(+Fuc~M zqj+Y~l~06rdlUbnV4HsNu;F%3mV{A4yT427xhA7g)@UYjl|R29e9CC465AL3%a%)9uIL4!J;$#!-dSgz{Q4}1 zsd&v}LVHbKuD>sRAmfAELaTMRn+feb9rE`ghsXO2o|F7XKgAK+XEyw&-(vTvq*XR6 zPrgeh^qIM+uh9%=zeh6qEb#soLFlvJB4@%_&ixBwb6?`|BaqPNegsX#u5f66prWyU zU-vCSpZ^v7F=3g@)6y-vYt09<2<`hPU^JD_uJ8JWy|TNW+$QvedH>f}g$|EoidgMb ztE(ln->P)wN{MTy=Nvxneq#2F(EdVKch-tzp8I?3je3LSAfW^JQe|&%xNb9c{@=dm zMjeC>6i89Hx9OJCl-Jy$7itd)9ke2L{VkOTdpgw`eSe*pBJ@R`IH|&|RTeJ{p16Ov zeMRU?+{vQ3a`!YJ?tOOrmDdcRgO??&&Dv7BtIfLKal&Vw&>BmUcB=hGD>19-a+X2Z3UjAhQs3*`ZTYwWyumcVM~_dymi}i)z=<1>%)Zzov4`Y z^7~->px$TSQi+RPgieyVc683H@sa7*WBHqt83>&$mwS9>@56TUfA%+aTwX%xluh>z zJ@lHoyYVu^q-TRUp)X6{w`z9#P_-t0(VMd!dkB4H-5t}%XJ6;;jO6*|G@?c5)D5@y zb(|k7QVbUSb)-w5&}nNb^m|W!NnkD6G_3yHj?n26mAjug%%w3rSley%)t=B-MQYUt zkNpke&)e2?;Jpu_GsLR2U)cVNSXR8bb?-+{LT6h3t>k~Ns}w)Y?G!!=-jC4`O|5$JRw!Fbk=xb(k`3zr8 zO};J2`)-`eNazB?(JQij7uNaPwBHe!-$&@{Mq^hMySzn0T$*cFP9G%n4ZXKTQwwe+XT6>d}}=lZBnqLTA?%XqQw2bpQ_oH*C+E(Wiy5eN-uw4mj7Vi_U z3r$?t30>*%tlRzpLj_}}}7c+uUE<2>tM~x%9;S(^hHC%x-r+ ztS59s>VXX(ZlBth*sHAjoCRq}? z{h}Vv%Q!2kH-&avzgD>tx+7SB*>J+awV$(`w)`l)Kx)Azl z;4XpZDf<>psgL5xT3^j0sjJ+MDf-pSa@>|9grvapBH&+i@a%6YhQQrSHEvf;)bgzhOj zdMf`B_n4eXP@<~ncS84;*j>2(gm+%j=(7L*&0~b_yJK^<=&{f@5v{n$Lpwhby8ot! zPR1wMsC{C;Texki2>qhiQ{&3`mP^*_rW*wv?+|+6`YEHtvF%qi6(_nDduJ2+WudD< z%&TolhC9ZF7JA+!^kAOP{_t6;Tkebdey*|bBlOU+;Vs41Sd~jVAPP)&*<- zdEuJFLiMjzRId_xBbk}o6vUi*#kj4X(C>O=%FK>l zI+dKS{z#bnE1^gGWbf>=@jrXDNbm6`pr$*e=n``5V(FeiWsc8;e*aAVfrWka ziHLIRPDRFdg#OU6CitX&_Ojo@3JK>9mJs^m)3t$)yYd8PpYKR<+h0TIPc356w%SFE zA6tbZj%yVY`g5Cb_+iz;#jol%_?@P6iNqvR*pF&FZ#T#~b!4)AR9!o@&^bdip@whDRS*Zd;#9BlPrNj`uwLpPq?H z4a+}j*-Ge{e@osj6`XjvUc5)CrCEW{vp*M4ty=l6K}PxU#^D|bLVx+q`k9yOZJW~O z2H|HfB?&$EmEk+%f^T0~7S1wF&oC4E>-Pm;7BbEJUc&y9_3yt$g#I?e@{euFkGCt9 zz7za3AwcNwUzmQgv;O-m$UDXL{_S!?|Cm_Otg^m4{)F$PJw+92g#J0TvO#%cPwLUr zkq5HMb`$#7$7N5qtQ`#Wag5n@KhK)bzdv)eZ5DnR?s_;?^+BOAq5q8X^=*)Sa>r=j z4Vx7I^MwBUp8uJYe9HrEi!A%ZXm>)-zZM!1*;$uksa|z1ApIDj|GgD_DX!F9xJS3l zH9Fpj(Em~Y|6d=B{V?{0XzY{F*f*dtk49r&iN-t&jd=$e-@|BpFQM^!8jasuX#71! zG=A@) z@$Uf|e=pG3SE8}cM&sTAje8h0=A~%N)6v+sps|lb<9iK_?|C%lJ!s4W(fE6T#_tI< zes7}jJ%YwP1RDEtH14U;xVJ%LAA`oc9*uo28v8yp=0Rx83(%M+qcLwpbL(jeRB>`z|!@A<)>Dqj67##=Q+1_n2th>!I;IhsN)HH0D8Q%nQ(%C!;ZML}MR? z#=ZiLdl5A53DDR#qp^=f<6aewdqy@c{LjI3^e9lXzWAL*q5WRPeEhf zhQ_=Ljd=(f^Kvxisc7um(AdYIv9CvCpNq!54;uF%Xxs~;aZiTEy)hd1sA#-bK;xbr zjd>ax^HwzG@o4O8(AejpvF}CW9srGd5j5_J(6~24;~o)>dsQ^#N8t-M$ z*w>)3&qHJ0i^e_xje8L^?upRYH=}WnfX2Nl8u!d-+`FN14~fQm88qHgp>c1I#(NAj z_JL^Zi_q97pfPVo;~oi(dlfY98PM2wqj3+7#=R^W_mpVd+oACu3yt?WXuRh@Q zdm}XN(a^Y8K;xbTjeRE?`*1Y&C1~u^(0Ff!#(O+8-fN)oo(GM4Z#3Qmpm8sX#yv3_ z_nc_l`=N0UhQ_@R8uuh<+#8^=k49r(iNW1ox0y$>4qAZWZ7MB_af8t;wKc#n$4dj&M^+0l6KgvNUq zH15&RxK~8uo)wLIXEgTVXxvMnaZiKBy%ieo@z8j$iNHeoinDp+q6yBrcHBd>Kqf3r@K#cVq@Ag6DLo{J-DZ~m|u#q2hW(wOm$VIBqmJ76@ z6HzS3%PN|XjL&4?6{8qR5dt(~E7y6+GoG`B7YyMBp9rym#S?jjG{3y<$@RVVXIGDQJ3X)JxSvl0J4eU8|MU3HCi9|_U85J49dLJTtF6^1WHxj^ zJ1cR@Y~=<8@2zjPuF2{)J4Sx^7JG7;zUAiH)03pt)tg&+)Ths1KK^)hVMekHMOsIV z{r>Xu#H_}CH@DQC{m=8G!yH<@DbD8gN9ck9Yq;HMjP!X7nkb9e9TV#cg+*%}CcQ1r z@6|;Fg5g%DJ<{m$X$+Pqw>dT@t{ylZlMEy7QWS8QLX9gtnvy_*gK2;s>&RyMP z)Afj26RTUw+c&5krf*MuIPhSHMrZv)E#+e$cU`R?Pn~=2>8C#HJaSpe*X#Q%w|+h9 z)fZnzzs*-;jir43e8G>fI^2-k{mX29cB-qT{Jb6cz4Yf%Kl~VbyjZWWN0##Q2Zc1j z>T{D!I@R@Hi`;1}WqE#y!p4k7+mu@92WqY!;9IV?zZBsa7vW~;Oc3H~y zwNAGSr;}SQ>+CvN_B|eCDck#Ck-Kg$b!4TLS5}pM^3hVZz1;0hy8YCevh-KCrJZIe z$0MXeAw3?{L~?ao6iw%3DaYq`=9jo#-0r*Yyh;0Z{&`C|UPD**)8j?WBQFi|39M%+ z$1eI%5`LBjv!xu*W|gPw@ua3tI@zQ2?B)e6<@jbFQxDaFJG4T!ltUWI zk2I9y9rW*l9&c)?%qarCUy15tDaXHF?;of>+>1T;wC(b{-C_{tQ{Qr zbey}3rCgs`TMnVRao-HDxMJ-wIRu7sy&Q^G)aymv*2*nIzN&lLTFSNC>eNQBANAh5 z5l7;87<|Z5uBYp)XuY1)<(GT8Prg5-yro=Up9?Oi&fMRB|BVmJCYQrdu6JzXWqQ4- zNAJJ?UT4+kQI@DI{ja0?b5Z%aU!8u&5|#NyD=iOHbdHD3X4JAoXWOw5-Gz(JyJ4PO zB10sDLe5%Vs7Nlk(SmI`1DUI&P zMddkLLW+zbDx>ECEpJp*?izbM^IA&z^S(C;-Ix37Yn<=(30rPhN_l+NCb5=BDqh(n z!Pb_R_+{D8pu2Oi@)<+%DO{Y4rFFEtQgL$U*Ko^VDP?!gu{>ISsd2tlR_)7)U(c1Y z>{xz-mS^hWjzyhItbIMqQp&gI_#Q|n+-UFp?MHOQ?r^2Nhy9Ax@=jgaa6{DRC;duW zO4-k|cdV9w>IP{YM@Qn-Txk#6ivQL2Kz%Nyr(E2!OP10;9uBXJbj1yj=sK=EekE7h zOYcu7w7pQnCHll2$ui4Q+D`b5d)j`e``d578FL$}&y}{+sA(r{PtP^}DWm)9Q8%q0YnW!+*C%35cj9SSH;Pi5( zJ;qGfq3w}6pzN|sQ@0$6v6S{XFWejHmYdQsq2J{`xMf^vuPz?%wY^ds5bpT=_ zIe&4(biqZk{ap;VoQuh0;AV{%R7?gVx69!&#N^gGLgNP&lU3)-s6JdwzEO2Go=`EF z-fb{N9{@1%5hxa)?hrk3>Tf}cpi@K!9_6YTTSC36~U)l&089Pm7 zRy#;@GsMo#>8A0Mij|$x6Wy1Klb`IN##1UzX2Pe7bO2zYax`?s(*Z7ScCGwK2V7L% z^J_HTQc)R0*Guy@L}TeRA5R~+XdJDAkY2cGjM1qz9#hfyuBHF34Umb(zB~#~H@Ilr z3vMB0aS=>1HPv`cMes=7QbLL$f>qU$8o#LsPG!p={c#ZtkA!GEry}_6*?T|(AQQp% z&k#JF;Uc(x9f6d}MKB-rQR6)o!F$kG31NmPEbWft=?@o$quF7kCoT%({Yp9>P*M19 zmTRm9kcq;+_#d7waZ$ME{zA&*qA_Ll)$xLg#*==OgeyZd){5D6{Gg(7x@SiEVv!qBUfj;B;ye4pYuv;Z(s*m8Zx(_txZE`a#47<50c^45Gzx%Hl99nv2ztXFuii|&iha6cud7FKYnt2RZAJ4 zb9`{b(`_zZ`6HqJn~EHnE{<%09x4P?iPQ}JQ m=%PQLii16GBc9H4ad4Le>-JD_F*D@V@t%r{mo1lWAN4=Y?(pIO literal 0 HcmV?d00001 diff --git a/src/ballet/reedsol/fd_reedsol.c b/src/ballet/reedsol/fd_reedsol.c new file mode 100644 index 00000000000..39887774a9f --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol.c @@ -0,0 +1,16 @@ +#include "fd_reedsol.h" +#include "fd_reedsol_internal.h" + +void fd_reedsol_encode_fini( fd_reedsol_t * rs ) { +#if FD_HAS_GFNI + if( FD_LIKELY( (rs->data_shred_cnt==32UL) & (rs->parity_shred_cnt==32UL ) ) ) + fd_reedsol_encode_32_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->parity_shred, rs->scratch ); + else + fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); +#else + fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); +#endif + + rs->data_shred_cnt = 0UL; + rs->parity_shred_cnt = 0UL; +} diff --git a/src/ballet/reedsol/fd_reedsol.h b/src/ballet/reedsol/fd_reedsol.h new file mode 100644 index 00000000000..16607a13477 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol.h @@ -0,0 +1,151 @@ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_h + +/* fd_reedsol provides APIs for producing Reed-Solomon encoded parity + data and for reconstructing missing data from parity data. The + encoding process is optimized, and highly optimized for Turbine's + typical case. + + Reed-Solomon works in GF(2^8), i.e. the codeword size is 1 byte, but + is typically used on each byte of larger pieces of data called + shreds (a Solana-specific term, often called shards elswhere in the + literature). Mathematically, the encoding process forms a vector + from the input data, taking one byte from each shred, and + left-multiplies the vector by a constant matrix in GF(2^8). The + resulting vector contains one byte for each of the parity shreds. + Solana also calls parity shreds "code" shreds, but due to the naming + collision with executable code, we have opted for "parity." This + mathematical structure thus forces each shred to be of identical size + but doesn't otherwise impose any size restrictions. */ + +#include "../../util/fd_util.h" + +// TODO: Define decode API +//#define SET_NAME reedsol_shred_set +//#include "../../util/tmpl/fd_smallset.c" + +/* FD_REEDSOL_{DATA, PARITY}_SHREDS_MAX describe the inclusive maximum + number of data and parity shreds that this implementation supports. + These limits are not mathematical limits, but limits based on current + Solana needs and performance. It is common for both shred counts to + be at their maximum values. */ +#define FD_REEDSOL_DATA_SHREDS_MAX (32UL) +#define FD_REEDSOL_PARITY_SHREDS_MAX (32UL) + + +#define FD_REEDSOL_ALIGN (128UL) +#define FD_REEDSOL_FOOTPRINT (1664UL) + +struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { + uchar scratch[ 1024 ]; /* Used for the ultra high performance implementation */ + + /* shred_sz: the size of each shred in bytes (all shreds must be the + same size) */ + ulong shred_sz; + + /* {data, parity}_shred_cnt: the number of data or parity shreds + (respectively) have been added to the in-process operation */ + ulong data_shred_cnt; + ulong parity_shred_cnt; + + /* {data, parity}_shred: pointers to the first byte of each shred */ + uchar * data_shred[ FD_REEDSOL_DATA_SHREDS_MAX ]; + uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; + + /* {data, parity}_shred_valid: whether the shred at the corresponding + index contains valid data. Used only for decoding operations. */ + //fd_reedsol_shred_set_t data_shred_valid; + //fd_reedsol_shred_set_t parity_shred_valid; +}; + +typedef struct fd_reedsol_private fd_reedsol_t; + +FD_PROTOTYPES_BEGIN + +/* fd_reedsol_{align, footprint} return the alignment and footprint + required in bytes for a fd_reedsol_t. */ +static inline FD_FN_CONST ulong fd_reedsol_align( void ) { return FD_REEDSOL_ALIGN; } +static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; } + + +/* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that + will encode shreds of size shred_sz. mem is assumed to be a piece + of memory that meets the alignment and size constraints specified + above. Takes a write interest in mem that persists until the + operation is canceled or finalized. shred_sz must be >= 32. Returns + mem. */ + +static inline fd_reedsol_t * +fd_reedsol_encode_init( void * mem, ulong shred_sz ) { + fd_reedsol_t * rs = (fd_reedsol_t *)mem; + + rs->shred_sz = shred_sz; + rs->data_shred_cnt = 0UL; + rs->parity_shred_cnt = 0UL; + + return rs; +} + +/* fd_reedsol_encode_add_data_shred: adds a shred consisting of the + memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding + operation. Takes a read interest in the shred that persists for + the lifetime of the operation (i.e. until finalized or cancelled). + Data shreds have no alignment restrictions and can overlap with each + other but should not overlap with any parity shreds in the same + encoding operation. + + Note: The order in which data shreds are added relative to other data + shreds matters. It impacts the parity data produced by the encoding + operation. */ + +static inline fd_reedsol_t * +fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, void const * ptr ) { + /* The argument is const to make it clear that an encoding operation + won't write to the shred, but we store them in the struct as + non-const so that the same struct can be used for encoding and + decoding operations, in which the data shreds actually are + writeable. */ + rs->data_shred[ rs->data_shred_cnt++ ] = (uchar *)ptr; + return rs; +} + +/* fd_reedsol_encode_add_parity_shred: adds the block of memory + [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding operation + as the destination of a parity shred. Takes a write interest in the + memory that persists for the lifetime of the operation (i.e. until + finalized or cancelled). Parity shreds have no alignment + restrictions but must not overlap with each other or with data shreds + in the same operation (U.B. if they overlap). + + Note: The order in which parity shreds are added matters only insofar + as which data will be written to which location. */ + +static inline fd_reedsol_t * +fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, void * ptr ) { + rs->parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr; + return rs; +} + + +/* fd_reedsol_encode_cancel cancels an in-progress encoding operation. + Releases any read or write interests in any shreds that were added to + the operation. Upon return, the contents of the parity shreds are + undefined. */ + +static inline void +fd_reedsol_encode_cancel( fd_reedsol_t * rs ) { + rs->data_shred_cnt = 0UL; + rs->parity_shred_cnt = 0UL; +} + +/* fd_reedsol_encode_fini finishes the in-progress encoding operation. + Upon return, the parity shreds will be filled with the correct + Reed-Solomon encoded parity data. Upon return, this will no longer + have any read or write interest in any of the provided shreds. */ +void fd_reedsol_encode_fini( fd_reedsol_t * rs ); + + +/* FIXME: Add decode API */ + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */ + diff --git a/src/ballet/reedsol/fd_reedsol_arith_avx2.h b/src/ballet/reedsol/fd_reedsol_arith_avx2.h new file mode 100644 index 00000000000..960e3a6438d --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_arith_avx2.h @@ -0,0 +1,41 @@ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h +#include "../../util/simd/fd_avx.h" + +#define FD_REEDSOL_GF_ARITH_DEFINED 1 + +typedef wb_t gf_t; +#define GF_WIDTH W_FOOTPRINT +#define gf_ldu wb_ldu +#define gf_stu wb_stu +#define gf_zero wb_zero + +#ifdef INCLUDE_CONSTANTS +FD_IMPORT_BINARY( fd_reedsol_arith_consts_avx_mul, "src/ballet/reedsol/constants/avx2_constants.bin" ); +#undef INCLUDE_CONSTANTS +#else +extern uchar const fd_reedsol_arith_consts_avx_mul[] __attribute__((aligned(128))); +#endif + +static uchar const fd_reedsol_arith_scale4[ 256UL ] = { + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 29, 13, 61, 45, 93, 77, 125, 109, 157, 141, 189, 173, 221, 205, 253, 237, 58, 42, 26, 10, 122, + 106, 90, 74, 186, 170, 154, 138, 250, 234, 218, 202, 39, 55, 7, 23, 103, 119, 71, 87, 167, 183, 135, 151, 231, 247, 199, 215, 116, 100, 84, 68, 52, 36, 20, 4, 244, 228, + 212, 196, 180, 164, 148, 132, 105, 121, 73, 89, 41, 57, 9, 25, 233, 249, 201, 217, 169, 185, 137, 153, 78, 94, 110, 126, 14, 30, 46, 62, 206, 222, 238, 254, 142, 158, 174, + 190, 83, 67, 115, 99, 19, 3, 51, 35, 211, 195, 243, 227, 147, 131, 179, 163, 232, 248, 200, 216, 168, 184, 136, 152, 104, 120, 72, 88, 40, 56, 8, 24, 245, 229, 213, 197, + 181, 165, 149, 133, 117, 101, 85, 69, 53, 37, 21, 5, 210, 194, 242, 226, 146, 130, 178, 162, 82, 66, 114, 98, 18, 2, 50, 34, 207, 223, 239, 255, 143, 159, 175, 191, 79, + 95, 111, 127, 15, 31, 47, 63, 156, 140, 188, 172, 220, 204, 252, 236, 28, 12, 60, 44, 92, 76, 124, 108, 129, 145, 161, 177, 193, 209, 225, 241, 1, 17, 33, 49, 65, 81, + 97, 113, 166, 182, 134, 150, 230, 246, 198, 214, 38, 54, 6, 22, 102, 118, 70, 86, 187, 171, 155, 139, 251, 235, 219, 203, 59, 43, 27, 11, 123, 107, 91, 75 }; /* Needs to be available at compile time, not link time, to allow the optimizer to use it */ + +#define GF_ADD wb_xor +#define GF_OR wb_or +#define GF_MUL( a, c ) (__extension__({ \ + wb_t lo = wb_and( a, wb_bcast( 0x0F ) ); \ + wb_t hi = wb_shr( a, 4 ); \ + wb_t p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*c ), lo ); \ + wb_t p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ c ] ), hi ); \ + /* c is known at compile time, so this is not a runtime branch */ \ + (c==0) ? wb_zero() : ( (c==1) ? a : wb_xor( p0, p1 ) ); } )) + + + +#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_gfni.h b/src/ballet/reedsol/fd_reedsol_arith_gfni.h new file mode 100644 index 00000000000..8c0f47d8424 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_arith_gfni.h @@ -0,0 +1,51 @@ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h +#include "../../util/simd/fd_avx.h" + +#define FD_REEDSOL_GF_ARITH_DEFINED 1 + +typedef wb_t gf_t; +#define GF_WIDTH W_FOOTPRINT +#define gf_ldu wb_ldu +#define gf_stu wb_stu +#define gf_zero wb_zero + +#ifdef INCLUDE_CONSTANTS +FD_IMPORT_BINARY( fd_reedsol_arith_consts_gfni_mul, "src/ballet/reedsol/constants/gfni_constants.bin" ); +#undef INCLUDE_CONSTANTS +#else +extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(128))); +#endif + +#define GF_ADD( a, b ) wb_xor( a, b ) + +/* Older versions of GCC have a bug that cause them to think + _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two arguments + (other than that the second argument can be a memory address). That's + totally incorrect. It was fixed in GCC 10. See + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92889 for more details. */ +#if !FD_USING_CLANG +#define GCC_VERSION (__GNUC__ * 10000 \ + + __GNUC_MINOR__ * 100 \ + + __GNUC_PATCHLEVEL__) +#endif + +#if FD_USING_CLANG || (GCC_VERSION >= 100000) +/* c is known at compile time, so this is not a runtime branch */ +#define GF_MUL( a, c ) ((c==0) ? wb_zero() : ( (c==1) ? (a) : _mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) )) + +#else + +#define GF_MUL( a, c ) (__extension__({ \ + wb_t product; \ + __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ + : [out]"=x"(product) \ + : [cons]"xm"( wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ) ), \ + [vec]"x" (a) ); \ + (c==0) ? wb_zero() : ( (c==1) ? (a) : product ); })) + +#endif + + + +#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_none.h b/src/ballet/reedsol/fd_reedsol_arith_none.h new file mode 100644 index 00000000000..5ffed29172f --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_arith_none.h @@ -0,0 +1,33 @@ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h +#include "../../util/fd_util_base.h" + +#define FD_REEDSOL_GF_ARITH_DEFINED 1 + +typedef ulong gf_t; /* One byte stored in a ulong */ +#define GF_WIDTH 1UL +#define W_ATTR + +static inline gf_t gf_ldu( uchar const * addr ) { return (ulong)(*addr); } +static inline void gf_stu( uchar * addr, gf_t v ) { *addr = (uchar)v; } +#define gf_zero() (0UL) + +#ifdef INCLUDE_CONSTANTS +FD_IMPORT_BINARY( fd_reedsol_arith_consts_generic_mul, "src/ballet/reedsol/constants/generic_constants.bin" ); +#undef INCLUDE_CONSTANTS +#else +extern uchar const fd_reedsol_arith_consts_generic_mul[] __attribute__((aligned(128))); +#endif +static FD_FN_UNUSED short const * gf_arith_log_tbl = (short const *)fd_reedsol_arith_consts_generic_mul; /* Indexed [0, 256) */ +static FD_FN_UNUSED uchar const * gf_arith_invlog_tbl = fd_reedsol_arith_consts_generic_mul + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512, 512) */ + +#define GF_ADD( a, b ) ((a)^(b)) + +/* c is known at compile time, so this is not a runtime branch. + Exposing log_tbl at compile time would let the compiler remove a + branch, but we don't care too much about performance in this case. */ +#define GF_MUL( a, c ) ((c==0) ? 0UL : ( (c==1) ? (a) : (ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] )) + + + +#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h */ diff --git a/src/ballet/reedsol/fd_reedsol_fft.h b/src/ballet/reedsol/fd_reedsol_fft.h new file mode 100644 index 00000000000..44146fba8b5 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_fft.h @@ -0,0 +1,475 @@ + +/* Note: This file is auto generated. */ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h + +/* This file implements the FFT-like operator described in: + S. -J. Lin, T. Y. Al-Naffouri, Y. S. Han and W. -H. Chung, "Novel + Polynomial Basis With Fast Fourier Transform and Its Application to + Reed–Solomon Erasure Codes," in IEEE Transactions on Information + Theory, vol. 62, no. 11, pp. 6284-6299, Nov. 2016, doi: + 10.1109/TIT.2016.2608892. + + The main macros this file provides are FD_REEDSOL_GENERATE_FFT and + FD_REEDSOL_GENERATE_IFFT. The rest of this file is auto-generated + implementation details. + + Like the normal FFT and IFFT, the operator implemented in this file + (and henceforward referred to as FFT and IFFT) tranforms between one + basis and another. Rather than tranformations of a signal between + the frequency domain and the time domain, these operators tranform a + polynomial between domains we call the "evaluation basis" and the + "coefficient basis". + + In the evaluation basis, a polynomial is represented by its value at + subsequent points. Equivalently, the polynomial is represented as a + linear combination of the Lagrange basis polynomials (briefly, e_i(i) + = 1, e_i(j)=0 when j != i) . In the coefficient basis, a polynomial + is represented as a linear combination of basis polynomials for a + specific, carefully chosen basis fully described in the paper and + summarized below. + + Let N, a power of 2, be the size of the transform. To define the + coefficient basis, we first define s_j(x) for j=0, ..., lg(N) + s_j(x) = x*(x+1)*(x+2)* .. (x+ (2^j-1)) + where the multiplication and addition are GF(2^8) operations, but + 2^j-1 is computed as an integer. This is equivalent to taking the + GF product of all elements that are identical to x in all but the + last j bits. s_j(x) has order 2^j. + + Now, we define a normalized version, S_j(x) (called s bar in the + paper): + S_j(x) = s_j(x) / s_j( 2^j ) + Again, the division is a field operation, but 2^j is an integer + operation. + + Finally, the basis elements X_i(x) for i=0, ..., N-1 are defined by + interpreting i as a bitmask and taking the product of the + corresponding S_j(x) where the bit is set. For example: + X_0(x) = 1, + X_3(x) = S_0(x) * S_1(x), + X_6(x) = S_1(x) * S_2(x). + The multiplication happens in GF(2^8) of course. X_i(x) is a + polynomial of order i. + + */ + +#ifndef FD_REEDSOL_GF_ARITH_DEFINED +#error "You must include fd_reedsol_arith_gfni.h or fd_reedsol_arith_avx2.h before including this file" +#endif + + +/* FD_REEDSOL_GENERATE_FFT: Inserts code to transform n input values from the + coefficient basis to the evaluation basis, i.e. evaluating the + polynomial described by the input at points b, b+1, b+2, ... b+n-1 + (where this arithmetic on b is integer arithmetic, not GF(2^8) + arithmetic). + + FD_REEDSOL_GENERATE_IFFT: Inserts code to transform n input values + from the evaluation basis to the coefficient basis, descrbing a + polynomial P(x) of degree no more than n such that P(b) = in0, + P(b+1)=in1, ... P(b+n-1)=in_{n-1} (where this arithmetic on b is + integer arithmetic, not GF(2^8) arithmetic). + + For both macros, n must be a power of 2 (only 4, 8, 16, 32 are + emitted by the code generator at the moment), and b must be a + non-negative multiple of n no more than 32. Both b and n must be + literal integer values. + + The remaining n arguments should be vector variables of type gf_t. + These are used as input and output, since there's no other good way + to return n vector values. As such, this macro is not robust. + + The FFT and IFFT are computed in a vectorized fashion, i.e. the + transform of the ith byte is computed and stored in the ith byte of + the output for each i independently. */ + +#define FD_REEDSOL_PRIVATE_EXPAND( M, ... ) M( __VA_ARGS__ ) + +#define FD_REEDSOL_GENERATE_FFT( n, b, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_FFT_IMPL_##n, FD_CONCAT4(FD_REEDSOL_FFT_CONSTANTS_, n, _, b), __VA_ARGS__ ) +#define FD_REEDSOL_GENERATE_IFFT( n, b, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_IFFT_IMPL_##n, FD_CONCAT4(FD_REEDSOL_IFFT_CONSTANTS_, n, _, b), __VA_ARGS__ ) + + + + +#define FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( inout0, inout1, c) \ + do { \ + inout0 = GF_ADD( inout0, GF_MUL( inout1, c ) ); \ + inout1 = GF_ADD( inout1, inout0 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( inout0, inout1, c) \ + do { \ + inout1 = GF_ADD( inout1, inout0 ); \ + inout0 = GF_ADD( inout0, GF_MUL( inout1, c ) ); \ + } while( 0 ) + + + +#define FD_REEDSOL_IFFT_CONSTANTS_32_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 6, 28, 26, 120, 126, 100, 98, 0, 22, 97, 119, 0, 11, 0 +#define FD_REEDSOL_IFFT_CONSTANTS_32_32 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 237, 235, 241, 247, 149, 147, 137, 143, 38, 48, 71, 81, 174, 165, 71 +#define FD_REEDSOL_IFFT_IMPL_32( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ + c_28, c_29, c_30, in00, in01, in02, in03, in04, in05, in06, in07 , \ + in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18 , \ + in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29 , \ + in30, in31) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in05, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in09, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in11, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in13, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in15, c_07 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in17, c_08 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in19, c_09 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in21, c_10 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in23, c_11 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in25, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in27, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in29, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in31, c_15 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_16 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in06, c_17 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in10, c_18 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in14, c_19 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in18, c_20 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in22, c_21 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in26, c_22 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in30, c_23 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in04, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in12, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in20, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in28, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in08, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in24, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in16, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in24, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in12, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in28, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in20, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in28, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in06, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in14, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in22, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in30, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in10, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in26, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in18, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in26, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in14, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in30, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in22, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in30, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_16 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in07, c_17 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in11, c_18 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in15, c_19 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in19, c_20 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in23, c_21 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in27, c_22 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in31, c_23 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in05, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in13, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in21, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in29, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in09, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in25, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in17, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in25, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in13, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in29, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in21, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in29, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in07, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in15, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in23, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in31, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in11, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in27, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in19, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in27, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in15, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in31, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in23, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in31, c_30 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_32_0 0, 0, 11, 0, 22, 97, 119, 0, 6, 28, 26, 120, 126, 100, 98, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +#define FD_REEDSOL_FFT_CONSTANTS_32_32 71, 174, 165, 38, 48, 71, 81, 237, 235, 241, 247, 149, 147, 137, 143, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 +#define FD_REEDSOL_FFT_IMPL_32( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16, \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27, \ + c_28, c_29, c_30, in00, in01, in02, in03, in04, in05, in06, in07, \ + in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, \ + in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, \ + in30, in31) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in16, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in24, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in08, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in24, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in20, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in28, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in12, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in28, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in04, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in12, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in20, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in28, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in18, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in26, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in10, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in26, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in22, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in30, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in14, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in30, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in06, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in14, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in22, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in30, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in06, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in10, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in14, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in18, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in22, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in26, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in30, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in17, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in25, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in09, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in25, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in21, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in29, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in13, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in29, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in05, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in13, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in21, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in29, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in19, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in27, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in11, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in27, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in23, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in31, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in15, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in31, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in15, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in23, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in31, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in07, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in11, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in15, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in19, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in23, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in27, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in31, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in05, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in07, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in09, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in11, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in13, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in15, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in17, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in19, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in21, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in23, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in25, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in27, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in29, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in31, c_30 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_IFFT_CONSTANTS_16_0 0, 2, 4, 6, 8, 10, 12, 14, 0, 6, 28, 26, 0, 22, 0 +#define FD_REEDSOL_IFFT_CONSTANTS_16_16 16, 18, 20, 22, 24, 26, 28, 30, 120, 126, 100, 98, 97, 119, 11 +#define FD_REEDSOL_IFFT_CONSTANTS_16_32 32, 34, 36, 38, 40, 42, 44, 46, 237, 235, 241, 247, 38, 48, 174 +#define FD_REEDSOL_IFFT_IMPL_16( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, in00, in01 , \ + in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12 , \ + in13, in14, in15) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in05, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in09, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in11, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in13, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in15, c_07 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_08 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in06, c_09 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in10, c_10 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in14, c_11 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in04, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in12, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in08, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in12, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in06, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in14, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in10, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in14, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_08 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in07, c_09 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in11, c_10 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in15, c_11 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in05, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in13, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in09, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in13, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in07, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in15, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in11, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in15, c_14 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_16_0 0, 0, 22, 0, 6, 28, 26, 0, 2, 4, 6, 8, 10, 12, 14 +#define FD_REEDSOL_FFT_CONSTANTS_16_16 11, 97, 119, 120, 126, 100, 98, 16, 18, 20, 22, 24, 26, 28, 30 +#define FD_REEDSOL_FFT_CONSTANTS_16_32 174, 38, 48, 237, 235, 241, 247, 32, 34, 36, 38, 40, 42, 44, 46 +#define FD_REEDSOL_FFT_IMPL_16( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, in00, in01, \ + in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, \ + in13, in14, in15) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in08, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in12, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in04, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in12, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in10, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in14, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in06, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in14, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in06, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in10, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in14, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in09, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in13, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in05, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in13, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in11, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in15, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in07, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in15, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in07, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in11, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in15, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in05, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in07, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in09, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in11, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in13, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in15, c_14 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_IFFT_CONSTANTS_8_0 0, 2, 4, 6, 0, 6, 0 +#define FD_REEDSOL_IFFT_CONSTANTS_8_8 8, 10, 12, 14, 28, 26, 22 +#define FD_REEDSOL_IFFT_CONSTANTS_8_16 16, 18, 20, 22, 120, 126, 97 +#define FD_REEDSOL_IFFT_CONSTANTS_8_24 24, 26, 28, 30, 100, 98, 119 +#define FD_REEDSOL_IFFT_CONSTANTS_8_32 32, 34, 36, 38, 237, 235, 38 +#define FD_REEDSOL_IFFT_IMPL_8( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, in00, in01, in02, in03, in04, in05, in06, in07) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in05, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in06, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in04, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in06, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in07, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in05, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in07, c_06 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_8_0 0, 0, 6, 0, 2, 4, 6 +#define FD_REEDSOL_FFT_CONSTANTS_8_8 22, 28, 26, 8, 10, 12, 14 +#define FD_REEDSOL_FFT_CONSTANTS_8_16 97, 120, 126, 16, 18, 20, 22 +#define FD_REEDSOL_FFT_CONSTANTS_8_24 119, 100, 98, 24, 26, 28, 30 +#define FD_REEDSOL_FFT_CONSTANTS_8_32 38, 237, 235, 32, 34, 36, 38 +#define FD_REEDSOL_FFT_IMPL_8( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, in00, in01, in02, in03, in04, in05, in06, in07) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in04, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in06, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in06, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in05, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in07, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in07, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in05, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in07, c_06 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_IFFT_CONSTANTS_4_0 0, 2, 0 +#define FD_REEDSOL_IFFT_CONSTANTS_4_4 4, 6, 6 +#define FD_REEDSOL_IFFT_CONSTANTS_4_8 8, 10, 28 +#define FD_REEDSOL_IFFT_CONSTANTS_4_12 12, 14, 26 +#define FD_REEDSOL_IFFT_CONSTANTS_4_16 16, 18, 120 +#define FD_REEDSOL_IFFT_CONSTANTS_4_20 20, 22, 126 +#define FD_REEDSOL_IFFT_CONSTANTS_4_24 24, 26, 100 +#define FD_REEDSOL_IFFT_CONSTANTS_4_28 28, 30, 98 +#define FD_REEDSOL_IFFT_CONSTANTS_4_32 32, 34, 237 +#define FD_REEDSOL_IFFT_IMPL_4( c_00, c_01, c_02, in00, in01, in02 , \ + in03) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_02 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_4_0 0, 0, 2 +#define FD_REEDSOL_FFT_CONSTANTS_4_4 6, 4, 6 +#define FD_REEDSOL_FFT_CONSTANTS_4_8 28, 8, 10 +#define FD_REEDSOL_FFT_CONSTANTS_4_12 26, 12, 14 +#define FD_REEDSOL_FFT_CONSTANTS_4_16 120, 16, 18 +#define FD_REEDSOL_FFT_CONSTANTS_4_20 126, 20, 22 +#define FD_REEDSOL_FFT_CONSTANTS_4_24 100, 24, 26 +#define FD_REEDSOL_FFT_CONSTANTS_4_28 98, 28, 30 +#define FD_REEDSOL_FFT_CONSTANTS_4_32 237, 32, 34 +#define FD_REEDSOL_FFT_IMPL_4( c_00, c_01, c_02, in00, in01, in02 , \ + in03) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_02 ); \ + } while( 0 ) + + + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h */ diff --git a/src/ballet/reedsol/fd_reedsol_gfni_32.S b/src/ballet/reedsol/fd_reedsol_gfni_32.S new file mode 100644 index 00000000000..5eafeb83253 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_gfni_32.S @@ -0,0 +1,414 @@ +# void fd_reedsol_encode_32_32( ulong shred_sz, (rdi) +# uchar const * const * data_shred, (rsi) +# uchar * const * parity_shred, (rdx) +# uchar * _scratch ) (rcx) +.section .rodata,"a",@progbits +gfni_const_tbl: +.align 32 +.incbin "src/ballet/reedsol/constants/gfni_constants.bin" +.previous + + +fd_reedsol_encode_32_32: +.globl fd_reedsol_encode_32_32 +.cfi_startproc + +# This file implements the FFT-like O(n log n) algorithm for computing Reed +# Solomon parity as described in +# S. -J. Lin, T. Y. Al-Naffouri, Y. S. Han and W. -H. Chung, "Novel +# Polynomial Basis With Fast Fourier Transform and Its Application to +# Reed–Solomon Erasure Codes," in IEEE Transactions on Information Theory, +# vol. 62, no. 11, pp. 6284-6299, Nov. 2016, doi: 10.1109/TIT.2016.2608892. +# +# Like any FFT operation, the core of the computation is a "butterfly": +# +# x_i >----------- + --\-------> y_i +# ^ \ +# *C / \ +# / V +# x_(i+2^k) >--/---------- + --> y_(i+2^k) +# +# i.e. y_i = x_i + C*x_(i+2^k), y_(i+2^k) = x_(i+2^k) + y_i +# +# Unlike typical FFT butterflies, these are not symmetric and only require one +# multiplication, but most of the ideas are similar. We compute the parity +# shreds by: +# 1. Computing an IFFT with no shift on the data. This finds the polynomial +# that interpolates the data, as expressed in the coefficient basis. +# 2. Computing an FFT with a shift of 32 on the result of step 1. This +# evaluates the polynomial at integers in [32, 64), which exactly gives us +# the 32 parity shreds we need. +# +# See fd_reedsol_fft.h for more details about the algorithm. This uses +# the same algorithm as fd_reedsol_fft.h, but this is at least 10% +# faster than the best compiled version of the C code. Depending on the +# compiler, this can be much, much faster than the compiled C code. +# +# With GFNI, Intel gives us some very useful instructions, but they're +# not exactly the friendliest to use. vgf2p8mulb seems perfect, but it +# uses the wrong reduction polynomial. That's okay though, because we +# don't need a fully general GF(2^8) component-wise vector +# multiplication. We only need GF(2^8) vector scaling, and we can +# achieve that with vgf2p8affineqb which has similar performance. Using +# vgf2p8affineqb requires encoding constants in an bizarre format, but +# we can just build a table that has these pre-encoded, so it's not a +# deal-breaker. +# +# +# We have 32 working values, so we can almost do the whole thing in registers, +# but we do need at least one scratch register. +# +# Register mapping: +# * rax: position within each shred +# * rbx: Temporary scalar variable, used for loading addresses +# * r_i (for i in [8, 15]): Stores data_shred[ i-8 ] +# * ymm_i for i in [0, 14) and [15, 31): Stores 32B of data from the ith line +# of computation +# * ymm14: Sometimes stores data from the 14th line, sometimes a scratch +# register used for the butterflies +# * ymm31: Sometimes stores data from the 31st line, sometimes a scratch +# register used for the butterflies + + +# Push registers we clobber +pushq %r15 +.cfi_def_cfa_offset 16 +.cfi_offset 15, -16 +pushq %r14 +.cfi_def_cfa_offset 24 +.cfi_offset 14, -24 +pushq %r13 +.cfi_def_cfa_offset 32 +.cfi_offset 13, -32 +pushq %r12 +.cfi_def_cfa_offset 40 +.cfi_offset 12, -40 +pushq %rbx +.cfi_def_cfa_offset 48 +.cfi_offset 3, -48 + +# Load some values from data_shred into scalar registers to save loads later on +mov (%rsi),%r8 +mov 0x08(%rsi),%r9 +mov 0x10(%rsi),%r10 +mov 0x18(%rsi),%r11 +mov 0x20(%rsi),%r12 +mov 0x28(%rsi),%r13 +mov 0x30(%rsi),%r14 +mov 0x38(%rsi),%r15 + +mov $0, %rax # Init shred_position + +.align 16 +outer_loop: + # First handle the ones that we don't need to load from data_shred + vmovdqu64 (%r8 ,%rax,1),%ymm0 + vmovdqu64 (%r9 ,%rax,1),%ymm1 + vmovdqu64 (%r10,%rax,1),%ymm2 + vmovdqu64 (%r11,%rax,1),%ymm3 + vmovdqu64 (%r12,%rax,1),%ymm4 + vmovdqu64 (%r13,%rax,1),%ymm5 + vmovdqu64 (%r14,%rax,1),%ymm6 + vmovdqu64 (%r15,%rax,1),%ymm7 + +.altmacro +# load_inputs: load one vector's worth of data from (data_shred[ reg ] + +# shred_position) into ymm_reg +.macro load_inputs reg + mov (\reg* 8)(%rsi), %rbx + vmovdqu64 (%rbx, %rax, 1), %ymm\reg +.endm + + load_inputs 8 + load_inputs 9 + load_inputs 10 + load_inputs 11 + load_inputs 12 + load_inputs 13 + load_inputs 14 + load_inputs 15 + load_inputs 16 + load_inputs 17 + load_inputs 18 + load_inputs 19 + load_inputs 20 + load_inputs 21 + load_inputs 22 + load_inputs 23 + load_inputs 24 + load_inputs 25 + load_inputs 26 + load_inputs 27 + load_inputs 28 + load_inputs 29 + load_inputs 30 + load_inputs 31 # 31 is our scratch right now + + # {i}fft_butterfly{_c0}: emit a butterfly operator on reg0 and reg1 + # that with a constant scalar of const (or 0 if _c0). Use scratch_reg + # for scratch, clobbering it. reg0 and reg1 are modified in place. + .macro ifft_butterfly reg0, reg1, const, scratch_reg + vpxord %ymm\reg0, %ymm\reg1, %ymm\reg1 + vgf2p8affineqb $0x00, (gfni_const_tbl+32*(\const))(%rip), %ymm\reg1, %ymm\scratch_reg + vpxord %ymm\reg0, %ymm\scratch_reg, %ymm\reg0 + .endm + .macro ifft_butterfly_c0 reg0, reg1, scratch_reg + vpxord %ymm\reg0, %ymm\reg1, %ymm\reg1 + .endm + + .macro fft_butterfly reg0, reg1, const, scratch_reg + vgf2p8affineqb $0x00, (gfni_const_tbl+32*(\const))(%rip), %ymm\reg1, %ymm\scratch_reg + vpxord %ymm\reg0, %ymm\scratch_reg, %ymm\reg0 + vpxord %ymm\reg1, %ymm\reg0, %ymm\reg1 + .endm + .macro fft_butterfly_c0 reg0, reg1, scratch_reg + vpxord %ymm\reg1, %ymm\reg0, %ymm\reg1 + .endm + + # spill_reload: spill register ymm\spill to its spot in scratch memory + # and reload ymm\reload from its spot + .macro spill_reload spill, reload + vmovdqa64 %ymm\spill, (32*(\spill))(%rcx) + vmovdqa64 (32*(\reload))(%rcx), %ymm\reload + .endm + + # parity_store: store generated parity data in ymm\reg to + # parity_shred[ reg ] + shred_position + .macro parity_store reg + mov ((\reg )* 8)(%rdx), %rbx + vmovdqu64 %ymm\reg, (%rbx, %rax, 1) + .endm + + # Spill ymm31 to its spot so we can use it as scratch and reload it using the spill_reload macro later + vmovdqa64 %ymm31, (32*(31))(%rcx) + + ifft_butterfly_c0 0, 1, 31 # (0, 0, 0) and (0, 0, 1) => (0, 1, 0) and (1, 1, 0) + ifft_butterfly 2, 3, 2, 31 # (0, 0, 2) and (0, 0, 3) => (0, 1, 2) and (1, 1, 2) + ifft_butterfly 4, 5, 4, 31 # (0, 0, 4) and (0, 0, 5) => (0, 1, 4) and (1, 1, 4) + ifft_butterfly 6, 7, 6, 31 # (0, 0, 6) and (0, 0, 7) => (0, 1, 6) and (1, 1, 6) + ifft_butterfly 8, 9, 8, 31 # (0, 0, 8) and (0, 0, 9) => (0, 1, 8) and (1, 1, 8) + ifft_butterfly 10, 11, 10, 31 # (0, 0, 10) and (0, 0, 11) => (0, 1, 10) and (1, 1, 10) + ifft_butterfly 12, 13, 12, 31 # (0, 0, 12) and (0, 0, 13) => (0, 1, 12) and (1, 1, 12) + ifft_butterfly 14, 15, 14, 31 # (0, 0, 14) and (0, 0, 15) => (0, 1, 14) and (1, 1, 14) + ifft_butterfly 16, 17, 16, 31 # (0, 0, 16) and (0, 0, 17) => (0, 1, 16) and (1, 1, 16) + ifft_butterfly 18, 19, 18, 31 # (0, 0, 18) and (0, 0, 19) => (0, 1, 18) and (1, 1, 18) + ifft_butterfly 20, 21, 20, 31 # (0, 0, 20) and (0, 0, 21) => (0, 1, 20) and (1, 1, 20) + ifft_butterfly 22, 23, 22, 31 # (0, 0, 22) and (0, 0, 23) => (0, 1, 22) and (1, 1, 22) + ifft_butterfly 24, 25, 24, 31 # (0, 0, 24) and (0, 0, 25) => (0, 1, 24) and (1, 1, 24) + ifft_butterfly 26, 27, 26, 31 # (0, 0, 26) and (0, 0, 27) => (0, 1, 26) and (1, 1, 26) + ifft_butterfly 28, 29, 28, 31 # (0, 0, 28) and (0, 0, 29) => (0, 1, 28) and (1, 1, 28) + ifft_butterfly_c0 0, 2, 31 # (0, 1, 0) and (0, 1, 2) => (0, 2, 0) and (2, 2, 0) + ifft_butterfly 4, 6, 6, 31 # (0, 1, 4) and (0, 1, 6) => (0, 2, 4) and (2, 2, 4) + ifft_butterfly 8, 10, 28, 31 # (0, 1, 8) and (0, 1, 10) => (0, 2, 8) and (2, 2, 8) + ifft_butterfly 12, 14, 26, 31 # (0, 1, 12) and (0, 1, 14) => (0, 2, 12) and (2, 2, 12) + ifft_butterfly 16, 18, 120, 31 # (0, 1, 16) and (0, 1, 18) => (0, 2, 16) and (2, 2, 16) + ifft_butterfly 20, 22, 126, 31 # (0, 1, 20) and (0, 1, 22) => (0, 2, 20) and (2, 2, 20) + ifft_butterfly 24, 26, 100, 31 # (0, 1, 24) and (0, 1, 26) => (0, 2, 24) and (2, 2, 24) + ifft_butterfly_c0 0, 4, 31 # (0, 2, 0) and (0, 2, 4) => (0, 3, 0) and (4, 3, 0) + ifft_butterfly 8, 12, 22, 31 # (0, 2, 8) and (0, 2, 12) => (0, 3, 8) and (4, 3, 8) + ifft_butterfly 16, 20, 97, 31 # (0, 2, 16) and (0, 2, 20) => (0, 3, 16) and (4, 3, 16) + ifft_butterfly_c0 0, 8, 31 # (0, 3, 0) and (0, 3, 8) => (0, 4, 0) and (8, 4, 0) + ifft_butterfly_c0 4, 12, 31 # (4, 3, 0) and (4, 3, 8) => (4, 4, 0) and (12, 4, 0) + ifft_butterfly_c0 2, 6, 31 # (2, 2, 0) and (2, 2, 4) => (2, 3, 0) and (6, 3, 0) + ifft_butterfly 10, 14, 22, 31 # (2, 2, 8) and (2, 2, 12) => (2, 3, 8) and (6, 3, 8) + ifft_butterfly 18, 22, 97, 31 # (2, 2, 16) and (2, 2, 20) => (2, 3, 16) and (6, 3, 16) + ifft_butterfly_c0 2, 10, 31 # (2, 3, 0) and (2, 3, 8) => (2, 4, 0) and (10, 4, 0) + ifft_butterfly_c0 6, 14, 31 # (6, 3, 0) and (6, 3, 8) => (6, 4, 0) and (14, 4, 0) + ifft_butterfly_c0 1, 3, 31 # (1, 1, 0) and (1, 1, 2) => (1, 2, 0) and (3, 2, 0) + ifft_butterfly 5, 7, 6, 31 # (1, 1, 4) and (1, 1, 6) => (1, 2, 4) and (3, 2, 4) + ifft_butterfly 9, 11, 28, 31 # (1, 1, 8) and (1, 1, 10) => (1, 2, 8) and (3, 2, 8) + ifft_butterfly 13, 15, 26, 31 # (1, 1, 12) and (1, 1, 14) => (1, 2, 12) and (3, 2, 12) + ifft_butterfly 17, 19, 120, 31 # (1, 1, 16) and (1, 1, 18) => (1, 2, 16) and (3, 2, 16) + ifft_butterfly 21, 23, 126, 31 # (1, 1, 20) and (1, 1, 22) => (1, 2, 20) and (3, 2, 20) + ifft_butterfly 25, 27, 100, 31 # (1, 1, 24) and (1, 1, 26) => (1, 2, 24) and (3, 2, 24) + ifft_butterfly_c0 1, 5, 31 # (1, 2, 0) and (1, 2, 4) => (1, 3, 0) and (5, 3, 0) + ifft_butterfly 9, 13, 22, 31 # (1, 2, 8) and (1, 2, 12) => (1, 3, 8) and (5, 3, 8) + ifft_butterfly 17, 21, 97, 31 # (1, 2, 16) and (1, 2, 20) => (1, 3, 16) and (5, 3, 16) + ifft_butterfly_c0 1, 9, 31 # (1, 3, 0) and (1, 3, 8) => (1, 4, 0) and (9, 4, 0) + ifft_butterfly_c0 5, 13, 31 # (5, 3, 0) and (5, 3, 8) => (5, 4, 0) and (13, 4, 0) + ifft_butterfly_c0 3, 7, 31 # (3, 2, 0) and (3, 2, 4) => (3, 3, 0) and (7, 3, 0) + ifft_butterfly 11, 15, 22, 31 # (3, 2, 8) and (3, 2, 12) => (3, 3, 8) and (7, 3, 8) + ifft_butterfly 19, 23, 97, 31 # (3, 2, 16) and (3, 2, 20) => (3, 3, 16) and (7, 3, 16) + ifft_butterfly_c0 3, 11, 31 # (3, 3, 0) and (3, 3, 8) => (3, 4, 0) and (11, 4, 0) + ifft_butterfly_c0 7, 15, 31 # (7, 3, 0) and (7, 3, 8) => (7, 4, 0) and (15, 4, 0) + spill_reload 14, 31 # spilling (14, 4, 0), reloading (0, 0, 31) + ifft_butterfly 30, 31, 30, 14 # (0, 0, 30) and (0, 0, 31) => (0, 1, 30) and (1, 1, 30) + ifft_butterfly 28, 30, 98, 14 # (0, 1, 28) and (0, 1, 30) => (0, 2, 28) and (2, 2, 28) + ifft_butterfly 24, 28, 119, 14 # (0, 2, 24) and (0, 2, 28) => (0, 3, 24) and (4, 3, 24) + ifft_butterfly 16, 24, 11, 14 # (0, 3, 16) and (0, 3, 24) => (0, 4, 16) and (8, 4, 16) + ifft_butterfly_c0 0, 16, 14 # (0, 4, 0) and (0, 4, 16) => (0, 5, 0) and (16, 5, 0) + ifft_butterfly_c0 8, 24, 14 # (8, 4, 0) and (8, 4, 16) => (8, 5, 0) and (24, 5, 0) + ifft_butterfly 20, 28, 11, 14 # (4, 3, 16) and (4, 3, 24) => (4, 4, 16) and (12, 4, 16) + ifft_butterfly_c0 4, 20, 14 # (4, 4, 0) and (4, 4, 16) => (4, 5, 0) and (20, 5, 0) + ifft_butterfly_c0 12, 28, 14 # (12, 4, 0) and (12, 4, 16) => (12, 5, 0) and (28, 5, 0) + ifft_butterfly 26, 30, 119, 14 # (2, 2, 24) and (2, 2, 28) => (2, 3, 24) and (6, 3, 24) + ifft_butterfly 18, 26, 11, 14 # (2, 3, 16) and (2, 3, 24) => (2, 4, 16) and (10, 4, 16) + ifft_butterfly_c0 2, 18, 14 # (2, 4, 0) and (2, 4, 16) => (2, 5, 0) and (18, 5, 0) + ifft_butterfly_c0 10, 26, 14 # (10, 4, 0) and (10, 4, 16) => (10, 5, 0) and (26, 5, 0) + ifft_butterfly 22, 30, 11, 14 # (6, 3, 16) and (6, 3, 24) => (6, 4, 16) and (14, 4, 16) + ifft_butterfly_c0 6, 22, 14 # (6, 4, 0) and (6, 4, 16) => (6, 5, 0) and (22, 5, 0) + ifft_butterfly 29, 31, 98, 14 # (1, 1, 28) and (1, 1, 30) => (1, 2, 28) and (3, 2, 28) + ifft_butterfly 25, 29, 119, 14 # (1, 2, 24) and (1, 2, 28) => (1, 3, 24) and (5, 3, 24) + ifft_butterfly 17, 25, 11, 14 # (1, 3, 16) and (1, 3, 24) => (1, 4, 16) and (9, 4, 16) + ifft_butterfly_c0 1, 17, 14 # (1, 4, 0) and (1, 4, 16) => (1, 5, 0) and (17, 5, 0) + ifft_butterfly_c0 9, 25, 14 # (9, 4, 0) and (9, 4, 16) => (9, 5, 0) and (25, 5, 0) + ifft_butterfly 21, 29, 11, 14 # (5, 3, 16) and (5, 3, 24) => (5, 4, 16) and (13, 4, 16) + ifft_butterfly_c0 5, 21, 14 # (5, 4, 0) and (5, 4, 16) => (5, 5, 0) and (21, 5, 0) + ifft_butterfly_c0 13, 29, 14 # (13, 4, 0) and (13, 4, 16) => (13, 5, 0) and (29, 5, 0) + ifft_butterfly 27, 31, 119, 14 # (3, 2, 24) and (3, 2, 28) => (3, 3, 24) and (7, 3, 24) + ifft_butterfly 19, 27, 11, 14 # (3, 3, 16) and (3, 3, 24) => (3, 4, 16) and (11, 4, 16) + ifft_butterfly_c0 3, 19, 14 # (3, 4, 0) and (3, 4, 16) => (3, 5, 0) and (19, 5, 0) + ifft_butterfly_c0 11, 27, 14 # (11, 4, 0) and (11, 4, 16) => (11, 5, 0) and (27, 5, 0) + ifft_butterfly 23, 31, 11, 14 # (7, 3, 16) and (7, 3, 24) => (7, 4, 16) and (15, 4, 16) + ifft_butterfly_c0 7, 23, 14 # (7, 4, 0) and (7, 4, 16) => (7, 5, 0) and (23, 5, 0) + ifft_butterfly_c0 15, 31, 14 # (15, 4, 0) and (15, 4, 16) => (15, 5, 0) and (31, 5, 0) + spill_reload 31, 14 # spilling (31, 5, 0), reloading (14, 4, 0) + ifft_butterfly_c0 14, 30, 31 # (14, 4, 0) and (14, 4, 16) => (14, 5, 0) and (30, 5, 0) + fft_butterfly 0, 16, 71, 31 # (0, 5, 0) and (16, 5, 0) => (0, 4, 0) and (0, 4, 16) + fft_butterfly 8, 24, 71, 31 # (8, 5, 0) and (24, 5, 0) => (8, 4, 0) and (8, 4, 16) + fft_butterfly 0, 8, 174, 31 # (0, 4, 0) and (8, 4, 0) => (0, 3, 0) and (0, 3, 8) + fft_butterfly 16, 24, 165, 31 # (0, 4, 16) and (8, 4, 16) => (0, 3, 16) and (0, 3, 24) + fft_butterfly 4, 20, 71, 31 # (4, 5, 0) and (20, 5, 0) => (4, 4, 0) and (4, 4, 16) + fft_butterfly 12, 28, 71, 31 # (12, 5, 0) and (28, 5, 0) => (12, 4, 0) and (12, 4, 16) + fft_butterfly 4, 12, 174, 31 # (4, 4, 0) and (12, 4, 0) => (4, 3, 0) and (4, 3, 8) + fft_butterfly 20, 28, 165, 31 # (4, 4, 16) and (12, 4, 16) => (4, 3, 16) and (4, 3, 24) + fft_butterfly 0, 4, 38, 31 # (0, 3, 0) and (4, 3, 0) => (0, 2, 0) and (0, 2, 4) + fft_butterfly 8, 12, 48, 31 # (0, 3, 8) and (4, 3, 8) => (0, 2, 8) and (0, 2, 12) + fft_butterfly 16, 20, 71, 31 # (0, 3, 16) and (4, 3, 16) => (0, 2, 16) and (0, 2, 20) + fft_butterfly 24, 28, 81, 31 # (0, 3, 24) and (4, 3, 24) => (0, 2, 24) and (0, 2, 28) + fft_butterfly 2, 18, 71, 31 # (2, 5, 0) and (18, 5, 0) => (2, 4, 0) and (2, 4, 16) + fft_butterfly 10, 26, 71, 31 # (10, 5, 0) and (26, 5, 0) => (10, 4, 0) and (10, 4, 16) + fft_butterfly 2, 10, 174, 31 # (2, 4, 0) and (10, 4, 0) => (2, 3, 0) and (2, 3, 8) + fft_butterfly 18, 26, 165, 31 # (2, 4, 16) and (10, 4, 16) => (2, 3, 16) and (2, 3, 24) + fft_butterfly 6, 22, 71, 31 # (6, 5, 0) and (22, 5, 0) => (6, 4, 0) and (6, 4, 16) + fft_butterfly 14, 30, 71, 31 # (14, 5, 0) and (30, 5, 0) => (14, 4, 0) and (14, 4, 16) + fft_butterfly 6, 14, 174, 31 # (6, 4, 0) and (14, 4, 0) => (6, 3, 0) and (6, 3, 8) + fft_butterfly 22, 30, 165, 31 # (6, 4, 16) and (14, 4, 16) => (6, 3, 16) and (6, 3, 24) + fft_butterfly 2, 6, 38, 31 # (2, 3, 0) and (6, 3, 0) => (2, 2, 0) and (2, 2, 4) + fft_butterfly 10, 14, 48, 31 # (2, 3, 8) and (6, 3, 8) => (2, 2, 8) and (2, 2, 12) + fft_butterfly 18, 22, 71, 31 # (2, 3, 16) and (6, 3, 16) => (2, 2, 16) and (2, 2, 20) + fft_butterfly 26, 30, 81, 31 # (2, 3, 24) and (6, 3, 24) => (2, 2, 24) and (2, 2, 28) + fft_butterfly 0, 2, 237, 31 # (0, 2, 0) and (2, 2, 0) => (0, 1, 0) and (0, 1, 2) + fft_butterfly 4, 6, 235, 31 # (0, 2, 4) and (2, 2, 4) => (0, 1, 4) and (0, 1, 6) + fft_butterfly 8, 10, 241, 31 # (0, 2, 8) and (2, 2, 8) => (0, 1, 8) and (0, 1, 10) + fft_butterfly 12, 14, 247, 31 # (0, 2, 12) and (2, 2, 12) => (0, 1, 12) and (0, 1, 14) + fft_butterfly 16, 18, 149, 31 # (0, 2, 16) and (2, 2, 16) => (0, 1, 16) and (0, 1, 18) + fft_butterfly 20, 22, 147, 31 # (0, 2, 20) and (2, 2, 20) => (0, 1, 20) and (0, 1, 22) + fft_butterfly 24, 26, 137, 31 # (0, 2, 24) and (2, 2, 24) => (0, 1, 24) and (0, 1, 26) + fft_butterfly 28, 30, 143, 31 # (0, 2, 28) and (2, 2, 28) => (0, 1, 28) and (0, 1, 30) + fft_butterfly 1, 17, 71, 31 # (1, 5, 0) and (17, 5, 0) => (1, 4, 0) and (1, 4, 16) + fft_butterfly 9, 25, 71, 31 # (9, 5, 0) and (25, 5, 0) => (9, 4, 0) and (9, 4, 16) + fft_butterfly 1, 9, 174, 31 # (1, 4, 0) and (9, 4, 0) => (1, 3, 0) and (1, 3, 8) + fft_butterfly 17, 25, 165, 31 # (1, 4, 16) and (9, 4, 16) => (1, 3, 16) and (1, 3, 24) + fft_butterfly 5, 21, 71, 31 # (5, 5, 0) and (21, 5, 0) => (5, 4, 0) and (5, 4, 16) + fft_butterfly 13, 29, 71, 31 # (13, 5, 0) and (29, 5, 0) => (13, 4, 0) and (13, 4, 16) + fft_butterfly 5, 13, 174, 31 # (5, 4, 0) and (13, 4, 0) => (5, 3, 0) and (5, 3, 8) + fft_butterfly 21, 29, 165, 31 # (5, 4, 16) and (13, 4, 16) => (5, 3, 16) and (5, 3, 24) + fft_butterfly 1, 5, 38, 31 # (1, 3, 0) and (5, 3, 0) => (1, 2, 0) and (1, 2, 4) + fft_butterfly 9, 13, 48, 31 # (1, 3, 8) and (5, 3, 8) => (1, 2, 8) and (1, 2, 12) + fft_butterfly 17, 21, 71, 31 # (1, 3, 16) and (5, 3, 16) => (1, 2, 16) and (1, 2, 20) + fft_butterfly 25, 29, 81, 31 # (1, 3, 24) and (5, 3, 24) => (1, 2, 24) and (1, 2, 28) + fft_butterfly 3, 19, 71, 31 # (3, 5, 0) and (19, 5, 0) => (3, 4, 0) and (3, 4, 16) + fft_butterfly 11, 27, 71, 31 # (11, 5, 0) and (27, 5, 0) => (11, 4, 0) and (11, 4, 16) + fft_butterfly 3, 11, 174, 31 # (3, 4, 0) and (11, 4, 0) => (3, 3, 0) and (3, 3, 8) + fft_butterfly 19, 27, 165, 31 # (3, 4, 16) and (11, 4, 16) => (3, 3, 16) and (3, 3, 24) + fft_butterfly 7, 23, 71, 31 # (7, 5, 0) and (23, 5, 0) => (7, 4, 0) and (7, 4, 16) + spill_reload 14, 31 # spilling (0, 1, 14), reloading (31, 5, 0) + fft_butterfly 15, 31, 71, 14 # (15, 5, 0) and (31, 5, 0) => (15, 4, 0) and (15, 4, 16) + fft_butterfly 7, 15, 174, 14 # (7, 4, 0) and (15, 4, 0) => (7, 3, 0) and (7, 3, 8) + fft_butterfly 23, 31, 165, 14 # (7, 4, 16) and (15, 4, 16) => (7, 3, 16) and (7, 3, 24) + fft_butterfly 3, 7, 38, 14 # (3, 3, 0) and (7, 3, 0) => (3, 2, 0) and (3, 2, 4) + fft_butterfly 11, 15, 48, 14 # (3, 3, 8) and (7, 3, 8) => (3, 2, 8) and (3, 2, 12) + fft_butterfly 19, 23, 71, 14 # (3, 3, 16) and (7, 3, 16) => (3, 2, 16) and (3, 2, 20) + fft_butterfly 27, 31, 81, 14 # (3, 3, 24) and (7, 3, 24) => (3, 2, 24) and (3, 2, 28) + fft_butterfly 1, 3, 237, 14 # (1, 2, 0) and (3, 2, 0) => (1, 1, 0) and (1, 1, 2) + fft_butterfly 5, 7, 235, 14 # (1, 2, 4) and (3, 2, 4) => (1, 1, 4) and (1, 1, 6) + fft_butterfly 9, 11, 241, 14 # (1, 2, 8) and (3, 2, 8) => (1, 1, 8) and (1, 1, 10) + fft_butterfly 13, 15, 247, 14 # (1, 2, 12) and (3, 2, 12) => (1, 1, 12) and (1, 1, 14) + fft_butterfly 17, 19, 149, 14 # (1, 2, 16) and (3, 2, 16) => (1, 1, 16) and (1, 1, 18) + fft_butterfly 21, 23, 147, 14 # (1, 2, 20) and (3, 2, 20) => (1, 1, 20) and (1, 1, 22) + fft_butterfly 25, 27, 137, 14 # (1, 2, 24) and (3, 2, 24) => (1, 1, 24) and (1, 1, 26) + fft_butterfly 29, 31, 143, 14 # (1, 2, 28) and (3, 2, 28) => (1, 1, 28) and (1, 1, 30) + fft_butterfly 0, 1, 32, 14 # (0, 1, 0) and (1, 1, 0) => (0, 0, 0) and (0, 0, 1) + parity_store 0 # storing (0, 0, 0) + parity_store 1 # storing (0, 0, 1) + fft_butterfly 2, 3, 34, 14 # (0, 1, 2) and (1, 1, 2) => (0, 0, 2) and (0, 0, 3) + parity_store 2 # storing (0, 0, 2) + parity_store 3 # storing (0, 0, 3) + fft_butterfly 4, 5, 36, 14 # (0, 1, 4) and (1, 1, 4) => (0, 0, 4) and (0, 0, 5) + parity_store 4 # storing (0, 0, 4) + parity_store 5 # storing (0, 0, 5) + fft_butterfly 6, 7, 38, 14 # (0, 1, 6) and (1, 1, 6) => (0, 0, 6) and (0, 0, 7) + parity_store 6 # storing (0, 0, 6) + parity_store 7 # storing (0, 0, 7) + fft_butterfly 8, 9, 40, 14 # (0, 1, 8) and (1, 1, 8) => (0, 0, 8) and (0, 0, 9) + parity_store 8 # storing (0, 0, 8) + parity_store 9 # storing (0, 0, 9) + fft_butterfly 10, 11, 42, 14 # (0, 1, 10) and (1, 1, 10) => (0, 0, 10) and (0, 0, 11) + parity_store 10 # storing (0, 0, 10) + parity_store 11 # storing (0, 0, 11) + fft_butterfly 12, 13, 44, 14 # (0, 1, 12) and (1, 1, 12) => (0, 0, 12) and (0, 0, 13) + parity_store 12 # storing (0, 0, 12) + parity_store 13 # storing (0, 0, 13) + fft_butterfly 16, 17, 48, 14 # (0, 1, 16) and (1, 1, 16) => (0, 0, 16) and (0, 0, 17) + parity_store 16 # storing (0, 0, 16) + parity_store 17 # storing (0, 0, 17) + fft_butterfly 18, 19, 50, 14 # (0, 1, 18) and (1, 1, 18) => (0, 0, 18) and (0, 0, 19) + parity_store 18 # storing (0, 0, 18) + parity_store 19 # storing (0, 0, 19) + fft_butterfly 20, 21, 52, 14 # (0, 1, 20) and (1, 1, 20) => (0, 0, 20) and (0, 0, 21) + parity_store 20 # storing (0, 0, 20) + parity_store 21 # storing (0, 0, 21) + fft_butterfly 22, 23, 54, 14 # (0, 1, 22) and (1, 1, 22) => (0, 0, 22) and (0, 0, 23) + parity_store 22 # storing (0, 0, 22) + parity_store 23 # storing (0, 0, 23) + fft_butterfly 24, 25, 56, 14 # (0, 1, 24) and (1, 1, 24) => (0, 0, 24) and (0, 0, 25) + parity_store 24 # storing (0, 0, 24) + parity_store 25 # storing (0, 0, 25) + fft_butterfly 26, 27, 58, 14 # (0, 1, 26) and (1, 1, 26) => (0, 0, 26) and (0, 0, 27) + parity_store 26 # storing (0, 0, 26) + parity_store 27 # storing (0, 0, 27) + fft_butterfly 28, 29, 60, 14 # (0, 1, 28) and (1, 1, 28) => (0, 0, 28) and (0, 0, 29) + parity_store 28 # storing (0, 0, 28) + parity_store 29 # storing (0, 0, 29) + fft_butterfly 30, 31, 62, 14 # (0, 1, 30) and (1, 1, 30) => (0, 0, 30) and (0, 0, 31) + parity_store 30 # storing (0, 0, 30) + parity_store 31 # storing (0, 0, 31) + spill_reload 31, 14 # spilling (0, 0, 31), reloading (0, 1, 14) + fft_butterfly 14, 15, 46, 31 # (0, 1, 14) and (1, 1, 14) => (0, 0, 14) and (0, 0, 15) + parity_store 14 # storing (0, 0, 14) + parity_store 15 # storing (0, 0, 15) + + + # Advance shred position. Normally it increases by 32, but if the shred size + # is not a multiple of 32, then we clamp it down. E.g. suppose rdi==33. We + # first run through the loop with rax==0. Then we add 32 to rax and test + # 32==33. That's false, so then we reset rax=min(rax, rdi-32), e.g. rax=1. + # We run through the loop again. The second time, we add 32, getting + # rax==33, so then we break. + add $0x20, %rax + cmp %rdi, %rax + je done + lea -0x20(%rdi), %rbx + cmp %rax, %rbx + cmovb %rbx, %rax + jmp outer_loop + +done: +popq %rbx +.cfi_def_cfa_offset 40 +popq %r12 +.cfi_def_cfa_offset 32 +popq %r13 +.cfi_def_cfa_offset 24 +popq %r14 +.cfi_def_cfa_offset 16 +popq %r15 +.cfi_def_cfa_offset 8 +ret +.align 16 + + + + +.cfi_endproc diff --git a/src/ballet/reedsol/fd_reedsol_internal.c b/src/ballet/reedsol/fd_reedsol_internal.c new file mode 100644 index 00000000000..8470b8e8040 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_internal.c @@ -0,0 +1,278 @@ +#include "../../util/fd_util.h" +#include "fd_reedsol_internal.h" +#define INCLUDE_CONSTANTS +#if FD_HAS_GFNI +#include "fd_reedsol_arith_gfni.h" +#elif FD_HAS_AVX +#include "fd_reedsol_arith_avx2.h" +#else +#include "fd_reedsol_arith_none.h" +#endif +#include "fd_reedsol_fft.h" +#include "fd_reedsol_ppt.h" + + +/* FALLTHRU: Tells the compiler that falling through to the next case + of the switch statement is intentional and not a bug. When brutality + is turned on, this must be used. Clang an GCC differ on what + annotations they accept, but this works for both. */ +#define FALLTHRU __attribute__((fallthrough)); + +void fd_reedsol_encode( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ) { + + if( FD_UNLIKELY( (data_shred_cnt==0) | (parity_shred_cnt==0) ) ) return; /* TODO: Is that the right behavior? */ + + for( ulong shred_pos=0UL; shred_pos0UL ) ) { + /* Produce another 16 parity shreds */ + FD_REEDSOL_GENERATE_FFT( 16, 16, ALL_VARS ); + switch( parity_remaining ) { + default: + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 16UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + + /* We might need one more round */ + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* TODO: Is it faster to save the output of the ifft/ppt than to + recompute? */ + FD_REEDSOL_GENERATE_IFFT( 16, 16, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 32, ALL_VARS ); + switch( parity_remaining ) { + default: + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } +#undef ALL_VARS + } + } else { + /* N==32 codepath */ + gf_t in00 = gf_zero(); gf_t in01 = gf_zero(); gf_t in02 = gf_zero(); gf_t in03 = gf_zero(); + gf_t in04 = gf_zero(); gf_t in05 = gf_zero(); gf_t in06 = gf_zero(); gf_t in07 = gf_zero(); + gf_t in08 = gf_zero(); gf_t in09 = gf_zero(); gf_t in10 = gf_zero(); gf_t in11 = gf_zero(); + gf_t in12 = gf_zero(); gf_t in13 = gf_zero(); gf_t in14 = gf_zero(); gf_t in15 = gf_zero(); + gf_t in16 = gf_zero(); gf_t in17 = gf_zero(); gf_t in18 = gf_zero(); gf_t in19 = gf_zero(); + gf_t in20 = gf_zero(); gf_t in21 = gf_zero(); gf_t in22 = gf_zero(); gf_t in23 = gf_zero(); + gf_t in24 = gf_zero(); gf_t in25 = gf_zero(); gf_t in26 = gf_zero(); gf_t in27 = gf_zero(); + gf_t in28 = gf_zero(); gf_t in29 = gf_zero(); gf_t in30 = gf_zero(); gf_t in31 = gf_zero(); + + in15 = gf_ldu( data_shred[ 15 ] + shred_pos ); in14 = gf_ldu( data_shred[ 14 ] + shred_pos ); + in13 = gf_ldu( data_shred[ 13 ] + shred_pos ); in12 = gf_ldu( data_shred[ 12 ] + shred_pos ); + in11 = gf_ldu( data_shred[ 11 ] + shred_pos ); in10 = gf_ldu( data_shred[ 10 ] + shred_pos ); + in09 = gf_ldu( data_shred[ 9 ] + shred_pos ); in08 = gf_ldu( data_shred[ 8 ] + shred_pos ); + in07 = gf_ldu( data_shred[ 7 ] + shred_pos ); in06 = gf_ldu( data_shred[ 6 ] + shred_pos ); + in05 = gf_ldu( data_shred[ 5 ] + shred_pos ); in04 = gf_ldu( data_shred[ 4 ] + shred_pos ); + in03 = gf_ldu( data_shred[ 3 ] + shred_pos ); in02 = gf_ldu( data_shred[ 2 ] + shred_pos ); + in01 = gf_ldu( data_shred[ 1 ] + shred_pos ); in00 = gf_ldu( data_shred[ 0 ] + shred_pos ); +#define ALL_VARS in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, \ + in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31 + + switch( data_shred_cnt ) { + case 32UL: in31 = gf_ldu( data_shred[ 31 ] + shred_pos ); FALLTHRU + case 31UL: in30 = gf_ldu( data_shred[ 30 ] + shred_pos ); FALLTHRU + case 30UL: in29 = gf_ldu( data_shred[ 29 ] + shred_pos ); FALLTHRU + case 29UL: in28 = gf_ldu( data_shred[ 28 ] + shred_pos ); FALLTHRU + case 28UL: in27 = gf_ldu( data_shred[ 27 ] + shred_pos ); FALLTHRU + case 27UL: in26 = gf_ldu( data_shred[ 26 ] + shred_pos ); FALLTHRU + case 26UL: in25 = gf_ldu( data_shred[ 25 ] + shred_pos ); FALLTHRU + case 25UL: in24 = gf_ldu( data_shred[ 24 ] + shred_pos ); FALLTHRU + case 24UL: in23 = gf_ldu( data_shred[ 23 ] + shred_pos ); FALLTHRU + case 23UL: in22 = gf_ldu( data_shred[ 22 ] + shred_pos ); FALLTHRU + case 22UL: in21 = gf_ldu( data_shred[ 21 ] + shred_pos ); FALLTHRU + case 21UL: in20 = gf_ldu( data_shred[ 20 ] + shred_pos ); FALLTHRU + case 20UL: in19 = gf_ldu( data_shred[ 19 ] + shred_pos ); FALLTHRU + case 19UL: in18 = gf_ldu( data_shred[ 18 ] + shred_pos ); FALLTHRU + case 18UL: in17 = gf_ldu( data_shred[ 17 ] + shred_pos ); FALLTHRU + case 17UL: in16 = gf_ldu( data_shred[ 16 ] + shred_pos ); + } + switch( data_shred_cnt ) { + case 32UL: FD_REEDSOL_GENERATE_IFFT( 32, 0, ALL_VARS ); break; + case 31UL: FD_REEDSOL_GENERATE_PPT( 32, 31, ALL_VARS ); break; + case 30UL: FD_REEDSOL_GENERATE_PPT( 32, 30, ALL_VARS ); break; + case 29UL: FD_REEDSOL_GENERATE_PPT( 32, 29, ALL_VARS ); break; + case 28UL: FD_REEDSOL_GENERATE_PPT( 32, 28, ALL_VARS ); break; + case 27UL: FD_REEDSOL_GENERATE_PPT( 32, 27, ALL_VARS ); break; + case 26UL: FD_REEDSOL_GENERATE_PPT( 32, 26, ALL_VARS ); break; + case 25UL: FD_REEDSOL_GENERATE_PPT( 32, 25, ALL_VARS ); break; + case 24UL: FD_REEDSOL_GENERATE_PPT( 32, 24, ALL_VARS ); break; + case 23UL: FD_REEDSOL_GENERATE_PPT( 32, 23, ALL_VARS ); break; + case 22UL: FD_REEDSOL_GENERATE_PPT( 32, 22, ALL_VARS ); break; + case 21UL: FD_REEDSOL_GENERATE_PPT( 32, 21, ALL_VARS ); break; + case 20UL: FD_REEDSOL_GENERATE_PPT( 32, 20, ALL_VARS ); break; + case 19UL: FD_REEDSOL_GENERATE_PPT( 32, 19, ALL_VARS ); break; + case 18UL: FD_REEDSOL_GENERATE_PPT( 32, 18, ALL_VARS ); break; + case 17UL: FD_REEDSOL_GENERATE_PPT( 32, 17, ALL_VARS ); break; + } + /* That generated the first 32-data_shred_cnt parity shreds in the + last 32-data_shred_cnt variables. We might only need + parity_shred_cnt of them though. */ + ulong total_shreds = data_shred_cnt+parity_shred_cnt; + switch( data_shred_cnt ) { + case 17UL: if( total_shreds <= 17UL ) break; gf_stu( parity_shred[ 17UL-data_shred_cnt ] + shred_pos, in17 ); in17 = gf_zero(); FALLTHRU + case 18UL: if( total_shreds <= 18UL ) break; gf_stu( parity_shred[ 18UL-data_shred_cnt ] + shred_pos, in18 ); in18 = gf_zero(); FALLTHRU + case 19UL: if( total_shreds <= 19UL ) break; gf_stu( parity_shred[ 19UL-data_shred_cnt ] + shred_pos, in19 ); in19 = gf_zero(); FALLTHRU + case 20UL: if( total_shreds <= 20UL ) break; gf_stu( parity_shred[ 20UL-data_shred_cnt ] + shred_pos, in20 ); in20 = gf_zero(); FALLTHRU + case 21UL: if( total_shreds <= 21UL ) break; gf_stu( parity_shred[ 21UL-data_shred_cnt ] + shred_pos, in21 ); in21 = gf_zero(); FALLTHRU + case 22UL: if( total_shreds <= 22UL ) break; gf_stu( parity_shred[ 22UL-data_shred_cnt ] + shred_pos, in22 ); in22 = gf_zero(); FALLTHRU + case 23UL: if( total_shreds <= 23UL ) break; gf_stu( parity_shred[ 23UL-data_shred_cnt ] + shred_pos, in23 ); in23 = gf_zero(); FALLTHRU + case 24UL: if( total_shreds <= 24UL ) break; gf_stu( parity_shred[ 24UL-data_shred_cnt ] + shred_pos, in24 ); in24 = gf_zero(); FALLTHRU + case 25UL: if( total_shreds <= 25UL ) break; gf_stu( parity_shred[ 25UL-data_shred_cnt ] + shred_pos, in25 ); in25 = gf_zero(); FALLTHRU + case 26UL: if( total_shreds <= 26UL ) break; gf_stu( parity_shred[ 26UL-data_shred_cnt ] + shred_pos, in26 ); in26 = gf_zero(); FALLTHRU + case 27UL: if( total_shreds <= 27UL ) break; gf_stu( parity_shred[ 27UL-data_shred_cnt ] + shred_pos, in27 ); in27 = gf_zero(); FALLTHRU + case 28UL: if( total_shreds <= 28UL ) break; gf_stu( parity_shred[ 28UL-data_shred_cnt ] + shred_pos, in28 ); in28 = gf_zero(); FALLTHRU + case 29UL: if( total_shreds <= 29UL ) break; gf_stu( parity_shred[ 29UL-data_shred_cnt ] + shred_pos, in29 ); in29 = gf_zero(); FALLTHRU + case 30UL: if( total_shreds <= 30UL ) break; gf_stu( parity_shred[ 30UL-data_shred_cnt ] + shred_pos, in30 ); in30 = gf_zero(); FALLTHRU + case 31UL: if( total_shreds <= 31UL ) break; gf_stu( parity_shred[ 31UL-data_shred_cnt ] + shred_pos, in31 ); in31 = gf_zero(); + } + ulong parity_produced = fd_ulong_min( 32UL - data_shred_cnt, parity_shred_cnt ); + ulong parity_remaining = parity_shred_cnt - parity_produced; + if( FD_LIKELY( parity_remaining>0UL ) ) { + /* Produce another 32 parity shreds */ + FD_REEDSOL_GENERATE_FFT( 32, 32, ALL_VARS ); +#undef ALL_VARS + switch( parity_remaining ) { + case 32UL: gf_stu( parity_shred[ 31UL+parity_produced ] + shred_pos, in31 ); FALLTHRU + case 31UL: gf_stu( parity_shred[ 30UL+parity_produced ] + shred_pos, in30 ); FALLTHRU + case 30UL: gf_stu( parity_shred[ 29UL+parity_produced ] + shred_pos, in29 ); FALLTHRU + case 29UL: gf_stu( parity_shred[ 28UL+parity_produced ] + shred_pos, in28 ); FALLTHRU + case 28UL: gf_stu( parity_shred[ 27UL+parity_produced ] + shred_pos, in27 ); FALLTHRU + case 27UL: gf_stu( parity_shred[ 26UL+parity_produced ] + shred_pos, in26 ); FALLTHRU + case 26UL: gf_stu( parity_shred[ 25UL+parity_produced ] + shred_pos, in25 ); FALLTHRU + case 25UL: gf_stu( parity_shred[ 24UL+parity_produced ] + shred_pos, in24 ); FALLTHRU + case 24UL: gf_stu( parity_shred[ 23UL+parity_produced ] + shred_pos, in23 ); FALLTHRU + case 23UL: gf_stu( parity_shred[ 22UL+parity_produced ] + shred_pos, in22 ); FALLTHRU + case 22UL: gf_stu( parity_shred[ 21UL+parity_produced ] + shred_pos, in21 ); FALLTHRU + case 21UL: gf_stu( parity_shred[ 20UL+parity_produced ] + shred_pos, in20 ); FALLTHRU + case 20UL: gf_stu( parity_shred[ 19UL+parity_produced ] + shred_pos, in19 ); FALLTHRU + case 19UL: gf_stu( parity_shred[ 18UL+parity_produced ] + shred_pos, in18 ); FALLTHRU + case 18UL: gf_stu( parity_shred[ 17UL+parity_produced ] + shred_pos, in17 ); FALLTHRU + case 17UL: gf_stu( parity_shred[ 16UL+parity_produced ] + shred_pos, in16 ); FALLTHRU + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + } + } + /* In order to handle shred sizes that are not divisible by 32, we clamp + shred_pos to shred_sz-32 when shred_sz-32 {(r_offset, i_round+1, omega_)} and {(r_offset+2**i_round, i_round+1, omega_ )}") + butterflies.append((1, r_offset+omega_, r_offset+2**i_round+omega_, ( i_round, omega_ , beta ), (r_offset, i_round, omega_), (r_offset, i_round, omega_ + 2**i_round), (r_offset, i_round+1, omega_), (r_offset+2**i_round, i_round+1, omega_ ) )) + + + butterflies.extend(op_ifft(h, beta, i_round+1, r_offset)) + butterflies.extend(op_ifft(h, beta, i_round+1, r_offset+2**i_round)) + return butterflies + +print_macro("FD_REEDSOL_PRIVATE_FFT_BUTTERFLY", ["inout0", "inout1", "c"], [ + "inout0 = GF_ADD( inout0, GF_MUL( inout1, c ) );", + "inout1 = GF_ADD( inout1, inout0 );" + ]) +print_macro("FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY", ["inout0", "inout1", "c"], [ + "inout1 = GF_ADD( inout1, inout0 );", + "inout0 = GF_ADD( inout0, GF_MUL( inout1, c ) );", + ]) + + + +for N in (32, 16, 8, 4): + inputs = [f"in{j:02}" for j in range(N)] + macro_lines = [ ] + + current_vars = [ (0,0,i) for i in range(32) ] + + butterflies = op_ifft(N, 0, 0, 0) + const_to_cidx = {} + for idx, (t, i0, i1, c, fi0, fi1, fo0, fo1) in enumerate(butterflies): + if not c in const_to_cidx: + const_to_cidx[c] = len(const_to_cidx) + + consts_array = [None]*len(const_to_cidx) + for k,v in const_to_cidx.items(): + consts_array[v] = k + + for shift in range(0, 33, N): + shift_specific = [ f'{(int(sbar[ c[0], c[1]^shift ])):3}' for c in consts_array ] + print(f"#define FD_REEDSOL_IFFT_CONSTANTS_{N}_{shift:<2} " + ', '.join(shift_specific), file=outf) + + + for t, i0, i1, c, fi0, fi1, fo0, fo1 in butterflies: + assert t==1 + assert current_vars[i0] == fi0 + assert current_vars[i1] == fi1 + macro_lines.append(f"FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( {inputs[i0]}, {inputs[i1]}, c_{const_to_cidx[c]:02} );") # {fi0} and {fi1} => {fo0} and {fo1}") + current_vars[i0] = fo0 + current_vars[i1] = fo1 + print_macro(f"FD_REEDSOL_IFFT_IMPL_{N}", [f"c_{j:02}" for j in range(len(const_to_cidx))] + inputs, macro_lines) + + + macro_lines = [ ] + butterflies = op_fft(N, shift, 0, 0) + + const_to_cidx = {} + for idx, (t, i0, i1, c, fi0, fi1, fo0, fo1) in enumerate(butterflies): + if not c in const_to_cidx: + const_to_cidx[c] = len(const_to_cidx) + + consts_array = [None]*len(const_to_cidx) + for k,v in const_to_cidx.items(): + consts_array[v] = k + + for shift in range(0, 33, N): + shift_specific = [ f'{int(sbar[ c[0], c[1]^shift ]):3}' for c in consts_array ] + print(f"#define FD_REEDSOL_FFT_CONSTANTS_{N}_{shift:<2} " + ', '.join(shift_specific), file=outf) + + + for t, i0, i1, c, fi0, fi1, fo0, fo1 in butterflies: + assert t==0 + assert current_vars[i0] == fi0 + assert current_vars[i1] == fi1 + macro_lines.append(f"FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( {inputs[i0]}, {inputs[i1]}, c_{const_to_cidx[c]:02} );") # {fi0} and {fi1} => {fo0} and {fo1}") + current_vars[i0] = fo0 + current_vars[i1] = fo1 + print_macro(f"FD_REEDSOL_FFT_IMPL_{N}", [f"c_{j:02}" for j in range(len(const_to_cidx))] + inputs, macro_lines) + + +print("#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h */", file=outf) diff --git a/src/ballet/reedsol/generate_ppt.py b/src/ballet/reedsol/generate_ppt.py new file mode 100644 index 00000000000..3a9e334884a --- /dev/null +++ b/src/ballet/reedsol/generate_ppt.py @@ -0,0 +1,374 @@ +import galois +import numpy as np +import numpy.linalg + +# file 1: fd_reedsol_ppt.h +header = """ +/* Note: This file is auto generated. */ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h + +/* This file implements the Principal Pivot Transform for the Reed + Solomon FFT operator as described in: + S. -J. Lin, A. Alloum and T. Al-Naffouri, "Principal pivot + transforms on radix-2 DFT-type matrices," 2017 IEEE International + Symposium on Information Theory (ISIT), Aachen, Germany, 2017, pp. + 2358-2362, doi: 10.1109/ISIT.2017.8006951 + + The main macro this file provides is FD_REEDSOL_GENERATE_PPT. The + rest of this file is auto-generated implementation details. + + + When the number of data shreds we have is not a power of 2, the + approach used in the 32-32 case doesn't apply. I found the paper + extending it to the general case uninterpretable. So we use the + principal pivot transform as an alternative with similar + computational complexity. + + The goal of the first step of the 32-32 case is to find a polynomial + of degree < 32 that interpolates the data shreds. If we only have k + data shreds, where k<32, then instead we need a polynomial P of + degree =4: + if k-alpha_offset >= n: + return [ ("IFFT", n, alpha_offset) ] + elif k-alpha_offset <= 0: + return [ ("FFT", n, alpha_offset) ] + + if n == 2: + f = fft_matrix(1, alpha_offset) + if k-alpha_offset <= 0: + matrix = f + elif k-alpha_offset >= 2: + matrix = ifft_matrix(1, alpha_offset) + else: + matrix = GF(np.array([[GF(1)/f[0,0], f[0,1]/f[0,0]], [ f[1,0]/f[0,0], f[1,1]-f[1,0]*f[0,1]/f[0,0]]])) + return [ ("MM22", alpha_offset, alpha_offset+1, matrix) ] + + + B = Bmatr(lg_sz, alpha_offset) + Bupper = np.linalg.inv(B) + Blower = GF(np.array([[GF(1)/B[0,0], B[0,1]/B[0,0]],[B[1,0]/B[0,0], B[1,1] - B[1,0]*B[0,1]/B[0,0]]])) + + operations = [] + n2 = n//2 + for j in range(n2): + in_alpha1 = (j+alpha_offset < k) + in_alpha2 = (j+alpha_offset+n2 < k) + if in_alpha1 and in_alpha2: + # Do nothing + pass + elif in_alpha1 and not in_alpha2: + operations.append( ("COPY_SCRATCH", j+n2+alpha_offset, j+n2) ) # We need this later in the last step + # No need to do anything to the left half. We need U1 to update the right half, but we'll defer that. We can do the Blower[1,1] part though + operations.append( ("SCALE", j+n2+alpha_offset, Blower[1,1]) ) + else: + operations.append( ("MM22", j+alpha_offset, j+alpha_offset+n2, B)) + + + + operations.extend( principal_pivot_transform_k_no_x(lg_sz-1, k, alpha_offset) ) + + # Fixup the part of J2 that needs U1 + for j in range(n2): + in_alpha1 = (j+alpha_offset < k) + in_alpha2 = (j+alpha_offset+n2 < k) + if in_alpha1 and not in_alpha2: + operations.append( ("MULACC", j+n2+alpha_offset, j+alpha_offset, Blower[1,0]) ) + + + operations.extend( principal_pivot_transform_k_no_x(lg_sz-1, k, alpha_offset+n//2) ) + + for j in range(n2): + in_alpha1 = (j+alpha_offset < k) + in_alpha2 = (j+alpha_offset+n2 < k) + if in_alpha2: + operations.append( ("MM22", j+alpha_offset, j+alpha_offset+n2, Bupper)) + # Do nothing + pass + elif in_alpha1 and not in_alpha2: + # No need to do anything to the right half + operations.append( ("SCALE", j+alpha_offset, B[0,0]) ) + operations.append( ("MULACC_SCRATCH", j+alpha_offset, j+n2, B[0,1]) ) + else: # in neither + pass + return operations + +def print_macro(macro_name, args, lines, indent=2): + line1 = "#define " + macro_name + "( " + args[0] + maxwidth = max( map(len, lines)) + indent + 16 + for arg in args[1:]: + if len(line1 + arg)+3 < maxwidth: + line1 += ", " + arg + else: + line1 += " "*(maxwidth - len(line1)-3) + ", \\" + print(line1, file=outf) + line1 = " "*(2*indent) + arg + line1 += ") " + print(line1 + " "*(maxwidth-len(line1)-1) + "\\", file=outf) + + line2 = " "*indent + "do {" + line2 += " "*(maxwidth-len(line2)-1) + "\\" + print(line2, file=outf) + for line in lines: + print(" "*(2*indent) + line + " "*(maxwidth-len(line)-1-2*indent) + "\\", file=outf) + print(" "*indent + "} while( 0 )", file=outf) + print("\n\n", file=outf) + + +print_macro("GF_MUL22", ["inout0", "inout1", "c00", "c01", "c10", "c11"], [ + "gf_t temp = GF_ADD( GF_MUL( inout0, c00 ), GF_MUL( inout1, c01 ) );", + "inout1 = GF_ADD( GF_MUL( inout0, c10 ), GF_MUL( inout1, c11 ) );", + "inout0 = temp;" + ]) + +for N in (16, 32): + for k in range(1,N): + inputs = [f"in{j:02}" for j in range(N)] + + macro_lines = [ ] + operations = principal_pivot_transform_k_no_x(int(np.log2(N)), k, 0) + + scratch_to_declare = set() + + + for op in operations: + if op[0] == "IFFT": + n, shift = op[1:] + macro_lines.append(f"FD_REEDSOL_GENERATE_IFFT( {n}, {shift}, {', '.join(inputs[shift:shift+n])} );") + if op[0] == "FFT": + n, shift = op[1:] + macro_lines.append(f"FD_REEDSOL_GENERATE_FFT( {n}, {shift}, {', '.join(inputs[shift:shift+n])} );") + if op[0] == "COPY_SCRATCH": + src, dest = op[1:] + scratch_to_declare.add(f"scratch_{dest}") + macro_lines.append(f"scratch_{dest} = {inputs[src]};") + if op[0] == "SCALE": + srcdest, const = op[1:] + macro_lines.append(f"{inputs[srcdest]} = GF_MUL( {inputs[srcdest]}, {int(const)} );") + if op[0] == "MM22": + srcdest0, srcdest1, matr = op[1:] + macro_lines.append(f"GF_MUL22( {inputs[srcdest0]}, {inputs[srcdest1]}, {int(matr[0,0])}, {int(matr[0,1])}, {int(matr[1,0])}, {int(matr[1,1])} );") + if op[0] == "MULACC": + dest, src, const = op[1:] + macro_lines.append(f"{inputs[dest]} = GF_ADD( GF_MUL( {inputs[src]}, {int(const)} ), {inputs[dest]} );") + if op[0] == "MULACC_SCRATCH": + dest, src_scratch, const = op[1:] + assert f"scratch_{src_scratch}" in scratch_to_declare + macro_lines.append(f"{inputs[dest]} = GF_ADD( GF_MUL( scratch_{src_scratch}, {int(const)} ), {inputs[dest]} );") + + scratch_lines = [] + scratch_to_declare = sorted(list(scratch_to_declare)) + while scratch_to_declare: + scratch_lines.append("gf_t " + ", ".join(scratch_to_declare[:16]) + ";") + scratch_to_declare = scratch_to_declare[16:] + macro_lines = scratch_lines + macro_lines + + print_macro(f"FD_REEDSOL_PPT_IMPL_{N}_{k}", inputs, macro_lines) + + + + first_bytes = GF([0]*1 + [1] +[0]*30) + scratch_first_bytes = GF([0]*32) + for op in operations: + if op[0] == "IFFT": + n, shift = op[1:] + first_bytes[shift:shift+n] = ifft_matrix(int(np.log2(n)), shift) @ GF(first_bytes[shift:shift+n]) + if op[0] == "FFT": + n, shift = op[1:] + first_bytes[shift:shift+n] = fft_matrix(int(np.log2(n)), shift) @ GF(first_bytes[shift:shift+n]) + if op[0] == "COPY_SCRATCH": + src, dest = op[1:] + scratch_first_bytes[dest] = first_bytes[src] + if op[0] == "SCALE": + srcdest, const = op[1:] + first_bytes[srcdest] = first_bytes[srcdest] * const + if op[0] == "MM22": + srcdest0, srcdest1, matr = op[1:] + first_bytes[srcdest0], first_bytes[srcdest1] = matr @ GF(np.array([[first_bytes[srcdest0]], [first_bytes[srcdest1]]])) + if op[0] == "MULACC": + dest, src, const = op[1:] + first_bytes[dest] += first_bytes[src] * const + if op[0] == "MULACC_SCRATCH": + dest, src_scratch, const = op[1:] + first_bytes[dest] += scratch_first_bytes[src_scratch] * const + + + + +print("#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h */", file=outf) diff --git a/src/ballet/reedsol/test_reedsol.c b/src/ballet/reedsol/test_reedsol.c new file mode 100644 index 00000000000..e72582f8200 --- /dev/null +++ b/src/ballet/reedsol/test_reedsol.c @@ -0,0 +1,646 @@ +#include "fd_reedsol.h" +#include "../../util/fd_util.h" + +#if FD_HAS_GFNI +#include "fd_reedsol_arith_gfni.h" +#elif FD_HAS_AVX +#include "fd_reedsol_arith_avx2.h" +#else +#include "fd_reedsol_arith_none.h" +#endif +#include "fd_reedsol_fft.h" +#include "fd_reedsol_ppt.h" + + +FD_IMPORT_BINARY( fd_reedsol_generic_constants, "src/ballet/reedsol/constants/generic_constants.bin" ); +static short const * log_tbl = (short const *)fd_reedsol_generic_constants; /* Indexed [0, 256) */ +static uchar const * invlog_tbl = fd_reedsol_generic_constants + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512, 512) */ +static uchar const * matrix_32_32= fd_reedsol_generic_constants + 256UL*sizeof(short) + 1024UL*sizeof(uchar); /* Row major order, 32x32 */ + +#define SHRED_SZ (1024UL) +uchar data_shreds[ SHRED_SZ * 32UL ]; +uchar parity_shreds[ SHRED_SZ * 32UL ]; + +FD_STATIC_ASSERT( sizeof(fd_reedsol_t) == FD_REEDSOL_FOOTPRINT, reedsol_footprint ); + +uchar mem[ FD_REEDSOL_FOOTPRINT ] __attribute__((aligned(FD_REEDSOL_ALIGN))); + +static uchar gfmul( uchar a, uchar b ){ return invlog_tbl[ log_tbl[ a ] + log_tbl[ b ] ]; } +static uchar gfinv( uchar a ){ return invlog_tbl[ 255 - log_tbl[ a ] ]; } + +/* Reference implementation using the matrix-based version similar to + the Rust crate. */ +void fd_reedsol_encode_ref( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ) { + + uchar top_matrix[ FD_REEDSOL_DATA_SHREDS_MAX ][ 2UL*FD_REEDSOL_DATA_SHREDS_MAX ]; + uchar main_matrix[ FD_REEDSOL_PARITY_SHREDS_MAX ][ FD_REEDSOL_DATA_SHREDS_MAX ]; + /* Set first row */ + top_matrix[ 0 ][ 0 ] = (uchar)1; + for( ulong j=1UL; j= 0 */ + top_matrix[ i ][ 0 ] = (uchar)1; + for( ulong j=1UL; j= 64 */ + for( ulong i=0UL; i<32UL; i++ ) { + d[ i ] = data_shreds + stride*i; + p[ i ] = parity_shreds + stride*i; + r[ i ] = parity_shreds + stride*(i+FD_REEDSOL_PARITY_SHREDS_MAX); + } + + for( ulong d_cnt=1UL; d_cnt<=FD_REEDSOL_DATA_SHREDS_MAX; d_cnt++ ) { + for( ulong p_cnt=1UL; p_cnt<=FD_REEDSOL_PARITY_SHREDS_MAX; p_cnt++ ) { + for( ulong shred_sz=32UL; shred_sz<=64UL; shred_sz++ ) { + + fd_memset( data_shreds, 0, FD_REEDSOL_DATA_SHREDS_MAX * stride ); + fd_memset( parity_shreds, 0xCC, 2UL*FD_REEDSOL_PARITY_SHREDS_MAX * stride ); + + /* populate data shreds with an identity followed by random data */ + for( ulong i=0UL; i Date: Wed, 19 Apr 2023 17:04:09 -0500 Subject: [PATCH 2/6] Extend to 67 data and parity shreds --- src/ballet/reedsol/Local.mk | 5 +- src/ballet/reedsol/fd_reedsol.c | 20 +- src/ballet/reedsol/fd_reedsol.h | 10 +- src/ballet/reedsol/fd_reedsol_fft.h | 1423 +++ src/ballet/reedsol/fd_reedsol_internal.c | 278 - src/ballet/reedsol/fd_reedsol_internal.h | 32 +- src/ballet/reedsol/fd_reedsol_internal_128.c | 234 + src/ballet/reedsol/fd_reedsol_internal_16.c | 206 + src/ballet/reedsol/fd_reedsol_internal_32.c | 194 + src/ballet/reedsol/fd_reedsol_internal_64.c | 244 + src/ballet/reedsol/fd_reedsol_ppt.h | 9653 +++++++++++++++--- src/ballet/reedsol/generate_dispatch.py | 106 + src/ballet/reedsol/generate_fft.py | 16 +- src/ballet/reedsol/generate_ppt.py | 60 +- src/ballet/reedsol/test_reedsol.c | 132 +- 15 files changed, 10579 insertions(+), 2034 deletions(-) delete mode 100644 src/ballet/reedsol/fd_reedsol_internal.c create mode 100644 src/ballet/reedsol/fd_reedsol_internal_128.c create mode 100644 src/ballet/reedsol/fd_reedsol_internal_16.c create mode 100644 src/ballet/reedsol/fd_reedsol_internal_32.c create mode 100644 src/ballet/reedsol/fd_reedsol_internal_64.c create mode 100644 src/ballet/reedsol/generate_dispatch.py diff --git a/src/ballet/reedsol/Local.mk b/src/ballet/reedsol/Local.mk index 45282343b23..9d7a23f7aa1 100644 --- a/src/ballet/reedsol/Local.mk +++ b/src/ballet/reedsol/Local.mk @@ -3,5 +3,8 @@ ifdef FD_HAS_GFNI $(call add-asms,fd_reedsol_gfni_32,fd_ballet) endif $(call add-objs,fd_reedsol,fd_ballet) -$(call add-objs,fd_reedsol_internal,fd_ballet) +$(call add-objs,fd_reedsol_internal_16,fd_ballet) +$(call add-objs,fd_reedsol_internal_32,fd_ballet) +$(call add-objs,fd_reedsol_internal_64,fd_ballet) +$(call add-objs,fd_reedsol_internal_128,fd_ballet) $(call make-unit-test,test_reedsol,test_reedsol,fd_ballet fd_util) diff --git a/src/ballet/reedsol/fd_reedsol.c b/src/ballet/reedsol/fd_reedsol.c index 39887774a9f..6d9b8193c07 100644 --- a/src/ballet/reedsol/fd_reedsol.c +++ b/src/ballet/reedsol/fd_reedsol.c @@ -1,15 +1,29 @@ #include "fd_reedsol.h" #include "fd_reedsol_internal.h" +/* Include the constants in one central spot */ +#define INCLUDE_CONSTANTS +#if FD_HAS_GFNI +#include "fd_reedsol_arith_gfni.h" +#elif FD_HAS_AVX +#include "fd_reedsol_arith_avx2.h" +#else +#include "fd_reedsol_arith_none.h" +#endif void fd_reedsol_encode_fini( fd_reedsol_t * rs ) { #if FD_HAS_GFNI if( FD_LIKELY( (rs->data_shred_cnt==32UL) & (rs->parity_shred_cnt==32UL ) ) ) fd_reedsol_encode_32_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->parity_shred, rs->scratch ); else - fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); -#else - fd_reedsol_encode( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); #endif + if( FD_UNLIKELY( rs->data_shred_cnt<=16UL ) ) + fd_reedsol_encode_16( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + else if( FD_LIKELY( rs->data_shred_cnt<=32UL ) ) + fd_reedsol_encode_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + else if( FD_LIKELY( rs->data_shred_cnt<=64UL ) ) + fd_reedsol_encode_64( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + else + fd_reedsol_encode_128( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; diff --git a/src/ballet/reedsol/fd_reedsol.h b/src/ballet/reedsol/fd_reedsol.h index 16607a13477..2ac42ea4318 100644 --- a/src/ballet/reedsol/fd_reedsol.h +++ b/src/ballet/reedsol/fd_reedsol.h @@ -27,14 +27,14 @@ /* FD_REEDSOL_{DATA, PARITY}_SHREDS_MAX describe the inclusive maximum number of data and parity shreds that this implementation supports. These limits are not mathematical limits, but limits based on current - Solana needs and performance. It is common for both shred counts to - be at their maximum values. */ -#define FD_REEDSOL_DATA_SHREDS_MAX (32UL) -#define FD_REEDSOL_PARITY_SHREDS_MAX (32UL) + Solana needs and performance. The common case for both shred counts + to be set to 32. */ +#define FD_REEDSOL_DATA_SHREDS_MAX (67UL) +#define FD_REEDSOL_PARITY_SHREDS_MAX (67UL) #define FD_REEDSOL_ALIGN (128UL) -#define FD_REEDSOL_FOOTPRINT (1664UL) +#define FD_REEDSOL_FOOTPRINT (2176UL) struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { uchar scratch[ 1024 ]; /* Used for the ultra high performance implementation */ diff --git a/src/ballet/reedsol/fd_reedsol_fft.h b/src/ballet/reedsol/fd_reedsol_fft.h index 44146fba8b5..375e93437d3 100644 --- a/src/ballet/reedsol/fd_reedsol_fft.h +++ b/src/ballet/reedsol/fd_reedsol_fft.h @@ -108,8 +108,1395 @@ +#define FD_REEDSOL_IFFT_CONSTANTS_128_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 11, 174, 165, 33, 42, 143, 132, 0, 71, 189, 250, 0, 218, 0 +#define FD_REEDSOL_IFFT_CONSTANTS_128_128 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 45, 38, 131, 136, 12, 7, 162, 169, 18, 85, 175, 232, 130, 88, 133 +#define FD_REEDSOL_IFFT_IMPL_128( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ + c_28, c_29, c_30, c_31, c_32, c_33, c_34, c_35, c_36, c_37, c_38 , \ + c_39, c_40, c_41, c_42, c_43, c_44, c_45, c_46, c_47, c_48, c_49 , \ + c_50, c_51, c_52, c_53, c_54, c_55, c_56, c_57, c_58, c_59, c_60 , \ + c_61, c_62, c_63, c_64, c_65, c_66, c_67, c_68, c_69, c_70, c_71 , \ + c_72, c_73, c_74, c_75, c_76, c_77, c_78, c_79, c_80, c_81, c_82 , \ + c_83, c_84, c_85, c_86, c_87, c_88, c_89, c_90, c_91, c_92, c_93 , \ + c_94, c_95, c_96, c_97, c_98, c_99, c_100, c_101, c_102, c_103, c_104, \ + c_105, c_106, c_107, c_108, c_109, c_110, c_111, c_112, c_113, c_114, \ + c_115, c_116, c_117, c_118, c_119, c_120, c_121, c_122, c_123, c_124, \ + c_125, c_126, in00, in01, in02, in03, in04, in05, in06, in07, in08 , \ + in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19 , \ + in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30 , \ + in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41 , \ + in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52 , \ + in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63 , \ + in64, in65, in66, in67, in68, in69, in70, in71, in72, in73, in74 , \ + in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85 , \ + in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96 , \ + in97, in98, in99, in100, in101, in102, in103, in104, in105, in106 , \ + in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, \ + in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, \ + in127) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in05, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in09, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in11, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in13, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in15, c_07 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in17, c_08 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in19, c_09 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in21, c_10 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in23, c_11 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in25, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in27, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in29, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in31, c_15 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in33, c_16 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in35, c_17 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in37, c_18 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in39, c_19 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in41, c_20 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in43, c_21 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in45, c_22 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in47, c_23 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in49, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in51, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in53, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in55, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in57, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in59, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in61, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in62, in63, c_31 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in65, c_32 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in67, c_33 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in69, c_34 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in71, c_35 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in73, c_36 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in75, c_37 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in77, c_38 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in79, c_39 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in81, c_40 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in83, c_41 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in85, c_42 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in87, c_43 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in89, c_44 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in91, c_45 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in93, c_46 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in94, in95, c_47 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in97, c_48 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in99, c_49 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in101, c_50 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in103, c_51 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in105, c_52 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in107, c_53 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in109, c_54 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in110, in111, c_55 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in113, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in115, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in117, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in118, in119, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in121, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in122, in123, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in124, in125, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in126, in127, c_63 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_64 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in06, c_65 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in10, c_66 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in14, c_67 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in18, c_68 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in22, c_69 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in26, c_70 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in30, c_71 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in34, c_72 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in38, c_73 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in42, c_74 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in46, c_75 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in50, c_76 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in54, c_77 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in58, c_78 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in62, c_79 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in66, c_80 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in70, c_81 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in74, c_82 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in78, c_83 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in82, c_84 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in86, c_85 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in90, c_86 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in94, c_87 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in98, c_88 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in102, c_89 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in106, c_90 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in110, c_91 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in114, c_92 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in118, c_93 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in122, c_94 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in124, in126, c_95 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in04, c_96 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in12, c_97 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in20, c_98 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in28, c_99 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in36, c_100 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in44, c_101 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in52, c_102 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in60, c_103 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in68, c_104 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in76, c_105 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in84, c_106 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in92, c_107 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in100, c_108 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in108, c_109 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in116, c_110 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in124, c_111 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in08, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in24, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in40, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in56, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in72, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in88, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in104, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in120, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in16, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in48, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in80, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in112, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in32, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in96, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in64, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in96, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in48, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in112, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in80, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in112, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in24, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in56, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in88, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in120, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in40, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in104, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in72, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in104, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in56, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in120, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in88, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in120, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in12, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in28, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in44, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in60, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in76, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in92, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in108, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in124, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in20, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in52, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in84, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in116, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in36, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in100, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in68, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in100, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in52, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in116, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in84, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in116, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in28, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in60, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in92, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in124, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in44, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in108, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in76, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in108, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in60, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in124, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in92, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in124, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in06, c_96 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in14, c_97 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in22, c_98 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in30, c_99 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in38, c_100 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in46, c_101 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in54, c_102 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in62, c_103 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in70, c_104 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in78, c_105 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in86, c_106 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in94, c_107 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in102, c_108 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in110, c_109 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in118, c_110 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in122, in126, c_111 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in10, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in26, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in42, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in58, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in74, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in90, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in106, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in122, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in18, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in50, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in82, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in114, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in34, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in98, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in66, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in98, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in50, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in114, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in82, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in114, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in26, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in58, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in90, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in122, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in42, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in106, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in74, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in106, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in58, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in122, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in90, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in122, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in14, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in30, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in46, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in62, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in78, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in94, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in110, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in118, in126, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in22, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in54, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in86, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in118, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in38, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in102, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in70, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in102, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in54, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in118, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in86, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in118, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in30, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in62, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in94, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in110, in126, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in46, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in110, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in78, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in110, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in62, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in94, in126, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in94, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in62, in126, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_64 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in07, c_65 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in11, c_66 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in15, c_67 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in19, c_68 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in23, c_69 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in27, c_70 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in31, c_71 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in35, c_72 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in39, c_73 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in43, c_74 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in47, c_75 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in51, c_76 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in55, c_77 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in59, c_78 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in61, in63, c_79 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in67, c_80 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in71, c_81 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in75, c_82 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in79, c_83 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in83, c_84 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in87, c_85 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in91, c_86 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in93, in95, c_87 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in99, c_88 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in103, c_89 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in107, c_90 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in109, in111, c_91 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in115, c_92 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in117, in119, c_93 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in121, in123, c_94 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in125, in127, c_95 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in05, c_96 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in13, c_97 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in21, c_98 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in29, c_99 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in37, c_100 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in45, c_101 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in53, c_102 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in61, c_103 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in69, c_104 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in77, c_105 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in85, c_106 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in93, c_107 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in101, c_108 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in109, c_109 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in117, c_110 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in121, in125, c_111 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in09, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in25, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in41, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in57, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in73, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in89, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in105, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in121, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in17, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in49, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in81, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in113, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in33, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in97, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in65, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in97, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in49, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in113, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in81, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in113, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in25, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in57, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in89, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in121, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in41, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in105, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in73, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in105, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in57, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in121, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in89, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in121, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in13, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in29, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in45, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in61, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in77, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in93, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in109, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in117, in125, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in21, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in53, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in85, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in117, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in37, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in101, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in69, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in101, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in53, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in117, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in85, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in117, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in29, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in61, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in93, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in109, in125, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in45, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in109, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in77, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in109, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in61, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in93, in125, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in93, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in61, in125, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in07, c_96 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in15, c_97 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in23, c_98 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in31, c_99 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in39, c_100 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in47, c_101 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in55, c_102 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in59, in63, c_103 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in71, c_104 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in79, c_105 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in87, c_106 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in91, in95, c_107 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in103, c_108 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in107, in111, c_109 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in115, in119, c_110 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in123, in127, c_111 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in11, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in27, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in43, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in59, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in75, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in91, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in107, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in115, in123, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in19, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in51, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in83, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in115, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in35, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in99, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in67, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in99, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in51, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in115, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in83, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in115, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in27, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in59, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in91, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in107, in123, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in43, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in107, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in75, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in107, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in59, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in91, in123, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in91, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in59, in123, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in15, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in31, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in47, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in55, in63, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in79, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in87, in95, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in103, in111, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in119, in127, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in23, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in55, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in87, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in103, in119, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in39, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in103, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in71, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in103, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in55, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in87, in119, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in87, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in55, in119, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in31, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in47, in63, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in79, in95, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in111, in127, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in47, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in79, in111, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in79, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in47, in111, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in63, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in95, in127, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in95, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in63, in127, c_126 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_128_0 0, 0, 218, 0, 71, 189, 250, 0, 11, 174, 165, 33, 42, 143, 132, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 +#define FD_REEDSOL_FFT_CONSTANTS_128_128 133, 130, 88, 18, 85, 175, 232, 45, 38, 131, 136, 12, 7, 162, 169, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254 +#define FD_REEDSOL_FFT_CONSTANTS_128_256 0, 0, 218, 0, 71, 189, 250, 0, 11, 174, 165, 33, 42, 143, 132, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 +#define FD_REEDSOL_FFT_IMPL_128( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ + c_28, c_29, c_30, c_31, c_32, c_33, c_34, c_35, c_36, c_37, c_38 , \ + c_39, c_40, c_41, c_42, c_43, c_44, c_45, c_46, c_47, c_48, c_49 , \ + c_50, c_51, c_52, c_53, c_54, c_55, c_56, c_57, c_58, c_59, c_60 , \ + c_61, c_62, c_63, c_64, c_65, c_66, c_67, c_68, c_69, c_70, c_71 , \ + c_72, c_73, c_74, c_75, c_76, c_77, c_78, c_79, c_80, c_81, c_82 , \ + c_83, c_84, c_85, c_86, c_87, c_88, c_89, c_90, c_91, c_92, c_93 , \ + c_94, c_95, c_96, c_97, c_98, c_99, c_100, c_101, c_102, c_103 , \ + c_104, c_105, c_106, c_107, c_108, c_109, c_110, c_111, c_112, c_113, \ + c_114, c_115, c_116, c_117, c_118, c_119, c_120, c_121, c_122, c_123, \ + c_124, c_125, c_126, in00, in01, in02, in03, in04, in05, in06, in07, \ + in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18 , \ + in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29 , \ + in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40 , \ + in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51 , \ + in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62 , \ + in63, in64, in65, in66, in67, in68, in69, in70, in71, in72, in73 , \ + in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84 , \ + in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95 , \ + in96, in97, in98, in99, in100, in101, in102, in103, in104, in105 , \ + in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, \ + in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, \ + in126, in127) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in64, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in96, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in32, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in96, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in80, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in112, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in48, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in112, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in16, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in48, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in80, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in112, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in72, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in104, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in40, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in104, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in88, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in120, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in56, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in120, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in24, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in56, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in88, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in120, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in08, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in24, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in40, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in56, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in72, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in88, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in104, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in120, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in68, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in100, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in36, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in100, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in84, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in116, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in52, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in116, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in20, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in52, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in84, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in116, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in76, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in108, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in44, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in108, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in92, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in124, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in60, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in124, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in28, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in60, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in92, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in124, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in12, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in28, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in44, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in60, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in76, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in92, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in108, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in124, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in04, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in12, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in20, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in28, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in36, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in44, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in52, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in60, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in68, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in76, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in84, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in92, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in100, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in108, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in116, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in124, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in66, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in98, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in34, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in98, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in82, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in114, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in50, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in114, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in18, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in50, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in82, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in114, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in74, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in106, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in42, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in106, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in90, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in122, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in58, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in122, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in26, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in58, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in90, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in122, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in10, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in26, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in42, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in58, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in74, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in90, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in106, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in122, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in70, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in102, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in38, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in102, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in86, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in118, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in54, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in118, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in22, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in54, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in86, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in118, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in78, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in110, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in46, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in110, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in94, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in126, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in62, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in94, in126, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in30, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in62, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in94, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in110, in126, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in14, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in30, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in46, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in62, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in78, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in94, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in110, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in118, in126, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in06, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in14, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in22, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in30, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in38, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in46, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in54, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in62, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in70, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in78, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in86, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in94, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in102, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in110, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in118, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in122, in126, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in06, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in10, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in14, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in18, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in22, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in26, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in30, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in34, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in38, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in42, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in46, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in50, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in54, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in58, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in62, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in66, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in70, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in74, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in78, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in82, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in86, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in90, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in94, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in98, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in102, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in106, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in110, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in114, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in118, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in122, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in124, in126, c_62 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in65, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in97, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in33, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in97, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in81, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in113, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in49, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in113, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in17, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in49, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in81, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in113, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in73, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in105, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in41, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in105, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in89, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in121, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in57, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in121, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in25, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in57, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in89, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in121, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in09, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in25, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in41, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in57, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in73, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in89, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in105, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in121, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in69, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in101, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in37, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in101, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in85, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in117, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in53, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in117, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in21, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in53, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in85, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in117, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in77, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in109, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in45, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in109, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in93, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in61, in125, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in61, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in93, in125, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in29, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in61, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in93, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in109, in125, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in13, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in29, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in45, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in61, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in77, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in93, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in109, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in117, in125, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in05, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in13, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in21, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in29, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in37, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in45, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in53, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in61, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in69, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in77, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in85, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in93, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in101, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in109, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in117, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in121, in125, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in67, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in99, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in35, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in99, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in83, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in115, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in51, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in115, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in19, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in51, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in83, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in115, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in75, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in107, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in43, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in107, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in91, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in59, in123, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in59, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in91, in123, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in27, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in59, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in91, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in107, in123, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in11, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in27, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in43, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in59, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in75, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in91, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in107, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in115, in123, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in71, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in103, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in39, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in103, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in87, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in55, in119, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in55, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in87, in119, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in23, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in55, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in87, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in103, in119, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in79, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in47, in111, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in47, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in79, in111, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in31, in95, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in63, in127, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in31, in63, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in95, in127, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in31, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in47, in63, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in79, in95, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in111, in127, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in15, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in31, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in47, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in55, in63, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in79, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in87, in95, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in103, in111, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in119, in127, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in07, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in15, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in23, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in31, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in39, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in47, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in55, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in59, in63, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in71, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in79, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in87, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in91, in95, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in103, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in107, in111, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in115, in119, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in123, in127, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in07, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in11, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in15, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in19, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in23, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in27, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in31, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in35, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in39, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in43, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in47, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in51, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in55, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in59, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in61, in63, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in67, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in71, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in75, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in79, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in83, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in87, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in91, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in93, in95, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in99, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in103, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in107, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in109, in111, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in115, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in117, in119, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in121, in123, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in125, in127, c_62 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_63 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_64 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in05, c_65 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in07, c_66 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in09, c_67 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in11, c_68 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in13, c_69 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in15, c_70 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in17, c_71 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in19, c_72 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in21, c_73 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in23, c_74 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in25, c_75 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in27, c_76 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in29, c_77 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in31, c_78 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in33, c_79 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in35, c_80 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in37, c_81 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in39, c_82 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in41, c_83 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in43, c_84 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in45, c_85 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in47, c_86 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in49, c_87 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in51, c_88 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in53, c_89 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in55, c_90 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in57, c_91 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in59, c_92 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in61, c_93 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in63, c_94 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in65, c_95 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in67, c_96 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in69, c_97 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in71, c_98 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in73, c_99 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in75, c_100 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in77, c_101 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in79, c_102 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in81, c_103 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in83, c_104 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in85, c_105 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in87, c_106 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in89, c_107 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in91, c_108 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in93, c_109 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in94, in95, c_110 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in97, c_111 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in99, c_112 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in101, c_113 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in103, c_114 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in105, c_115 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in107, c_116 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in109, c_117 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in110, in111, c_118 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in113, c_119 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in115, c_120 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in117, c_121 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in118, in119, c_122 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in121, c_123 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in122, in123, c_124 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in124, in125, c_125 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in126, in127, c_126 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_IFFT_CONSTANTS_64_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 0, 22, 97, 119, 38, 48, 71, 81, 0, 11, 174, 165, 0, 71, 0 +#define FD_REEDSOL_IFFT_CONSTANTS_64_64 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 183, 161, 214, 192, 145, 135, 240, 230, 33, 42, 143, 132, 189, 250, 218 +#define FD_REEDSOL_IFFT_CONSTANTS_64_128 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 12, 26, 109, 123, 42, 60, 75, 93, 45, 38, 131, 136, 18, 85, 130 +#define FD_REEDSOL_IFFT_IMPL_64( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ + c_28, c_29, c_30, c_31, c_32, c_33, c_34, c_35, c_36, c_37, c_38 , \ + c_39, c_40, c_41, c_42, c_43, c_44, c_45, c_46, c_47, c_48, c_49 , \ + c_50, c_51, c_52, c_53, c_54, c_55, c_56, c_57, c_58, c_59, c_60 , \ + c_61, c_62, in00, in01, in02, in03, in04, in05, in06, in07, in08 , \ + in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19 , \ + in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30 , \ + in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41 , \ + in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52 , \ + in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in05, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in09, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in11, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in13, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in15, c_07 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in17, c_08 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in19, c_09 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in21, c_10 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in23, c_11 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in25, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in27, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in29, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in31, c_15 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in33, c_16 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in35, c_17 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in37, c_18 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in39, c_19 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in41, c_20 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in43, c_21 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in45, c_22 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in47, c_23 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in49, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in51, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in53, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in55, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in57, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in59, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in61, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in62, in63, c_31 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_32 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in06, c_33 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in10, c_34 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in14, c_35 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in18, c_36 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in22, c_37 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in26, c_38 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in30, c_39 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in34, c_40 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in38, c_41 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in42, c_42 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in46, c_43 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in50, c_44 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in54, c_45 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in58, c_46 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in62, c_47 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in04, c_48 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in12, c_49 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in20, c_50 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in28, c_51 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in36, c_52 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in44, c_53 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in52, c_54 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in60, c_55 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in08, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in24, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in40, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in56, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in16, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in48, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in32, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in48, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in24, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in56, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in40, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in56, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in12, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in28, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in44, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in60, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in20, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in52, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in36, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in52, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in28, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in60, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in44, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in60, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in06, c_48 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in14, c_49 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in22, c_50 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in30, c_51 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in38, c_52 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in46, c_53 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in54, c_54 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in62, c_55 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in10, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in26, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in42, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in58, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in18, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in50, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in34, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in50, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in26, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in58, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in42, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in58, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in14, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in30, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in46, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in62, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in22, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in54, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in38, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in54, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in30, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in62, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in46, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in62, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_32 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in07, c_33 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in11, c_34 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in15, c_35 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in19, c_36 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in23, c_37 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in27, c_38 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in31, c_39 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in35, c_40 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in39, c_41 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in43, c_42 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in47, c_43 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in51, c_44 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in55, c_45 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in59, c_46 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in61, in63, c_47 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in05, c_48 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in13, c_49 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in21, c_50 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in29, c_51 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in37, c_52 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in45, c_53 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in53, c_54 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in61, c_55 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in09, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in25, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in41, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in57, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in17, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in49, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in33, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in49, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in25, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in57, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in41, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in57, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in13, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in29, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in45, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in61, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in21, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in53, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in37, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in53, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in29, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in61, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in45, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in61, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in07, c_48 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in15, c_49 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in23, c_50 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in31, c_51 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in39, c_52 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in47, c_53 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in55, c_54 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in59, in63, c_55 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in11, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in27, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in43, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in59, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in19, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in51, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in35, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in51, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in27, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in59, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in43, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in59, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in15, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in31, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in47, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in55, in63, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in23, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in55, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in39, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in55, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in31, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in47, in63, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in47, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in63, c_62 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_64_0 0, 0, 71, 0, 11, 174, 165, 0, 22, 97, 119, 38, 48, 71, 81, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 +#define FD_REEDSOL_FFT_CONSTANTS_64_64 218, 189, 250, 33, 42, 143, 132, 183, 161, 214, 192, 145, 135, 240, 230, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 +#define FD_REEDSOL_FFT_CONSTANTS_64_128 130, 18, 85, 45, 38, 131, 136, 12, 26, 109, 123, 42, 60, 75, 93, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190 +#define FD_REEDSOL_FFT_IMPL_64( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16, \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27, \ + c_28, c_29, c_30, c_31, c_32, c_33, c_34, c_35, c_36, c_37, c_38, \ + c_39, c_40, c_41, c_42, c_43, c_44, c_45, c_46, c_47, c_48, c_49, \ + c_50, c_51, c_52, c_53, c_54, c_55, c_56, c_57, c_58, c_59, c_60, \ + c_61, c_62, in00, in01, in02, in03, in04, in05, in06, in07, in08, \ + in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, \ + in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, \ + in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, \ + in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, \ + in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in32, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in48, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in16, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in48, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in40, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in56, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in24, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in56, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in08, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in24, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in40, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in56, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in36, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in52, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in20, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in52, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in44, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in60, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in28, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in60, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in12, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in28, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in44, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in60, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in04, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in12, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in20, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in28, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in36, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in44, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in52, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in60, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in34, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in50, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in18, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in50, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in42, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in58, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in26, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in58, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in10, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in26, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in42, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in58, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in38, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in54, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in22, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in54, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in46, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in62, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in30, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in62, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in14, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in30, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in46, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in62, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in06, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in14, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in22, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in30, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in38, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in46, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in54, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in62, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in06, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in10, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in14, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in18, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in22, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in26, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in30, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in34, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in38, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in42, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in46, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in50, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in54, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in58, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in62, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in33, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in49, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in17, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in49, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in41, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in57, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in25, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in57, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in09, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in25, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in41, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in57, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in37, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in53, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in21, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in53, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in45, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in61, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in29, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in61, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in13, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in29, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in45, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in61, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in05, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in13, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in21, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in29, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in37, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in45, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in53, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in61, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in35, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in51, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in19, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in51, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in43, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in59, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in27, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in59, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in11, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in27, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in43, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in59, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in39, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in55, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in23, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in55, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in47, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in31, in63, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in31, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in47, in63, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in15, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in31, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in47, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in55, in63, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in07, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in15, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in23, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in31, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in39, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in47, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in55, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in59, in63, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in07, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in11, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in15, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in19, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in23, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in27, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in31, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in35, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in39, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in43, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in47, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in51, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in55, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in59, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in61, in63, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in05, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in07, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in09, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in11, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in13, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in15, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in17, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in19, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in21, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in23, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in25, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in27, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in29, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in31, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in33, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in35, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in37, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in39, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in41, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in43, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in45, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in47, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in49, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in51, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in53, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in55, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in57, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in59, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in61, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in63, c_62 ); \ + } while( 0 ) + + + #define FD_REEDSOL_IFFT_CONSTANTS_32_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 6, 28, 26, 120, 126, 100, 98, 0, 22, 97, 119, 0, 11, 0 #define FD_REEDSOL_IFFT_CONSTANTS_32_32 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 237, 235, 241, 247, 149, 147, 137, 143, 38, 48, 71, 81, 174, 165, 71 +#define FD_REEDSOL_IFFT_CONSTANTS_32_64 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 179, 181, 175, 169, 203, 205, 215, 209, 183, 161, 214, 192, 33, 42, 189 +#define FD_REEDSOL_IFFT_CONSTANTS_32_96 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 94, 88, 66, 68, 38, 32, 58, 60, 145, 135, 240, 230, 143, 132, 250 #define FD_REEDSOL_IFFT_IMPL_32( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ @@ -204,6 +1591,8 @@ #define FD_REEDSOL_FFT_CONSTANTS_32_0 0, 0, 11, 0, 22, 97, 119, 0, 6, 28, 26, 120, 126, 100, 98, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 #define FD_REEDSOL_FFT_CONSTANTS_32_32 71, 174, 165, 38, 48, 71, 81, 237, 235, 241, 247, 149, 147, 137, 143, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 +#define FD_REEDSOL_FFT_CONSTANTS_32_64 189, 33, 42, 183, 161, 214, 192, 179, 181, 175, 169, 203, 205, 215, 209, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94 +#define FD_REEDSOL_FFT_CONSTANTS_32_96 250, 143, 132, 145, 135, 240, 230, 94, 88, 66, 68, 38, 32, 58, 60, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 #define FD_REEDSOL_FFT_IMPL_32( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16, \ c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27, \ @@ -299,6 +1688,9 @@ #define FD_REEDSOL_IFFT_CONSTANTS_16_0 0, 2, 4, 6, 8, 10, 12, 14, 0, 6, 28, 26, 0, 22, 0 #define FD_REEDSOL_IFFT_CONSTANTS_16_16 16, 18, 20, 22, 24, 26, 28, 30, 120, 126, 100, 98, 97, 119, 11 #define FD_REEDSOL_IFFT_CONSTANTS_16_32 32, 34, 36, 38, 40, 42, 44, 46, 237, 235, 241, 247, 38, 48, 174 +#define FD_REEDSOL_IFFT_CONSTANTS_16_48 48, 50, 52, 54, 56, 58, 60, 62, 149, 147, 137, 143, 71, 81, 165 +#define FD_REEDSOL_IFFT_CONSTANTS_16_64 64, 66, 68, 70, 72, 74, 76, 78, 179, 181, 175, 169, 183, 161, 33 +#define FD_REEDSOL_IFFT_CONSTANTS_16_80 80, 82, 84, 86, 88, 90, 92, 94, 203, 205, 215, 209, 214, 192, 42 #define FD_REEDSOL_IFFT_IMPL_16( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, in00, in01 , \ in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12 , \ @@ -343,6 +1735,9 @@ #define FD_REEDSOL_FFT_CONSTANTS_16_0 0, 0, 22, 0, 6, 28, 26, 0, 2, 4, 6, 8, 10, 12, 14 #define FD_REEDSOL_FFT_CONSTANTS_16_16 11, 97, 119, 120, 126, 100, 98, 16, 18, 20, 22, 24, 26, 28, 30 #define FD_REEDSOL_FFT_CONSTANTS_16_32 174, 38, 48, 237, 235, 241, 247, 32, 34, 36, 38, 40, 42, 44, 46 +#define FD_REEDSOL_FFT_CONSTANTS_16_48 165, 71, 81, 149, 147, 137, 143, 48, 50, 52, 54, 56, 58, 60, 62 +#define FD_REEDSOL_FFT_CONSTANTS_16_64 33, 183, 161, 179, 181, 175, 169, 64, 66, 68, 70, 72, 74, 76, 78 +#define FD_REEDSOL_FFT_CONSTANTS_16_80 42, 214, 192, 203, 205, 215, 209, 80, 82, 84, 86, 88, 90, 92, 94 #define FD_REEDSOL_FFT_IMPL_16( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, in00, in01, \ in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, \ @@ -389,6 +1784,11 @@ #define FD_REEDSOL_IFFT_CONSTANTS_8_16 16, 18, 20, 22, 120, 126, 97 #define FD_REEDSOL_IFFT_CONSTANTS_8_24 24, 26, 28, 30, 100, 98, 119 #define FD_REEDSOL_IFFT_CONSTANTS_8_32 32, 34, 36, 38, 237, 235, 38 +#define FD_REEDSOL_IFFT_CONSTANTS_8_40 40, 42, 44, 46, 241, 247, 48 +#define FD_REEDSOL_IFFT_CONSTANTS_8_48 48, 50, 52, 54, 149, 147, 71 +#define FD_REEDSOL_IFFT_CONSTANTS_8_56 56, 58, 60, 62, 137, 143, 81 +#define FD_REEDSOL_IFFT_CONSTANTS_8_64 64, 66, 68, 70, 179, 181, 183 +#define FD_REEDSOL_IFFT_CONSTANTS_8_72 72, 74, 76, 78, 175, 169, 161 #define FD_REEDSOL_IFFT_IMPL_8( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, in00, in01, in02, in03, in04, in05, in06, in07) \ do { \ @@ -413,6 +1813,11 @@ #define FD_REEDSOL_FFT_CONSTANTS_8_16 97, 120, 126, 16, 18, 20, 22 #define FD_REEDSOL_FFT_CONSTANTS_8_24 119, 100, 98, 24, 26, 28, 30 #define FD_REEDSOL_FFT_CONSTANTS_8_32 38, 237, 235, 32, 34, 36, 38 +#define FD_REEDSOL_FFT_CONSTANTS_8_40 48, 241, 247, 40, 42, 44, 46 +#define FD_REEDSOL_FFT_CONSTANTS_8_48 71, 149, 147, 48, 50, 52, 54 +#define FD_REEDSOL_FFT_CONSTANTS_8_56 81, 137, 143, 56, 58, 60, 62 +#define FD_REEDSOL_FFT_CONSTANTS_8_64 183, 179, 181, 64, 66, 68, 70 +#define FD_REEDSOL_FFT_CONSTANTS_8_72 161, 175, 169, 72, 74, 76, 78 #define FD_REEDSOL_FFT_IMPL_8( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, in00, in01, in02, in03, in04, in05, in06, in07) \ do { \ @@ -441,6 +1846,15 @@ #define FD_REEDSOL_IFFT_CONSTANTS_4_24 24, 26, 100 #define FD_REEDSOL_IFFT_CONSTANTS_4_28 28, 30, 98 #define FD_REEDSOL_IFFT_CONSTANTS_4_32 32, 34, 237 +#define FD_REEDSOL_IFFT_CONSTANTS_4_36 36, 38, 235 +#define FD_REEDSOL_IFFT_CONSTANTS_4_40 40, 42, 241 +#define FD_REEDSOL_IFFT_CONSTANTS_4_44 44, 46, 247 +#define FD_REEDSOL_IFFT_CONSTANTS_4_48 48, 50, 149 +#define FD_REEDSOL_IFFT_CONSTANTS_4_52 52, 54, 147 +#define FD_REEDSOL_IFFT_CONSTANTS_4_56 56, 58, 137 +#define FD_REEDSOL_IFFT_CONSTANTS_4_60 60, 62, 143 +#define FD_REEDSOL_IFFT_CONSTANTS_4_64 64, 66, 179 +#define FD_REEDSOL_IFFT_CONSTANTS_4_68 68, 70, 181 #define FD_REEDSOL_IFFT_IMPL_4( c_00, c_01, c_02, in00, in01, in02 , \ in03) \ do { \ @@ -461,6 +1875,15 @@ #define FD_REEDSOL_FFT_CONSTANTS_4_24 100, 24, 26 #define FD_REEDSOL_FFT_CONSTANTS_4_28 98, 28, 30 #define FD_REEDSOL_FFT_CONSTANTS_4_32 237, 32, 34 +#define FD_REEDSOL_FFT_CONSTANTS_4_36 235, 36, 38 +#define FD_REEDSOL_FFT_CONSTANTS_4_40 241, 40, 42 +#define FD_REEDSOL_FFT_CONSTANTS_4_44 247, 44, 46 +#define FD_REEDSOL_FFT_CONSTANTS_4_48 149, 48, 50 +#define FD_REEDSOL_FFT_CONSTANTS_4_52 147, 52, 54 +#define FD_REEDSOL_FFT_CONSTANTS_4_56 137, 56, 58 +#define FD_REEDSOL_FFT_CONSTANTS_4_60 143, 60, 62 +#define FD_REEDSOL_FFT_CONSTANTS_4_64 179, 64, 66 +#define FD_REEDSOL_FFT_CONSTANTS_4_68 181, 68, 70 #define FD_REEDSOL_FFT_IMPL_4( c_00, c_01, c_02, in00, in01, in02 , \ in03) \ do { \ diff --git a/src/ballet/reedsol/fd_reedsol_internal.c b/src/ballet/reedsol/fd_reedsol_internal.c deleted file mode 100644 index 8470b8e8040..00000000000 --- a/src/ballet/reedsol/fd_reedsol_internal.c +++ /dev/null @@ -1,278 +0,0 @@ -#include "../../util/fd_util.h" -#include "fd_reedsol_internal.h" -#define INCLUDE_CONSTANTS -#if FD_HAS_GFNI -#include "fd_reedsol_arith_gfni.h" -#elif FD_HAS_AVX -#include "fd_reedsol_arith_avx2.h" -#else -#include "fd_reedsol_arith_none.h" -#endif -#include "fd_reedsol_fft.h" -#include "fd_reedsol_ppt.h" - - -/* FALLTHRU: Tells the compiler that falling through to the next case - of the switch statement is intentional and not a bug. When brutality - is turned on, this must be used. Clang an GCC differ on what - annotations they accept, but this works for both. */ -#define FALLTHRU __attribute__((fallthrough)); - -void fd_reedsol_encode( ulong shred_sz, - uchar const * const * data_shred, - ulong data_shred_cnt, - uchar * const * parity_shred, - ulong parity_shred_cnt ) { - - if( FD_UNLIKELY( (data_shred_cnt==0) | (parity_shred_cnt==0) ) ) return; /* TODO: Is that the right behavior? */ - - for( ulong shred_pos=0UL; shred_pos0UL ) ) { - /* Produce another 16 parity shreds */ - FD_REEDSOL_GENERATE_FFT( 16, 16, ALL_VARS ); - switch( parity_remaining ) { - default: - case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU - case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU - case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU - case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU - case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU - case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU - case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU - case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU - case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU - case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU - case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU - case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU - case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU - case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU - case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU - case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); - } - parity_produced += fd_ulong_min( 16UL, parity_remaining ); - parity_remaining = parity_shred_cnt - parity_produced; - } - - /* We might need one more round */ - if( FD_UNLIKELY( parity_remaining>0UL ) ) { - /* TODO: Is it faster to save the output of the ifft/ppt than to - recompute? */ - FD_REEDSOL_GENERATE_IFFT( 16, 16, ALL_VARS ); - FD_REEDSOL_GENERATE_FFT( 16, 32, ALL_VARS ); - switch( parity_remaining ) { - default: - case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU - case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU - case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU - case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU - case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU - case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU - case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU - case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU - case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU - case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU - case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU - case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU - case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU - case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU - case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU - case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); - } -#undef ALL_VARS - } - } else { - /* N==32 codepath */ - gf_t in00 = gf_zero(); gf_t in01 = gf_zero(); gf_t in02 = gf_zero(); gf_t in03 = gf_zero(); - gf_t in04 = gf_zero(); gf_t in05 = gf_zero(); gf_t in06 = gf_zero(); gf_t in07 = gf_zero(); - gf_t in08 = gf_zero(); gf_t in09 = gf_zero(); gf_t in10 = gf_zero(); gf_t in11 = gf_zero(); - gf_t in12 = gf_zero(); gf_t in13 = gf_zero(); gf_t in14 = gf_zero(); gf_t in15 = gf_zero(); - gf_t in16 = gf_zero(); gf_t in17 = gf_zero(); gf_t in18 = gf_zero(); gf_t in19 = gf_zero(); - gf_t in20 = gf_zero(); gf_t in21 = gf_zero(); gf_t in22 = gf_zero(); gf_t in23 = gf_zero(); - gf_t in24 = gf_zero(); gf_t in25 = gf_zero(); gf_t in26 = gf_zero(); gf_t in27 = gf_zero(); - gf_t in28 = gf_zero(); gf_t in29 = gf_zero(); gf_t in30 = gf_zero(); gf_t in31 = gf_zero(); - - in15 = gf_ldu( data_shred[ 15 ] + shred_pos ); in14 = gf_ldu( data_shred[ 14 ] + shred_pos ); - in13 = gf_ldu( data_shred[ 13 ] + shred_pos ); in12 = gf_ldu( data_shred[ 12 ] + shred_pos ); - in11 = gf_ldu( data_shred[ 11 ] + shred_pos ); in10 = gf_ldu( data_shred[ 10 ] + shred_pos ); - in09 = gf_ldu( data_shred[ 9 ] + shred_pos ); in08 = gf_ldu( data_shred[ 8 ] + shred_pos ); - in07 = gf_ldu( data_shred[ 7 ] + shred_pos ); in06 = gf_ldu( data_shred[ 6 ] + shred_pos ); - in05 = gf_ldu( data_shred[ 5 ] + shred_pos ); in04 = gf_ldu( data_shred[ 4 ] + shred_pos ); - in03 = gf_ldu( data_shred[ 3 ] + shred_pos ); in02 = gf_ldu( data_shred[ 2 ] + shred_pos ); - in01 = gf_ldu( data_shred[ 1 ] + shred_pos ); in00 = gf_ldu( data_shred[ 0 ] + shred_pos ); -#define ALL_VARS in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, \ - in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31 - - switch( data_shred_cnt ) { - case 32UL: in31 = gf_ldu( data_shred[ 31 ] + shred_pos ); FALLTHRU - case 31UL: in30 = gf_ldu( data_shred[ 30 ] + shred_pos ); FALLTHRU - case 30UL: in29 = gf_ldu( data_shred[ 29 ] + shred_pos ); FALLTHRU - case 29UL: in28 = gf_ldu( data_shred[ 28 ] + shred_pos ); FALLTHRU - case 28UL: in27 = gf_ldu( data_shred[ 27 ] + shred_pos ); FALLTHRU - case 27UL: in26 = gf_ldu( data_shred[ 26 ] + shred_pos ); FALLTHRU - case 26UL: in25 = gf_ldu( data_shred[ 25 ] + shred_pos ); FALLTHRU - case 25UL: in24 = gf_ldu( data_shred[ 24 ] + shred_pos ); FALLTHRU - case 24UL: in23 = gf_ldu( data_shred[ 23 ] + shred_pos ); FALLTHRU - case 23UL: in22 = gf_ldu( data_shred[ 22 ] + shred_pos ); FALLTHRU - case 22UL: in21 = gf_ldu( data_shred[ 21 ] + shred_pos ); FALLTHRU - case 21UL: in20 = gf_ldu( data_shred[ 20 ] + shred_pos ); FALLTHRU - case 20UL: in19 = gf_ldu( data_shred[ 19 ] + shred_pos ); FALLTHRU - case 19UL: in18 = gf_ldu( data_shred[ 18 ] + shred_pos ); FALLTHRU - case 18UL: in17 = gf_ldu( data_shred[ 17 ] + shred_pos ); FALLTHRU - case 17UL: in16 = gf_ldu( data_shred[ 16 ] + shred_pos ); - } - switch( data_shred_cnt ) { - case 32UL: FD_REEDSOL_GENERATE_IFFT( 32, 0, ALL_VARS ); break; - case 31UL: FD_REEDSOL_GENERATE_PPT( 32, 31, ALL_VARS ); break; - case 30UL: FD_REEDSOL_GENERATE_PPT( 32, 30, ALL_VARS ); break; - case 29UL: FD_REEDSOL_GENERATE_PPT( 32, 29, ALL_VARS ); break; - case 28UL: FD_REEDSOL_GENERATE_PPT( 32, 28, ALL_VARS ); break; - case 27UL: FD_REEDSOL_GENERATE_PPT( 32, 27, ALL_VARS ); break; - case 26UL: FD_REEDSOL_GENERATE_PPT( 32, 26, ALL_VARS ); break; - case 25UL: FD_REEDSOL_GENERATE_PPT( 32, 25, ALL_VARS ); break; - case 24UL: FD_REEDSOL_GENERATE_PPT( 32, 24, ALL_VARS ); break; - case 23UL: FD_REEDSOL_GENERATE_PPT( 32, 23, ALL_VARS ); break; - case 22UL: FD_REEDSOL_GENERATE_PPT( 32, 22, ALL_VARS ); break; - case 21UL: FD_REEDSOL_GENERATE_PPT( 32, 21, ALL_VARS ); break; - case 20UL: FD_REEDSOL_GENERATE_PPT( 32, 20, ALL_VARS ); break; - case 19UL: FD_REEDSOL_GENERATE_PPT( 32, 19, ALL_VARS ); break; - case 18UL: FD_REEDSOL_GENERATE_PPT( 32, 18, ALL_VARS ); break; - case 17UL: FD_REEDSOL_GENERATE_PPT( 32, 17, ALL_VARS ); break; - } - /* That generated the first 32-data_shred_cnt parity shreds in the - last 32-data_shred_cnt variables. We might only need - parity_shred_cnt of them though. */ - ulong total_shreds = data_shred_cnt+parity_shred_cnt; - switch( data_shred_cnt ) { - case 17UL: if( total_shreds <= 17UL ) break; gf_stu( parity_shred[ 17UL-data_shred_cnt ] + shred_pos, in17 ); in17 = gf_zero(); FALLTHRU - case 18UL: if( total_shreds <= 18UL ) break; gf_stu( parity_shred[ 18UL-data_shred_cnt ] + shred_pos, in18 ); in18 = gf_zero(); FALLTHRU - case 19UL: if( total_shreds <= 19UL ) break; gf_stu( parity_shred[ 19UL-data_shred_cnt ] + shred_pos, in19 ); in19 = gf_zero(); FALLTHRU - case 20UL: if( total_shreds <= 20UL ) break; gf_stu( parity_shred[ 20UL-data_shred_cnt ] + shred_pos, in20 ); in20 = gf_zero(); FALLTHRU - case 21UL: if( total_shreds <= 21UL ) break; gf_stu( parity_shred[ 21UL-data_shred_cnt ] + shred_pos, in21 ); in21 = gf_zero(); FALLTHRU - case 22UL: if( total_shreds <= 22UL ) break; gf_stu( parity_shred[ 22UL-data_shred_cnt ] + shred_pos, in22 ); in22 = gf_zero(); FALLTHRU - case 23UL: if( total_shreds <= 23UL ) break; gf_stu( parity_shred[ 23UL-data_shred_cnt ] + shred_pos, in23 ); in23 = gf_zero(); FALLTHRU - case 24UL: if( total_shreds <= 24UL ) break; gf_stu( parity_shred[ 24UL-data_shred_cnt ] + shred_pos, in24 ); in24 = gf_zero(); FALLTHRU - case 25UL: if( total_shreds <= 25UL ) break; gf_stu( parity_shred[ 25UL-data_shred_cnt ] + shred_pos, in25 ); in25 = gf_zero(); FALLTHRU - case 26UL: if( total_shreds <= 26UL ) break; gf_stu( parity_shred[ 26UL-data_shred_cnt ] + shred_pos, in26 ); in26 = gf_zero(); FALLTHRU - case 27UL: if( total_shreds <= 27UL ) break; gf_stu( parity_shred[ 27UL-data_shred_cnt ] + shred_pos, in27 ); in27 = gf_zero(); FALLTHRU - case 28UL: if( total_shreds <= 28UL ) break; gf_stu( parity_shred[ 28UL-data_shred_cnt ] + shred_pos, in28 ); in28 = gf_zero(); FALLTHRU - case 29UL: if( total_shreds <= 29UL ) break; gf_stu( parity_shred[ 29UL-data_shred_cnt ] + shred_pos, in29 ); in29 = gf_zero(); FALLTHRU - case 30UL: if( total_shreds <= 30UL ) break; gf_stu( parity_shred[ 30UL-data_shred_cnt ] + shred_pos, in30 ); in30 = gf_zero(); FALLTHRU - case 31UL: if( total_shreds <= 31UL ) break; gf_stu( parity_shred[ 31UL-data_shred_cnt ] + shred_pos, in31 ); in31 = gf_zero(); - } - ulong parity_produced = fd_ulong_min( 32UL - data_shred_cnt, parity_shred_cnt ); - ulong parity_remaining = parity_shred_cnt - parity_produced; - if( FD_LIKELY( parity_remaining>0UL ) ) { - /* Produce another 32 parity shreds */ - FD_REEDSOL_GENERATE_FFT( 32, 32, ALL_VARS ); -#undef ALL_VARS - switch( parity_remaining ) { - case 32UL: gf_stu( parity_shred[ 31UL+parity_produced ] + shred_pos, in31 ); FALLTHRU - case 31UL: gf_stu( parity_shred[ 30UL+parity_produced ] + shred_pos, in30 ); FALLTHRU - case 30UL: gf_stu( parity_shred[ 29UL+parity_produced ] + shred_pos, in29 ); FALLTHRU - case 29UL: gf_stu( parity_shred[ 28UL+parity_produced ] + shred_pos, in28 ); FALLTHRU - case 28UL: gf_stu( parity_shred[ 27UL+parity_produced ] + shred_pos, in27 ); FALLTHRU - case 27UL: gf_stu( parity_shred[ 26UL+parity_produced ] + shred_pos, in26 ); FALLTHRU - case 26UL: gf_stu( parity_shred[ 25UL+parity_produced ] + shred_pos, in25 ); FALLTHRU - case 25UL: gf_stu( parity_shred[ 24UL+parity_produced ] + shred_pos, in24 ); FALLTHRU - case 24UL: gf_stu( parity_shred[ 23UL+parity_produced ] + shred_pos, in23 ); FALLTHRU - case 23UL: gf_stu( parity_shred[ 22UL+parity_produced ] + shred_pos, in22 ); FALLTHRU - case 22UL: gf_stu( parity_shred[ 21UL+parity_produced ] + shred_pos, in21 ); FALLTHRU - case 21UL: gf_stu( parity_shred[ 20UL+parity_produced ] + shred_pos, in20 ); FALLTHRU - case 20UL: gf_stu( parity_shred[ 19UL+parity_produced ] + shred_pos, in19 ); FALLTHRU - case 19UL: gf_stu( parity_shred[ 18UL+parity_produced ] + shred_pos, in18 ); FALLTHRU - case 18UL: gf_stu( parity_shred[ 17UL+parity_produced ] + shred_pos, in17 ); FALLTHRU - case 17UL: gf_stu( parity_shred[ 16UL+parity_produced ] + shred_pos, in16 ); FALLTHRU - case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU - case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU - case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU - case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU - case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU - case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU - case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU - case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU - case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU - case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU - case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU - case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU - case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU - case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU - case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU - case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); - } - } - } - /* In order to handle shred sizes that are not divisible by 32, we clamp - shred_pos to shred_sz-32 when shred_sz-320UL ) ) { + /* Produce another 128 parity shreds */ + FD_REEDSOL_GENERATE_FFT( 128, 128, ALL_VARS ); + switch( parity_remaining ) { + default: + case 68UL: gf_stu( parity_shred[ 67UL+parity_produced ] + shred_pos, in67 ); FALLTHRU + case 67UL: gf_stu( parity_shred[ 66UL+parity_produced ] + shred_pos, in66 ); FALLTHRU + case 66UL: gf_stu( parity_shred[ 65UL+parity_produced ] + shred_pos, in65 ); FALLTHRU + case 65UL: gf_stu( parity_shred[ 64UL+parity_produced ] + shred_pos, in64 ); FALLTHRU + case 64UL: gf_stu( parity_shred[ 63UL+parity_produced ] + shred_pos, in63 ); FALLTHRU + case 63UL: gf_stu( parity_shred[ 62UL+parity_produced ] + shred_pos, in62 ); FALLTHRU + case 62UL: gf_stu( parity_shred[ 61UL+parity_produced ] + shred_pos, in61 ); FALLTHRU + case 61UL: gf_stu( parity_shred[ 60UL+parity_produced ] + shred_pos, in60 ); FALLTHRU + case 60UL: gf_stu( parity_shred[ 59UL+parity_produced ] + shred_pos, in59 ); FALLTHRU + case 59UL: gf_stu( parity_shred[ 58UL+parity_produced ] + shred_pos, in58 ); FALLTHRU + case 58UL: gf_stu( parity_shred[ 57UL+parity_produced ] + shred_pos, in57 ); FALLTHRU + case 57UL: gf_stu( parity_shred[ 56UL+parity_produced ] + shred_pos, in56 ); FALLTHRU + case 56UL: gf_stu( parity_shred[ 55UL+parity_produced ] + shred_pos, in55 ); FALLTHRU + case 55UL: gf_stu( parity_shred[ 54UL+parity_produced ] + shred_pos, in54 ); FALLTHRU + case 54UL: gf_stu( parity_shred[ 53UL+parity_produced ] + shred_pos, in53 ); FALLTHRU + case 53UL: gf_stu( parity_shred[ 52UL+parity_produced ] + shred_pos, in52 ); FALLTHRU + case 52UL: gf_stu( parity_shred[ 51UL+parity_produced ] + shred_pos, in51 ); FALLTHRU + case 51UL: gf_stu( parity_shred[ 50UL+parity_produced ] + shred_pos, in50 ); FALLTHRU + case 50UL: gf_stu( parity_shred[ 49UL+parity_produced ] + shred_pos, in49 ); FALLTHRU + case 49UL: gf_stu( parity_shred[ 48UL+parity_produced ] + shred_pos, in48 ); FALLTHRU + case 48UL: gf_stu( parity_shred[ 47UL+parity_produced ] + shred_pos, in47 ); FALLTHRU + case 47UL: gf_stu( parity_shred[ 46UL+parity_produced ] + shred_pos, in46 ); FALLTHRU + case 46UL: gf_stu( parity_shred[ 45UL+parity_produced ] + shred_pos, in45 ); FALLTHRU + case 45UL: gf_stu( parity_shred[ 44UL+parity_produced ] + shred_pos, in44 ); FALLTHRU + case 44UL: gf_stu( parity_shred[ 43UL+parity_produced ] + shred_pos, in43 ); FALLTHRU + case 43UL: gf_stu( parity_shred[ 42UL+parity_produced ] + shred_pos, in42 ); FALLTHRU + case 42UL: gf_stu( parity_shred[ 41UL+parity_produced ] + shred_pos, in41 ); FALLTHRU + case 41UL: gf_stu( parity_shred[ 40UL+parity_produced ] + shred_pos, in40 ); FALLTHRU + case 40UL: gf_stu( parity_shred[ 39UL+parity_produced ] + shred_pos, in39 ); FALLTHRU + case 39UL: gf_stu( parity_shred[ 38UL+parity_produced ] + shred_pos, in38 ); FALLTHRU + case 38UL: gf_stu( parity_shred[ 37UL+parity_produced ] + shred_pos, in37 ); FALLTHRU + case 37UL: gf_stu( parity_shred[ 36UL+parity_produced ] + shred_pos, in36 ); FALLTHRU + case 36UL: gf_stu( parity_shred[ 35UL+parity_produced ] + shred_pos, in35 ); FALLTHRU + case 35UL: gf_stu( parity_shred[ 34UL+parity_produced ] + shred_pos, in34 ); FALLTHRU + case 34UL: gf_stu( parity_shred[ 33UL+parity_produced ] + shred_pos, in33 ); FALLTHRU + case 33UL: gf_stu( parity_shred[ 32UL+parity_produced ] + shred_pos, in32 ); FALLTHRU + case 32UL: gf_stu( parity_shred[ 31UL+parity_produced ] + shred_pos, in31 ); FALLTHRU + case 31UL: gf_stu( parity_shred[ 30UL+parity_produced ] + shred_pos, in30 ); FALLTHRU + case 30UL: gf_stu( parity_shred[ 29UL+parity_produced ] + shred_pos, in29 ); FALLTHRU + case 29UL: gf_stu( parity_shred[ 28UL+parity_produced ] + shred_pos, in28 ); FALLTHRU + case 28UL: gf_stu( parity_shred[ 27UL+parity_produced ] + shred_pos, in27 ); FALLTHRU + case 27UL: gf_stu( parity_shred[ 26UL+parity_produced ] + shred_pos, in26 ); FALLTHRU + case 26UL: gf_stu( parity_shred[ 25UL+parity_produced ] + shred_pos, in25 ); FALLTHRU + case 25UL: gf_stu( parity_shred[ 24UL+parity_produced ] + shred_pos, in24 ); FALLTHRU + case 24UL: gf_stu( parity_shred[ 23UL+parity_produced ] + shred_pos, in23 ); FALLTHRU + case 23UL: gf_stu( parity_shred[ 22UL+parity_produced ] + shred_pos, in22 ); FALLTHRU + case 22UL: gf_stu( parity_shred[ 21UL+parity_produced ] + shred_pos, in21 ); FALLTHRU + case 21UL: gf_stu( parity_shred[ 20UL+parity_produced ] + shred_pos, in20 ); FALLTHRU + case 20UL: gf_stu( parity_shred[ 19UL+parity_produced ] + shred_pos, in19 ); FALLTHRU + case 19UL: gf_stu( parity_shred[ 18UL+parity_produced ] + shred_pos, in18 ); FALLTHRU + case 18UL: gf_stu( parity_shred[ 17UL+parity_produced ] + shred_pos, in17 ); FALLTHRU + case 17UL: gf_stu( parity_shred[ 16UL+parity_produced ] + shred_pos, in16 ); FALLTHRU + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 68UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + #undef ALL_VARS + /* In order to handle shred sizes that are not divisible by 32, we clamp + shred_pos to shred_sz-32 when shred_sz-320UL ) ) { + /* Produce another 16 parity shreds */ + FD_REEDSOL_GENERATE_FFT( 16, 16, ALL_VARS ); + switch( parity_remaining ) { + default: + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 16UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 16 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 16, 16, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 32, ALL_VARS ); + switch( parity_remaining ) { + default: + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 16UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 16 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 16, 32, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 48, ALL_VARS ); + switch( parity_remaining ) { + default: + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 16UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 16 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 16, 48, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 64, ALL_VARS ); + switch( parity_remaining ) { + default: + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 16UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 16 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 16, 64, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 80, ALL_VARS ); + switch( parity_remaining ) { + default: + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 4UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + #undef ALL_VARS + /* In order to handle shred sizes that are not divisible by 32, we clamp + shred_pos to shred_sz-32 when shred_sz-320UL ) ) { + /* Produce another 32 parity shreds */ + FD_REEDSOL_GENERATE_FFT( 32, 32, ALL_VARS ); + switch( parity_remaining ) { + default: + case 32UL: gf_stu( parity_shred[ 31UL+parity_produced ] + shred_pos, in31 ); FALLTHRU + case 31UL: gf_stu( parity_shred[ 30UL+parity_produced ] + shred_pos, in30 ); FALLTHRU + case 30UL: gf_stu( parity_shred[ 29UL+parity_produced ] + shred_pos, in29 ); FALLTHRU + case 29UL: gf_stu( parity_shred[ 28UL+parity_produced ] + shred_pos, in28 ); FALLTHRU + case 28UL: gf_stu( parity_shred[ 27UL+parity_produced ] + shred_pos, in27 ); FALLTHRU + case 27UL: gf_stu( parity_shred[ 26UL+parity_produced ] + shred_pos, in26 ); FALLTHRU + case 26UL: gf_stu( parity_shred[ 25UL+parity_produced ] + shred_pos, in25 ); FALLTHRU + case 25UL: gf_stu( parity_shred[ 24UL+parity_produced ] + shred_pos, in24 ); FALLTHRU + case 24UL: gf_stu( parity_shred[ 23UL+parity_produced ] + shred_pos, in23 ); FALLTHRU + case 23UL: gf_stu( parity_shred[ 22UL+parity_produced ] + shred_pos, in22 ); FALLTHRU + case 22UL: gf_stu( parity_shred[ 21UL+parity_produced ] + shred_pos, in21 ); FALLTHRU + case 21UL: gf_stu( parity_shred[ 20UL+parity_produced ] + shred_pos, in20 ); FALLTHRU + case 20UL: gf_stu( parity_shred[ 19UL+parity_produced ] + shred_pos, in19 ); FALLTHRU + case 19UL: gf_stu( parity_shred[ 18UL+parity_produced ] + shred_pos, in18 ); FALLTHRU + case 18UL: gf_stu( parity_shred[ 17UL+parity_produced ] + shred_pos, in17 ); FALLTHRU + case 17UL: gf_stu( parity_shred[ 16UL+parity_produced ] + shred_pos, in16 ); FALLTHRU + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 32UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 32 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 32, 32, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 32, 64, ALL_VARS ); + switch( parity_remaining ) { + default: + case 32UL: gf_stu( parity_shred[ 31UL+parity_produced ] + shred_pos, in31 ); FALLTHRU + case 31UL: gf_stu( parity_shred[ 30UL+parity_produced ] + shred_pos, in30 ); FALLTHRU + case 30UL: gf_stu( parity_shred[ 29UL+parity_produced ] + shred_pos, in29 ); FALLTHRU + case 29UL: gf_stu( parity_shred[ 28UL+parity_produced ] + shred_pos, in28 ); FALLTHRU + case 28UL: gf_stu( parity_shred[ 27UL+parity_produced ] + shred_pos, in27 ); FALLTHRU + case 27UL: gf_stu( parity_shred[ 26UL+parity_produced ] + shred_pos, in26 ); FALLTHRU + case 26UL: gf_stu( parity_shred[ 25UL+parity_produced ] + shred_pos, in25 ); FALLTHRU + case 25UL: gf_stu( parity_shred[ 24UL+parity_produced ] + shred_pos, in24 ); FALLTHRU + case 24UL: gf_stu( parity_shred[ 23UL+parity_produced ] + shred_pos, in23 ); FALLTHRU + case 23UL: gf_stu( parity_shred[ 22UL+parity_produced ] + shred_pos, in22 ); FALLTHRU + case 22UL: gf_stu( parity_shred[ 21UL+parity_produced ] + shred_pos, in21 ); FALLTHRU + case 21UL: gf_stu( parity_shred[ 20UL+parity_produced ] + shred_pos, in20 ); FALLTHRU + case 20UL: gf_stu( parity_shred[ 19UL+parity_produced ] + shred_pos, in19 ); FALLTHRU + case 19UL: gf_stu( parity_shred[ 18UL+parity_produced ] + shred_pos, in18 ); FALLTHRU + case 18UL: gf_stu( parity_shred[ 17UL+parity_produced ] + shred_pos, in17 ); FALLTHRU + case 17UL: gf_stu( parity_shred[ 16UL+parity_produced ] + shred_pos, in16 ); FALLTHRU + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 32UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 32 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 32, 64, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 32, 96, ALL_VARS ); + switch( parity_remaining ) { + default: + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 4UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + #undef ALL_VARS + /* In order to handle shred sizes that are not divisible by 32, we clamp + shred_pos to shred_sz-32 when shred_sz-320UL ) ) { + /* Produce another 64 parity shreds */ + FD_REEDSOL_GENERATE_FFT( 64, 64, ALL_VARS ); + switch( parity_remaining ) { + default: + case 64UL: gf_stu( parity_shred[ 63UL+parity_produced ] + shred_pos, in63 ); FALLTHRU + case 63UL: gf_stu( parity_shred[ 62UL+parity_produced ] + shred_pos, in62 ); FALLTHRU + case 62UL: gf_stu( parity_shred[ 61UL+parity_produced ] + shred_pos, in61 ); FALLTHRU + case 61UL: gf_stu( parity_shred[ 60UL+parity_produced ] + shred_pos, in60 ); FALLTHRU + case 60UL: gf_stu( parity_shred[ 59UL+parity_produced ] + shred_pos, in59 ); FALLTHRU + case 59UL: gf_stu( parity_shred[ 58UL+parity_produced ] + shred_pos, in58 ); FALLTHRU + case 58UL: gf_stu( parity_shred[ 57UL+parity_produced ] + shred_pos, in57 ); FALLTHRU + case 57UL: gf_stu( parity_shred[ 56UL+parity_produced ] + shred_pos, in56 ); FALLTHRU + case 56UL: gf_stu( parity_shred[ 55UL+parity_produced ] + shred_pos, in55 ); FALLTHRU + case 55UL: gf_stu( parity_shred[ 54UL+parity_produced ] + shred_pos, in54 ); FALLTHRU + case 54UL: gf_stu( parity_shred[ 53UL+parity_produced ] + shred_pos, in53 ); FALLTHRU + case 53UL: gf_stu( parity_shred[ 52UL+parity_produced ] + shred_pos, in52 ); FALLTHRU + case 52UL: gf_stu( parity_shred[ 51UL+parity_produced ] + shred_pos, in51 ); FALLTHRU + case 51UL: gf_stu( parity_shred[ 50UL+parity_produced ] + shred_pos, in50 ); FALLTHRU + case 50UL: gf_stu( parity_shred[ 49UL+parity_produced ] + shred_pos, in49 ); FALLTHRU + case 49UL: gf_stu( parity_shred[ 48UL+parity_produced ] + shred_pos, in48 ); FALLTHRU + case 48UL: gf_stu( parity_shred[ 47UL+parity_produced ] + shred_pos, in47 ); FALLTHRU + case 47UL: gf_stu( parity_shred[ 46UL+parity_produced ] + shred_pos, in46 ); FALLTHRU + case 46UL: gf_stu( parity_shred[ 45UL+parity_produced ] + shred_pos, in45 ); FALLTHRU + case 45UL: gf_stu( parity_shred[ 44UL+parity_produced ] + shred_pos, in44 ); FALLTHRU + case 44UL: gf_stu( parity_shred[ 43UL+parity_produced ] + shred_pos, in43 ); FALLTHRU + case 43UL: gf_stu( parity_shred[ 42UL+parity_produced ] + shred_pos, in42 ); FALLTHRU + case 42UL: gf_stu( parity_shred[ 41UL+parity_produced ] + shred_pos, in41 ); FALLTHRU + case 41UL: gf_stu( parity_shred[ 40UL+parity_produced ] + shred_pos, in40 ); FALLTHRU + case 40UL: gf_stu( parity_shred[ 39UL+parity_produced ] + shred_pos, in39 ); FALLTHRU + case 39UL: gf_stu( parity_shred[ 38UL+parity_produced ] + shred_pos, in38 ); FALLTHRU + case 38UL: gf_stu( parity_shred[ 37UL+parity_produced ] + shred_pos, in37 ); FALLTHRU + case 37UL: gf_stu( parity_shred[ 36UL+parity_produced ] + shred_pos, in36 ); FALLTHRU + case 36UL: gf_stu( parity_shred[ 35UL+parity_produced ] + shred_pos, in35 ); FALLTHRU + case 35UL: gf_stu( parity_shred[ 34UL+parity_produced ] + shred_pos, in34 ); FALLTHRU + case 34UL: gf_stu( parity_shred[ 33UL+parity_produced ] + shred_pos, in33 ); FALLTHRU + case 33UL: gf_stu( parity_shred[ 32UL+parity_produced ] + shred_pos, in32 ); FALLTHRU + case 32UL: gf_stu( parity_shred[ 31UL+parity_produced ] + shred_pos, in31 ); FALLTHRU + case 31UL: gf_stu( parity_shred[ 30UL+parity_produced ] + shred_pos, in30 ); FALLTHRU + case 30UL: gf_stu( parity_shred[ 29UL+parity_produced ] + shred_pos, in29 ); FALLTHRU + case 29UL: gf_stu( parity_shred[ 28UL+parity_produced ] + shred_pos, in28 ); FALLTHRU + case 28UL: gf_stu( parity_shred[ 27UL+parity_produced ] + shred_pos, in27 ); FALLTHRU + case 27UL: gf_stu( parity_shred[ 26UL+parity_produced ] + shred_pos, in26 ); FALLTHRU + case 26UL: gf_stu( parity_shred[ 25UL+parity_produced ] + shred_pos, in25 ); FALLTHRU + case 25UL: gf_stu( parity_shred[ 24UL+parity_produced ] + shred_pos, in24 ); FALLTHRU + case 24UL: gf_stu( parity_shred[ 23UL+parity_produced ] + shred_pos, in23 ); FALLTHRU + case 23UL: gf_stu( parity_shred[ 22UL+parity_produced ] + shred_pos, in22 ); FALLTHRU + case 22UL: gf_stu( parity_shred[ 21UL+parity_produced ] + shred_pos, in21 ); FALLTHRU + case 21UL: gf_stu( parity_shred[ 20UL+parity_produced ] + shred_pos, in20 ); FALLTHRU + case 20UL: gf_stu( parity_shred[ 19UL+parity_produced ] + shred_pos, in19 ); FALLTHRU + case 19UL: gf_stu( parity_shred[ 18UL+parity_produced ] + shred_pos, in18 ); FALLTHRU + case 18UL: gf_stu( parity_shred[ 17UL+parity_produced ] + shred_pos, in17 ); FALLTHRU + case 17UL: gf_stu( parity_shred[ 16UL+parity_produced ] + shred_pos, in16 ); FALLTHRU + case 16UL: gf_stu( parity_shred[ 15UL+parity_produced ] + shred_pos, in15 ); FALLTHRU + case 15UL: gf_stu( parity_shred[ 14UL+parity_produced ] + shred_pos, in14 ); FALLTHRU + case 14UL: gf_stu( parity_shred[ 13UL+parity_produced ] + shred_pos, in13 ); FALLTHRU + case 13UL: gf_stu( parity_shred[ 12UL+parity_produced ] + shred_pos, in12 ); FALLTHRU + case 12UL: gf_stu( parity_shred[ 11UL+parity_produced ] + shred_pos, in11 ); FALLTHRU + case 11UL: gf_stu( parity_shred[ 10UL+parity_produced ] + shred_pos, in10 ); FALLTHRU + case 10UL: gf_stu( parity_shred[ 9UL+parity_produced ] + shred_pos, in09 ); FALLTHRU + case 9UL: gf_stu( parity_shred[ 8UL+parity_produced ] + shred_pos, in08 ); FALLTHRU + case 8UL: gf_stu( parity_shred[ 7UL+parity_produced ] + shred_pos, in07 ); FALLTHRU + case 7UL: gf_stu( parity_shred[ 6UL+parity_produced ] + shred_pos, in06 ); FALLTHRU + case 6UL: gf_stu( parity_shred[ 5UL+parity_produced ] + shred_pos, in05 ); FALLTHRU + case 5UL: gf_stu( parity_shred[ 4UL+parity_produced ] + shred_pos, in04 ); FALLTHRU + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 64UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + if( FD_UNLIKELY( parity_remaining>0UL ) ) { + /* Produce another 64 parity shreds */ + FD_REEDSOL_GENERATE_IFFT( 64, 64, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 64, 128, ALL_VARS ); + switch( parity_remaining ) { + default: + case 4UL: gf_stu( parity_shred[ 3UL+parity_produced ] + shred_pos, in03 ); FALLTHRU + case 3UL: gf_stu( parity_shred[ 2UL+parity_produced ] + shred_pos, in02 ); FALLTHRU + case 2UL: gf_stu( parity_shred[ 1UL+parity_produced ] + shred_pos, in01 ); FALLTHRU + case 1UL: gf_stu( parity_shred[ 0UL+parity_produced ] + shred_pos, in00 ); + } + parity_produced += fd_ulong_min( 4UL, parity_remaining ); + parity_remaining = parity_shred_cnt - parity_produced; + } + #undef ALL_VARS + /* In order to handle shred sizes that are not divisible by 32, we clamp + shred_pos to shred_sz-32 when shred_sz-32min_data_shreds: + fallthru = " FALLTHRU" + cprint(f"case {k:2}UL: in{k-1:02} = gf_ldu( data_shred[ {k-1:2} ] + shred_pos );"+ fallthru) + cprint("}") + all_vars = [ f'in{k:02}' for k in range(n) ] + cprint(f"#define ALL_VARS " + ", ".join(all_vars)) + cprint("switch( data_shred_cnt ) {") + cprint(f"case {n:2}UL: FD_REEDSOL_GENERATE_IFFT( {n:2}, {0:2}, ALL_VARS ); break;") + for k in range(max_data_shreds-1, min_data_shreds-1, -1): + cprint(f"case {k:2}UL: FD_REEDSOL_GENERATE_PPT( {n:2}, {k:2}, ALL_VARS ); break;") + cprint("}") + cprint(f"/* That generated the first {n}-data_shred_cnt parity shreds in the") + cprint(f" last {n}-data_shred_cnt variables. We might only need") + cprint(f" parity_shred_cnt of them though. */") + + cprint("ulong total_shreds = data_shred_cnt+parity_shred_cnt;") + cprint("switch( data_shred_cnt ) {") + for k in range(min_data_shreds, n): + fallthru = "" + if k0: + cprint( "if( FD_UNLIKELY( parity_remaining>0UL ) ) {") + cprint(f"/* Produce another {n} parity shreds */") + if rep>0: + cprint(f"FD_REEDSOL_GENERATE_IFFT( {n}, {n*rep}, ALL_VARS );") + cprint(f"FD_REEDSOL_GENERATE_FFT( {n}, {n*rep+n}, ALL_VARS );") + cprint("switch( parity_remaining ) {") + cprint("default:") + for k in range(min(n, potential_parity_remaining), 0, -1): + fallthru = "" + if k>1: + fallthru = " FALLTHRU" + cprint(f"case {k:2}UL: gf_stu( parity_shred[ {k-1:2}UL+parity_produced ] + shred_pos, in{k-1:02} );" + fallthru) + cprint("}") + cprint(f"parity_produced += fd_ulong_min( {min(n, potential_parity_remaining)}UL, parity_remaining );") + cprint(f"parity_remaining = parity_shred_cnt - parity_produced;") + cprint("}") + + rep += 1 + potential_parity_remaining -= n + + cprint("#undef ALL_VARS") + cprint("/* In order to handle shred sizes that are not divisible by 32, we clamp") + cprint(" shred_pos to shred_sz-32 when shred_sz-32 #include "fd_reedsol.h" #include "../../util/fd_util.h" @@ -18,8 +19,8 @@ static uchar const * invlog_tbl = fd_reedsol_generic_constants + 256UL*sizeof(s static uchar const * matrix_32_32= fd_reedsol_generic_constants + 256UL*sizeof(short) + 1024UL*sizeof(uchar); /* Row major order, 32x32 */ #define SHRED_SZ (1024UL) -uchar data_shreds[ SHRED_SZ * 32UL ]; -uchar parity_shreds[ SHRED_SZ * 32UL ]; +uchar data_shreds[ SHRED_SZ * FD_REEDSOL_DATA_SHREDS_MAX ]; +uchar parity_shreds[ SHRED_SZ * FD_REEDSOL_PARITY_SHREDS_MAX ]; FD_STATIC_ASSERT( sizeof(fd_reedsol_t) == FD_REEDSOL_FOOTPRINT, reedsol_footprint ); @@ -167,6 +168,8 @@ basic_tests( void ) { typedef uchar linear_chunk_t[ 32UL ]; +#define LINEAR_MAX_DIM (128UL) + /* FFT, PPT, and even Reed-Solomon encoding are all linear functions on each byte of the chunk. */ typedef void linear_func_t( linear_chunk_t *, linear_chunk_t * ); @@ -178,11 +181,11 @@ test_linearity( linear_func_t to_test, ulong test_cnt, ulong chunk_sz ) { /* If these fail, the test is wrong */ - FD_TEST( input_cnt <= FD_REEDSOL_DATA_SHREDS_MAX && output_cnt <= FD_REEDSOL_PARITY_SHREDS_MAX ); + FD_TEST( input_cnt <= LINEAR_MAX_DIM && output_cnt <= LINEAR_MAX_DIM ); FD_TEST( chunk_sz <= 32UL ); - linear_chunk_t inputs[ FD_REEDSOL_DATA_SHREDS_MAX ]; - linear_chunk_t outputs[ FD_REEDSOL_PARITY_SHREDS_MAX ]; + linear_chunk_t inputs[ LINEAR_MAX_DIM ]; + linear_chunk_t outputs[ LINEAR_MAX_DIM ]; /* For a linear function, f(0) = 0 */ for( ulong i=0UL; i= 64 */ - for( ulong i=0UL; i<32UL; i++ ) { - d[ i ] = data_shreds + stride*i; - p[ i ] = parity_shreds + stride*i; - r[ i ] = parity_shreds + stride*(i+FD_REEDSOL_PARITY_SHREDS_MAX); - } + for( ulong i=0UL; i Date: Thu, 4 May 2023 13:29:00 -0500 Subject: [PATCH 3/6] reedsol: Add recovery --- src/ballet/reedsol/Local.mk | 14 +- src/ballet/reedsol/fd_reedsol.c | 52 +- src/ballet/reedsol/fd_reedsol.h | 179 +- src/ballet/reedsol/fd_reedsol_arith_avx2.h | 7 + src/ballet/reedsol/fd_reedsol_arith_gfni.h | 14 +- src/ballet/reedsol/fd_reedsol_arith_none.h | 4 + ...internal_128.c => fd_reedsol_encode_128.c} | 0 ...l_internal_16.c => fd_reedsol_encode_16.c} | 0 ...l_internal_32.c => fd_reedsol_encode_32.c} | 0 ...l_internal_64.c => fd_reedsol_encode_64.c} | 0 src/ballet/reedsol/fd_reedsol_fderiv.h | 3423 +++++++++++++++++ src/ballet/reedsol/fd_reedsol_fft.h | 2215 ++++++++++- src/ballet/reedsol/fd_reedsol_internal.h | 62 +- src/ballet/reedsol/fd_reedsol_pi.c | 1195 ++++++ src/ballet/reedsol/fd_reedsol_pi.h | 47 + src/ballet/reedsol/fd_reedsol_recover_128.c | 614 +++ src/ballet/reedsol/fd_reedsol_recover_16.c | 334 ++ src/ballet/reedsol/fd_reedsol_recover_256.c | 744 ++++ src/ballet/reedsol/fd_reedsol_recover_32.c | 350 ++ src/ballet/reedsol/fd_reedsol_recover_64.c | 430 +++ ...enerate_dispatch.py => generate_encode.py} | 14 +- src/ballet/reedsol/generate_fderiv.py | 122 + src/ballet/reedsol/generate_fft.py | 6 +- src/ballet/reedsol/generate_recover.py | 168 + src/ballet/reedsol/test_reedsol.c | 310 ++ 25 files changed, 10261 insertions(+), 43 deletions(-) rename src/ballet/reedsol/{fd_reedsol_internal_128.c => fd_reedsol_encode_128.c} (100%) rename src/ballet/reedsol/{fd_reedsol_internal_16.c => fd_reedsol_encode_16.c} (100%) rename src/ballet/reedsol/{fd_reedsol_internal_32.c => fd_reedsol_encode_32.c} (100%) rename src/ballet/reedsol/{fd_reedsol_internal_64.c => fd_reedsol_encode_64.c} (100%) create mode 100644 src/ballet/reedsol/fd_reedsol_fderiv.h create mode 100644 src/ballet/reedsol/fd_reedsol_pi.c create mode 100644 src/ballet/reedsol/fd_reedsol_pi.h create mode 100644 src/ballet/reedsol/fd_reedsol_recover_128.c create mode 100644 src/ballet/reedsol/fd_reedsol_recover_16.c create mode 100644 src/ballet/reedsol/fd_reedsol_recover_256.c create mode 100644 src/ballet/reedsol/fd_reedsol_recover_32.c create mode 100644 src/ballet/reedsol/fd_reedsol_recover_64.c rename src/ballet/reedsol/{generate_dispatch.py => generate_encode.py} (95%) create mode 100644 src/ballet/reedsol/generate_fderiv.py create mode 100644 src/ballet/reedsol/generate_recover.py diff --git a/src/ballet/reedsol/Local.mk b/src/ballet/reedsol/Local.mk index 9d7a23f7aa1..716efa0a4e6 100644 --- a/src/ballet/reedsol/Local.mk +++ b/src/ballet/reedsol/Local.mk @@ -3,8 +3,14 @@ ifdef FD_HAS_GFNI $(call add-asms,fd_reedsol_gfni_32,fd_ballet) endif $(call add-objs,fd_reedsol,fd_ballet) -$(call add-objs,fd_reedsol_internal_16,fd_ballet) -$(call add-objs,fd_reedsol_internal_32,fd_ballet) -$(call add-objs,fd_reedsol_internal_64,fd_ballet) -$(call add-objs,fd_reedsol_internal_128,fd_ballet) +$(call add-objs,fd_reedsol_encode_16,fd_ballet) +$(call add-objs,fd_reedsol_encode_32,fd_ballet) +$(call add-objs,fd_reedsol_encode_64,fd_ballet) +$(call add-objs,fd_reedsol_encode_128,fd_ballet) +$(call add-objs,fd_reedsol_recover_16,fd_ballet) +$(call add-objs,fd_reedsol_recover_32,fd_ballet) +$(call add-objs,fd_reedsol_recover_64,fd_ballet) +$(call add-objs,fd_reedsol_recover_128,fd_ballet) +$(call add-objs,fd_reedsol_recover_256,fd_ballet) +$(call add-objs,fd_reedsol_pi,fd_ballet) $(call make-unit-test,test_reedsol,test_reedsol,fd_ballet fd_util) diff --git a/src/ballet/reedsol/fd_reedsol.c b/src/ballet/reedsol/fd_reedsol.c index 6d9b8193c07..668203de286 100644 --- a/src/ballet/reedsol/fd_reedsol.c +++ b/src/ballet/reedsol/fd_reedsol.c @@ -13,18 +13,58 @@ void fd_reedsol_encode_fini( fd_reedsol_t * rs ) { #if FD_HAS_GFNI if( FD_LIKELY( (rs->data_shred_cnt==32UL) & (rs->parity_shred_cnt==32UL ) ) ) - fd_reedsol_encode_32_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->parity_shred, rs->scratch ); + fd_reedsol_encode_32_32( rs->shred_sz, rs->encode.data_shred, rs->encode.parity_shred, rs->scratch ); else #endif if( FD_UNLIKELY( rs->data_shred_cnt<=16UL ) ) - fd_reedsol_encode_16( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + fd_reedsol_encode_16( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); else if( FD_LIKELY( rs->data_shred_cnt<=32UL ) ) - fd_reedsol_encode_32( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + fd_reedsol_encode_32( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); else if( FD_LIKELY( rs->data_shred_cnt<=64UL ) ) - fd_reedsol_encode_64( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + fd_reedsol_encode_64( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); else - fd_reedsol_encode_128( rs->shred_sz, (uchar const * *)rs->data_shred, rs->data_shred_cnt, rs->parity_shred, rs->parity_shred_cnt ); + fd_reedsol_encode_128( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); - rs->data_shred_cnt = 0UL; + rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; } + + +int fd_reedsol_recover_fini( fd_reedsol_t * rs ) { + /* How many shreds do we need to consider in order to find + rs->data_shred_cnt un-erased? */ + ulong unerased = 0UL; + ulong i=0UL; + + ulong data_shred_cnt = rs->data_shred_cnt; + ulong parity_shred_cnt = rs->parity_shred_cnt; + rs->data_shred_cnt = 0UL; + rs->parity_shred_cnt = 0UL; + + for( ; irecover.erased[ i ]; + if( unerased==data_shred_cnt ) break; + } + if( FD_UNLIKELY( unerased != data_shred_cnt ) ) return FD_REEDSOL_ERR_INSUFFICIENT; + + /* if( FD_LIKELY( i==data_shred_cnt ) ) { + // Common case: we have all of the data shreds + if( FD_UNLIKELY( i<=16UL ) ) + return fd_reedsol_recover_first_16( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + if( FD_LIKELY( i<=32UL ) ) + return fd_reedsol_recover_first_32( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + if( FD_LIKELY( i<=64UL ) ) + return fd_reedsol_recover_first_64( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + return fd_reedsol_recover_first_128( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + } */ + + if( FD_UNLIKELY( i<16UL ) ) + return fd_reedsol_recover_var_16( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + if( FD_LIKELY( i<32UL ) ) + return fd_reedsol_recover_var_32( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + if( FD_LIKELY( i<64UL ) ) + return fd_reedsol_recover_var_64( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + if( FD_LIKELY( i<128UL ) ) + return fd_reedsol_recover_var_128( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + return fd_reedsol_recover_var_256( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); +} diff --git a/src/ballet/reedsol/fd_reedsol.h b/src/ballet/reedsol/fd_reedsol.h index 2ac42ea4318..3cb320bed72 100644 --- a/src/ballet/reedsol/fd_reedsol.h +++ b/src/ballet/reedsol/fd_reedsol.h @@ -16,13 +16,11 @@ Solana also calls parity shreds "code" shreds, but due to the naming collision with executable code, we have opted for "parity." This mathematical structure thus forces each shred to be of identical size - but doesn't otherwise impose any size restrictions. */ + but doesn't otherwise impose any size restrictions.*/ + #include "../../util/fd_util.h" -// TODO: Define decode API -//#define SET_NAME reedsol_shred_set -//#include "../../util/tmpl/fd_smallset.c" /* FD_REEDSOL_{DATA, PARITY}_SHREDS_MAX describe the inclusive maximum number of data and parity shreds that this implementation supports. @@ -34,7 +32,14 @@ #define FD_REEDSOL_ALIGN (128UL) -#define FD_REEDSOL_FOOTPRINT (2176UL) +#define FD_REEDSOL_FOOTPRINT (2304UL) + +/* Return values for the recover operation, which is the only part that + can fail for non-bug reasons. Their meaning is documented with + fd_reedsol_recover_fini. */ +#define FD_REEDSOL_OK (0) +#define FD_REEDSOL_ERR_INCONSISTENT (-1) +#define FD_REEDSOL_ERR_INSUFFICIENT (-2) struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { uchar scratch[ 1024 ]; /* Used for the ultra high performance implementation */ @@ -48,14 +53,23 @@ struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { ulong data_shred_cnt; ulong parity_shred_cnt; - /* {data, parity}_shred: pointers to the first byte of each shred */ - uchar * data_shred[ FD_REEDSOL_DATA_SHREDS_MAX ]; - uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; - - /* {data, parity}_shred_valid: whether the shred at the corresponding - index contains valid data. Used only for decoding operations. */ - //fd_reedsol_shred_set_t data_shred_valid; - //fd_reedsol_shred_set_t parity_shred_valid; + union { + struct { + /* {data, parity}_shred: pointers to the 1st byte of each shred */ + uchar const * data_shred[ FD_REEDSOL_DATA_SHREDS_MAX ]; + uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; + } encode; + struct { + uchar * shred[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; + + /* erased: whether the shred at the corresponding + index is an erasure (i.e. wasn't received or was corrupted). + Used only for decoding operations. */ + /* TODO: Is this the right data type? Should it use a fd_smallset + instead? */ + uchar erased[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; + } recover; + }; }; typedef struct fd_reedsol_private fd_reedsol_t; @@ -100,12 +114,7 @@ fd_reedsol_encode_init( void * mem, ulong shred_sz ) { static inline fd_reedsol_t * fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, void const * ptr ) { - /* The argument is const to make it clear that an encoding operation - won't write to the shred, but we store them in the struct as - non-const so that the same struct can be used for encoding and - decoding operations, in which the data shreds actually are - writeable. */ - rs->data_shred[ rs->data_shred_cnt++ ] = (uchar *)ptr; + rs->encode.data_shred[ rs->data_shred_cnt++ ] = (uchar const*)ptr; return rs; } @@ -122,7 +131,7 @@ fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, void const * ptr ) { static inline fd_reedsol_t * fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, void * ptr ) { - rs->parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr; + rs->encode.parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr; return rs; } @@ -144,8 +153,136 @@ fd_reedsol_encode_cancel( fd_reedsol_t * rs ) { have any read or write interest in any of the provided shreds. */ void fd_reedsol_encode_fini( fd_reedsol_t * rs ); +/* fd_reedsol_recover_init: starts a Reed-Solomon recover/decode + operation that will recover shreds of size shred_sz. mem is assumed + to be a piece of memory that meets the alignment and size constraints + specified above. Takes a write interest in mem that persists until + the operation is canceled or finalized. shred_sz must be >= 32. + Returns mem. */ +static inline fd_reedsol_t * +fd_reedsol_recover_init( void * mem, ulong shred_sz ) { + /* TODO: This is the same as encode_init. Should I merge them? */ + fd_reedsol_t * rs = (fd_reedsol_t *)mem; + + rs->shred_sz = shred_sz; + rs->data_shred_cnt = 0UL; + rs->parity_shred_cnt = 0UL; + + return rs; +} + +/* fd_reedsol_recover_add_rcvd_shred adds the shred consisting of the of + memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon recover + operation as a source of data. Takes a read interest in the shred + that persists for the lifetime of the operation (i.e. until finalized + or cancelled). Received shreds have no alignment restrictions and + can overlap with each other (if necessary, but there's no known use + case for doing so), but should not overlap with any erased shreds in + the same recovery operation. + + The shred is treated as a data shred if is_data_shred is non-zero and + as a parity shred if not. Data shreds and parity shreds are mostly + treated identically in the recover operation, but having the right + number of data shreds is important for validating the shreds are + correct. + + Note: The order in which shreds are added (using this function and + fd_reedsol_recover_add_erased_shred) is very important for recovery. + Shreds must be added in the natural index order or the recover + operation will almost certainly fail. In particular, all data shreds + must be added before any parity shreds are added. */ +static inline fd_reedsol_t * +fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs, int is_data_shred, void const * ptr ) { +#if FD_REEDSOL_HANDHOLDING + // assert is_data_shred==1 implies rs->parity_shred_cnt==0 + // data_shred_cnt, parity_shred_cnt won't go over the max + #endif + /* For performance reasons, we need to store all the shred pointers in + one flat array, which means the array needs to be non-const. The + const in the function signature signals that this operation won't + modify the shred. */ + rs->recover.shred[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (void *)ptr; + rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)0; + rs->data_shred_cnt += !!is_data_shred; + rs->parity_shred_cnt += !is_data_shred; + return rs; +} + +/* fd_reedsol_recover_add_erased_shred adds the block of memory [ptr, + ptr+shred_sz) to the in-process Reed-Solomon recover operation as the + destination for a shred that will be recovered. Takes a write + interest in the shred that persists for the lifetime of the operation + (i.e. until finalized or cancelled). Erased shreds have no alignment + restrictions but should not overlap with any other shreds in the same + recover operation. The contents of the the block of memory are + ignored and will be overwritten by the time the operation is + finished. + + The shred is treated as a data shred if is_data_shred is non-zero and + as a parity shred if not. Data shreds and parity shreds are mostly + treated identically in the recover operation, but having the right + number of data shreds is important for validating the shreds are + correct. + + Note: The order in which shreds are added (using this function and + fd_reedsol_recover_add_rcvd_shred) is very important for recovery. + Shreds must be added in the natural index order or the recover + operation will almost certainly fail. In particular, all data shreds + must be added before any parity shreds are added. */ +static inline fd_reedsol_t * +fd_reedsol_recover_add_erased_shred( fd_reedsol_t * rs, int is_data_shred, void * ptr ) { +#if FD_REEDSOL_HANDHOLDING + // assert is_data_shred==1 implies rs->parity_shred_cnt==0 + // data_shred_cnt, parity_shred_cnt won't go over the max + #endif + rs->recover.shred[ rs->data_shred_cnt + rs->parity_shred_cnt ] = ptr; + rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)1; + rs->data_shred_cnt += !!is_data_shred; + rs->parity_shred_cnt += !is_data_shred; + return rs; +} + + +/* fd_reedsol_recover_cancel cancels an in-progress encoding operation. + Releases any read or write interests in any shreds that were added to + the operation. Upon return, the contents of the erased shreds are + undefined. */ +static inline void +fd_reedsol_recover_cancel( fd_reedsol_t * rs ) { + rs->data_shred_cnt = 0UL; + rs->parity_shred_cnt = 0UL; +} + + +/* fd_reedsol_recover_fini finishes the in-progress recover operation. + If successful, upon return, any erased shreds will be filled with the + correct data as recovered by the Reed-Solomon recovery algorithm. + + If the recover operation fails with FD_REEDSOL_ERR_{INCONSISTENT, + INSUFFICIENT} , the contents of any erased shreds are undefined. + + Upon return, this will no longer have any read or write interest in + any of the provided shreds. + + Returns one of: + FD_REEDSOL_OK if the recover operation was successful + FD_REEDSOL_ERR_INCONSISTENT if the shreds are not consistent with + having come from a Reed-Solomon encoding with the provided number + of data shreds + FD_REEDSOL_ERR_INSUFFICIENT if there's not enough un-erased data to + recover data_shred_cnt data shreds. There must be at least one + un-erased shred (data or parity) for each data shred in the + operation. + -/* FIXME: Add decode API */ + It's worth pointing out that the recovery process differs from + typical network coding theory by making no effort to correct data + corruption. The shred signature verification process should detect + any data corruption, and any shred that fails signature verification + can be treated as an erasure. This prevents the network from forking + if the leader (maliciously) creates data shreds from one version of + the block and parity shreds from another version of the block. */ +int fd_reedsol_recover_fini( fd_reedsol_t * rs ); #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_avx2.h b/src/ballet/reedsol/fd_reedsol_arith_avx2.h index 960e3a6438d..6856fcb34b4 100644 --- a/src/ballet/reedsol/fd_reedsol_arith_avx2.h +++ b/src/ballet/reedsol/fd_reedsol_arith_avx2.h @@ -36,6 +36,13 @@ static uchar const fd_reedsol_arith_scale4[ 256UL ] = { /* c is known at compile time, so this is not a runtime branch */ \ (c==0) ? wb_zero() : ( (c==1) ? a : wb_xor( p0, p1 ) ); } )) +#define GF_MUL_VAR( a, c ) (__extension__({ \ + wb_t lo = wb_and( a, wb_bcast( 0x0F ) ); \ + wb_t hi = wb_shr( a, 4 ); \ + wb_t p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*c ), lo ); \ + wb_t p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ c ] ), hi ); \ + wb_xor( p0, p1 ); } )) +#define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) ) ) #endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_gfni.h b/src/ballet/reedsol/fd_reedsol_arith_gfni.h index 8c0f47d8424..d4a420693a9 100644 --- a/src/ballet/reedsol/fd_reedsol_arith_gfni.h +++ b/src/ballet/reedsol/fd_reedsol_arith_gfni.h @@ -17,7 +17,8 @@ FD_IMPORT_BINARY( fd_reedsol_arith_consts_gfni_mul, "src/ballet/reedsol/const extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(128))); #endif -#define GF_ADD( a, b ) wb_xor( a, b ) +#define GF_ADD wb_xor +#define GF_OR wb_or /* Older versions of GCC have a bug that cause them to think _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two arguments @@ -33,6 +34,7 @@ extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(12 #if FD_USING_CLANG || (GCC_VERSION >= 100000) /* c is known at compile time, so this is not a runtime branch */ #define GF_MUL( a, c ) ((c==0) ? wb_zero() : ( (c==1) ? (a) : _mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) )) +#define GF_MUL_VAR( a, c ) (_mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) ) #else @@ -44,8 +46,18 @@ extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(12 [vec]"x" (a) ); \ (c==0) ? wb_zero() : ( (c==1) ? (a) : product ); })) + +#define GF_MUL_VAR( a, c ) (__extension__({ \ + wb_t product; \ + __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ + : [out]"=x"(product) \ + : [cons]"xm"( wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ) ), \ + [vec]"x" (a) ); \ + (product); })) + #endif +#define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) ) ) #endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_none.h b/src/ballet/reedsol/fd_reedsol_arith_none.h index 5ffed29172f..bcdc3d10060 100644 --- a/src/ballet/reedsol/fd_reedsol_arith_none.h +++ b/src/ballet/reedsol/fd_reedsol_arith_none.h @@ -22,12 +22,16 @@ static FD_FN_UNUSED short const * gf_arith_log_tbl = (short const *)fd_reeds static FD_FN_UNUSED uchar const * gf_arith_invlog_tbl = fd_reedsol_arith_consts_generic_mul + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512, 512) */ #define GF_ADD( a, b ) ((a)^(b)) +#define GF_OR( a, b ) ((a)|(b)) /* c is known at compile time, so this is not a runtime branch. Exposing log_tbl at compile time would let the compiler remove a branch, but we don't care too much about performance in this case. */ #define GF_MUL( a, c ) ((c==0) ? 0UL : ( (c==1) ? (a) : (ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] )) +#define GF_MUL_VAR( a, c ) ((ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] ) + +#define GF_ANY( x ) (!!(x)) #endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h */ diff --git a/src/ballet/reedsol/fd_reedsol_internal_128.c b/src/ballet/reedsol/fd_reedsol_encode_128.c similarity index 100% rename from src/ballet/reedsol/fd_reedsol_internal_128.c rename to src/ballet/reedsol/fd_reedsol_encode_128.c diff --git a/src/ballet/reedsol/fd_reedsol_internal_16.c b/src/ballet/reedsol/fd_reedsol_encode_16.c similarity index 100% rename from src/ballet/reedsol/fd_reedsol_internal_16.c rename to src/ballet/reedsol/fd_reedsol_encode_16.c diff --git a/src/ballet/reedsol/fd_reedsol_internal_32.c b/src/ballet/reedsol/fd_reedsol_encode_32.c similarity index 100% rename from src/ballet/reedsol/fd_reedsol_internal_32.c rename to src/ballet/reedsol/fd_reedsol_encode_32.c diff --git a/src/ballet/reedsol/fd_reedsol_internal_64.c b/src/ballet/reedsol/fd_reedsol_encode_64.c similarity index 100% rename from src/ballet/reedsol/fd_reedsol_internal_64.c rename to src/ballet/reedsol/fd_reedsol_encode_64.c diff --git a/src/ballet/reedsol/fd_reedsol_fderiv.h b/src/ballet/reedsol/fd_reedsol_fderiv.h new file mode 100644 index 00000000000..016b47cad0d --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_fderiv.h @@ -0,0 +1,3423 @@ + +/* Note: This file is auto generated. */ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_fderiv_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_fderiv_h + +/* This file implements the formal derivative computation of a + polynomial stored in the coefficient basis. The computation is + described in section IV of the Lin, et al. paper, and especially part + IV.B. + + The main macro this file provides is FD_REEDSOL_GEN_FDERIV. The rest + of this file is auto-generated implementation details. + + The formal derivative of a polynomial P(x) over a finite field comes + from treating the x as if it were a real value, taking the + derivative, and then re-interpreting the resulting polynomial back as + being over the original finite field. More precisely, it's the + linear operator on polynomials that maps x^n to + x^(n-1) + x^(n-1) + ... + x^(n-1) + |-------------------------------| + n terms + + Since our finite field is GF(2^8), then x^n maps to 0 if n is even + and x^(n-1) if n is odd. + + Basically, this operator is useful because it obeys the formal + equivalent of the product rule. */ + +#ifndef FD_REEDSOL_GF_ARITH_DEFINED +#error "You must include fd_reedsol_arith_gfni.h or fd_reedsol_arith_avx2.h before including this file" +#endif + +/* FD_REEDSOL_GEN_FDERIV: Inserts code to compute the formal derivative + of a polynomial of length n, where both the input and output + polynomial are in the coefficient basis. + + n must be a power of 2 (only 16, 32, 64, 128 are emitted by the code + generator at the moment). + + The n arguments that follow the first should be vector variables of + type gf_t. These are used as input and output, since there's no + other good way to return n vector values. As such, this macro is not + robust. + + The formal derivative is computed in a vectorized fashion, i.e. the + transform of the ith byte is computed and stored in the ith byte of + the output for each i independently. */ + +#define FD_REEDSOL_PRIVATE_EXPAND( M, ... ) M( __VA_ARGS__ ) + +#define FD_REEDSOL_GENERATE_FDERIV( n, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_FDERIV_IMPL_##n, __VA_ARGS__ ) + +#define FD_REEDSOL_FDERIV_IMPL_16( in00 , \ + in01, in02, in03, in04, in05, in06, in07, \ + in08, in09, in10, in11, in12, in13, in14, \ + in15) \ + do { \ + in00 = GF_MUL( in00, 1 ); \ + in00 = gf_zero(); \ + in01 = GF_MUL( in01, 1 ); \ + in00 = GF_ADD( in00, in01 ); \ + in01 = gf_zero(); \ + in02 = GF_MUL( in02, 122 ); \ + in00 = GF_ADD( in00, in02 ); \ + in02 = gf_zero(); \ + in03 = GF_MUL( in03, 122 ); \ + in02 = GF_ADD( in02, in03 ); \ + in01 = GF_ADD( in01, in03 ); \ + in03 = gf_zero(); \ + in04 = GF_MUL( in04, 32 ); \ + in00 = GF_ADD( in00, in04 ); \ + in04 = gf_zero(); \ + in05 = GF_MUL( in05, 32 ); \ + in04 = GF_ADD( in04, in05 ); \ + in01 = GF_ADD( in01, in05 ); \ + in05 = gf_zero(); \ + in06 = GF_MUL( in06, 251 ); \ + in04 = GF_ADD( in04, in06 ); \ + in02 = GF_ADD( in02, in06 ); \ + in06 = gf_zero(); \ + in07 = GF_MUL( in07, 251 ); \ + in06 = GF_ADD( in06, in07 ); \ + in05 = GF_ADD( in05, in07 ); \ + in03 = GF_ADD( in03, in07 ); \ + in07 = gf_zero(); \ + in08 = GF_MUL( in08, 41 ); \ + in00 = GF_ADD( in00, in08 ); \ + in08 = gf_zero(); \ + in09 = GF_MUL( in09, 41 ); \ + in08 = GF_ADD( in08, in09 ); \ + in01 = GF_ADD( in01, in09 ); \ + in09 = gf_zero(); \ + in10 = GF_MUL( in10, 118 ); \ + in08 = GF_ADD( in08, in10 ); \ + in02 = GF_ADD( in02, in10 ); \ + in10 = gf_zero(); \ + in11 = GF_MUL( in11, 118 ); \ + in10 = GF_ADD( in10, in11 ); \ + in09 = GF_ADD( in09, in11 ); \ + in03 = GF_ADD( in03, in11 ); \ + in11 = gf_zero(); \ + in12 = GF_MUL( in12, 73 ); \ + in08 = GF_ADD( in08, in12 ); \ + in04 = GF_ADD( in04, in12 ); \ + in12 = gf_zero(); \ + in13 = GF_MUL( in13, 73 ); \ + in12 = GF_ADD( in12, in13 ); \ + in09 = GF_ADD( in09, in13 ); \ + in05 = GF_ADD( in05, in13 ); \ + in13 = gf_zero(); \ + in14 = GF_MUL( in14, 102 ); \ + in12 = GF_ADD( in12, in14 ); \ + in10 = GF_ADD( in10, in14 ); \ + in06 = GF_ADD( in06, in14 ); \ + in14 = gf_zero(); \ + in15 = GF_MUL( in15, 102 ); \ + in14 = GF_ADD( in14, in15 ); \ + in13 = GF_ADD( in13, in15 ); \ + in11 = GF_ADD( in11, in15 ); \ + in07 = GF_ADD( in07, in15 ); \ + in15 = gf_zero(); \ + in00 = GF_MUL( in00, 1 ); \ + in01 = GF_MUL( in01, 1 ); \ + in02 = GF_MUL( in02, 6 ); \ + in03 = GF_MUL( in03, 6 ); \ + in04 = GF_MUL( in04, 108 ); \ + in05 = GF_MUL( in05, 108 ); \ + in06 = GF_MUL( in06, 117 ); \ + in07 = GF_MUL( in07, 117 ); \ + in08 = GF_MUL( in08, 208 ); \ + in09 = GF_MUL( in09, 208 ); \ + in10 = GF_MUL( in10, 218 ); \ + in11 = GF_MUL( in11, 218 ); \ + in12 = GF_MUL( in12, 136 ); \ + in13 = GF_MUL( in13, 136 ); \ + in14 = GF_MUL( in14, 23 ); \ + in15 = GF_MUL( in15, 23 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FDERIV_IMPL_32( in00 , \ + in01, in02, in03, in04, in05, in06, in07, \ + in08, in09, in10, in11, in12, in13, in14, \ + in15, in16, in17, in18, in19, in20, in21, \ + in22, in23, in24, in25, in26, in27, in28, \ + in29, in30, in31) \ + do { \ + in00 = GF_MUL( in00, 1 ); \ + in00 = gf_zero(); \ + in01 = GF_MUL( in01, 1 ); \ + in00 = GF_ADD( in00, in01 ); \ + in01 = gf_zero(); \ + in02 = GF_MUL( in02, 122 ); \ + in00 = GF_ADD( in00, in02 ); \ + in02 = gf_zero(); \ + in03 = GF_MUL( in03, 122 ); \ + in02 = GF_ADD( in02, in03 ); \ + in01 = GF_ADD( in01, in03 ); \ + in03 = gf_zero(); \ + in04 = GF_MUL( in04, 32 ); \ + in00 = GF_ADD( in00, in04 ); \ + in04 = gf_zero(); \ + in05 = GF_MUL( in05, 32 ); \ + in04 = GF_ADD( in04, in05 ); \ + in01 = GF_ADD( in01, in05 ); \ + in05 = gf_zero(); \ + in06 = GF_MUL( in06, 251 ); \ + in04 = GF_ADD( in04, in06 ); \ + in02 = GF_ADD( in02, in06 ); \ + in06 = gf_zero(); \ + in07 = GF_MUL( in07, 251 ); \ + in06 = GF_ADD( in06, in07 ); \ + in05 = GF_ADD( in05, in07 ); \ + in03 = GF_ADD( in03, in07 ); \ + in07 = gf_zero(); \ + in08 = GF_MUL( in08, 41 ); \ + in00 = GF_ADD( in00, in08 ); \ + in08 = gf_zero(); \ + in09 = GF_MUL( in09, 41 ); \ + in08 = GF_ADD( in08, in09 ); \ + in01 = GF_ADD( in01, in09 ); \ + in09 = gf_zero(); \ + in10 = GF_MUL( in10, 118 ); \ + in08 = GF_ADD( in08, in10 ); \ + in02 = GF_ADD( in02, in10 ); \ + in10 = gf_zero(); \ + in11 = GF_MUL( in11, 118 ); \ + in10 = GF_ADD( in10, in11 ); \ + in09 = GF_ADD( in09, in11 ); \ + in03 = GF_ADD( in03, in11 ); \ + in11 = gf_zero(); \ + in12 = GF_MUL( in12, 73 ); \ + in08 = GF_ADD( in08, in12 ); \ + in04 = GF_ADD( in04, in12 ); \ + in12 = gf_zero(); \ + in13 = GF_MUL( in13, 73 ); \ + in12 = GF_ADD( in12, in13 ); \ + in09 = GF_ADD( in09, in13 ); \ + in05 = GF_ADD( in05, in13 ); \ + in13 = gf_zero(); \ + in14 = GF_MUL( in14, 102 ); \ + in12 = GF_ADD( in12, in14 ); \ + in10 = GF_ADD( in10, in14 ); \ + in06 = GF_ADD( in06, in14 ); \ + in14 = gf_zero(); \ + in15 = GF_MUL( in15, 102 ); \ + in14 = GF_ADD( in14, in15 ); \ + in13 = GF_ADD( in13, in15 ); \ + in11 = GF_ADD( in11, in15 ); \ + in07 = GF_ADD( in07, in15 ); \ + in15 = gf_zero(); \ + in16 = GF_MUL( in16, 31 ); \ + in00 = GF_ADD( in00, in16 ); \ + in16 = gf_zero(); \ + in17 = GF_MUL( in17, 31 ); \ + in16 = GF_ADD( in16, in17 ); \ + in01 = GF_ADD( in01, in17 ); \ + in17 = gf_zero(); \ + in18 = GF_MUL( in18, 127 ); \ + in16 = GF_ADD( in16, in18 ); \ + in02 = GF_ADD( in02, in18 ); \ + in18 = gf_zero(); \ + in19 = GF_MUL( in19, 127 ); \ + in18 = GF_ADD( in18, in19 ); \ + in17 = GF_ADD( in17, in19 ); \ + in03 = GF_ADD( in03, in19 ); \ + in19 = gf_zero(); \ + in20 = GF_MUL( in20, 199 ); \ + in16 = GF_ADD( in16, in20 ); \ + in04 = GF_ADD( in04, in20 ); \ + in20 = gf_zero(); \ + in21 = GF_MUL( in21, 199 ); \ + in20 = GF_ADD( in20, in21 ); \ + in17 = GF_ADD( in17, in21 ); \ + in05 = GF_ADD( in05, in21 ); \ + in21 = gf_zero(); \ + in22 = GF_MUL( in22, 91 ); \ + in20 = GF_ADD( in20, in22 ); \ + in18 = GF_ADD( in18, in22 ); \ + in06 = GF_ADD( in06, in22 ); \ + in22 = gf_zero(); \ + in23 = GF_MUL( in23, 91 ); \ + in22 = GF_ADD( in22, in23 ); \ + in21 = GF_ADD( in21, in23 ); \ + in19 = GF_ADD( in19, in23 ); \ + in07 = GF_ADD( in07, in23 ); \ + in23 = gf_zero(); \ + in24 = GF_MUL( in24, 32 ); \ + in16 = GF_ADD( in16, in24 ); \ + in08 = GF_ADD( in08, in24 ); \ + in24 = gf_zero(); \ + in25 = GF_MUL( in25, 32 ); \ + in24 = GF_ADD( in24, in25 ); \ + in17 = GF_ADD( in17, in25 ); \ + in09 = GF_ADD( in09, in25 ); \ + in25 = gf_zero(); \ + in26 = GF_MUL( in26, 251 ); \ + in24 = GF_ADD( in24, in26 ); \ + in18 = GF_ADD( in18, in26 ); \ + in10 = GF_ADD( in10, in26 ); \ + in26 = gf_zero(); \ + in27 = GF_MUL( in27, 251 ); \ + in26 = GF_ADD( in26, in27 ); \ + in25 = GF_ADD( in25, in27 ); \ + in19 = GF_ADD( in19, in27 ); \ + in11 = GF_ADD( in11, in27 ); \ + in27 = gf_zero(); \ + in28 = GF_MUL( in28, 116 ); \ + in24 = GF_ADD( in24, in28 ); \ + in20 = GF_ADD( in20, in28 ); \ + in12 = GF_ADD( in12, in28 ); \ + in28 = gf_zero(); \ + in29 = GF_MUL( in29, 116 ); \ + in28 = GF_ADD( in28, in29 ); \ + in25 = GF_ADD( in25, in29 ); \ + in21 = GF_ADD( in21, in29 ); \ + in13 = GF_ADD( in13, in29 ); \ + in29 = gf_zero(); \ + in30 = GF_MUL( in30, 22 ); \ + in28 = GF_ADD( in28, in30 ); \ + in26 = GF_ADD( in26, in30 ); \ + in22 = GF_ADD( in22, in30 ); \ + in14 = GF_ADD( in14, in30 ); \ + in30 = gf_zero(); \ + in31 = GF_MUL( in31, 22 ); \ + in30 = GF_ADD( in30, in31 ); \ + in29 = GF_ADD( in29, in31 ); \ + in27 = GF_ADD( in27, in31 ); \ + in23 = GF_ADD( in23, in31 ); \ + in15 = GF_ADD( in15, in31 ); \ + in31 = gf_zero(); \ + in00 = GF_MUL( in00, 1 ); \ + in01 = GF_MUL( in01, 1 ); \ + in02 = GF_MUL( in02, 6 ); \ + in03 = GF_MUL( in03, 6 ); \ + in04 = GF_MUL( in04, 108 ); \ + in05 = GF_MUL( in05, 108 ); \ + in06 = GF_MUL( in06, 117 ); \ + in07 = GF_MUL( in07, 117 ); \ + in08 = GF_MUL( in08, 208 ); \ + in09 = GF_MUL( in09, 208 ); \ + in10 = GF_MUL( in10, 218 ); \ + in11 = GF_MUL( in11, 218 ); \ + in12 = GF_MUL( in12, 136 ); \ + in13 = GF_MUL( in13, 136 ); \ + in14 = GF_MUL( in14, 23 ); \ + in15 = GF_MUL( in15, 23 ); \ + in16 = GF_MUL( in16, 42 ); \ + in17 = GF_MUL( in17, 42 ); \ + in18 = GF_MUL( in18, 252 ); \ + in19 = GF_MUL( in19, 252 ); \ + in20 = GF_MUL( in20, 158 ); \ + in21 = GF_MUL( in21, 158 ); \ + in22 = GF_MUL( in22, 99 ); \ + in23 = GF_MUL( in23, 99 ); \ + in24 = GF_MUL( in24, 108 ); \ + in25 = GF_MUL( in25, 108 ); \ + in26 = GF_MUL( in26, 117 ); \ + in27 = GF_MUL( in27, 117 ); \ + in28 = GF_MUL( in28, 233 ); \ + in29 = GF_MUL( in29, 233 ); \ + in30 = GF_MUL( in30, 76 ); \ + in31 = GF_MUL( in31, 76 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FDERIV_IMPL_64( in00 , \ + in01, in02, in03, in04, in05, in06, in07, \ + in08, in09, in10, in11, in12, in13, in14, \ + in15, in16, in17, in18, in19, in20, in21, \ + in22, in23, in24, in25, in26, in27, in28, \ + in29, in30, in31, in32, in33, in34, in35, \ + in36, in37, in38, in39, in40, in41, in42, \ + in43, in44, in45, in46, in47, in48, in49, \ + in50, in51, in52, in53, in54, in55, in56, \ + in57, in58, in59, in60, in61, in62, in63) \ + do { \ + in00 = GF_MUL( in00, 1 ); \ + in00 = gf_zero(); \ + in01 = GF_MUL( in01, 1 ); \ + in00 = GF_ADD( in00, in01 ); \ + in01 = gf_zero(); \ + in02 = GF_MUL( in02, 122 ); \ + in00 = GF_ADD( in00, in02 ); \ + in02 = gf_zero(); \ + in03 = GF_MUL( in03, 122 ); \ + in02 = GF_ADD( in02, in03 ); \ + in01 = GF_ADD( in01, in03 ); \ + in03 = gf_zero(); \ + in04 = GF_MUL( in04, 32 ); \ + in00 = GF_ADD( in00, in04 ); \ + in04 = gf_zero(); \ + in05 = GF_MUL( in05, 32 ); \ + in04 = GF_ADD( in04, in05 ); \ + in01 = GF_ADD( in01, in05 ); \ + in05 = gf_zero(); \ + in06 = GF_MUL( in06, 251 ); \ + in04 = GF_ADD( in04, in06 ); \ + in02 = GF_ADD( in02, in06 ); \ + in06 = gf_zero(); \ + in07 = GF_MUL( in07, 251 ); \ + in06 = GF_ADD( in06, in07 ); \ + in05 = GF_ADD( in05, in07 ); \ + in03 = GF_ADD( in03, in07 ); \ + in07 = gf_zero(); \ + in08 = GF_MUL( in08, 41 ); \ + in00 = GF_ADD( in00, in08 ); \ + in08 = gf_zero(); \ + in09 = GF_MUL( in09, 41 ); \ + in08 = GF_ADD( in08, in09 ); \ + in01 = GF_ADD( in01, in09 ); \ + in09 = gf_zero(); \ + in10 = GF_MUL( in10, 118 ); \ + in08 = GF_ADD( in08, in10 ); \ + in02 = GF_ADD( in02, in10 ); \ + in10 = gf_zero(); \ + in11 = GF_MUL( in11, 118 ); \ + in10 = GF_ADD( in10, in11 ); \ + in09 = GF_ADD( in09, in11 ); \ + in03 = GF_ADD( in03, in11 ); \ + in11 = gf_zero(); \ + in12 = GF_MUL( in12, 73 ); \ + in08 = GF_ADD( in08, in12 ); \ + in04 = GF_ADD( in04, in12 ); \ + in12 = gf_zero(); \ + in13 = GF_MUL( in13, 73 ); \ + in12 = GF_ADD( in12, in13 ); \ + in09 = GF_ADD( in09, in13 ); \ + in05 = GF_ADD( in05, in13 ); \ + in13 = gf_zero(); \ + in14 = GF_MUL( in14, 102 ); \ + in12 = GF_ADD( in12, in14 ); \ + in10 = GF_ADD( in10, in14 ); \ + in06 = GF_ADD( in06, in14 ); \ + in14 = gf_zero(); \ + in15 = GF_MUL( in15, 102 ); \ + in14 = GF_ADD( in14, in15 ); \ + in13 = GF_ADD( in13, in15 ); \ + in11 = GF_ADD( in11, in15 ); \ + in07 = GF_ADD( in07, in15 ); \ + in15 = gf_zero(); \ + in16 = GF_MUL( in16, 31 ); \ + in00 = GF_ADD( in00, in16 ); \ + in16 = gf_zero(); \ + in17 = GF_MUL( in17, 31 ); \ + in16 = GF_ADD( in16, in17 ); \ + in01 = GF_ADD( in01, in17 ); \ + in17 = gf_zero(); \ + in18 = GF_MUL( in18, 127 ); \ + in16 = GF_ADD( in16, in18 ); \ + in02 = GF_ADD( in02, in18 ); \ + in18 = gf_zero(); \ + in19 = GF_MUL( in19, 127 ); \ + in18 = GF_ADD( in18, in19 ); \ + in17 = GF_ADD( in17, in19 ); \ + in03 = GF_ADD( in03, in19 ); \ + in19 = gf_zero(); \ + in20 = GF_MUL( in20, 199 ); \ + in16 = GF_ADD( in16, in20 ); \ + in04 = GF_ADD( in04, in20 ); \ + in20 = gf_zero(); \ + in21 = GF_MUL( in21, 199 ); \ + in20 = GF_ADD( in20, in21 ); \ + in17 = GF_ADD( in17, in21 ); \ + in05 = GF_ADD( in05, in21 ); \ + in21 = gf_zero(); \ + in22 = GF_MUL( in22, 91 ); \ + in20 = GF_ADD( in20, in22 ); \ + in18 = GF_ADD( in18, in22 ); \ + in06 = GF_ADD( in06, in22 ); \ + in22 = gf_zero(); \ + in23 = GF_MUL( in23, 91 ); \ + in22 = GF_ADD( in22, in23 ); \ + in21 = GF_ADD( in21, in23 ); \ + in19 = GF_ADD( in19, in23 ); \ + in07 = GF_ADD( in07, in23 ); \ + in23 = gf_zero(); \ + in24 = GF_MUL( in24, 32 ); \ + in16 = GF_ADD( in16, in24 ); \ + in08 = GF_ADD( in08, in24 ); \ + in24 = gf_zero(); \ + in25 = GF_MUL( in25, 32 ); \ + in24 = GF_ADD( in24, in25 ); \ + in17 = GF_ADD( in17, in25 ); \ + in09 = GF_ADD( in09, in25 ); \ + in25 = gf_zero(); \ + in26 = GF_MUL( in26, 251 ); \ + in24 = GF_ADD( in24, in26 ); \ + in18 = GF_ADD( in18, in26 ); \ + in10 = GF_ADD( in10, in26 ); \ + in26 = gf_zero(); \ + in27 = GF_MUL( in27, 251 ); \ + in26 = GF_ADD( in26, in27 ); \ + in25 = GF_ADD( in25, in27 ); \ + in19 = GF_ADD( in19, in27 ); \ + in11 = GF_ADD( in11, in27 ); \ + in27 = gf_zero(); \ + in28 = GF_MUL( in28, 116 ); \ + in24 = GF_ADD( in24, in28 ); \ + in20 = GF_ADD( in20, in28 ); \ + in12 = GF_ADD( in12, in28 ); \ + in28 = gf_zero(); \ + in29 = GF_MUL( in29, 116 ); \ + in28 = GF_ADD( in28, in29 ); \ + in25 = GF_ADD( in25, in29 ); \ + in21 = GF_ADD( in21, in29 ); \ + in13 = GF_ADD( in13, in29 ); \ + in29 = gf_zero(); \ + in30 = GF_MUL( in30, 22 ); \ + in28 = GF_ADD( in28, in30 ); \ + in26 = GF_ADD( in26, in30 ); \ + in22 = GF_ADD( in22, in30 ); \ + in14 = GF_ADD( in14, in30 ); \ + in30 = gf_zero(); \ + in31 = GF_MUL( in31, 22 ); \ + in30 = GF_ADD( in30, in31 ); \ + in29 = GF_ADD( in29, in31 ); \ + in27 = GF_ADD( in27, in31 ); \ + in23 = GF_ADD( in23, in31 ); \ + in15 = GF_ADD( in15, in31 ); \ + in31 = gf_zero(); \ + in32 = GF_MUL( in32, 194 ); \ + in00 = GF_ADD( in00, in32 ); \ + in32 = gf_zero(); \ + in33 = GF_MUL( in33, 194 ); \ + in32 = GF_ADD( in32, in33 ); \ + in01 = GF_ADD( in01, in33 ); \ + in33 = gf_zero(); \ + in34 = GF_MUL( in34, 212 ); \ + in32 = GF_ADD( in32, in34 ); \ + in02 = GF_ADD( in02, in34 ); \ + in34 = gf_zero(); \ + in35 = GF_MUL( in35, 212 ); \ + in34 = GF_ADD( in34, in35 ); \ + in33 = GF_ADD( in33, in35 ); \ + in03 = GF_ADD( in03, in35 ); \ + in35 = gf_zero(); \ + in36 = GF_MUL( in36, 101 ); \ + in32 = GF_ADD( in32, in36 ); \ + in04 = GF_ADD( in04, in36 ); \ + in36 = gf_zero(); \ + in37 = GF_MUL( in37, 101 ); \ + in36 = GF_ADD( in36, in37 ); \ + in33 = GF_ADD( in33, in37 ); \ + in05 = GF_ADD( in05, in37 ); \ + in37 = gf_zero(); \ + in38 = GF_MUL( in38, 159 ); \ + in36 = GF_ADD( in36, in38 ); \ + in34 = GF_ADD( in34, in38 ); \ + in06 = GF_ADD( in06, in38 ); \ + in38 = gf_zero(); \ + in39 = GF_MUL( in39, 159 ); \ + in38 = GF_ADD( in38, in39 ); \ + in37 = GF_ADD( in37, in39 ); \ + in35 = GF_ADD( in35, in39 ); \ + in07 = GF_ADD( in07, in39 ); \ + in39 = gf_zero(); \ + in40 = GF_MUL( in40, 249 ); \ + in32 = GF_ADD( in32, in40 ); \ + in08 = GF_ADD( in08, in40 ); \ + in40 = gf_zero(); \ + in41 = GF_MUL( in41, 249 ); \ + in40 = GF_ADD( in40, in41 ); \ + in33 = GF_ADD( in33, in41 ); \ + in09 = GF_ADD( in09, in41 ); \ + in41 = gf_zero(); \ + in42 = GF_MUL( in42, 165 ); \ + in40 = GF_ADD( in40, in42 ); \ + in34 = GF_ADD( in34, in42 ); \ + in10 = GF_ADD( in10, in42 ); \ + in42 = gf_zero(); \ + in43 = GF_MUL( in43, 165 ); \ + in42 = GF_ADD( in42, in43 ); \ + in41 = GF_ADD( in41, in43 ); \ + in35 = GF_ADD( in35, in43 ); \ + in11 = GF_ADD( in11, in43 ); \ + in43 = gf_zero(); \ + in44 = GF_MUL( in44, 86 ); \ + in40 = GF_ADD( in40, in44 ); \ + in36 = GF_ADD( in36, in44 ); \ + in12 = GF_ADD( in12, in44 ); \ + in44 = gf_zero(); \ + in45 = GF_MUL( in45, 86 ); \ + in44 = GF_ADD( in44, in45 ); \ + in41 = GF_ADD( in41, in45 ); \ + in37 = GF_ADD( in37, in45 ); \ + in13 = GF_ADD( in13, in45 ); \ + in45 = gf_zero(); \ + in46 = GF_MUL( in46, 25 ); \ + in44 = GF_ADD( in44, in46 ); \ + in42 = GF_ADD( in42, in46 ); \ + in38 = GF_ADD( in38, in46 ); \ + in14 = GF_ADD( in14, in46 ); \ + in46 = gf_zero(); \ + in47 = GF_MUL( in47, 25 ); \ + in46 = GF_ADD( in46, in47 ); \ + in45 = GF_ADD( in45, in47 ); \ + in43 = GF_ADD( in43, in47 ); \ + in39 = GF_ADD( in39, in47 ); \ + in15 = GF_ADD( in15, in47 ); \ + in47 = gf_zero(); \ + in48 = GF_MUL( in48, 150 ); \ + in32 = GF_ADD( in32, in48 ); \ + in16 = GF_ADD( in16, in48 ); \ + in48 = gf_zero(); \ + in49 = GF_MUL( in49, 150 ); \ + in48 = GF_ADD( in48, in49 ); \ + in33 = GF_ADD( in33, in49 ); \ + in17 = GF_ADD( in17, in49 ); \ + in49 = gf_zero(); \ + in50 = GF_MUL( in50, 57 ); \ + in48 = GF_ADD( in48, in50 ); \ + in34 = GF_ADD( in34, in50 ); \ + in18 = GF_ADD( in18, in50 ); \ + in50 = gf_zero(); \ + in51 = GF_MUL( in51, 57 ); \ + in50 = GF_ADD( in50, in51 ); \ + in49 = GF_ADD( in49, in51 ); \ + in35 = GF_ADD( in35, in51 ); \ + in19 = GF_ADD( in19, in51 ); \ + in51 = gf_zero(); \ + in52 = GF_MUL( in52, 55 ); \ + in48 = GF_ADD( in48, in52 ); \ + in36 = GF_ADD( in36, in52 ); \ + in20 = GF_ADD( in20, in52 ); \ + in52 = gf_zero(); \ + in53 = GF_MUL( in53, 55 ); \ + in52 = GF_ADD( in52, in53 ); \ + in49 = GF_ADD( in49, in53 ); \ + in37 = GF_ADD( in37, in53 ); \ + in21 = GF_ADD( in21, in53 ); \ + in53 = gf_zero(); \ + in54 = GF_MUL( in54, 115 ); \ + in52 = GF_ADD( in52, in54 ); \ + in50 = GF_ADD( in50, in54 ); \ + in38 = GF_ADD( in38, in54 ); \ + in22 = GF_ADD( in22, in54 ); \ + in54 = gf_zero(); \ + in55 = GF_MUL( in55, 115 ); \ + in54 = GF_ADD( in54, in55 ); \ + in53 = GF_ADD( in53, in55 ); \ + in51 = GF_ADD( in51, in55 ); \ + in39 = GF_ADD( in39, in55 ); \ + in23 = GF_ADD( in23, in55 ); \ + in55 = gf_zero(); \ + in56 = GF_MUL( in56, 101 ); \ + in48 = GF_ADD( in48, in56 ); \ + in40 = GF_ADD( in40, in56 ); \ + in24 = GF_ADD( in24, in56 ); \ + in56 = gf_zero(); \ + in57 = GF_MUL( in57, 101 ); \ + in56 = GF_ADD( in56, in57 ); \ + in49 = GF_ADD( in49, in57 ); \ + in41 = GF_ADD( in41, in57 ); \ + in25 = GF_ADD( in25, in57 ); \ + in57 = gf_zero(); \ + in58 = GF_MUL( in58, 159 ); \ + in56 = GF_ADD( in56, in58 ); \ + in50 = GF_ADD( in50, in58 ); \ + in42 = GF_ADD( in42, in58 ); \ + in26 = GF_ADD( in26, in58 ); \ + in58 = gf_zero(); \ + in59 = GF_MUL( in59, 159 ); \ + in58 = GF_ADD( in58, in59 ); \ + in57 = GF_ADD( in57, in59 ); \ + in51 = GF_ADD( in51, in59 ); \ + in43 = GF_ADD( in43, in59 ); \ + in27 = GF_ADD( in27, in59 ); \ + in59 = gf_zero(); \ + in60 = GF_MUL( in60, 60 ); \ + in56 = GF_ADD( in56, in60 ); \ + in52 = GF_ADD( in52, in60 ); \ + in44 = GF_ADD( in44, in60 ); \ + in28 = GF_ADD( in28, in60 ); \ + in60 = gf_zero(); \ + in61 = GF_MUL( in61, 60 ); \ + in60 = GF_ADD( in60, in61 ); \ + in57 = GF_ADD( in57, in61 ); \ + in53 = GF_ADD( in53, in61 ); \ + in45 = GF_ADD( in45, in61 ); \ + in29 = GF_ADD( in29, in61 ); \ + in61 = gf_zero(); \ + in62 = GF_MUL( in62, 10 ); \ + in60 = GF_ADD( in60, in62 ); \ + in58 = GF_ADD( in58, in62 ); \ + in54 = GF_ADD( in54, in62 ); \ + in46 = GF_ADD( in46, in62 ); \ + in30 = GF_ADD( in30, in62 ); \ + in62 = gf_zero(); \ + in63 = GF_MUL( in63, 10 ); \ + in62 = GF_ADD( in62, in63 ); \ + in61 = GF_ADD( in61, in63 ); \ + in59 = GF_ADD( in59, in63 ); \ + in55 = GF_ADD( in55, in63 ); \ + in47 = GF_ADD( in47, in63 ); \ + in31 = GF_ADD( in31, in63 ); \ + in63 = gf_zero(); \ + in00 = GF_MUL( in00, 1 ); \ + in01 = GF_MUL( in01, 1 ); \ + in02 = GF_MUL( in02, 6 ); \ + in03 = GF_MUL( in03, 6 ); \ + in04 = GF_MUL( in04, 108 ); \ + in05 = GF_MUL( in05, 108 ); \ + in06 = GF_MUL( in06, 117 ); \ + in07 = GF_MUL( in07, 117 ); \ + in08 = GF_MUL( in08, 208 ); \ + in09 = GF_MUL( in09, 208 ); \ + in10 = GF_MUL( in10, 218 ); \ + in11 = GF_MUL( in11, 218 ); \ + in12 = GF_MUL( in12, 136 ); \ + in13 = GF_MUL( in13, 136 ); \ + in14 = GF_MUL( in14, 23 ); \ + in15 = GF_MUL( in15, 23 ); \ + in16 = GF_MUL( in16, 42 ); \ + in17 = GF_MUL( in17, 42 ); \ + in18 = GF_MUL( in18, 252 ); \ + in19 = GF_MUL( in19, 252 ); \ + in20 = GF_MUL( in20, 158 ); \ + in21 = GF_MUL( in21, 158 ); \ + in22 = GF_MUL( in22, 99 ); \ + in23 = GF_MUL( in23, 99 ); \ + in24 = GF_MUL( in24, 108 ); \ + in25 = GF_MUL( in25, 108 ); \ + in26 = GF_MUL( in26, 117 ); \ + in27 = GF_MUL( in27, 117 ); \ + in28 = GF_MUL( in28, 233 ); \ + in29 = GF_MUL( in29, 233 ); \ + in30 = GF_MUL( in30, 76 ); \ + in31 = GF_MUL( in31, 76 ); \ + in32 = GF_MUL( in32, 165 ); \ + in33 = GF_MUL( in33, 165 ); \ + in34 = GF_MUL( in34, 249 ); \ + in35 = GF_MUL( in35, 249 ); \ + in36 = GF_MUL( in36, 196 ); \ + in37 = GF_MUL( in37, 196 ); \ + in38 = GF_MUL( in38, 162 ); \ + in39 = GF_MUL( in39, 162 ); \ + in40 = GF_MUL( in40, 212 ); \ + in41 = GF_MUL( in41, 212 ); \ + in42 = GF_MUL( in42, 194 ); \ + in43 = GF_MUL( in43, 194 ); \ + in44 = GF_MUL( in44, 37 ); \ + in45 = GF_MUL( in45, 37 ); \ + in46 = GF_MUL( in46, 222 ); \ + in47 = GF_MUL( in47, 222 ); \ + in48 = GF_MUL( in48, 15 ); \ + in49 = GF_MUL( in49, 15 ); \ + in50 = GF_MUL( in50, 34 ); \ + in51 = GF_MUL( in51, 34 ); \ + in52 = GF_MUL( in52, 94 ); \ + in53 = GF_MUL( in53, 94 ); \ + in54 = GF_MUL( in54, 217 ); \ + in55 = GF_MUL( in55, 217 ); \ + in56 = GF_MUL( in56, 196 ); \ + in57 = GF_MUL( in57, 196 ); \ + in58 = GF_MUL( in58, 162 ); \ + in59 = GF_MUL( in59, 162 ); \ + in60 = GF_MUL( in60, 171 ); \ + in61 = GF_MUL( in61, 171 ); \ + in62 = GF_MUL( in62, 221 ); \ + in63 = GF_MUL( in63, 221 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FDERIV_IMPL_128( in00, in01, \ + in02, in03, in04, in05, in06, in07, in08 , \ + in09, in10, in11, in12, in13, in14, in15 , \ + in16, in17, in18, in19, in20, in21, in22 , \ + in23, in24, in25, in26, in27, in28, in29 , \ + in30, in31, in32, in33, in34, in35, in36 , \ + in37, in38, in39, in40, in41, in42, in43 , \ + in44, in45, in46, in47, in48, in49, in50 , \ + in51, in52, in53, in54, in55, in56, in57 , \ + in58, in59, in60, in61, in62, in63, in64 , \ + in65, in66, in67, in68, in69, in70, in71 , \ + in72, in73, in74, in75, in76, in77, in78 , \ + in79, in80, in81, in82, in83, in84, in85 , \ + in86, in87, in88, in89, in90, in91, in92 , \ + in93, in94, in95, in96, in97, in98, in99 , \ + in100, in101, in102, in103, in104, in105 , \ + in106, in107, in108, in109, in110, in111 , \ + in112, in113, in114, in115, in116, in117 , \ + in118, in119, in120, in121, in122, in123 , \ + in124, in125, in126, in127) \ + do { \ + in00 = GF_MUL( in00, 1 ); \ + in00 = gf_zero(); \ + in01 = GF_MUL( in01, 1 ); \ + in00 = GF_ADD( in00, in01 ); \ + in01 = gf_zero(); \ + in02 = GF_MUL( in02, 122 ); \ + in00 = GF_ADD( in00, in02 ); \ + in02 = gf_zero(); \ + in03 = GF_MUL( in03, 122 ); \ + in02 = GF_ADD( in02, in03 ); \ + in01 = GF_ADD( in01, in03 ); \ + in03 = gf_zero(); \ + in04 = GF_MUL( in04, 32 ); \ + in00 = GF_ADD( in00, in04 ); \ + in04 = gf_zero(); \ + in05 = GF_MUL( in05, 32 ); \ + in04 = GF_ADD( in04, in05 ); \ + in01 = GF_ADD( in01, in05 ); \ + in05 = gf_zero(); \ + in06 = GF_MUL( in06, 251 ); \ + in04 = GF_ADD( in04, in06 ); \ + in02 = GF_ADD( in02, in06 ); \ + in06 = gf_zero(); \ + in07 = GF_MUL( in07, 251 ); \ + in06 = GF_ADD( in06, in07 ); \ + in05 = GF_ADD( in05, in07 ); \ + in03 = GF_ADD( in03, in07 ); \ + in07 = gf_zero(); \ + in08 = GF_MUL( in08, 41 ); \ + in00 = GF_ADD( in00, in08 ); \ + in08 = gf_zero(); \ + in09 = GF_MUL( in09, 41 ); \ + in08 = GF_ADD( in08, in09 ); \ + in01 = GF_ADD( in01, in09 ); \ + in09 = gf_zero(); \ + in10 = GF_MUL( in10, 118 ); \ + in08 = GF_ADD( in08, in10 ); \ + in02 = GF_ADD( in02, in10 ); \ + in10 = gf_zero(); \ + in11 = GF_MUL( in11, 118 ); \ + in10 = GF_ADD( in10, in11 ); \ + in09 = GF_ADD( in09, in11 ); \ + in03 = GF_ADD( in03, in11 ); \ + in11 = gf_zero(); \ + in12 = GF_MUL( in12, 73 ); \ + in08 = GF_ADD( in08, in12 ); \ + in04 = GF_ADD( in04, in12 ); \ + in12 = gf_zero(); \ + in13 = GF_MUL( in13, 73 ); \ + in12 = GF_ADD( in12, in13 ); \ + in09 = GF_ADD( in09, in13 ); \ + in05 = GF_ADD( in05, in13 ); \ + in13 = gf_zero(); \ + in14 = GF_MUL( in14, 102 ); \ + in12 = GF_ADD( in12, in14 ); \ + in10 = GF_ADD( in10, in14 ); \ + in06 = GF_ADD( in06, in14 ); \ + in14 = gf_zero(); \ + in15 = GF_MUL( in15, 102 ); \ + in14 = GF_ADD( in14, in15 ); \ + in13 = GF_ADD( in13, in15 ); \ + in11 = GF_ADD( in11, in15 ); \ + in07 = GF_ADD( in07, in15 ); \ + in15 = gf_zero(); \ + in16 = GF_MUL( in16, 31 ); \ + in00 = GF_ADD( in00, in16 ); \ + in16 = gf_zero(); \ + in17 = GF_MUL( in17, 31 ); \ + in16 = GF_ADD( in16, in17 ); \ + in01 = GF_ADD( in01, in17 ); \ + in17 = gf_zero(); \ + in18 = GF_MUL( in18, 127 ); \ + in16 = GF_ADD( in16, in18 ); \ + in02 = GF_ADD( in02, in18 ); \ + in18 = gf_zero(); \ + in19 = GF_MUL( in19, 127 ); \ + in18 = GF_ADD( in18, in19 ); \ + in17 = GF_ADD( in17, in19 ); \ + in03 = GF_ADD( in03, in19 ); \ + in19 = gf_zero(); \ + in20 = GF_MUL( in20, 199 ); \ + in16 = GF_ADD( in16, in20 ); \ + in04 = GF_ADD( in04, in20 ); \ + in20 = gf_zero(); \ + in21 = GF_MUL( in21, 199 ); \ + in20 = GF_ADD( in20, in21 ); \ + in17 = GF_ADD( in17, in21 ); \ + in05 = GF_ADD( in05, in21 ); \ + in21 = gf_zero(); \ + in22 = GF_MUL( in22, 91 ); \ + in20 = GF_ADD( in20, in22 ); \ + in18 = GF_ADD( in18, in22 ); \ + in06 = GF_ADD( in06, in22 ); \ + in22 = gf_zero(); \ + in23 = GF_MUL( in23, 91 ); \ + in22 = GF_ADD( in22, in23 ); \ + in21 = GF_ADD( in21, in23 ); \ + in19 = GF_ADD( in19, in23 ); \ + in07 = GF_ADD( in07, in23 ); \ + in23 = gf_zero(); \ + in24 = GF_MUL( in24, 32 ); \ + in16 = GF_ADD( in16, in24 ); \ + in08 = GF_ADD( in08, in24 ); \ + in24 = gf_zero(); \ + in25 = GF_MUL( in25, 32 ); \ + in24 = GF_ADD( in24, in25 ); \ + in17 = GF_ADD( in17, in25 ); \ + in09 = GF_ADD( in09, in25 ); \ + in25 = gf_zero(); \ + in26 = GF_MUL( in26, 251 ); \ + in24 = GF_ADD( in24, in26 ); \ + in18 = GF_ADD( in18, in26 ); \ + in10 = GF_ADD( in10, in26 ); \ + in26 = gf_zero(); \ + in27 = GF_MUL( in27, 251 ); \ + in26 = GF_ADD( in26, in27 ); \ + in25 = GF_ADD( in25, in27 ); \ + in19 = GF_ADD( in19, in27 ); \ + in11 = GF_ADD( in11, in27 ); \ + in27 = gf_zero(); \ + in28 = GF_MUL( in28, 116 ); \ + in24 = GF_ADD( in24, in28 ); \ + in20 = GF_ADD( in20, in28 ); \ + in12 = GF_ADD( in12, in28 ); \ + in28 = gf_zero(); \ + in29 = GF_MUL( in29, 116 ); \ + in28 = GF_ADD( in28, in29 ); \ + in25 = GF_ADD( in25, in29 ); \ + in21 = GF_ADD( in21, in29 ); \ + in13 = GF_ADD( in13, in29 ); \ + in29 = gf_zero(); \ + in30 = GF_MUL( in30, 22 ); \ + in28 = GF_ADD( in28, in30 ); \ + in26 = GF_ADD( in26, in30 ); \ + in22 = GF_ADD( in22, in30 ); \ + in14 = GF_ADD( in14, in30 ); \ + in30 = gf_zero(); \ + in31 = GF_MUL( in31, 22 ); \ + in30 = GF_ADD( in30, in31 ); \ + in29 = GF_ADD( in29, in31 ); \ + in27 = GF_ADD( in27, in31 ); \ + in23 = GF_ADD( in23, in31 ); \ + in15 = GF_ADD( in15, in31 ); \ + in31 = gf_zero(); \ + in32 = GF_MUL( in32, 194 ); \ + in00 = GF_ADD( in00, in32 ); \ + in32 = gf_zero(); \ + in33 = GF_MUL( in33, 194 ); \ + in32 = GF_ADD( in32, in33 ); \ + in01 = GF_ADD( in01, in33 ); \ + in33 = gf_zero(); \ + in34 = GF_MUL( in34, 212 ); \ + in32 = GF_ADD( in32, in34 ); \ + in02 = GF_ADD( in02, in34 ); \ + in34 = gf_zero(); \ + in35 = GF_MUL( in35, 212 ); \ + in34 = GF_ADD( in34, in35 ); \ + in33 = GF_ADD( in33, in35 ); \ + in03 = GF_ADD( in03, in35 ); \ + in35 = gf_zero(); \ + in36 = GF_MUL( in36, 101 ); \ + in32 = GF_ADD( in32, in36 ); \ + in04 = GF_ADD( in04, in36 ); \ + in36 = gf_zero(); \ + in37 = GF_MUL( in37, 101 ); \ + in36 = GF_ADD( in36, in37 ); \ + in33 = GF_ADD( in33, in37 ); \ + in05 = GF_ADD( in05, in37 ); \ + in37 = gf_zero(); \ + in38 = GF_MUL( in38, 159 ); \ + in36 = GF_ADD( in36, in38 ); \ + in34 = GF_ADD( in34, in38 ); \ + in06 = GF_ADD( in06, in38 ); \ + in38 = gf_zero(); \ + in39 = GF_MUL( in39, 159 ); \ + in38 = GF_ADD( in38, in39 ); \ + in37 = GF_ADD( in37, in39 ); \ + in35 = GF_ADD( in35, in39 ); \ + in07 = GF_ADD( in07, in39 ); \ + in39 = gf_zero(); \ + in40 = GF_MUL( in40, 249 ); \ + in32 = GF_ADD( in32, in40 ); \ + in08 = GF_ADD( in08, in40 ); \ + in40 = gf_zero(); \ + in41 = GF_MUL( in41, 249 ); \ + in40 = GF_ADD( in40, in41 ); \ + in33 = GF_ADD( in33, in41 ); \ + in09 = GF_ADD( in09, in41 ); \ + in41 = gf_zero(); \ + in42 = GF_MUL( in42, 165 ); \ + in40 = GF_ADD( in40, in42 ); \ + in34 = GF_ADD( in34, in42 ); \ + in10 = GF_ADD( in10, in42 ); \ + in42 = gf_zero(); \ + in43 = GF_MUL( in43, 165 ); \ + in42 = GF_ADD( in42, in43 ); \ + in41 = GF_ADD( in41, in43 ); \ + in35 = GF_ADD( in35, in43 ); \ + in11 = GF_ADD( in11, in43 ); \ + in43 = gf_zero(); \ + in44 = GF_MUL( in44, 86 ); \ + in40 = GF_ADD( in40, in44 ); \ + in36 = GF_ADD( in36, in44 ); \ + in12 = GF_ADD( in12, in44 ); \ + in44 = gf_zero(); \ + in45 = GF_MUL( in45, 86 ); \ + in44 = GF_ADD( in44, in45 ); \ + in41 = GF_ADD( in41, in45 ); \ + in37 = GF_ADD( in37, in45 ); \ + in13 = GF_ADD( in13, in45 ); \ + in45 = gf_zero(); \ + in46 = GF_MUL( in46, 25 ); \ + in44 = GF_ADD( in44, in46 ); \ + in42 = GF_ADD( in42, in46 ); \ + in38 = GF_ADD( in38, in46 ); \ + in14 = GF_ADD( in14, in46 ); \ + in46 = gf_zero(); \ + in47 = GF_MUL( in47, 25 ); \ + in46 = GF_ADD( in46, in47 ); \ + in45 = GF_ADD( in45, in47 ); \ + in43 = GF_ADD( in43, in47 ); \ + in39 = GF_ADD( in39, in47 ); \ + in15 = GF_ADD( in15, in47 ); \ + in47 = gf_zero(); \ + in48 = GF_MUL( in48, 150 ); \ + in32 = GF_ADD( in32, in48 ); \ + in16 = GF_ADD( in16, in48 ); \ + in48 = gf_zero(); \ + in49 = GF_MUL( in49, 150 ); \ + in48 = GF_ADD( in48, in49 ); \ + in33 = GF_ADD( in33, in49 ); \ + in17 = GF_ADD( in17, in49 ); \ + in49 = gf_zero(); \ + in50 = GF_MUL( in50, 57 ); \ + in48 = GF_ADD( in48, in50 ); \ + in34 = GF_ADD( in34, in50 ); \ + in18 = GF_ADD( in18, in50 ); \ + in50 = gf_zero(); \ + in51 = GF_MUL( in51, 57 ); \ + in50 = GF_ADD( in50, in51 ); \ + in49 = GF_ADD( in49, in51 ); \ + in35 = GF_ADD( in35, in51 ); \ + in19 = GF_ADD( in19, in51 ); \ + in51 = gf_zero(); \ + in52 = GF_MUL( in52, 55 ); \ + in48 = GF_ADD( in48, in52 ); \ + in36 = GF_ADD( in36, in52 ); \ + in20 = GF_ADD( in20, in52 ); \ + in52 = gf_zero(); \ + in53 = GF_MUL( in53, 55 ); \ + in52 = GF_ADD( in52, in53 ); \ + in49 = GF_ADD( in49, in53 ); \ + in37 = GF_ADD( in37, in53 ); \ + in21 = GF_ADD( in21, in53 ); \ + in53 = gf_zero(); \ + in54 = GF_MUL( in54, 115 ); \ + in52 = GF_ADD( in52, in54 ); \ + in50 = GF_ADD( in50, in54 ); \ + in38 = GF_ADD( in38, in54 ); \ + in22 = GF_ADD( in22, in54 ); \ + in54 = gf_zero(); \ + in55 = GF_MUL( in55, 115 ); \ + in54 = GF_ADD( in54, in55 ); \ + in53 = GF_ADD( in53, in55 ); \ + in51 = GF_ADD( in51, in55 ); \ + in39 = GF_ADD( in39, in55 ); \ + in23 = GF_ADD( in23, in55 ); \ + in55 = gf_zero(); \ + in56 = GF_MUL( in56, 101 ); \ + in48 = GF_ADD( in48, in56 ); \ + in40 = GF_ADD( in40, in56 ); \ + in24 = GF_ADD( in24, in56 ); \ + in56 = gf_zero(); \ + in57 = GF_MUL( in57, 101 ); \ + in56 = GF_ADD( in56, in57 ); \ + in49 = GF_ADD( in49, in57 ); \ + in41 = GF_ADD( in41, in57 ); \ + in25 = GF_ADD( in25, in57 ); \ + in57 = gf_zero(); \ + in58 = GF_MUL( in58, 159 ); \ + in56 = GF_ADD( in56, in58 ); \ + in50 = GF_ADD( in50, in58 ); \ + in42 = GF_ADD( in42, in58 ); \ + in26 = GF_ADD( in26, in58 ); \ + in58 = gf_zero(); \ + in59 = GF_MUL( in59, 159 ); \ + in58 = GF_ADD( in58, in59 ); \ + in57 = GF_ADD( in57, in59 ); \ + in51 = GF_ADD( in51, in59 ); \ + in43 = GF_ADD( in43, in59 ); \ + in27 = GF_ADD( in27, in59 ); \ + in59 = gf_zero(); \ + in60 = GF_MUL( in60, 60 ); \ + in56 = GF_ADD( in56, in60 ); \ + in52 = GF_ADD( in52, in60 ); \ + in44 = GF_ADD( in44, in60 ); \ + in28 = GF_ADD( in28, in60 ); \ + in60 = gf_zero(); \ + in61 = GF_MUL( in61, 60 ); \ + in60 = GF_ADD( in60, in61 ); \ + in57 = GF_ADD( in57, in61 ); \ + in53 = GF_ADD( in53, in61 ); \ + in45 = GF_ADD( in45, in61 ); \ + in29 = GF_ADD( in29, in61 ); \ + in61 = gf_zero(); \ + in62 = GF_MUL( in62, 10 ); \ + in60 = GF_ADD( in60, in62 ); \ + in58 = GF_ADD( in58, in62 ); \ + in54 = GF_ADD( in54, in62 ); \ + in46 = GF_ADD( in46, in62 ); \ + in30 = GF_ADD( in30, in62 ); \ + in62 = gf_zero(); \ + in63 = GF_MUL( in63, 10 ); \ + in62 = GF_ADD( in62, in63 ); \ + in61 = GF_ADD( in61, in63 ); \ + in59 = GF_ADD( in59, in63 ); \ + in55 = GF_ADD( in55, in63 ); \ + in47 = GF_ADD( in47, in63 ); \ + in31 = GF_ADD( in31, in63 ); \ + in63 = gf_zero(); \ + in64 = GF_MUL( in64, 232 ); \ + in00 = GF_ADD( in00, in64 ); \ + in64 = gf_zero(); \ + in65 = GF_MUL( in65, 232 ); \ + in64 = GF_ADD( in64, in65 ); \ + in01 = GF_ADD( in01, in65 ); \ + in65 = gf_zero(); \ + in66 = GF_MUL( in66, 44 ); \ + in64 = GF_ADD( in64, in66 ); \ + in02 = GF_ADD( in02, in66 ); \ + in66 = gf_zero(); \ + in67 = GF_MUL( in67, 44 ); \ + in66 = GF_ADD( in66, in67 ); \ + in65 = GF_ADD( in65, in67 ); \ + in03 = GF_ADD( in03, in67 ); \ + in67 = gf_zero(); \ + in68 = GF_MUL( in68, 76 ); \ + in64 = GF_ADD( in64, in68 ); \ + in04 = GF_ADD( in04, in68 ); \ + in68 = gf_zero(); \ + in69 = GF_MUL( in69, 76 ); \ + in68 = GF_ADD( in68, in69 ); \ + in65 = GF_ADD( in65, in69 ); \ + in05 = GF_ADD( in05, in69 ); \ + in69 = gf_zero(); \ + in70 = GF_MUL( in70, 233 ); \ + in68 = GF_ADD( in68, in70 ); \ + in66 = GF_ADD( in66, in70 ); \ + in06 = GF_ADD( in06, in70 ); \ + in70 = gf_zero(); \ + in71 = GF_MUL( in71, 233 ); \ + in70 = GF_ADD( in70, in71 ); \ + in69 = GF_ADD( in69, in71 ); \ + in67 = GF_ADD( in67, in71 ); \ + in07 = GF_ADD( in07, in71 ); \ + in71 = gf_zero(); \ + in72 = GF_MUL( in72, 183 ); \ + in64 = GF_ADD( in64, in72 ); \ + in08 = GF_ADD( in08, in72 ); \ + in72 = gf_zero(); \ + in73 = GF_MUL( in73, 183 ); \ + in72 = GF_ADD( in72, in73 ); \ + in65 = GF_ADD( in65, in73 ); \ + in09 = GF_ADD( in09, in73 ); \ + in73 = gf_zero(); \ + in74 = GF_MUL( in74, 184 ); \ + in72 = GF_ADD( in72, in74 ); \ + in66 = GF_ADD( in66, in74 ); \ + in10 = GF_ADD( in10, in74 ); \ + in74 = gf_zero(); \ + in75 = GF_MUL( in75, 184 ); \ + in74 = GF_ADD( in74, in75 ); \ + in73 = GF_ADD( in73, in75 ); \ + in67 = GF_ADD( in67, in75 ); \ + in11 = GF_ADD( in11, in75 ); \ + in75 = gf_zero(); \ + in76 = GF_MUL( in76, 99 ); \ + in72 = GF_ADD( in72, in76 ); \ + in68 = GF_ADD( in68, in76 ); \ + in12 = GF_ADD( in12, in76 ); \ + in76 = gf_zero(); \ + in77 = GF_MUL( in77, 99 ); \ + in76 = GF_ADD( in76, in77 ); \ + in73 = GF_ADD( in73, in77 ); \ + in69 = GF_ADD( in69, in77 ); \ + in13 = GF_ADD( in13, in77 ); \ + in77 = gf_zero(); \ + in78 = GF_MUL( in78, 158 ); \ + in76 = GF_ADD( in76, in78 ); \ + in74 = GF_ADD( in74, in78 ); \ + in70 = GF_ADD( in70, in78 ); \ + in14 = GF_ADD( in14, in78 ); \ + in78 = gf_zero(); \ + in79 = GF_MUL( in79, 158 ); \ + in78 = GF_ADD( in78, in79 ); \ + in77 = GF_ADD( in77, in79 ); \ + in75 = GF_ADD( in75, in79 ); \ + in71 = GF_ADD( in71, in79 ); \ + in15 = GF_ADD( in15, in79 ); \ + in79 = gf_zero(); \ + in80 = GF_MUL( in80, 151 ); \ + in64 = GF_ADD( in64, in80 ); \ + in16 = GF_ADD( in16, in80 ); \ + in80 = gf_zero(); \ + in81 = GF_MUL( in81, 151 ); \ + in80 = GF_ADD( in80, in81 ); \ + in65 = GF_ADD( in65, in81 ); \ + in17 = GF_ADD( in17, in81 ); \ + in81 = gf_zero(); \ + in82 = GF_MUL( in82, 67 ); \ + in80 = GF_ADD( in80, in82 ); \ + in66 = GF_ADD( in66, in82 ); \ + in18 = GF_ADD( in18, in82 ); \ + in82 = gf_zero(); \ + in83 = GF_MUL( in83, 67 ); \ + in82 = GF_ADD( in82, in83 ); \ + in81 = GF_ADD( in81, in83 ); \ + in67 = GF_ADD( in67, in83 ); \ + in19 = GF_ADD( in19, in83 ); \ + in83 = gf_zero(); \ + in84 = GF_MUL( in84, 23 ); \ + in80 = GF_ADD( in80, in84 ); \ + in68 = GF_ADD( in68, in84 ); \ + in20 = GF_ADD( in20, in84 ); \ + in84 = gf_zero(); \ + in85 = GF_MUL( in85, 23 ); \ + in84 = GF_ADD( in84, in85 ); \ + in81 = GF_ADD( in81, in85 ); \ + in69 = GF_ADD( in69, in85 ); \ + in21 = GF_ADD( in21, in85 ); \ + in85 = gf_zero(); \ + in86 = GF_MUL( in86, 136 ); \ + in84 = GF_ADD( in84, in86 ); \ + in82 = GF_ADD( in82, in86 ); \ + in70 = GF_ADD( in70, in86 ); \ + in22 = GF_ADD( in22, in86 ); \ + in86 = gf_zero(); \ + in87 = GF_MUL( in87, 136 ); \ + in86 = GF_ADD( in86, in87 ); \ + in85 = GF_ADD( in85, in87 ); \ + in83 = GF_ADD( in83, in87 ); \ + in71 = GF_ADD( in71, in87 ); \ + in23 = GF_ADD( in23, in87 ); \ + in87 = gf_zero(); \ + in88 = GF_MUL( in88, 76 ); \ + in80 = GF_ADD( in80, in88 ); \ + in72 = GF_ADD( in72, in88 ); \ + in24 = GF_ADD( in24, in88 ); \ + in88 = gf_zero(); \ + in89 = GF_MUL( in89, 76 ); \ + in88 = GF_ADD( in88, in89 ); \ + in81 = GF_ADD( in81, in89 ); \ + in73 = GF_ADD( in73, in89 ); \ + in25 = GF_ADD( in25, in89 ); \ + in89 = gf_zero(); \ + in90 = GF_MUL( in90, 233 ); \ + in88 = GF_ADD( in88, in90 ); \ + in82 = GF_ADD( in82, in90 ); \ + in74 = GF_ADD( in74, in90 ); \ + in26 = GF_ADD( in26, in90 ); \ + in90 = gf_zero(); \ + in91 = GF_MUL( in91, 233 ); \ + in90 = GF_ADD( in90, in91 ); \ + in89 = GF_ADD( in89, in91 ); \ + in83 = GF_ADD( in83, in91 ); \ + in75 = GF_ADD( in75, in91 ); \ + in27 = GF_ADD( in27, in91 ); \ + in91 = gf_zero(); \ + in92 = GF_MUL( in92, 117 ); \ + in88 = GF_ADD( in88, in92 ); \ + in84 = GF_ADD( in84, in92 ); \ + in76 = GF_ADD( in76, in92 ); \ + in28 = GF_ADD( in28, in92 ); \ + in92 = gf_zero(); \ + in93 = GF_MUL( in93, 117 ); \ + in92 = GF_ADD( in92, in93 ); \ + in89 = GF_ADD( in89, in93 ); \ + in85 = GF_ADD( in85, in93 ); \ + in77 = GF_ADD( in77, in93 ); \ + in29 = GF_ADD( in29, in93 ); \ + in93 = gf_zero(); \ + in94 = GF_MUL( in94, 108 ); \ + in92 = GF_ADD( in92, in94 ); \ + in90 = GF_ADD( in90, in94 ); \ + in86 = GF_ADD( in86, in94 ); \ + in78 = GF_ADD( in78, in94 ); \ + in30 = GF_ADD( in30, in94 ); \ + in94 = gf_zero(); \ + in95 = GF_MUL( in95, 108 ); \ + in94 = GF_ADD( in94, in95 ); \ + in93 = GF_ADD( in93, in95 ); \ + in91 = GF_ADD( in91, in95 ); \ + in87 = GF_ADD( in87, in95 ); \ + in79 = GF_ADD( in79, in95 ); \ + in31 = GF_ADD( in31, in95 ); \ + in95 = gf_zero(); \ + in96 = GF_MUL( in96, 120 ); \ + in64 = GF_ADD( in64, in96 ); \ + in32 = GF_ADD( in32, in96 ); \ + in96 = gf_zero(); \ + in97 = GF_MUL( in97, 120 ); \ + in96 = GF_ADD( in96, in97 ); \ + in65 = GF_ADD( in65, in97 ); \ + in33 = GF_ADD( in33, in97 ); \ + in97 = gf_zero(); \ + in98 = GF_MUL( in98, 20 ); \ + in96 = GF_ADD( in96, in98 ); \ + in66 = GF_ADD( in66, in98 ); \ + in34 = GF_ADD( in34, in98 ); \ + in98 = gf_zero(); \ + in99 = GF_MUL( in99, 20 ); \ + in98 = GF_ADD( in98, in99 ); \ + in97 = GF_ADD( in97, in99 ); \ + in67 = GF_ADD( in67, in99 ); \ + in35 = GF_ADD( in35, in99 ); \ + in99 = gf_zero(); \ + in100 = GF_MUL( in100, 187 ); \ + in96 = GF_ADD( in96, in100 ); \ + in68 = GF_ADD( in68, in100 ); \ + in36 = GF_ADD( in36, in100 ); \ + in100 = gf_zero(); \ + in101 = GF_MUL( in101, 187 ); \ + in100 = GF_ADD( in100, in101 ); \ + in97 = GF_ADD( in97, in101 ); \ + in69 = GF_ADD( in69, in101 ); \ + in37 = GF_ADD( in37, in101 ); \ + in101 = gf_zero(); \ + in102 = GF_MUL( in102, 186 ); \ + in100 = GF_ADD( in100, in102 ); \ + in98 = GF_ADD( in98, in102 ); \ + in70 = GF_ADD( in70, in102 ); \ + in38 = GF_ADD( in38, in102 ); \ + in102 = gf_zero(); \ + in103 = GF_MUL( in103, 186 ); \ + in102 = GF_ADD( in102, in103 ); \ + in101 = GF_ADD( in101, in103 ); \ + in99 = GF_ADD( in99, in103 ); \ + in71 = GF_ADD( in71, in103 ); \ + in39 = GF_ADD( in39, in103 ); \ + in103 = gf_zero(); \ + in104 = GF_MUL( in104, 36 ); \ + in96 = GF_ADD( in96, in104 ); \ + in72 = GF_ADD( in72, in104 ); \ + in40 = GF_ADD( in40, in104 ); \ + in104 = gf_zero(); \ + in105 = GF_MUL( in105, 36 ); \ + in104 = GF_ADD( in104, in105 ); \ + in97 = GF_ADD( in97, in105 ); \ + in73 = GF_ADD( in73, in105 ); \ + in41 = GF_ADD( in41, in105 ); \ + in105 = gf_zero(); \ + in106 = GF_MUL( in106, 14 ); \ + in104 = GF_ADD( in104, in106 ); \ + in98 = GF_ADD( in98, in106 ); \ + in74 = GF_ADD( in74, in106 ); \ + in42 = GF_ADD( in42, in106 ); \ + in106 = gf_zero(); \ + in107 = GF_MUL( in107, 14 ); \ + in106 = GF_ADD( in106, in107 ); \ + in105 = GF_ADD( in105, in107 ); \ + in99 = GF_ADD( in99, in107 ); \ + in75 = GF_ADD( in75, in107 ); \ + in43 = GF_ADD( in43, in107 ); \ + in107 = gf_zero(); \ + in108 = GF_MUL( in108, 244 ); \ + in104 = GF_ADD( in104, in108 ); \ + in100 = GF_ADD( in100, in108 ); \ + in76 = GF_ADD( in76, in108 ); \ + in44 = GF_ADD( in44, in108 ); \ + in108 = gf_zero(); \ + in109 = GF_MUL( in109, 244 ); \ + in108 = GF_ADD( in108, in109 ); \ + in105 = GF_ADD( in105, in109 ); \ + in101 = GF_ADD( in101, in109 ); \ + in77 = GF_ADD( in77, in109 ); \ + in45 = GF_ADD( in45, in109 ); \ + in109 = gf_zero(); \ + in110 = GF_MUL( in110, 221 ); \ + in108 = GF_ADD( in108, in110 ); \ + in106 = GF_ADD( in106, in110 ); \ + in102 = GF_ADD( in102, in110 ); \ + in78 = GF_ADD( in78, in110 ); \ + in46 = GF_ADD( in46, in110 ); \ + in110 = gf_zero(); \ + in111 = GF_MUL( in111, 221 ); \ + in110 = GF_ADD( in110, in111 ); \ + in109 = GF_ADD( in109, in111 ); \ + in107 = GF_ADD( in107, in111 ); \ + in103 = GF_ADD( in103, in111 ); \ + in79 = GF_ADD( in79, in111 ); \ + in47 = GF_ADD( in47, in111 ); \ + in111 = gf_zero(); \ + in112 = GF_MUL( in112, 65 ); \ + in96 = GF_ADD( in96, in112 ); \ + in80 = GF_ADD( in80, in112 ); \ + in48 = GF_ADD( in48, in112 ); \ + in112 = gf_zero(); \ + in113 = GF_MUL( in113, 65 ); \ + in112 = GF_ADD( in112, in113 ); \ + in97 = GF_ADD( in97, in113 ); \ + in81 = GF_ADD( in81, in113 ); \ + in49 = GF_ADD( in49, in113 ); \ + in113 = gf_zero(); \ + in114 = GF_MUL( in114, 145 ); \ + in112 = GF_ADD( in112, in114 ); \ + in98 = GF_ADD( in98, in114 ); \ + in82 = GF_ADD( in82, in114 ); \ + in50 = GF_ADD( in50, in114 ); \ + in114 = gf_zero(); \ + in115 = GF_MUL( in115, 145 ); \ + in114 = GF_ADD( in114, in115 ); \ + in113 = GF_ADD( in113, in115 ); \ + in99 = GF_ADD( in99, in115 ); \ + in83 = GF_ADD( in83, in115 ); \ + in51 = GF_ADD( in51, in115 ); \ + in115 = gf_zero(); \ + in116 = GF_MUL( in116, 200 ); \ + in112 = GF_ADD( in112, in116 ); \ + in100 = GF_ADD( in100, in116 ); \ + in84 = GF_ADD( in84, in116 ); \ + in52 = GF_ADD( in52, in116 ); \ + in116 = gf_zero(); \ + in117 = GF_MUL( in117, 200 ); \ + in116 = GF_ADD( in116, in117 ); \ + in113 = GF_ADD( in113, in117 ); \ + in101 = GF_ADD( in101, in117 ); \ + in85 = GF_ADD( in85, in117 ); \ + in53 = GF_ADD( in53, in117 ); \ + in117 = gf_zero(); \ + in118 = GF_MUL( in118, 215 ); \ + in116 = GF_ADD( in116, in118 ); \ + in114 = GF_ADD( in114, in118 ); \ + in102 = GF_ADD( in102, in118 ); \ + in86 = GF_ADD( in86, in118 ); \ + in54 = GF_ADD( in54, in118 ); \ + in118 = gf_zero(); \ + in119 = GF_MUL( in119, 215 ); \ + in118 = GF_ADD( in118, in119 ); \ + in117 = GF_ADD( in117, in119 ); \ + in115 = GF_ADD( in115, in119 ); \ + in103 = GF_ADD( in103, in119 ); \ + in87 = GF_ADD( in87, in119 ); \ + in55 = GF_ADD( in55, in119 ); \ + in119 = gf_zero(); \ + in120 = GF_MUL( in120, 187 ); \ + in112 = GF_ADD( in112, in120 ); \ + in104 = GF_ADD( in104, in120 ); \ + in88 = GF_ADD( in88, in120 ); \ + in56 = GF_ADD( in56, in120 ); \ + in120 = gf_zero(); \ + in121 = GF_MUL( in121, 187 ); \ + in120 = GF_ADD( in120, in121 ); \ + in113 = GF_ADD( in113, in121 ); \ + in105 = GF_ADD( in105, in121 ); \ + in89 = GF_ADD( in89, in121 ); \ + in57 = GF_ADD( in57, in121 ); \ + in121 = gf_zero(); \ + in122 = GF_MUL( in122, 186 ); \ + in120 = GF_ADD( in120, in122 ); \ + in114 = GF_ADD( in114, in122 ); \ + in106 = GF_ADD( in106, in122 ); \ + in90 = GF_ADD( in90, in122 ); \ + in58 = GF_ADD( in58, in122 ); \ + in122 = gf_zero(); \ + in123 = GF_MUL( in123, 186 ); \ + in122 = GF_ADD( in122, in123 ); \ + in121 = GF_ADD( in121, in123 ); \ + in115 = GF_ADD( in115, in123 ); \ + in107 = GF_ADD( in107, in123 ); \ + in91 = GF_ADD( in91, in123 ); \ + in59 = GF_ADD( in59, in123 ); \ + in123 = gf_zero(); \ + in124 = GF_MUL( in124, 254 ); \ + in120 = GF_ADD( in120, in124 ); \ + in116 = GF_ADD( in116, in124 ); \ + in108 = GF_ADD( in108, in124 ); \ + in92 = GF_ADD( in92, in124 ); \ + in60 = GF_ADD( in60, in124 ); \ + in124 = gf_zero(); \ + in125 = GF_MUL( in125, 254 ); \ + in124 = GF_ADD( in124, in125 ); \ + in121 = GF_ADD( in121, in125 ); \ + in117 = GF_ADD( in117, in125 ); \ + in109 = GF_ADD( in109, in125 ); \ + in93 = GF_ADD( in93, in125 ); \ + in61 = GF_ADD( in61, in125 ); \ + in125 = gf_zero(); \ + in126 = GF_MUL( in126, 222 ); \ + in124 = GF_ADD( in124, in126 ); \ + in122 = GF_ADD( in122, in126 ); \ + in118 = GF_ADD( in118, in126 ); \ + in110 = GF_ADD( in110, in126 ); \ + in94 = GF_ADD( in94, in126 ); \ + in62 = GF_ADD( in62, in126 ); \ + in126 = gf_zero(); \ + in127 = GF_MUL( in127, 222 ); \ + in126 = GF_ADD( in126, in127 ); \ + in125 = GF_ADD( in125, in127 ); \ + in123 = GF_ADD( in123, in127 ); \ + in119 = GF_ADD( in119, in127 ); \ + in111 = GF_ADD( in111, in127 ); \ + in95 = GF_ADD( in95, in127 ); \ + in63 = GF_ADD( in63, in127 ); \ + in127 = gf_zero(); \ + in00 = GF_MUL( in00, 1 ); \ + in01 = GF_MUL( in01, 1 ); \ + in02 = GF_MUL( in02, 6 ); \ + in03 = GF_MUL( in03, 6 ); \ + in04 = GF_MUL( in04, 108 ); \ + in05 = GF_MUL( in05, 108 ); \ + in06 = GF_MUL( in06, 117 ); \ + in07 = GF_MUL( in07, 117 ); \ + in08 = GF_MUL( in08, 208 ); \ + in09 = GF_MUL( in09, 208 ); \ + in10 = GF_MUL( in10, 218 ); \ + in11 = GF_MUL( in11, 218 ); \ + in12 = GF_MUL( in12, 136 ); \ + in13 = GF_MUL( in13, 136 ); \ + in14 = GF_MUL( in14, 23 ); \ + in15 = GF_MUL( in15, 23 ); \ + in16 = GF_MUL( in16, 42 ); \ + in17 = GF_MUL( in17, 42 ); \ + in18 = GF_MUL( in18, 252 ); \ + in19 = GF_MUL( in19, 252 ); \ + in20 = GF_MUL( in20, 158 ); \ + in21 = GF_MUL( in21, 158 ); \ + in22 = GF_MUL( in22, 99 ); \ + in23 = GF_MUL( in23, 99 ); \ + in24 = GF_MUL( in24, 108 ); \ + in25 = GF_MUL( in25, 108 ); \ + in26 = GF_MUL( in26, 117 ); \ + in27 = GF_MUL( in27, 117 ); \ + in28 = GF_MUL( in28, 233 ); \ + in29 = GF_MUL( in29, 233 ); \ + in30 = GF_MUL( in30, 76 ); \ + in31 = GF_MUL( in31, 76 ); \ + in32 = GF_MUL( in32, 165 ); \ + in33 = GF_MUL( in33, 165 ); \ + in34 = GF_MUL( in34, 249 ); \ + in35 = GF_MUL( in35, 249 ); \ + in36 = GF_MUL( in36, 196 ); \ + in37 = GF_MUL( in37, 196 ); \ + in38 = GF_MUL( in38, 162 ); \ + in39 = GF_MUL( in39, 162 ); \ + in40 = GF_MUL( in40, 212 ); \ + in41 = GF_MUL( in41, 212 ); \ + in42 = GF_MUL( in42, 194 ); \ + in43 = GF_MUL( in43, 194 ); \ + in44 = GF_MUL( in44, 37 ); \ + in45 = GF_MUL( in45, 37 ); \ + in46 = GF_MUL( in46, 222 ); \ + in47 = GF_MUL( in47, 222 ); \ + in48 = GF_MUL( in48, 15 ); \ + in49 = GF_MUL( in49, 15 ); \ + in50 = GF_MUL( in50, 34 ); \ + in51 = GF_MUL( in51, 34 ); \ + in52 = GF_MUL( in52, 94 ); \ + in53 = GF_MUL( in53, 94 ); \ + in54 = GF_MUL( in54, 217 ); \ + in55 = GF_MUL( in55, 217 ); \ + in56 = GF_MUL( in56, 196 ); \ + in57 = GF_MUL( in57, 196 ); \ + in58 = GF_MUL( in58, 162 ); \ + in59 = GF_MUL( in59, 162 ); \ + in60 = GF_MUL( in60, 171 ); \ + in61 = GF_MUL( in61, 171 ); \ + in62 = GF_MUL( in62, 221 ); \ + in63 = GF_MUL( in63, 221 ); \ + in64 = GF_MUL( in64, 250 ); \ + in65 = GF_MUL( in65, 250 ); \ + in66 = GF_MUL( in66, 38 ); \ + in67 = GF_MUL( in67, 38 ); \ + in68 = GF_MUL( in68, 22 ); \ + in69 = GF_MUL( in69, 22 ); \ + in70 = GF_MUL( in70, 116 ); \ + in71 = GF_MUL( in71, 116 ); \ + in72 = GF_MUL( in72, 175 ); \ + in73 = GF_MUL( in73, 175 ); \ + in74 = GF_MUL( in74, 197 ); \ + in75 = GF_MUL( in75, 197 ); \ + in76 = GF_MUL( in76, 91 ); \ + in77 = GF_MUL( in77, 91 ); \ + in78 = GF_MUL( in78, 199 ); \ + in79 = GF_MUL( in79, 199 ); \ + in80 = GF_MUL( in80, 92 ); \ + in81 = GF_MUL( in81, 92 ); \ + in82 = GF_MUL( in82, 213 ); \ + in83 = GF_MUL( in83, 213 ); \ + in84 = GF_MUL( in84, 102 ); \ + in85 = GF_MUL( in85, 102 ); \ + in86 = GF_MUL( in86, 73 ); \ + in87 = GF_MUL( in87, 73 ); \ + in88 = GF_MUL( in88, 22 ); \ + in89 = GF_MUL( in89, 22 ); \ + in90 = GF_MUL( in90, 116 ); \ + in91 = GF_MUL( in91, 116 ); \ + in92 = GF_MUL( in92, 251 ); \ + in93 = GF_MUL( in93, 251 ); \ + in94 = GF_MUL( in94, 32 ); \ + in95 = GF_MUL( in95, 32 ); \ + in96 = GF_MUL( in96, 219 ); \ + in97 = GF_MUL( in97, 219 ); \ + in98 = GF_MUL( in98, 224 ); \ + in99 = GF_MUL( in99, 224 ); \ + in100 = GF_MUL( in100, 123 ); \ + in101 = GF_MUL( in101, 123 ); \ + in102 = GF_MUL( in102, 7 ); \ + in103 = GF_MUL( in103, 7 ); \ + in104 = GF_MUL( in104, 96 ); \ + in105 = GF_MUL( in105, 96 ); \ + in106 = GF_MUL( in106, 93 ); \ + in107 = GF_MUL( in107, 93 ); \ + in108 = GF_MUL( in108, 3 ); \ + in109 = GF_MUL( in109, 3 ); \ + in110 = GF_MUL( in110, 10 ); \ + in111 = GF_MUL( in111, 10 ); \ + in112 = GF_MUL( in112, 95 ); \ + in113 = GF_MUL( in113, 95 ); \ + in114 = GF_MUL( in114, 223 ); \ + in115 = GF_MUL( in115, 223 ); \ + in116 = GF_MUL( in116, 210 ); \ + in117 = GF_MUL( in117, 210 ); \ + in118 = GF_MUL( in118, 214 ); \ + in119 = GF_MUL( in119, 214 ); \ + in120 = GF_MUL( in120, 123 ); \ + in121 = GF_MUL( in121, 123 ); \ + in122 = GF_MUL( in122, 7 ); \ + in123 = GF_MUL( in123, 7 ); \ + in124 = GF_MUL( in124, 126 ); \ + in125 = GF_MUL( in125, 126 ); \ + in126 = GF_MUL( in126, 25 ); \ + in127 = GF_MUL( in127, 25 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FDERIV_IMPL_256( in00, in01, \ + in02, in03, in04, in05, in06, in07, in08 , \ + in09, in10, in11, in12, in13, in14, in15 , \ + in16, in17, in18, in19, in20, in21, in22 , \ + in23, in24, in25, in26, in27, in28, in29 , \ + in30, in31, in32, in33, in34, in35, in36 , \ + in37, in38, in39, in40, in41, in42, in43 , \ + in44, in45, in46, in47, in48, in49, in50 , \ + in51, in52, in53, in54, in55, in56, in57 , \ + in58, in59, in60, in61, in62, in63, in64 , \ + in65, in66, in67, in68, in69, in70, in71 , \ + in72, in73, in74, in75, in76, in77, in78 , \ + in79, in80, in81, in82, in83, in84, in85 , \ + in86, in87, in88, in89, in90, in91, in92 , \ + in93, in94, in95, in96, in97, in98, in99 , \ + in100, in101, in102, in103, in104, in105 , \ + in106, in107, in108, in109, in110, in111 , \ + in112, in113, in114, in115, in116, in117 , \ + in118, in119, in120, in121, in122, in123 , \ + in124, in125, in126, in127, in128, in129 , \ + in130, in131, in132, in133, in134, in135 , \ + in136, in137, in138, in139, in140, in141 , \ + in142, in143, in144, in145, in146, in147 , \ + in148, in149, in150, in151, in152, in153 , \ + in154, in155, in156, in157, in158, in159 , \ + in160, in161, in162, in163, in164, in165 , \ + in166, in167, in168, in169, in170, in171 , \ + in172, in173, in174, in175, in176, in177 , \ + in178, in179, in180, in181, in182, in183 , \ + in184, in185, in186, in187, in188, in189 , \ + in190, in191, in192, in193, in194, in195 , \ + in196, in197, in198, in199, in200, in201 , \ + in202, in203, in204, in205, in206, in207 , \ + in208, in209, in210, in211, in212, in213 , \ + in214, in215, in216, in217, in218, in219 , \ + in220, in221, in222, in223, in224, in225 , \ + in226, in227, in228, in229, in230, in231 , \ + in232, in233, in234, in235, in236, in237 , \ + in238, in239, in240, in241, in242, in243 , \ + in244, in245, in246, in247, in248, in249 , \ + in250, in251, in252, in253, in254, in255) \ + do { \ + in00 = GF_MUL( in00, 1 ); \ + in00 = gf_zero(); \ + in01 = GF_MUL( in01, 1 ); \ + in00 = GF_ADD( in00, in01 ); \ + in01 = gf_zero(); \ + in02 = GF_MUL( in02, 122 ); \ + in00 = GF_ADD( in00, in02 ); \ + in02 = gf_zero(); \ + in03 = GF_MUL( in03, 122 ); \ + in02 = GF_ADD( in02, in03 ); \ + in01 = GF_ADD( in01, in03 ); \ + in03 = gf_zero(); \ + in04 = GF_MUL( in04, 32 ); \ + in00 = GF_ADD( in00, in04 ); \ + in04 = gf_zero(); \ + in05 = GF_MUL( in05, 32 ); \ + in04 = GF_ADD( in04, in05 ); \ + in01 = GF_ADD( in01, in05 ); \ + in05 = gf_zero(); \ + in06 = GF_MUL( in06, 251 ); \ + in04 = GF_ADD( in04, in06 ); \ + in02 = GF_ADD( in02, in06 ); \ + in06 = gf_zero(); \ + in07 = GF_MUL( in07, 251 ); \ + in06 = GF_ADD( in06, in07 ); \ + in05 = GF_ADD( in05, in07 ); \ + in03 = GF_ADD( in03, in07 ); \ + in07 = gf_zero(); \ + in08 = GF_MUL( in08, 41 ); \ + in00 = GF_ADD( in00, in08 ); \ + in08 = gf_zero(); \ + in09 = GF_MUL( in09, 41 ); \ + in08 = GF_ADD( in08, in09 ); \ + in01 = GF_ADD( in01, in09 ); \ + in09 = gf_zero(); \ + in10 = GF_MUL( in10, 118 ); \ + in08 = GF_ADD( in08, in10 ); \ + in02 = GF_ADD( in02, in10 ); \ + in10 = gf_zero(); \ + in11 = GF_MUL( in11, 118 ); \ + in10 = GF_ADD( in10, in11 ); \ + in09 = GF_ADD( in09, in11 ); \ + in03 = GF_ADD( in03, in11 ); \ + in11 = gf_zero(); \ + in12 = GF_MUL( in12, 73 ); \ + in08 = GF_ADD( in08, in12 ); \ + in04 = GF_ADD( in04, in12 ); \ + in12 = gf_zero(); \ + in13 = GF_MUL( in13, 73 ); \ + in12 = GF_ADD( in12, in13 ); \ + in09 = GF_ADD( in09, in13 ); \ + in05 = GF_ADD( in05, in13 ); \ + in13 = gf_zero(); \ + in14 = GF_MUL( in14, 102 ); \ + in12 = GF_ADD( in12, in14 ); \ + in10 = GF_ADD( in10, in14 ); \ + in06 = GF_ADD( in06, in14 ); \ + in14 = gf_zero(); \ + in15 = GF_MUL( in15, 102 ); \ + in14 = GF_ADD( in14, in15 ); \ + in13 = GF_ADD( in13, in15 ); \ + in11 = GF_ADD( in11, in15 ); \ + in07 = GF_ADD( in07, in15 ); \ + in15 = gf_zero(); \ + in16 = GF_MUL( in16, 31 ); \ + in00 = GF_ADD( in00, in16 ); \ + in16 = gf_zero(); \ + in17 = GF_MUL( in17, 31 ); \ + in16 = GF_ADD( in16, in17 ); \ + in01 = GF_ADD( in01, in17 ); \ + in17 = gf_zero(); \ + in18 = GF_MUL( in18, 127 ); \ + in16 = GF_ADD( in16, in18 ); \ + in02 = GF_ADD( in02, in18 ); \ + in18 = gf_zero(); \ + in19 = GF_MUL( in19, 127 ); \ + in18 = GF_ADD( in18, in19 ); \ + in17 = GF_ADD( in17, in19 ); \ + in03 = GF_ADD( in03, in19 ); \ + in19 = gf_zero(); \ + in20 = GF_MUL( in20, 199 ); \ + in16 = GF_ADD( in16, in20 ); \ + in04 = GF_ADD( in04, in20 ); \ + in20 = gf_zero(); \ + in21 = GF_MUL( in21, 199 ); \ + in20 = GF_ADD( in20, in21 ); \ + in17 = GF_ADD( in17, in21 ); \ + in05 = GF_ADD( in05, in21 ); \ + in21 = gf_zero(); \ + in22 = GF_MUL( in22, 91 ); \ + in20 = GF_ADD( in20, in22 ); \ + in18 = GF_ADD( in18, in22 ); \ + in06 = GF_ADD( in06, in22 ); \ + in22 = gf_zero(); \ + in23 = GF_MUL( in23, 91 ); \ + in22 = GF_ADD( in22, in23 ); \ + in21 = GF_ADD( in21, in23 ); \ + in19 = GF_ADD( in19, in23 ); \ + in07 = GF_ADD( in07, in23 ); \ + in23 = gf_zero(); \ + in24 = GF_MUL( in24, 32 ); \ + in16 = GF_ADD( in16, in24 ); \ + in08 = GF_ADD( in08, in24 ); \ + in24 = gf_zero(); \ + in25 = GF_MUL( in25, 32 ); \ + in24 = GF_ADD( in24, in25 ); \ + in17 = GF_ADD( in17, in25 ); \ + in09 = GF_ADD( in09, in25 ); \ + in25 = gf_zero(); \ + in26 = GF_MUL( in26, 251 ); \ + in24 = GF_ADD( in24, in26 ); \ + in18 = GF_ADD( in18, in26 ); \ + in10 = GF_ADD( in10, in26 ); \ + in26 = gf_zero(); \ + in27 = GF_MUL( in27, 251 ); \ + in26 = GF_ADD( in26, in27 ); \ + in25 = GF_ADD( in25, in27 ); \ + in19 = GF_ADD( in19, in27 ); \ + in11 = GF_ADD( in11, in27 ); \ + in27 = gf_zero(); \ + in28 = GF_MUL( in28, 116 ); \ + in24 = GF_ADD( in24, in28 ); \ + in20 = GF_ADD( in20, in28 ); \ + in12 = GF_ADD( in12, in28 ); \ + in28 = gf_zero(); \ + in29 = GF_MUL( in29, 116 ); \ + in28 = GF_ADD( in28, in29 ); \ + in25 = GF_ADD( in25, in29 ); \ + in21 = GF_ADD( in21, in29 ); \ + in13 = GF_ADD( in13, in29 ); \ + in29 = gf_zero(); \ + in30 = GF_MUL( in30, 22 ); \ + in28 = GF_ADD( in28, in30 ); \ + in26 = GF_ADD( in26, in30 ); \ + in22 = GF_ADD( in22, in30 ); \ + in14 = GF_ADD( in14, in30 ); \ + in30 = gf_zero(); \ + in31 = GF_MUL( in31, 22 ); \ + in30 = GF_ADD( in30, in31 ); \ + in29 = GF_ADD( in29, in31 ); \ + in27 = GF_ADD( in27, in31 ); \ + in23 = GF_ADD( in23, in31 ); \ + in15 = GF_ADD( in15, in31 ); \ + in31 = gf_zero(); \ + in32 = GF_MUL( in32, 194 ); \ + in00 = GF_ADD( in00, in32 ); \ + in32 = gf_zero(); \ + in33 = GF_MUL( in33, 194 ); \ + in32 = GF_ADD( in32, in33 ); \ + in01 = GF_ADD( in01, in33 ); \ + in33 = gf_zero(); \ + in34 = GF_MUL( in34, 212 ); \ + in32 = GF_ADD( in32, in34 ); \ + in02 = GF_ADD( in02, in34 ); \ + in34 = gf_zero(); \ + in35 = GF_MUL( in35, 212 ); \ + in34 = GF_ADD( in34, in35 ); \ + in33 = GF_ADD( in33, in35 ); \ + in03 = GF_ADD( in03, in35 ); \ + in35 = gf_zero(); \ + in36 = GF_MUL( in36, 101 ); \ + in32 = GF_ADD( in32, in36 ); \ + in04 = GF_ADD( in04, in36 ); \ + in36 = gf_zero(); \ + in37 = GF_MUL( in37, 101 ); \ + in36 = GF_ADD( in36, in37 ); \ + in33 = GF_ADD( in33, in37 ); \ + in05 = GF_ADD( in05, in37 ); \ + in37 = gf_zero(); \ + in38 = GF_MUL( in38, 159 ); \ + in36 = GF_ADD( in36, in38 ); \ + in34 = GF_ADD( in34, in38 ); \ + in06 = GF_ADD( in06, in38 ); \ + in38 = gf_zero(); \ + in39 = GF_MUL( in39, 159 ); \ + in38 = GF_ADD( in38, in39 ); \ + in37 = GF_ADD( in37, in39 ); \ + in35 = GF_ADD( in35, in39 ); \ + in07 = GF_ADD( in07, in39 ); \ + in39 = gf_zero(); \ + in40 = GF_MUL( in40, 249 ); \ + in32 = GF_ADD( in32, in40 ); \ + in08 = GF_ADD( in08, in40 ); \ + in40 = gf_zero(); \ + in41 = GF_MUL( in41, 249 ); \ + in40 = GF_ADD( in40, in41 ); \ + in33 = GF_ADD( in33, in41 ); \ + in09 = GF_ADD( in09, in41 ); \ + in41 = gf_zero(); \ + in42 = GF_MUL( in42, 165 ); \ + in40 = GF_ADD( in40, in42 ); \ + in34 = GF_ADD( in34, in42 ); \ + in10 = GF_ADD( in10, in42 ); \ + in42 = gf_zero(); \ + in43 = GF_MUL( in43, 165 ); \ + in42 = GF_ADD( in42, in43 ); \ + in41 = GF_ADD( in41, in43 ); \ + in35 = GF_ADD( in35, in43 ); \ + in11 = GF_ADD( in11, in43 ); \ + in43 = gf_zero(); \ + in44 = GF_MUL( in44, 86 ); \ + in40 = GF_ADD( in40, in44 ); \ + in36 = GF_ADD( in36, in44 ); \ + in12 = GF_ADD( in12, in44 ); \ + in44 = gf_zero(); \ + in45 = GF_MUL( in45, 86 ); \ + in44 = GF_ADD( in44, in45 ); \ + in41 = GF_ADD( in41, in45 ); \ + in37 = GF_ADD( in37, in45 ); \ + in13 = GF_ADD( in13, in45 ); \ + in45 = gf_zero(); \ + in46 = GF_MUL( in46, 25 ); \ + in44 = GF_ADD( in44, in46 ); \ + in42 = GF_ADD( in42, in46 ); \ + in38 = GF_ADD( in38, in46 ); \ + in14 = GF_ADD( in14, in46 ); \ + in46 = gf_zero(); \ + in47 = GF_MUL( in47, 25 ); \ + in46 = GF_ADD( in46, in47 ); \ + in45 = GF_ADD( in45, in47 ); \ + in43 = GF_ADD( in43, in47 ); \ + in39 = GF_ADD( in39, in47 ); \ + in15 = GF_ADD( in15, in47 ); \ + in47 = gf_zero(); \ + in48 = GF_MUL( in48, 150 ); \ + in32 = GF_ADD( in32, in48 ); \ + in16 = GF_ADD( in16, in48 ); \ + in48 = gf_zero(); \ + in49 = GF_MUL( in49, 150 ); \ + in48 = GF_ADD( in48, in49 ); \ + in33 = GF_ADD( in33, in49 ); \ + in17 = GF_ADD( in17, in49 ); \ + in49 = gf_zero(); \ + in50 = GF_MUL( in50, 57 ); \ + in48 = GF_ADD( in48, in50 ); \ + in34 = GF_ADD( in34, in50 ); \ + in18 = GF_ADD( in18, in50 ); \ + in50 = gf_zero(); \ + in51 = GF_MUL( in51, 57 ); \ + in50 = GF_ADD( in50, in51 ); \ + in49 = GF_ADD( in49, in51 ); \ + in35 = GF_ADD( in35, in51 ); \ + in19 = GF_ADD( in19, in51 ); \ + in51 = gf_zero(); \ + in52 = GF_MUL( in52, 55 ); \ + in48 = GF_ADD( in48, in52 ); \ + in36 = GF_ADD( in36, in52 ); \ + in20 = GF_ADD( in20, in52 ); \ + in52 = gf_zero(); \ + in53 = GF_MUL( in53, 55 ); \ + in52 = GF_ADD( in52, in53 ); \ + in49 = GF_ADD( in49, in53 ); \ + in37 = GF_ADD( in37, in53 ); \ + in21 = GF_ADD( in21, in53 ); \ + in53 = gf_zero(); \ + in54 = GF_MUL( in54, 115 ); \ + in52 = GF_ADD( in52, in54 ); \ + in50 = GF_ADD( in50, in54 ); \ + in38 = GF_ADD( in38, in54 ); \ + in22 = GF_ADD( in22, in54 ); \ + in54 = gf_zero(); \ + in55 = GF_MUL( in55, 115 ); \ + in54 = GF_ADD( in54, in55 ); \ + in53 = GF_ADD( in53, in55 ); \ + in51 = GF_ADD( in51, in55 ); \ + in39 = GF_ADD( in39, in55 ); \ + in23 = GF_ADD( in23, in55 ); \ + in55 = gf_zero(); \ + in56 = GF_MUL( in56, 101 ); \ + in48 = GF_ADD( in48, in56 ); \ + in40 = GF_ADD( in40, in56 ); \ + in24 = GF_ADD( in24, in56 ); \ + in56 = gf_zero(); \ + in57 = GF_MUL( in57, 101 ); \ + in56 = GF_ADD( in56, in57 ); \ + in49 = GF_ADD( in49, in57 ); \ + in41 = GF_ADD( in41, in57 ); \ + in25 = GF_ADD( in25, in57 ); \ + in57 = gf_zero(); \ + in58 = GF_MUL( in58, 159 ); \ + in56 = GF_ADD( in56, in58 ); \ + in50 = GF_ADD( in50, in58 ); \ + in42 = GF_ADD( in42, in58 ); \ + in26 = GF_ADD( in26, in58 ); \ + in58 = gf_zero(); \ + in59 = GF_MUL( in59, 159 ); \ + in58 = GF_ADD( in58, in59 ); \ + in57 = GF_ADD( in57, in59 ); \ + in51 = GF_ADD( in51, in59 ); \ + in43 = GF_ADD( in43, in59 ); \ + in27 = GF_ADD( in27, in59 ); \ + in59 = gf_zero(); \ + in60 = GF_MUL( in60, 60 ); \ + in56 = GF_ADD( in56, in60 ); \ + in52 = GF_ADD( in52, in60 ); \ + in44 = GF_ADD( in44, in60 ); \ + in28 = GF_ADD( in28, in60 ); \ + in60 = gf_zero(); \ + in61 = GF_MUL( in61, 60 ); \ + in60 = GF_ADD( in60, in61 ); \ + in57 = GF_ADD( in57, in61 ); \ + in53 = GF_ADD( in53, in61 ); \ + in45 = GF_ADD( in45, in61 ); \ + in29 = GF_ADD( in29, in61 ); \ + in61 = gf_zero(); \ + in62 = GF_MUL( in62, 10 ); \ + in60 = GF_ADD( in60, in62 ); \ + in58 = GF_ADD( in58, in62 ); \ + in54 = GF_ADD( in54, in62 ); \ + in46 = GF_ADD( in46, in62 ); \ + in30 = GF_ADD( in30, in62 ); \ + in62 = gf_zero(); \ + in63 = GF_MUL( in63, 10 ); \ + in62 = GF_ADD( in62, in63 ); \ + in61 = GF_ADD( in61, in63 ); \ + in59 = GF_ADD( in59, in63 ); \ + in55 = GF_ADD( in55, in63 ); \ + in47 = GF_ADD( in47, in63 ); \ + in31 = GF_ADD( in31, in63 ); \ + in63 = gf_zero(); \ + in64 = GF_MUL( in64, 232 ); \ + in00 = GF_ADD( in00, in64 ); \ + in64 = gf_zero(); \ + in65 = GF_MUL( in65, 232 ); \ + in64 = GF_ADD( in64, in65 ); \ + in01 = GF_ADD( in01, in65 ); \ + in65 = gf_zero(); \ + in66 = GF_MUL( in66, 44 ); \ + in64 = GF_ADD( in64, in66 ); \ + in02 = GF_ADD( in02, in66 ); \ + in66 = gf_zero(); \ + in67 = GF_MUL( in67, 44 ); \ + in66 = GF_ADD( in66, in67 ); \ + in65 = GF_ADD( in65, in67 ); \ + in03 = GF_ADD( in03, in67 ); \ + in67 = gf_zero(); \ + in68 = GF_MUL( in68, 76 ); \ + in64 = GF_ADD( in64, in68 ); \ + in04 = GF_ADD( in04, in68 ); \ + in68 = gf_zero(); \ + in69 = GF_MUL( in69, 76 ); \ + in68 = GF_ADD( in68, in69 ); \ + in65 = GF_ADD( in65, in69 ); \ + in05 = GF_ADD( in05, in69 ); \ + in69 = gf_zero(); \ + in70 = GF_MUL( in70, 233 ); \ + in68 = GF_ADD( in68, in70 ); \ + in66 = GF_ADD( in66, in70 ); \ + in06 = GF_ADD( in06, in70 ); \ + in70 = gf_zero(); \ + in71 = GF_MUL( in71, 233 ); \ + in70 = GF_ADD( in70, in71 ); \ + in69 = GF_ADD( in69, in71 ); \ + in67 = GF_ADD( in67, in71 ); \ + in07 = GF_ADD( in07, in71 ); \ + in71 = gf_zero(); \ + in72 = GF_MUL( in72, 183 ); \ + in64 = GF_ADD( in64, in72 ); \ + in08 = GF_ADD( in08, in72 ); \ + in72 = gf_zero(); \ + in73 = GF_MUL( in73, 183 ); \ + in72 = GF_ADD( in72, in73 ); \ + in65 = GF_ADD( in65, in73 ); \ + in09 = GF_ADD( in09, in73 ); \ + in73 = gf_zero(); \ + in74 = GF_MUL( in74, 184 ); \ + in72 = GF_ADD( in72, in74 ); \ + in66 = GF_ADD( in66, in74 ); \ + in10 = GF_ADD( in10, in74 ); \ + in74 = gf_zero(); \ + in75 = GF_MUL( in75, 184 ); \ + in74 = GF_ADD( in74, in75 ); \ + in73 = GF_ADD( in73, in75 ); \ + in67 = GF_ADD( in67, in75 ); \ + in11 = GF_ADD( in11, in75 ); \ + in75 = gf_zero(); \ + in76 = GF_MUL( in76, 99 ); \ + in72 = GF_ADD( in72, in76 ); \ + in68 = GF_ADD( in68, in76 ); \ + in12 = GF_ADD( in12, in76 ); \ + in76 = gf_zero(); \ + in77 = GF_MUL( in77, 99 ); \ + in76 = GF_ADD( in76, in77 ); \ + in73 = GF_ADD( in73, in77 ); \ + in69 = GF_ADD( in69, in77 ); \ + in13 = GF_ADD( in13, in77 ); \ + in77 = gf_zero(); \ + in78 = GF_MUL( in78, 158 ); \ + in76 = GF_ADD( in76, in78 ); \ + in74 = GF_ADD( in74, in78 ); \ + in70 = GF_ADD( in70, in78 ); \ + in14 = GF_ADD( in14, in78 ); \ + in78 = gf_zero(); \ + in79 = GF_MUL( in79, 158 ); \ + in78 = GF_ADD( in78, in79 ); \ + in77 = GF_ADD( in77, in79 ); \ + in75 = GF_ADD( in75, in79 ); \ + in71 = GF_ADD( in71, in79 ); \ + in15 = GF_ADD( in15, in79 ); \ + in79 = gf_zero(); \ + in80 = GF_MUL( in80, 151 ); \ + in64 = GF_ADD( in64, in80 ); \ + in16 = GF_ADD( in16, in80 ); \ + in80 = gf_zero(); \ + in81 = GF_MUL( in81, 151 ); \ + in80 = GF_ADD( in80, in81 ); \ + in65 = GF_ADD( in65, in81 ); \ + in17 = GF_ADD( in17, in81 ); \ + in81 = gf_zero(); \ + in82 = GF_MUL( in82, 67 ); \ + in80 = GF_ADD( in80, in82 ); \ + in66 = GF_ADD( in66, in82 ); \ + in18 = GF_ADD( in18, in82 ); \ + in82 = gf_zero(); \ + in83 = GF_MUL( in83, 67 ); \ + in82 = GF_ADD( in82, in83 ); \ + in81 = GF_ADD( in81, in83 ); \ + in67 = GF_ADD( in67, in83 ); \ + in19 = GF_ADD( in19, in83 ); \ + in83 = gf_zero(); \ + in84 = GF_MUL( in84, 23 ); \ + in80 = GF_ADD( in80, in84 ); \ + in68 = GF_ADD( in68, in84 ); \ + in20 = GF_ADD( in20, in84 ); \ + in84 = gf_zero(); \ + in85 = GF_MUL( in85, 23 ); \ + in84 = GF_ADD( in84, in85 ); \ + in81 = GF_ADD( in81, in85 ); \ + in69 = GF_ADD( in69, in85 ); \ + in21 = GF_ADD( in21, in85 ); \ + in85 = gf_zero(); \ + in86 = GF_MUL( in86, 136 ); \ + in84 = GF_ADD( in84, in86 ); \ + in82 = GF_ADD( in82, in86 ); \ + in70 = GF_ADD( in70, in86 ); \ + in22 = GF_ADD( in22, in86 ); \ + in86 = gf_zero(); \ + in87 = GF_MUL( in87, 136 ); \ + in86 = GF_ADD( in86, in87 ); \ + in85 = GF_ADD( in85, in87 ); \ + in83 = GF_ADD( in83, in87 ); \ + in71 = GF_ADD( in71, in87 ); \ + in23 = GF_ADD( in23, in87 ); \ + in87 = gf_zero(); \ + in88 = GF_MUL( in88, 76 ); \ + in80 = GF_ADD( in80, in88 ); \ + in72 = GF_ADD( in72, in88 ); \ + in24 = GF_ADD( in24, in88 ); \ + in88 = gf_zero(); \ + in89 = GF_MUL( in89, 76 ); \ + in88 = GF_ADD( in88, in89 ); \ + in81 = GF_ADD( in81, in89 ); \ + in73 = GF_ADD( in73, in89 ); \ + in25 = GF_ADD( in25, in89 ); \ + in89 = gf_zero(); \ + in90 = GF_MUL( in90, 233 ); \ + in88 = GF_ADD( in88, in90 ); \ + in82 = GF_ADD( in82, in90 ); \ + in74 = GF_ADD( in74, in90 ); \ + in26 = GF_ADD( in26, in90 ); \ + in90 = gf_zero(); \ + in91 = GF_MUL( in91, 233 ); \ + in90 = GF_ADD( in90, in91 ); \ + in89 = GF_ADD( in89, in91 ); \ + in83 = GF_ADD( in83, in91 ); \ + in75 = GF_ADD( in75, in91 ); \ + in27 = GF_ADD( in27, in91 ); \ + in91 = gf_zero(); \ + in92 = GF_MUL( in92, 117 ); \ + in88 = GF_ADD( in88, in92 ); \ + in84 = GF_ADD( in84, in92 ); \ + in76 = GF_ADD( in76, in92 ); \ + in28 = GF_ADD( in28, in92 ); \ + in92 = gf_zero(); \ + in93 = GF_MUL( in93, 117 ); \ + in92 = GF_ADD( in92, in93 ); \ + in89 = GF_ADD( in89, in93 ); \ + in85 = GF_ADD( in85, in93 ); \ + in77 = GF_ADD( in77, in93 ); \ + in29 = GF_ADD( in29, in93 ); \ + in93 = gf_zero(); \ + in94 = GF_MUL( in94, 108 ); \ + in92 = GF_ADD( in92, in94 ); \ + in90 = GF_ADD( in90, in94 ); \ + in86 = GF_ADD( in86, in94 ); \ + in78 = GF_ADD( in78, in94 ); \ + in30 = GF_ADD( in30, in94 ); \ + in94 = gf_zero(); \ + in95 = GF_MUL( in95, 108 ); \ + in94 = GF_ADD( in94, in95 ); \ + in93 = GF_ADD( in93, in95 ); \ + in91 = GF_ADD( in91, in95 ); \ + in87 = GF_ADD( in87, in95 ); \ + in79 = GF_ADD( in79, in95 ); \ + in31 = GF_ADD( in31, in95 ); \ + in95 = gf_zero(); \ + in96 = GF_MUL( in96, 120 ); \ + in64 = GF_ADD( in64, in96 ); \ + in32 = GF_ADD( in32, in96 ); \ + in96 = gf_zero(); \ + in97 = GF_MUL( in97, 120 ); \ + in96 = GF_ADD( in96, in97 ); \ + in65 = GF_ADD( in65, in97 ); \ + in33 = GF_ADD( in33, in97 ); \ + in97 = gf_zero(); \ + in98 = GF_MUL( in98, 20 ); \ + in96 = GF_ADD( in96, in98 ); \ + in66 = GF_ADD( in66, in98 ); \ + in34 = GF_ADD( in34, in98 ); \ + in98 = gf_zero(); \ + in99 = GF_MUL( in99, 20 ); \ + in98 = GF_ADD( in98, in99 ); \ + in97 = GF_ADD( in97, in99 ); \ + in67 = GF_ADD( in67, in99 ); \ + in35 = GF_ADD( in35, in99 ); \ + in99 = gf_zero(); \ + in100 = GF_MUL( in100, 187 ); \ + in96 = GF_ADD( in96, in100 ); \ + in68 = GF_ADD( in68, in100 ); \ + in36 = GF_ADD( in36, in100 ); \ + in100 = gf_zero(); \ + in101 = GF_MUL( in101, 187 ); \ + in100 = GF_ADD( in100, in101 ); \ + in97 = GF_ADD( in97, in101 ); \ + in69 = GF_ADD( in69, in101 ); \ + in37 = GF_ADD( in37, in101 ); \ + in101 = gf_zero(); \ + in102 = GF_MUL( in102, 186 ); \ + in100 = GF_ADD( in100, in102 ); \ + in98 = GF_ADD( in98, in102 ); \ + in70 = GF_ADD( in70, in102 ); \ + in38 = GF_ADD( in38, in102 ); \ + in102 = gf_zero(); \ + in103 = GF_MUL( in103, 186 ); \ + in102 = GF_ADD( in102, in103 ); \ + in101 = GF_ADD( in101, in103 ); \ + in99 = GF_ADD( in99, in103 ); \ + in71 = GF_ADD( in71, in103 ); \ + in39 = GF_ADD( in39, in103 ); \ + in103 = gf_zero(); \ + in104 = GF_MUL( in104, 36 ); \ + in96 = GF_ADD( in96, in104 ); \ + in72 = GF_ADD( in72, in104 ); \ + in40 = GF_ADD( in40, in104 ); \ + in104 = gf_zero(); \ + in105 = GF_MUL( in105, 36 ); \ + in104 = GF_ADD( in104, in105 ); \ + in97 = GF_ADD( in97, in105 ); \ + in73 = GF_ADD( in73, in105 ); \ + in41 = GF_ADD( in41, in105 ); \ + in105 = gf_zero(); \ + in106 = GF_MUL( in106, 14 ); \ + in104 = GF_ADD( in104, in106 ); \ + in98 = GF_ADD( in98, in106 ); \ + in74 = GF_ADD( in74, in106 ); \ + in42 = GF_ADD( in42, in106 ); \ + in106 = gf_zero(); \ + in107 = GF_MUL( in107, 14 ); \ + in106 = GF_ADD( in106, in107 ); \ + in105 = GF_ADD( in105, in107 ); \ + in99 = GF_ADD( in99, in107 ); \ + in75 = GF_ADD( in75, in107 ); \ + in43 = GF_ADD( in43, in107 ); \ + in107 = gf_zero(); \ + in108 = GF_MUL( in108, 244 ); \ + in104 = GF_ADD( in104, in108 ); \ + in100 = GF_ADD( in100, in108 ); \ + in76 = GF_ADD( in76, in108 ); \ + in44 = GF_ADD( in44, in108 ); \ + in108 = gf_zero(); \ + in109 = GF_MUL( in109, 244 ); \ + in108 = GF_ADD( in108, in109 ); \ + in105 = GF_ADD( in105, in109 ); \ + in101 = GF_ADD( in101, in109 ); \ + in77 = GF_ADD( in77, in109 ); \ + in45 = GF_ADD( in45, in109 ); \ + in109 = gf_zero(); \ + in110 = GF_MUL( in110, 221 ); \ + in108 = GF_ADD( in108, in110 ); \ + in106 = GF_ADD( in106, in110 ); \ + in102 = GF_ADD( in102, in110 ); \ + in78 = GF_ADD( in78, in110 ); \ + in46 = GF_ADD( in46, in110 ); \ + in110 = gf_zero(); \ + in111 = GF_MUL( in111, 221 ); \ + in110 = GF_ADD( in110, in111 ); \ + in109 = GF_ADD( in109, in111 ); \ + in107 = GF_ADD( in107, in111 ); \ + in103 = GF_ADD( in103, in111 ); \ + in79 = GF_ADD( in79, in111 ); \ + in47 = GF_ADD( in47, in111 ); \ + in111 = gf_zero(); \ + in112 = GF_MUL( in112, 65 ); \ + in96 = GF_ADD( in96, in112 ); \ + in80 = GF_ADD( in80, in112 ); \ + in48 = GF_ADD( in48, in112 ); \ + in112 = gf_zero(); \ + in113 = GF_MUL( in113, 65 ); \ + in112 = GF_ADD( in112, in113 ); \ + in97 = GF_ADD( in97, in113 ); \ + in81 = GF_ADD( in81, in113 ); \ + in49 = GF_ADD( in49, in113 ); \ + in113 = gf_zero(); \ + in114 = GF_MUL( in114, 145 ); \ + in112 = GF_ADD( in112, in114 ); \ + in98 = GF_ADD( in98, in114 ); \ + in82 = GF_ADD( in82, in114 ); \ + in50 = GF_ADD( in50, in114 ); \ + in114 = gf_zero(); \ + in115 = GF_MUL( in115, 145 ); \ + in114 = GF_ADD( in114, in115 ); \ + in113 = GF_ADD( in113, in115 ); \ + in99 = GF_ADD( in99, in115 ); \ + in83 = GF_ADD( in83, in115 ); \ + in51 = GF_ADD( in51, in115 ); \ + in115 = gf_zero(); \ + in116 = GF_MUL( in116, 200 ); \ + in112 = GF_ADD( in112, in116 ); \ + in100 = GF_ADD( in100, in116 ); \ + in84 = GF_ADD( in84, in116 ); \ + in52 = GF_ADD( in52, in116 ); \ + in116 = gf_zero(); \ + in117 = GF_MUL( in117, 200 ); \ + in116 = GF_ADD( in116, in117 ); \ + in113 = GF_ADD( in113, in117 ); \ + in101 = GF_ADD( in101, in117 ); \ + in85 = GF_ADD( in85, in117 ); \ + in53 = GF_ADD( in53, in117 ); \ + in117 = gf_zero(); \ + in118 = GF_MUL( in118, 215 ); \ + in116 = GF_ADD( in116, in118 ); \ + in114 = GF_ADD( in114, in118 ); \ + in102 = GF_ADD( in102, in118 ); \ + in86 = GF_ADD( in86, in118 ); \ + in54 = GF_ADD( in54, in118 ); \ + in118 = gf_zero(); \ + in119 = GF_MUL( in119, 215 ); \ + in118 = GF_ADD( in118, in119 ); \ + in117 = GF_ADD( in117, in119 ); \ + in115 = GF_ADD( in115, in119 ); \ + in103 = GF_ADD( in103, in119 ); \ + in87 = GF_ADD( in87, in119 ); \ + in55 = GF_ADD( in55, in119 ); \ + in119 = gf_zero(); \ + in120 = GF_MUL( in120, 187 ); \ + in112 = GF_ADD( in112, in120 ); \ + in104 = GF_ADD( in104, in120 ); \ + in88 = GF_ADD( in88, in120 ); \ + in56 = GF_ADD( in56, in120 ); \ + in120 = gf_zero(); \ + in121 = GF_MUL( in121, 187 ); \ + in120 = GF_ADD( in120, in121 ); \ + in113 = GF_ADD( in113, in121 ); \ + in105 = GF_ADD( in105, in121 ); \ + in89 = GF_ADD( in89, in121 ); \ + in57 = GF_ADD( in57, in121 ); \ + in121 = gf_zero(); \ + in122 = GF_MUL( in122, 186 ); \ + in120 = GF_ADD( in120, in122 ); \ + in114 = GF_ADD( in114, in122 ); \ + in106 = GF_ADD( in106, in122 ); \ + in90 = GF_ADD( in90, in122 ); \ + in58 = GF_ADD( in58, in122 ); \ + in122 = gf_zero(); \ + in123 = GF_MUL( in123, 186 ); \ + in122 = GF_ADD( in122, in123 ); \ + in121 = GF_ADD( in121, in123 ); \ + in115 = GF_ADD( in115, in123 ); \ + in107 = GF_ADD( in107, in123 ); \ + in91 = GF_ADD( in91, in123 ); \ + in59 = GF_ADD( in59, in123 ); \ + in123 = gf_zero(); \ + in124 = GF_MUL( in124, 254 ); \ + in120 = GF_ADD( in120, in124 ); \ + in116 = GF_ADD( in116, in124 ); \ + in108 = GF_ADD( in108, in124 ); \ + in92 = GF_ADD( in92, in124 ); \ + in60 = GF_ADD( in60, in124 ); \ + in124 = gf_zero(); \ + in125 = GF_MUL( in125, 254 ); \ + in124 = GF_ADD( in124, in125 ); \ + in121 = GF_ADD( in121, in125 ); \ + in117 = GF_ADD( in117, in125 ); \ + in109 = GF_ADD( in109, in125 ); \ + in93 = GF_ADD( in93, in125 ); \ + in61 = GF_ADD( in61, in125 ); \ + in125 = gf_zero(); \ + in126 = GF_MUL( in126, 222 ); \ + in124 = GF_ADD( in124, in126 ); \ + in122 = GF_ADD( in122, in126 ); \ + in118 = GF_ADD( in118, in126 ); \ + in110 = GF_ADD( in110, in126 ); \ + in94 = GF_ADD( in94, in126 ); \ + in62 = GF_ADD( in62, in126 ); \ + in126 = gf_zero(); \ + in127 = GF_MUL( in127, 222 ); \ + in126 = GF_ADD( in126, in127 ); \ + in125 = GF_ADD( in125, in127 ); \ + in123 = GF_ADD( in123, in127 ); \ + in119 = GF_ADD( in119, in127 ); \ + in111 = GF_ADD( in111, in127 ); \ + in95 = GF_ADD( in95, in127 ); \ + in63 = GF_ADD( in63, in127 ); \ + in127 = gf_zero(); \ + in128 = GF_MUL( in128, 71 ); \ + in00 = GF_ADD( in00, in128 ); \ + in128 = gf_zero(); \ + in129 = GF_MUL( in129, 71 ); \ + in128 = GF_ADD( in128, in129 ); \ + in01 = GF_ADD( in01, in129 ); \ + in129 = gf_zero(); \ + in130 = GF_MUL( in130, 144 ); \ + in128 = GF_ADD( in128, in130 ); \ + in02 = GF_ADD( in02, in130 ); \ + in130 = gf_zero(); \ + in131 = GF_MUL( in131, 144 ); \ + in130 = GF_ADD( in130, in131 ); \ + in129 = GF_ADD( in129, in131 ); \ + in03 = GF_ADD( in03, in131 ); \ + in131 = gf_zero(); \ + in132 = GF_MUL( in132, 8 ); \ + in128 = GF_ADD( in128, in132 ); \ + in04 = GF_ADD( in04, in132 ); \ + in132 = gf_zero(); \ + in133 = GF_MUL( in133, 8 ); \ + in132 = GF_ADD( in132, in133 ); \ + in129 = GF_ADD( in129, in133 ); \ + in05 = GF_ADD( in05, in133 ); \ + in133 = gf_zero(); \ + in134 = GF_MUL( in134, 247 ); \ + in132 = GF_ADD( in132, in134 ); \ + in130 = GF_ADD( in130, in134 ); \ + in06 = GF_ADD( in06, in134 ); \ + in134 = gf_zero(); \ + in135 = GF_MUL( in135, 247 ); \ + in134 = GF_ADD( in134, in135 ); \ + in133 = GF_ADD( in133, in135 ); \ + in131 = GF_ADD( in131, in135 ); \ + in07 = GF_ADD( in07, in135 ); \ + in135 = gf_zero(); \ + in136 = GF_MUL( in136, 77 ); \ + in128 = GF_ADD( in128, in136 ); \ + in08 = GF_ADD( in08, in136 ); \ + in136 = gf_zero(); \ + in137 = GF_MUL( in137, 77 ); \ + in136 = GF_ADD( in136, in137 ); \ + in129 = GF_ADD( in129, in137 ); \ + in09 = GF_ADD( in09, in137 ); \ + in137 = gf_zero(); \ + in138 = GF_MUL( in138, 147 ); \ + in136 = GF_ADD( in136, in138 ); \ + in130 = GF_ADD( in130, in138 ); \ + in10 = GF_ADD( in10, in138 ); \ + in138 = gf_zero(); \ + in139 = GF_MUL( in139, 147 ); \ + in138 = GF_ADD( in138, in139 ); \ + in137 = GF_ADD( in137, in139 ); \ + in131 = GF_ADD( in131, in139 ); \ + in11 = GF_ADD( in11, in139 ); \ + in139 = gf_zero(); \ + in140 = GF_MUL( in140, 85 ); \ + in136 = GF_ADD( in136, in140 ); \ + in132 = GF_ADD( in132, in140 ); \ + in12 = GF_ADD( in12, in140 ); \ + in140 = gf_zero(); \ + in141 = GF_MUL( in141, 85 ); \ + in140 = GF_ADD( in140, in141 ); \ + in137 = GF_ADD( in137, in141 ); \ + in133 = GF_ADD( in133, in141 ); \ + in13 = GF_ADD( in13, in141 ); \ + in141 = gf_zero(); \ + in142 = GF_MUL( in142, 151 ); \ + in140 = GF_ADD( in140, in142 ); \ + in138 = GF_ADD( in138, in142 ); \ + in134 = GF_ADD( in134, in142 ); \ + in14 = GF_ADD( in14, in142 ); \ + in142 = gf_zero(); \ + in143 = GF_MUL( in143, 151 ); \ + in142 = GF_ADD( in142, in143 ); \ + in141 = GF_ADD( in141, in143 ); \ + in139 = GF_ADD( in139, in143 ); \ + in135 = GF_ADD( in135, in143 ); \ + in15 = GF_ADD( in15, in143 ); \ + in143 = gf_zero(); \ + in144 = GF_MUL( in144, 206 ); \ + in128 = GF_ADD( in128, in144 ); \ + in16 = GF_ADD( in16, in144 ); \ + in144 = gf_zero(); \ + in145 = GF_MUL( in145, 206 ); \ + in144 = GF_ADD( in144, in145 ); \ + in129 = GF_ADD( in129, in145 ); \ + in17 = GF_ADD( in17, in145 ); \ + in145 = gf_zero(); \ + in146 = GF_MUL( in146, 214 ); \ + in144 = GF_ADD( in144, in146 ); \ + in130 = GF_ADD( in130, in146 ); \ + in18 = GF_ADD( in18, in146 ); \ + in146 = gf_zero(); \ + in147 = GF_MUL( in147, 214 ); \ + in146 = GF_ADD( in146, in147 ); \ + in145 = GF_ADD( in145, in147 ); \ + in131 = GF_ADD( in131, in147 ); \ + in19 = GF_ADD( in19, in147 ); \ + in147 = gf_zero(); \ + in148 = GF_MUL( in148, 248 ); \ + in144 = GF_ADD( in144, in148 ); \ + in132 = GF_ADD( in132, in148 ); \ + in20 = GF_ADD( in20, in148 ); \ + in148 = gf_zero(); \ + in149 = GF_MUL( in149, 248 ); \ + in148 = GF_ADD( in148, in149 ); \ + in145 = GF_ADD( in145, in149 ); \ + in133 = GF_ADD( in133, in149 ); \ + in21 = GF_ADD( in21, in149 ); \ + in149 = gf_zero(); \ + in150 = GF_MUL( in150, 223 ); \ + in148 = GF_ADD( in148, in150 ); \ + in146 = GF_ADD( in146, in150 ); \ + in134 = GF_ADD( in134, in150 ); \ + in22 = GF_ADD( in22, in150 ); \ + in150 = gf_zero(); \ + in151 = GF_MUL( in151, 223 ); \ + in150 = GF_ADD( in150, in151 ); \ + in149 = GF_ADD( in149, in151 ); \ + in147 = GF_ADD( in147, in151 ); \ + in135 = GF_ADD( in135, in151 ); \ + in23 = GF_ADD( in23, in151 ); \ + in151 = gf_zero(); \ + in152 = GF_MUL( in152, 8 ); \ + in144 = GF_ADD( in144, in152 ); \ + in136 = GF_ADD( in136, in152 ); \ + in24 = GF_ADD( in24, in152 ); \ + in152 = gf_zero(); \ + in153 = GF_MUL( in153, 8 ); \ + in152 = GF_ADD( in152, in153 ); \ + in145 = GF_ADD( in145, in153 ); \ + in137 = GF_ADD( in137, in153 ); \ + in25 = GF_ADD( in25, in153 ); \ + in153 = gf_zero(); \ + in154 = GF_MUL( in154, 247 ); \ + in152 = GF_ADD( in152, in154 ); \ + in146 = GF_ADD( in146, in154 ); \ + in138 = GF_ADD( in138, in154 ); \ + in26 = GF_ADD( in26, in154 ); \ + in154 = gf_zero(); \ + in155 = GF_MUL( in155, 247 ); \ + in154 = GF_ADD( in154, in155 ); \ + in153 = GF_ADD( in153, in155 ); \ + in147 = GF_ADD( in147, in155 ); \ + in139 = GF_ADD( in139, in155 ); \ + in27 = GF_ADD( in27, in155 ); \ + in155 = gf_zero(); \ + in156 = GF_MUL( in156, 29 ); \ + in152 = GF_ADD( in152, in156 ); \ + in148 = GF_ADD( in148, in156 ); \ + in140 = GF_ADD( in140, in156 ); \ + in28 = GF_ADD( in28, in156 ); \ + in156 = gf_zero(); \ + in157 = GF_MUL( in157, 29 ); \ + in156 = GF_ADD( in156, in157 ); \ + in153 = GF_ADD( in153, in157 ); \ + in149 = GF_ADD( in149, in157 ); \ + in141 = GF_ADD( in141, in157 ); \ + in29 = GF_ADD( in29, in157 ); \ + in157 = gf_zero(); \ + in158 = GF_MUL( in158, 139 ); \ + in156 = GF_ADD( in156, in158 ); \ + in154 = GF_ADD( in154, in158 ); \ + in150 = GF_ADD( in150, in158 ); \ + in142 = GF_ADD( in142, in158 ); \ + in30 = GF_ADD( in30, in158 ); \ + in158 = gf_zero(); \ + in159 = GF_MUL( in159, 139 ); \ + in158 = GF_ADD( in158, in159 ); \ + in157 = GF_ADD( in157, in159 ); \ + in155 = GF_ADD( in155, in159 ); \ + in151 = GF_ADD( in151, in159 ); \ + in143 = GF_ADD( in143, in159 ); \ + in31 = GF_ADD( in31, in159 ); \ + in159 = gf_zero(); \ + in160 = GF_MUL( in160, 190 ); \ + in128 = GF_ADD( in128, in160 ); \ + in32 = GF_ADD( in32, in160 ); \ + in160 = gf_zero(); \ + in161 = GF_MUL( in161, 190 ); \ + in160 = GF_ADD( in160, in161 ); \ + in129 = GF_ADD( in129, in161 ); \ + in33 = GF_ADD( in33, in161 ); \ + in161 = gf_zero(); \ + in162 = GF_MUL( in162, 53 ); \ + in160 = GF_ADD( in160, in162 ); \ + in130 = GF_ADD( in130, in162 ); \ + in34 = GF_ADD( in34, in162 ); \ + in162 = gf_zero(); \ + in163 = GF_MUL( in163, 53 ); \ + in162 = GF_ADD( in162, in163 ); \ + in161 = GF_ADD( in161, in163 ); \ + in131 = GF_ADD( in131, in163 ); \ + in35 = GF_ADD( in35, in163 ); \ + in163 = gf_zero(); \ + in164 = GF_MUL( in164, 94 ); \ + in160 = GF_ADD( in160, in164 ); \ + in132 = GF_ADD( in132, in164 ); \ + in36 = GF_ADD( in36, in164 ); \ + in164 = gf_zero(); \ + in165 = GF_MUL( in165, 94 ); \ + in164 = GF_ADD( in164, in165 ); \ + in161 = GF_ADD( in161, in165 ); \ + in133 = GF_ADD( in133, in165 ); \ + in37 = GF_ADD( in37, in165 ); \ + in165 = gf_zero(); \ + in166 = GF_MUL( in166, 238 ); \ + in164 = GF_ADD( in164, in166 ); \ + in162 = GF_ADD( in162, in166 ); \ + in134 = GF_ADD( in134, in166 ); \ + in38 = GF_ADD( in38, in166 ); \ + in166 = gf_zero(); \ + in167 = GF_MUL( in167, 238 ); \ + in166 = GF_ADD( in166, in167 ); \ + in165 = GF_ADD( in165, in167 ); \ + in163 = GF_ADD( in163, in167 ); \ + in135 = GF_ADD( in135, in167 ); \ + in39 = GF_ADD( in39, in167 ); \ + in167 = gf_zero(); \ + in168 = GF_MUL( in168, 121 ); \ + in160 = GF_ADD( in160, in168 ); \ + in136 = GF_ADD( in136, in168 ); \ + in40 = GF_ADD( in40, in168 ); \ + in168 = gf_zero(); \ + in169 = GF_MUL( in169, 121 ); \ + in168 = GF_ADD( in168, in169 ); \ + in161 = GF_ADD( in161, in169 ); \ + in137 = GF_ADD( in137, in169 ); \ + in41 = GF_ADD( in41, in169 ); \ + in169 = gf_zero(); \ + in170 = GF_MUL( in170, 110 ); \ + in168 = GF_ADD( in168, in170 ); \ + in162 = GF_ADD( in162, in170 ); \ + in138 = GF_ADD( in138, in170 ); \ + in42 = GF_ADD( in42, in170 ); \ + in170 = gf_zero(); \ + in171 = GF_MUL( in171, 110 ); \ + in170 = GF_ADD( in170, in171 ); \ + in169 = GF_ADD( in169, in171 ); \ + in163 = GF_ADD( in163, in171 ); \ + in139 = GF_ADD( in139, in171 ); \ + in43 = GF_ADD( in43, in171 ); \ + in171 = gf_zero(); \ + in172 = GF_MUL( in172, 155 ); \ + in168 = GF_ADD( in168, in172 ); \ + in164 = GF_ADD( in164, in172 ); \ + in140 = GF_ADD( in140, in172 ); \ + in44 = GF_ADD( in44, in172 ); \ + in172 = gf_zero(); \ + in173 = GF_MUL( in173, 155 ); \ + in172 = GF_ADD( in172, in173 ); \ + in169 = GF_ADD( in169, in173 ); \ + in165 = GF_ADD( in165, in173 ); \ + in141 = GF_ADD( in141, in173 ); \ + in45 = GF_ADD( in45, in173 ); \ + in173 = gf_zero(); \ + in174 = GF_MUL( in174, 65 ); \ + in172 = GF_ADD( in172, in174 ); \ + in170 = GF_ADD( in170, in174 ); \ + in166 = GF_ADD( in166, in174 ); \ + in142 = GF_ADD( in142, in174 ); \ + in46 = GF_ADD( in46, in174 ); \ + in174 = gf_zero(); \ + in175 = GF_MUL( in175, 65 ); \ + in174 = GF_ADD( in174, in175 ); \ + in173 = GF_ADD( in173, in175 ); \ + in171 = GF_ADD( in171, in175 ); \ + in167 = GF_ADD( in167, in175 ); \ + in143 = GF_ADD( in143, in175 ); \ + in47 = GF_ADD( in47, in175 ); \ + in175 = gf_zero(); \ + in176 = GF_MUL( in176, 171 ); \ + in160 = GF_ADD( in160, in176 ); \ + in144 = GF_ADD( in144, in176 ); \ + in48 = GF_ADD( in48, in176 ); \ + in176 = gf_zero(); \ + in177 = GF_MUL( in177, 171 ); \ + in176 = GF_ADD( in176, in177 ); \ + in161 = GF_ADD( in161, in177 ); \ + in145 = GF_ADD( in145, in177 ); \ + in49 = GF_ADD( in49, in177 ); \ + in177 = gf_zero(); \ + in178 = GF_MUL( in178, 73 ); \ + in176 = GF_ADD( in176, in178 ); \ + in162 = GF_ADD( in162, in178 ); \ + in146 = GF_ADD( in146, in178 ); \ + in50 = GF_ADD( in50, in178 ); \ + in178 = gf_zero(); \ + in179 = GF_MUL( in179, 73 ); \ + in178 = GF_ADD( in178, in179 ); \ + in177 = GF_ADD( in177, in179 ); \ + in163 = GF_ADD( in163, in179 ); \ + in147 = GF_ADD( in147, in179 ); \ + in51 = GF_ADD( in51, in179 ); \ + in179 = gf_zero(); \ + in180 = GF_MUL( in180, 196 ); \ + in176 = GF_ADD( in176, in180 ); \ + in164 = GF_ADD( in164, in180 ); \ + in148 = GF_ADD( in148, in180 ); \ + in52 = GF_ADD( in52, in180 ); \ + in180 = gf_zero(); \ + in181 = GF_MUL( in181, 196 ); \ + in180 = GF_ADD( in180, in181 ); \ + in177 = GF_ADD( in177, in181 ); \ + in165 = GF_ADD( in165, in181 ); \ + in149 = GF_ADD( in149, in181 ); \ + in53 = GF_ADD( in53, in181 ); \ + in181 = gf_zero(); \ + in182 = GF_MUL( in182, 213 ); \ + in180 = GF_ADD( in180, in182 ); \ + in178 = GF_ADD( in178, in182 ); \ + in166 = GF_ADD( in166, in182 ); \ + in150 = GF_ADD( in150, in182 ); \ + in54 = GF_ADD( in54, in182 ); \ + in182 = gf_zero(); \ + in183 = GF_MUL( in183, 213 ); \ + in182 = GF_ADD( in182, in183 ); \ + in181 = GF_ADD( in181, in183 ); \ + in179 = GF_ADD( in179, in183 ); \ + in167 = GF_ADD( in167, in183 ); \ + in151 = GF_ADD( in151, in183 ); \ + in55 = GF_ADD( in55, in183 ); \ + in183 = gf_zero(); \ + in184 = GF_MUL( in184, 94 ); \ + in176 = GF_ADD( in176, in184 ); \ + in168 = GF_ADD( in168, in184 ); \ + in152 = GF_ADD( in152, in184 ); \ + in56 = GF_ADD( in56, in184 ); \ + in184 = gf_zero(); \ + in185 = GF_MUL( in185, 94 ); \ + in184 = GF_ADD( in184, in185 ); \ + in177 = GF_ADD( in177, in185 ); \ + in169 = GF_ADD( in169, in185 ); \ + in153 = GF_ADD( in153, in185 ); \ + in57 = GF_ADD( in57, in185 ); \ + in185 = gf_zero(); \ + in186 = GF_MUL( in186, 238 ); \ + in184 = GF_ADD( in184, in186 ); \ + in178 = GF_ADD( in178, in186 ); \ + in170 = GF_ADD( in170, in186 ); \ + in154 = GF_ADD( in154, in186 ); \ + in58 = GF_ADD( in58, in186 ); \ + in186 = gf_zero(); \ + in187 = GF_MUL( in187, 238 ); \ + in186 = GF_ADD( in186, in187 ); \ + in185 = GF_ADD( in185, in187 ); \ + in179 = GF_ADD( in179, in187 ); \ + in171 = GF_ADD( in171, in187 ); \ + in155 = GF_ADD( in155, in187 ); \ + in59 = GF_ADD( in59, in187 ); \ + in187 = gf_zero(); \ + in188 = GF_MUL( in188, 15 ); \ + in184 = GF_ADD( in184, in188 ); \ + in180 = GF_ADD( in180, in188 ); \ + in172 = GF_ADD( in172, in188 ); \ + in156 = GF_ADD( in156, in188 ); \ + in60 = GF_ADD( in60, in188 ); \ + in188 = gf_zero(); \ + in189 = GF_MUL( in189, 15 ); \ + in188 = GF_ADD( in188, in189 ); \ + in185 = GF_ADD( in185, in189 ); \ + in181 = GF_ADD( in181, in189 ); \ + in173 = GF_ADD( in173, in189 ); \ + in157 = GF_ADD( in157, in189 ); \ + in61 = GF_ADD( in61, in189 ); \ + in189 = gf_zero(); \ + in190 = GF_MUL( in190, 140 ); \ + in188 = GF_ADD( in188, in190 ); \ + in186 = GF_ADD( in186, in190 ); \ + in182 = GF_ADD( in182, in190 ); \ + in174 = GF_ADD( in174, in190 ); \ + in158 = GF_ADD( in158, in190 ); \ + in62 = GF_ADD( in62, in190 ); \ + in190 = gf_zero(); \ + in191 = GF_MUL( in191, 140 ); \ + in190 = GF_ADD( in190, in191 ); \ + in189 = GF_ADD( in189, in191 ); \ + in187 = GF_ADD( in187, in191 ); \ + in183 = GF_ADD( in183, in191 ); \ + in175 = GF_ADD( in175, in191 ); \ + in159 = GF_ADD( in159, in191 ); \ + in63 = GF_ADD( in63, in191 ); \ + in191 = gf_zero(); \ + in192 = GF_MUL( in192, 58 ); \ + in128 = GF_ADD( in128, in192 ); \ + in64 = GF_ADD( in64, in192 ); \ + in192 = gf_zero(); \ + in193 = GF_MUL( in193, 58 ); \ + in192 = GF_ADD( in192, in193 ); \ + in129 = GF_ADD( in129, in193 ); \ + in65 = GF_ADD( in65, in193 ); \ + in193 = gf_zero(); \ + in194 = GF_MUL( in194, 11 ); \ + in192 = GF_ADD( in192, in194 ); \ + in130 = GF_ADD( in130, in194 ); \ + in66 = GF_ADD( in66, in194 ); \ + in194 = gf_zero(); \ + in195 = GF_MUL( in195, 11 ); \ + in194 = GF_ADD( in194, in195 ); \ + in193 = GF_ADD( in193, in195 ); \ + in131 = GF_ADD( in131, in195 ); \ + in67 = GF_ADD( in67, in195 ); \ + in195 = gf_zero(); \ + in196 = GF_MUL( in196, 19 ); \ + in192 = GF_ADD( in192, in196 ); \ + in132 = GF_ADD( in132, in196 ); \ + in68 = GF_ADD( in68, in196 ); \ + in196 = gf_zero(); \ + in197 = GF_MUL( in197, 19 ); \ + in196 = GF_ADD( in196, in197 ); \ + in193 = GF_ADD( in193, in197 ); \ + in133 = GF_ADD( in133, in197 ); \ + in69 = GF_ADD( in69, in197 ); \ + in197 = gf_zero(); \ + in198 = GF_MUL( in198, 125 ); \ + in196 = GF_ADD( in196, in198 ); \ + in194 = GF_ADD( in194, in198 ); \ + in134 = GF_ADD( in134, in198 ); \ + in70 = GF_ADD( in70, in198 ); \ + in198 = gf_zero(); \ + in199 = GF_MUL( in199, 125 ); \ + in198 = GF_ADD( in198, in199 ); \ + in197 = GF_ADD( in197, in199 ); \ + in195 = GF_ADD( in195, in199 ); \ + in135 = GF_ADD( in135, in199 ); \ + in71 = GF_ADD( in71, in199 ); \ + in199 = gf_zero(); \ + in200 = GF_MUL( in200, 228 ); \ + in192 = GF_ADD( in192, in200 ); \ + in136 = GF_ADD( in136, in200 ); \ + in72 = GF_ADD( in72, in200 ); \ + in200 = gf_zero(); \ + in201 = GF_MUL( in201, 228 ); \ + in200 = GF_ADD( in200, in201 ); \ + in193 = GF_ADD( in193, in201 ); \ + in137 = GF_ADD( in137, in201 ); \ + in73 = GF_ADD( in73, in201 ); \ + in201 = gf_zero(); \ + in202 = GF_MUL( in202, 46 ); \ + in200 = GF_ADD( in200, in202 ); \ + in194 = GF_ADD( in194, in202 ); \ + in138 = GF_ADD( in138, in202 ); \ + in74 = GF_ADD( in74, in202 ); \ + in202 = gf_zero(); \ + in203 = GF_MUL( in203, 46 ); \ + in202 = GF_ADD( in202, in203 ); \ + in201 = GF_ADD( in201, in203 ); \ + in195 = GF_ADD( in195, in203 ); \ + in139 = GF_ADD( in139, in203 ); \ + in75 = GF_ADD( in75, in203 ); \ + in203 = gf_zero(); \ + in204 = GF_MUL( in204, 209 ); \ + in200 = GF_ADD( in200, in204 ); \ + in196 = GF_ADD( in196, in204 ); \ + in140 = GF_ADD( in140, in204 ); \ + in76 = GF_ADD( in76, in204 ); \ + in204 = gf_zero(); \ + in205 = GF_MUL( in205, 209 ); \ + in204 = GF_ADD( in204, in205 ); \ + in201 = GF_ADD( in201, in205 ); \ + in197 = GF_ADD( in197, in205 ); \ + in141 = GF_ADD( in141, in205 ); \ + in77 = GF_ADD( in77, in205 ); \ + in205 = gf_zero(); \ + in206 = GF_MUL( in206, 169 ); \ + in204 = GF_ADD( in204, in206 ); \ + in202 = GF_ADD( in202, in206 ); \ + in198 = GF_ADD( in198, in206 ); \ + in142 = GF_ADD( in142, in206 ); \ + in78 = GF_ADD( in78, in206 ); \ + in206 = gf_zero(); \ + in207 = GF_MUL( in207, 169 ); \ + in206 = GF_ADD( in206, in207 ); \ + in205 = GF_ADD( in205, in207 ); \ + in203 = GF_ADD( in203, in207 ); \ + in199 = GF_ADD( in199, in207 ); \ + in143 = GF_ADD( in143, in207 ); \ + in79 = GF_ADD( in79, in207 ); \ + in207 = gf_zero(); \ + in208 = GF_MUL( in208, 236 ); \ + in192 = GF_ADD( in192, in208 ); \ + in144 = GF_ADD( in144, in208 ); \ + in80 = GF_ADD( in80, in208 ); \ + in208 = gf_zero(); \ + in209 = GF_MUL( in209, 236 ); \ + in208 = GF_ADD( in208, in209 ); \ + in193 = GF_ADD( in193, in209 ); \ + in145 = GF_ADD( in145, in209 ); \ + in81 = GF_ADD( in81, in209 ); \ + in209 = gf_zero(); \ + in210 = GF_MUL( in210, 217 ); \ + in208 = GF_ADD( in208, in210 ); \ + in194 = GF_ADD( in194, in210 ); \ + in146 = GF_ADD( in146, in210 ); \ + in82 = GF_ADD( in82, in210 ); \ + in210 = gf_zero(); \ + in211 = GF_MUL( in211, 217 ); \ + in210 = GF_ADD( in210, in211 ); \ + in209 = GF_ADD( in209, in211 ); \ + in195 = GF_ADD( in195, in211 ); \ + in147 = GF_ADD( in147, in211 ); \ + in83 = GF_ADD( in83, in211 ); \ + in211 = gf_zero(); \ + in212 = GF_MUL( in212, 204 ); \ + in208 = GF_ADD( in208, in212 ); \ + in196 = GF_ADD( in196, in212 ); \ + in148 = GF_ADD( in148, in212 ); \ + in84 = GF_ADD( in84, in212 ); \ + in212 = gf_zero(); \ + in213 = GF_MUL( in213, 204 ); \ + in212 = GF_ADD( in212, in213 ); \ + in209 = GF_ADD( in209, in213 ); \ + in197 = GF_ADD( in197, in213 ); \ + in149 = GF_ADD( in149, in213 ); \ + in85 = GF_ADD( in85, in213 ); \ + in213 = gf_zero(); \ + in214 = GF_MUL( in214, 34 ); \ + in212 = GF_ADD( in212, in214 ); \ + in210 = GF_ADD( in210, in214 ); \ + in198 = GF_ADD( in198, in214 ); \ + in150 = GF_ADD( in150, in214 ); \ + in86 = GF_ADD( in86, in214 ); \ + in214 = gf_zero(); \ + in215 = GF_MUL( in215, 34 ); \ + in214 = GF_ADD( in214, in215 ); \ + in213 = GF_ADD( in213, in215 ); \ + in211 = GF_ADD( in211, in215 ); \ + in199 = GF_ADD( in199, in215 ); \ + in151 = GF_ADD( in151, in215 ); \ + in87 = GF_ADD( in87, in215 ); \ + in215 = gf_zero(); \ + in216 = GF_MUL( in216, 19 ); \ + in208 = GF_ADD( in208, in216 ); \ + in200 = GF_ADD( in200, in216 ); \ + in152 = GF_ADD( in152, in216 ); \ + in88 = GF_ADD( in88, in216 ); \ + in216 = gf_zero(); \ + in217 = GF_MUL( in217, 19 ); \ + in216 = GF_ADD( in216, in217 ); \ + in209 = GF_ADD( in209, in217 ); \ + in201 = GF_ADD( in201, in217 ); \ + in153 = GF_ADD( in153, in217 ); \ + in89 = GF_ADD( in89, in217 ); \ + in217 = gf_zero(); \ + in218 = GF_MUL( in218, 125 ); \ + in216 = GF_ADD( in216, in218 ); \ + in210 = GF_ADD( in210, in218 ); \ + in202 = GF_ADD( in202, in218 ); \ + in154 = GF_ADD( in154, in218 ); \ + in90 = GF_ADD( in90, in218 ); \ + in218 = gf_zero(); \ + in219 = GF_MUL( in219, 125 ); \ + in218 = GF_ADD( in218, in219 ); \ + in217 = GF_ADD( in217, in219 ); \ + in211 = GF_ADD( in211, in219 ); \ + in203 = GF_ADD( in203, in219 ); \ + in155 = GF_ADD( in155, in219 ); \ + in91 = GF_ADD( in91, in219 ); \ + in219 = gf_zero(); \ + in220 = GF_MUL( in220, 90 ); \ + in216 = GF_ADD( in216, in220 ); \ + in212 = GF_ADD( in212, in220 ); \ + in204 = GF_ADD( in204, in220 ); \ + in156 = GF_ADD( in156, in220 ); \ + in92 = GF_ADD( in92, in220 ); \ + in220 = gf_zero(); \ + in221 = GF_MUL( in221, 90 ); \ + in220 = GF_ADD( in220, in221 ); \ + in217 = GF_ADD( in217, in221 ); \ + in213 = GF_ADD( in213, in221 ); \ + in205 = GF_ADD( in205, in221 ); \ + in157 = GF_ADD( in157, in221 ); \ + in93 = GF_ADD( in93, in221 ); \ + in221 = gf_zero(); \ + in222 = GF_MUL( in222, 27 ); \ + in220 = GF_ADD( in220, in222 ); \ + in218 = GF_ADD( in218, in222 ); \ + in214 = GF_ADD( in214, in222 ); \ + in206 = GF_ADD( in206, in222 ); \ + in158 = GF_ADD( in158, in222 ); \ + in94 = GF_ADD( in94, in222 ); \ + in222 = gf_zero(); \ + in223 = GF_MUL( in223, 27 ); \ + in222 = GF_ADD( in222, in223 ); \ + in221 = GF_ADD( in221, in223 ); \ + in219 = GF_ADD( in219, in223 ); \ + in215 = GF_ADD( in215, in223 ); \ + in207 = GF_ADD( in207, in223 ); \ + in159 = GF_ADD( in159, in223 ); \ + in95 = GF_ADD( in95, in223 ); \ + in223 = gf_zero(); \ + in224 = GF_MUL( in224, 30 ); \ + in192 = GF_ADD( in192, in224 ); \ + in160 = GF_ADD( in160, in224 ); \ + in96 = GF_ADD( in96, in224 ); \ + in224 = gf_zero(); \ + in225 = GF_MUL( in225, 30 ); \ + in224 = GF_ADD( in224, in225 ); \ + in193 = GF_ADD( in193, in225 ); \ + in161 = GF_ADD( in161, in225 ); \ + in97 = GF_ADD( in97, in225 ); \ + in225 = gf_zero(); \ + in226 = GF_MUL( in226, 5 ); \ + in224 = GF_ADD( in224, in226 ); \ + in194 = GF_ADD( in194, in226 ); \ + in162 = GF_ADD( in162, in226 ); \ + in98 = GF_ADD( in98, in226 ); \ + in226 = gf_zero(); \ + in227 = GF_MUL( in227, 5 ); \ + in226 = GF_ADD( in226, in227 ); \ + in225 = GF_ADD( in225, in227 ); \ + in195 = GF_ADD( in195, in227 ); \ + in163 = GF_ADD( in163, in227 ); \ + in99 = GF_ADD( in99, in227 ); \ + in227 = gf_zero(); \ + in228 = GF_MUL( in228, 231 ); \ + in224 = GF_ADD( in224, in228 ); \ + in196 = GF_ADD( in196, in228 ); \ + in164 = GF_ADD( in164, in228 ); \ + in100 = GF_ADD( in100, in228 ); \ + in228 = gf_zero(); \ + in229 = GF_MUL( in229, 231 ); \ + in228 = GF_ADD( in228, in229 ); \ + in225 = GF_ADD( in225, in229 ); \ + in197 = GF_ADD( in197, in229 ); \ + in165 = GF_ADD( in165, in229 ); \ + in101 = GF_ADD( in101, in229 ); \ + in229 = gf_zero(); \ + in230 = GF_MUL( in230, 160 ); \ + in228 = GF_ADD( in228, in230 ); \ + in226 = GF_ADD( in226, in230 ); \ + in198 = GF_ADD( in198, in230 ); \ + in166 = GF_ADD( in166, in230 ); \ + in102 = GF_ADD( in102, in230 ); \ + in230 = gf_zero(); \ + in231 = GF_MUL( in231, 160 ); \ + in230 = GF_ADD( in230, in231 ); \ + in229 = GF_ADD( in229, in231 ); \ + in227 = GF_ADD( in227, in231 ); \ + in199 = GF_ADD( in199, in231 ); \ + in167 = GF_ADD( in167, in231 ); \ + in103 = GF_ADD( in103, in231 ); \ + in231 = gf_zero(); \ + in232 = GF_MUL( in232, 9 ); \ + in224 = GF_ADD( in224, in232 ); \ + in200 = GF_ADD( in200, in232 ); \ + in168 = GF_ADD( in168, in232 ); \ + in104 = GF_ADD( in104, in232 ); \ + in232 = gf_zero(); \ + in233 = GF_MUL( in233, 9 ); \ + in232 = GF_ADD( in232, in233 ); \ + in225 = GF_ADD( in225, in233 ); \ + in201 = GF_ADD( in201, in233 ); \ + in169 = GF_ADD( in169, in233 ); \ + in105 = GF_ADD( in105, in233 ); \ + in233 = gf_zero(); \ + in234 = GF_MUL( in234, 141 ); \ + in232 = GF_ADD( in232, in234 ); \ + in226 = GF_ADD( in226, in234 ); \ + in202 = GF_ADD( in202, in234 ); \ + in170 = GF_ADD( in170, in234 ); \ + in106 = GF_ADD( in106, in234 ); \ + in234 = gf_zero(); \ + in235 = GF_MUL( in235, 141 ); \ + in234 = GF_ADD( in234, in235 ); \ + in233 = GF_ADD( in233, in235 ); \ + in227 = GF_ADD( in227, in235 ); \ + in203 = GF_ADD( in203, in235 ); \ + in171 = GF_ADD( in171, in235 ); \ + in107 = GF_ADD( in107, in235 ); \ + in235 = gf_zero(); \ + in236 = GF_MUL( in236, 61 ); \ + in232 = GF_ADD( in232, in236 ); \ + in228 = GF_ADD( in228, in236 ); \ + in204 = GF_ADD( in204, in236 ); \ + in172 = GF_ADD( in172, in236 ); \ + in108 = GF_ADD( in108, in236 ); \ + in236 = gf_zero(); \ + in237 = GF_MUL( in237, 61 ); \ + in236 = GF_ADD( in236, in237 ); \ + in233 = GF_ADD( in233, in237 ); \ + in229 = GF_ADD( in229, in237 ); \ + in205 = GF_ADD( in205, in237 ); \ + in173 = GF_ADD( in173, in237 ); \ + in109 = GF_ADD( in109, in237 ); \ + in237 = gf_zero(); \ + in238 = GF_MUL( in238, 112 ); \ + in236 = GF_ADD( in236, in238 ); \ + in234 = GF_ADD( in234, in238 ); \ + in230 = GF_ADD( in230, in238 ); \ + in206 = GF_ADD( in206, in238 ); \ + in174 = GF_ADD( in174, in238 ); \ + in110 = GF_ADD( in110, in238 ); \ + in238 = gf_zero(); \ + in239 = GF_MUL( in239, 112 ); \ + in238 = GF_ADD( in238, in239 ); \ + in237 = GF_ADD( in237, in239 ); \ + in235 = GF_ADD( in235, in239 ); \ + in231 = GF_ADD( in231, in239 ); \ + in207 = GF_ADD( in207, in239 ); \ + in175 = GF_ADD( in175, in239 ); \ + in111 = GF_ADD( in111, in239 ); \ + in239 = gf_zero(); \ + in240 = GF_MUL( in240, 87 ); \ + in224 = GF_ADD( in224, in240 ); \ + in208 = GF_ADD( in208, in240 ); \ + in176 = GF_ADD( in176, in240 ); \ + in112 = GF_ADD( in112, in240 ); \ + in240 = gf_zero(); \ + in241 = GF_MUL( in241, 87 ); \ + in240 = GF_ADD( in240, in241 ); \ + in225 = GF_ADD( in225, in241 ); \ + in209 = GF_ADD( in209, in241 ); \ + in177 = GF_ADD( in177, in241 ); \ + in113 = GF_ADD( in113, in241 ); \ + in241 = gf_zero(); \ + in242 = GF_MUL( in242, 99 ); \ + in240 = GF_ADD( in240, in242 ); \ + in226 = GF_ADD( in226, in242 ); \ + in210 = GF_ADD( in210, in242 ); \ + in178 = GF_ADD( in178, in242 ); \ + in114 = GF_ADD( in114, in242 ); \ + in242 = gf_zero(); \ + in243 = GF_MUL( in243, 99 ); \ + in242 = GF_ADD( in242, in243 ); \ + in241 = GF_ADD( in241, in243 ); \ + in227 = GF_ADD( in227, in243 ); \ + in211 = GF_ADD( in211, in243 ); \ + in179 = GF_ADD( in179, in243 ); \ + in115 = GF_ADD( in115, in243 ); \ + in243 = gf_zero(); \ + in244 = GF_MUL( in244, 50 ); \ + in240 = GF_ADD( in240, in244 ); \ + in228 = GF_ADD( in228, in244 ); \ + in212 = GF_ADD( in212, in244 ); \ + in180 = GF_ADD( in180, in244 ); \ + in116 = GF_ADD( in116, in244 ); \ + in244 = gf_zero(); \ + in245 = GF_MUL( in245, 50 ); \ + in244 = GF_ADD( in244, in245 ); \ + in241 = GF_ADD( in241, in245 ); \ + in229 = GF_ADD( in229, in245 ); \ + in213 = GF_ADD( in213, in245 ); \ + in181 = GF_ADD( in181, in245 ); \ + in117 = GF_ADD( in117, in245 ); \ + in245 = gf_zero(); \ + in246 = GF_MUL( in246, 252 ); \ + in244 = GF_ADD( in244, in246 ); \ + in242 = GF_ADD( in242, in246 ); \ + in230 = GF_ADD( in230, in246 ); \ + in214 = GF_ADD( in214, in246 ); \ + in182 = GF_ADD( in182, in246 ); \ + in118 = GF_ADD( in118, in246 ); \ + in246 = gf_zero(); \ + in247 = GF_MUL( in247, 252 ); \ + in246 = GF_ADD( in246, in247 ); \ + in245 = GF_ADD( in245, in247 ); \ + in243 = GF_ADD( in243, in247 ); \ + in231 = GF_ADD( in231, in247 ); \ + in215 = GF_ADD( in215, in247 ); \ + in183 = GF_ADD( in183, in247 ); \ + in119 = GF_ADD( in119, in247 ); \ + in247 = gf_zero(); \ + in248 = GF_MUL( in248, 231 ); \ + in240 = GF_ADD( in240, in248 ); \ + in232 = GF_ADD( in232, in248 ); \ + in216 = GF_ADD( in216, in248 ); \ + in184 = GF_ADD( in184, in248 ); \ + in120 = GF_ADD( in120, in248 ); \ + in248 = gf_zero(); \ + in249 = GF_MUL( in249, 231 ); \ + in248 = GF_ADD( in248, in249 ); \ + in241 = GF_ADD( in241, in249 ); \ + in233 = GF_ADD( in233, in249 ); \ + in217 = GF_ADD( in217, in249 ); \ + in185 = GF_ADD( in185, in249 ); \ + in121 = GF_ADD( in121, in249 ); \ + in249 = gf_zero(); \ + in250 = GF_MUL( in250, 160 ); \ + in248 = GF_ADD( in248, in250 ); \ + in242 = GF_ADD( in242, in250 ); \ + in234 = GF_ADD( in234, in250 ); \ + in218 = GF_ADD( in218, in250 ); \ + in186 = GF_ADD( in186, in250 ); \ + in122 = GF_ADD( in122, in250 ); \ + in250 = gf_zero(); \ + in251 = GF_MUL( in251, 160 ); \ + in250 = GF_ADD( in250, in251 ); \ + in249 = GF_ADD( in249, in251 ); \ + in243 = GF_ADD( in243, in251 ); \ + in235 = GF_ADD( in235, in251 ); \ + in219 = GF_ADD( in219, in251 ); \ + in187 = GF_ADD( in187, in251 ); \ + in123 = GF_ADD( in123, in251 ); \ + in251 = gf_zero(); \ + in252 = GF_MUL( in252, 177 ); \ + in248 = GF_ADD( in248, in252 ); \ + in244 = GF_ADD( in244, in252 ); \ + in236 = GF_ADD( in236, in252 ); \ + in220 = GF_ADD( in220, in252 ); \ + in188 = GF_ADD( in188, in252 ); \ + in124 = GF_ADD( in124, in252 ); \ + in252 = gf_zero(); \ + in253 = GF_MUL( in253, 177 ); \ + in252 = GF_ADD( in252, in253 ); \ + in249 = GF_ADD( in249, in253 ); \ + in245 = GF_ADD( in245, in253 ); \ + in237 = GF_ADD( in237, in253 ); \ + in221 = GF_ADD( in221, in253 ); \ + in189 = GF_ADD( in189, in253 ); \ + in125 = GF_ADD( in125, in253 ); \ + in253 = gf_zero(); \ + in254 = GF_MUL( in254, 185 ); \ + in252 = GF_ADD( in252, in254 ); \ + in250 = GF_ADD( in250, in254 ); \ + in246 = GF_ADD( in246, in254 ); \ + in238 = GF_ADD( in238, in254 ); \ + in222 = GF_ADD( in222, in254 ); \ + in190 = GF_ADD( in190, in254 ); \ + in126 = GF_ADD( in126, in254 ); \ + in254 = gf_zero(); \ + in255 = GF_MUL( in255, 185 ); \ + in254 = GF_ADD( in254, in255 ); \ + in253 = GF_ADD( in253, in255 ); \ + in251 = GF_ADD( in251, in255 ); \ + in247 = GF_ADD( in247, in255 ); \ + in239 = GF_ADD( in239, in255 ); \ + in223 = GF_ADD( in223, in255 ); \ + in191 = GF_ADD( in191, in255 ); \ + in127 = GF_ADD( in127, in255 ); \ + in255 = gf_zero(); \ + in00 = GF_MUL( in00, 1 ); \ + in01 = GF_MUL( in01, 1 ); \ + in02 = GF_MUL( in02, 6 ); \ + in03 = GF_MUL( in03, 6 ); \ + in04 = GF_MUL( in04, 108 ); \ + in05 = GF_MUL( in05, 108 ); \ + in06 = GF_MUL( in06, 117 ); \ + in07 = GF_MUL( in07, 117 ); \ + in08 = GF_MUL( in08, 208 ); \ + in09 = GF_MUL( in09, 208 ); \ + in10 = GF_MUL( in10, 218 ); \ + in11 = GF_MUL( in11, 218 ); \ + in12 = GF_MUL( in12, 136 ); \ + in13 = GF_MUL( in13, 136 ); \ + in14 = GF_MUL( in14, 23 ); \ + in15 = GF_MUL( in15, 23 ); \ + in16 = GF_MUL( in16, 42 ); \ + in17 = GF_MUL( in17, 42 ); \ + in18 = GF_MUL( in18, 252 ); \ + in19 = GF_MUL( in19, 252 ); \ + in20 = GF_MUL( in20, 158 ); \ + in21 = GF_MUL( in21, 158 ); \ + in22 = GF_MUL( in22, 99 ); \ + in23 = GF_MUL( in23, 99 ); \ + in24 = GF_MUL( in24, 108 ); \ + in25 = GF_MUL( in25, 108 ); \ + in26 = GF_MUL( in26, 117 ); \ + in27 = GF_MUL( in27, 117 ); \ + in28 = GF_MUL( in28, 233 ); \ + in29 = GF_MUL( in29, 233 ); \ + in30 = GF_MUL( in30, 76 ); \ + in31 = GF_MUL( in31, 76 ); \ + in32 = GF_MUL( in32, 165 ); \ + in33 = GF_MUL( in33, 165 ); \ + in34 = GF_MUL( in34, 249 ); \ + in35 = GF_MUL( in35, 249 ); \ + in36 = GF_MUL( in36, 196 ); \ + in37 = GF_MUL( in37, 196 ); \ + in38 = GF_MUL( in38, 162 ); \ + in39 = GF_MUL( in39, 162 ); \ + in40 = GF_MUL( in40, 212 ); \ + in41 = GF_MUL( in41, 212 ); \ + in42 = GF_MUL( in42, 194 ); \ + in43 = GF_MUL( in43, 194 ); \ + in44 = GF_MUL( in44, 37 ); \ + in45 = GF_MUL( in45, 37 ); \ + in46 = GF_MUL( in46, 222 ); \ + in47 = GF_MUL( in47, 222 ); \ + in48 = GF_MUL( in48, 15 ); \ + in49 = GF_MUL( in49, 15 ); \ + in50 = GF_MUL( in50, 34 ); \ + in51 = GF_MUL( in51, 34 ); \ + in52 = GF_MUL( in52, 94 ); \ + in53 = GF_MUL( in53, 94 ); \ + in54 = GF_MUL( in54, 217 ); \ + in55 = GF_MUL( in55, 217 ); \ + in56 = GF_MUL( in56, 196 ); \ + in57 = GF_MUL( in57, 196 ); \ + in58 = GF_MUL( in58, 162 ); \ + in59 = GF_MUL( in59, 162 ); \ + in60 = GF_MUL( in60, 171 ); \ + in61 = GF_MUL( in61, 171 ); \ + in62 = GF_MUL( in62, 221 ); \ + in63 = GF_MUL( in63, 221 ); \ + in64 = GF_MUL( in64, 250 ); \ + in65 = GF_MUL( in65, 250 ); \ + in66 = GF_MUL( in66, 38 ); \ + in67 = GF_MUL( in67, 38 ); \ + in68 = GF_MUL( in68, 22 ); \ + in69 = GF_MUL( in69, 22 ); \ + in70 = GF_MUL( in70, 116 ); \ + in71 = GF_MUL( in71, 116 ); \ + in72 = GF_MUL( in72, 175 ); \ + in73 = GF_MUL( in73, 175 ); \ + in74 = GF_MUL( in74, 197 ); \ + in75 = GF_MUL( in75, 197 ); \ + in76 = GF_MUL( in76, 91 ); \ + in77 = GF_MUL( in77, 91 ); \ + in78 = GF_MUL( in78, 199 ); \ + in79 = GF_MUL( in79, 199 ); \ + in80 = GF_MUL( in80, 92 ); \ + in81 = GF_MUL( in81, 92 ); \ + in82 = GF_MUL( in82, 213 ); \ + in83 = GF_MUL( in83, 213 ); \ + in84 = GF_MUL( in84, 102 ); \ + in85 = GF_MUL( in85, 102 ); \ + in86 = GF_MUL( in86, 73 ); \ + in87 = GF_MUL( in87, 73 ); \ + in88 = GF_MUL( in88, 22 ); \ + in89 = GF_MUL( in89, 22 ); \ + in90 = GF_MUL( in90, 116 ); \ + in91 = GF_MUL( in91, 116 ); \ + in92 = GF_MUL( in92, 251 ); \ + in93 = GF_MUL( in93, 251 ); \ + in94 = GF_MUL( in94, 32 ); \ + in95 = GF_MUL( in95, 32 ); \ + in96 = GF_MUL( in96, 219 ); \ + in97 = GF_MUL( in97, 219 ); \ + in98 = GF_MUL( in98, 224 ); \ + in99 = GF_MUL( in99, 224 ); \ + in100 = GF_MUL( in100, 123 ); \ + in101 = GF_MUL( in101, 123 ); \ + in102 = GF_MUL( in102, 7 ); \ + in103 = GF_MUL( in103, 7 ); \ + in104 = GF_MUL( in104, 96 ); \ + in105 = GF_MUL( in105, 96 ); \ + in106 = GF_MUL( in106, 93 ); \ + in107 = GF_MUL( in107, 93 ); \ + in108 = GF_MUL( in108, 3 ); \ + in109 = GF_MUL( in109, 3 ); \ + in110 = GF_MUL( in110, 10 ); \ + in111 = GF_MUL( in111, 10 ); \ + in112 = GF_MUL( in112, 95 ); \ + in113 = GF_MUL( in113, 95 ); \ + in114 = GF_MUL( in114, 223 ); \ + in115 = GF_MUL( in115, 223 ); \ + in116 = GF_MUL( in116, 210 ); \ + in117 = GF_MUL( in117, 210 ); \ + in118 = GF_MUL( in118, 214 ); \ + in119 = GF_MUL( in119, 214 ); \ + in120 = GF_MUL( in120, 123 ); \ + in121 = GF_MUL( in121, 123 ); \ + in122 = GF_MUL( in122, 7 ); \ + in123 = GF_MUL( in123, 7 ); \ + in124 = GF_MUL( in124, 126 ); \ + in125 = GF_MUL( in125, 126 ); \ + in126 = GF_MUL( in126, 25 ); \ + in127 = GF_MUL( in127, 25 ); \ + in128 = GF_MUL( in128, 4 ); \ + in129 = GF_MUL( in129, 4 ); \ + in130 = GF_MUL( in130, 24 ); \ + in131 = GF_MUL( in131, 24 ); \ + in132 = GF_MUL( in132, 173 ); \ + in133 = GF_MUL( in133, 173 ); \ + in134 = GF_MUL( in134, 201 ); \ + in135 = GF_MUL( in135, 201 ); \ + in136 = GF_MUL( in136, 103 ); \ + in137 = GF_MUL( in137, 103 ); \ + in138 = GF_MUL( in138, 79 ); \ + in139 = GF_MUL( in139, 79 ); \ + in140 = GF_MUL( in140, 26 ); \ + in141 = GF_MUL( in141, 26 ); \ + in142 = GF_MUL( in142, 92 ); \ + in143 = GF_MUL( in143, 92 ); \ + in144 = GF_MUL( in144, 168 ); \ + in145 = GF_MUL( in145, 168 ); \ + in146 = GF_MUL( in146, 215 ); \ + in147 = GF_MUL( in147, 215 ); \ + in148 = GF_MUL( in148, 66 ); \ + in149 = GF_MUL( in149, 66 ); \ + in150 = GF_MUL( in150, 145 ); \ + in151 = GF_MUL( in151, 145 ); \ + in152 = GF_MUL( in152, 173 ); \ + in153 = GF_MUL( in153, 173 ); \ + in154 = GF_MUL( in154, 201 ); \ + in155 = GF_MUL( in155, 201 ); \ + in156 = GF_MUL( in156, 131 ); \ + in157 = GF_MUL( in157, 131 ); \ + in158 = GF_MUL( in158, 45 ); \ + in159 = GF_MUL( in159, 45 ); \ + in160 = GF_MUL( in160, 174 ); \ + in161 = GF_MUL( in161, 174 ); \ + in162 = GF_MUL( in162, 195 ); \ + in163 = GF_MUL( in163, 195 ); \ + in164 = GF_MUL( in164, 55 ); \ + in165 = GF_MUL( in165, 55 ); \ + in166 = GF_MUL( in166, 178 ); \ + in167 = GF_MUL( in167, 178 ); \ + in168 = GF_MUL( in168, 119 ); \ + in169 = GF_MUL( in169, 119 ); \ + in170 = GF_MUL( in170, 47 ); \ + in171 = GF_MUL( in171, 47 ); \ + in172 = GF_MUL( in172, 148 ); \ + in173 = GF_MUL( in173, 148 ); \ + in174 = GF_MUL( in174, 95 ); \ + in175 = GF_MUL( in175, 95 ); \ + in176 = GF_MUL( in176, 60 ); \ + in177 = GF_MUL( in177, 60 ); \ + in178 = GF_MUL( in178, 136 ); \ + in179 = GF_MUL( in179, 136 ); \ + in180 = GF_MUL( in180, 101 ); \ + in181 = GF_MUL( in181, 101 ); \ + in182 = GF_MUL( in182, 67 ); \ + in183 = GF_MUL( in183, 67 ); \ + in184 = GF_MUL( in184, 55 ); \ + in185 = GF_MUL( in185, 55 ); \ + in186 = GF_MUL( in186, 178 ); \ + in187 = GF_MUL( in187, 178 ); \ + in188 = GF_MUL( in188, 150 ); \ + in189 = GF_MUL( in189, 150 ); \ + in190 = GF_MUL( in190, 83 ); \ + in191 = GF_MUL( in191, 83 ); \ + in192 = GF_MUL( in192, 207 ); \ + in193 = GF_MUL( in193, 207 ); \ + in194 = GF_MUL( in194, 152 ); \ + in195 = GF_MUL( in195, 152 ); \ + in196 = GF_MUL( in196, 88 ); \ + in197 = GF_MUL( in197, 88 ); \ + in198 = GF_MUL( in198, 205 ); \ + in199 = GF_MUL( in199, 205 ); \ + in200 = GF_MUL( in200, 134 ); \ + in201 = GF_MUL( in201, 134 ); \ + in202 = GF_MUL( in202, 51 ); \ + in203 = GF_MUL( in203, 51 ); \ + in204 = GF_MUL( in204, 113 ); \ + in205 = GF_MUL( in205, 113 ); \ + in206 = GF_MUL( in206, 59 ); \ + in207 = GF_MUL( in207, 59 ); \ + in208 = GF_MUL( in208, 109 ); \ + in209 = GF_MUL( in209, 109 ); \ + in210 = GF_MUL( in210, 115 ); \ + in211 = GF_MUL( in211, 115 ); \ + in212 = GF_MUL( in212, 133 ); \ + in213 = GF_MUL( in213, 133 ); \ + in214 = GF_MUL( in214, 57 ); \ + in215 = GF_MUL( in215, 57 ); \ + in216 = GF_MUL( in216, 88 ); \ + in217 = GF_MUL( in217, 88 ); \ + in218 = GF_MUL( in218, 205 ); \ + in219 = GF_MUL( in219, 205 ); \ + in220 = GF_MUL( in220, 203 ); \ + in221 = GF_MUL( in221, 203 ); \ + in222 = GF_MUL( in222, 128 ); \ + in223 = GF_MUL( in223, 128 ); \ + in224 = GF_MUL( in224, 75 ); \ + in225 = GF_MUL( in225, 75 ); \ + in226 = GF_MUL( in226, 167 ); \ + in227 = GF_MUL( in227, 167 ); \ + in228 = GF_MUL( in228, 241 ); \ + in229 = GF_MUL( in229, 241 ); \ + in230 = GF_MUL( in230, 28 ); \ + in231 = GF_MUL( in231, 28 ); \ + in232 = GF_MUL( in232, 157 ); \ + in233 = GF_MUL( in233, 157 ); \ + in234 = GF_MUL( in234, 105 ); \ + in235 = GF_MUL( in235, 105 ); \ + in236 = GF_MUL( in236, 12 ); \ + in237 = GF_MUL( in237, 12 ); \ + in238 = GF_MUL( in238, 40 ); \ + in239 = GF_MUL( in239, 40 ); \ + in240 = GF_MUL( in240, 97 ); \ + in241 = GF_MUL( in241, 97 ); \ + in242 = GF_MUL( in242, 91 ); \ + in243 = GF_MUL( in243, 91 ); \ + in244 = GF_MUL( in244, 111 ); \ + in245 = GF_MUL( in245, 111 ); \ + in246 = GF_MUL( in246, 127 ); \ + in247 = GF_MUL( in247, 127 ); \ + in248 = GF_MUL( in248, 241 ); \ + in249 = GF_MUL( in249, 241 ); \ + in250 = GF_MUL( in250, 28 ); \ + in251 = GF_MUL( in251, 28 ); \ + in252 = GF_MUL( in252, 229 ); \ + in253 = GF_MUL( in253, 229 ); \ + in254 = GF_MUL( in254, 100 ); \ + in255 = GF_MUL( in255, 100 ); \ + } while( 0 ) + + + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_fderiv_h */ diff --git a/src/ballet/reedsol/fd_reedsol_fft.h b/src/ballet/reedsol/fd_reedsol_fft.h index 375e93437d3..53277d25063 100644 --- a/src/ballet/reedsol/fd_reedsol_fft.h +++ b/src/ballet/reedsol/fd_reedsol_fft.h @@ -108,6 +108,2166 @@ +#define FD_REEDSOL_IFFT_CONSTANTS_256_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 0, 11, 174, 165, 33, 42, 143, 132, 45, 38, 131, 136, 12, 7, 162, 169, 0, 71, 189, 250, 18, 85, 175, 232, 0, 218, 130, 88, 0, 133, 0 +#define FD_REEDSOL_IFFT_IMPL_256( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ + c_28, c_29, c_30, c_31, c_32, c_33, c_34, c_35, c_36, c_37, c_38 , \ + c_39, c_40, c_41, c_42, c_43, c_44, c_45, c_46, c_47, c_48, c_49 , \ + c_50, c_51, c_52, c_53, c_54, c_55, c_56, c_57, c_58, c_59, c_60 , \ + c_61, c_62, c_63, c_64, c_65, c_66, c_67, c_68, c_69, c_70, c_71 , \ + c_72, c_73, c_74, c_75, c_76, c_77, c_78, c_79, c_80, c_81, c_82 , \ + c_83, c_84, c_85, c_86, c_87, c_88, c_89, c_90, c_91, c_92, c_93 , \ + c_94, c_95, c_96, c_97, c_98, c_99, c_100, c_101, c_102, c_103, c_104, \ + c_105, c_106, c_107, c_108, c_109, c_110, c_111, c_112, c_113, c_114, \ + c_115, c_116, c_117, c_118, c_119, c_120, c_121, c_122, c_123, c_124, \ + c_125, c_126, c_127, c_128, c_129, c_130, c_131, c_132, c_133, c_134, \ + c_135, c_136, c_137, c_138, c_139, c_140, c_141, c_142, c_143, c_144, \ + c_145, c_146, c_147, c_148, c_149, c_150, c_151, c_152, c_153, c_154, \ + c_155, c_156, c_157, c_158, c_159, c_160, c_161, c_162, c_163, c_164, \ + c_165, c_166, c_167, c_168, c_169, c_170, c_171, c_172, c_173, c_174, \ + c_175, c_176, c_177, c_178, c_179, c_180, c_181, c_182, c_183, c_184, \ + c_185, c_186, c_187, c_188, c_189, c_190, c_191, c_192, c_193, c_194, \ + c_195, c_196, c_197, c_198, c_199, c_200, c_201, c_202, c_203, c_204, \ + c_205, c_206, c_207, c_208, c_209, c_210, c_211, c_212, c_213, c_214, \ + c_215, c_216, c_217, c_218, c_219, c_220, c_221, c_222, c_223, c_224, \ + c_225, c_226, c_227, c_228, c_229, c_230, c_231, c_232, c_233, c_234, \ + c_235, c_236, c_237, c_238, c_239, c_240, c_241, c_242, c_243, c_244, \ + c_245, c_246, c_247, c_248, c_249, c_250, c_251, c_252, c_253, c_254, \ + in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10 , \ + in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21 , \ + in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32 , \ + in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43 , \ + in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54 , \ + in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ + in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76 , \ + in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87 , \ + in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98 , \ + in99, in100, in101, in102, in103, in104, in105, in106, in107, in108 , \ + in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, \ + in119, in120, in121, in122, in123, in124, in125, in126, in127, in128, \ + in129, in130, in131, in132, in133, in134, in135, in136, in137, in138, \ + in139, in140, in141, in142, in143, in144, in145, in146, in147, in148, \ + in149, in150, in151, in152, in153, in154, in155, in156, in157, in158, \ + in159, in160, in161, in162, in163, in164, in165, in166, in167, in168, \ + in169, in170, in171, in172, in173, in174, in175, in176, in177, in178, \ + in179, in180, in181, in182, in183, in184, in185, in186, in187, in188, \ + in189, in190, in191, in192, in193, in194, in195, in196, in197, in198, \ + in199, in200, in201, in202, in203, in204, in205, in206, in207, in208, \ + in209, in210, in211, in212, in213, in214, in215, in216, in217, in218, \ + in219, in220, in221, in222, in223, in224, in225, in226, in227, in228, \ + in229, in230, in231, in232, in233, in234, in235, in236, in237, in238, \ + in239, in240, in241, in242, in243, in244, in245, in246, in247, in248, \ + in249, in250, in251, in252, in253, in254, in255) \ + do { \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in01, c_00 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in03, c_01 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in05, c_02 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in07, c_03 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in09, c_04 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in11, c_05 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in13, c_06 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in15, c_07 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in17, c_08 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in19, c_09 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in21, c_10 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in23, c_11 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in25, c_12 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in27, c_13 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in29, c_14 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in31, c_15 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in33, c_16 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in35, c_17 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in37, c_18 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in39, c_19 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in41, c_20 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in43, c_21 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in45, c_22 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in47, c_23 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in49, c_24 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in51, c_25 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in53, c_26 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in55, c_27 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in57, c_28 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in59, c_29 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in61, c_30 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in62, in63, c_31 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in65, c_32 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in67, c_33 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in69, c_34 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in71, c_35 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in73, c_36 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in75, c_37 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in77, c_38 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in79, c_39 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in81, c_40 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in83, c_41 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in85, c_42 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in87, c_43 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in89, c_44 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in91, c_45 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in93, c_46 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in94, in95, c_47 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in97, c_48 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in99, c_49 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in101, c_50 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in103, c_51 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in105, c_52 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in107, c_53 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in109, c_54 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in110, in111, c_55 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in113, c_56 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in115, c_57 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in117, c_58 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in118, in119, c_59 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in121, c_60 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in122, in123, c_61 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in124, in125, c_62 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in126, in127, c_63 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in129, c_64 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in130, in131, c_65 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in132, in133, c_66 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in134, in135, c_67 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in136, in137, c_68 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in138, in139, c_69 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in140, in141, c_70 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in142, in143, c_71 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in144, in145, c_72 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in146, in147, c_73 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in148, in149, c_74 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in150, in151, c_75 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in152, in153, c_76 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in154, in155, c_77 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in156, in157, c_78 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in158, in159, c_79 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in160, in161, c_80 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in162, in163, c_81 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in164, in165, c_82 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in166, in167, c_83 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in168, in169, c_84 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in170, in171, c_85 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in172, in173, c_86 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in174, in175, c_87 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in176, in177, c_88 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in178, in179, c_89 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in180, in181, c_90 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in182, in183, c_91 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in184, in185, c_92 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in186, in187, c_93 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in188, in189, c_94 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in190, in191, c_95 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in192, in193, c_96 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in194, in195, c_97 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in196, in197, c_98 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in198, in199, c_99 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in200, in201, c_100 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in202, in203, c_101 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in204, in205, c_102 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in206, in207, c_103 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in208, in209, c_104 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in210, in211, c_105 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in212, in213, c_106 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in214, in215, c_107 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in216, in217, c_108 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in218, in219, c_109 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in220, in221, c_110 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in222, in223, c_111 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in224, in225, c_112 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in226, in227, c_113 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in228, in229, c_114 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in230, in231, c_115 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in232, in233, c_116 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in234, in235, c_117 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in236, in237, c_118 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in238, in239, c_119 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in240, in241, c_120 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in242, in243, c_121 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in244, in245, c_122 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in246, in247, c_123 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in248, in249, c_124 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in250, in251, c_125 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in252, in253, c_126 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in254, in255, c_127 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in02, c_128 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in06, c_129 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in10, c_130 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in14, c_131 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in18, c_132 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in22, c_133 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in26, c_134 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in30, c_135 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in34, c_136 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in38, c_137 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in42, c_138 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in46, c_139 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in50, c_140 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in54, c_141 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in58, c_142 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in62, c_143 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in66, c_144 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in70, c_145 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in74, c_146 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in78, c_147 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in82, c_148 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in86, c_149 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in90, c_150 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in94, c_151 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in98, c_152 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in102, c_153 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in106, c_154 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in110, c_155 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in114, c_156 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in118, c_157 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in122, c_158 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in124, in126, c_159 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in130, c_160 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in132, in134, c_161 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in136, in138, c_162 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in140, in142, c_163 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in144, in146, c_164 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in148, in150, c_165 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in152, in154, c_166 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in156, in158, c_167 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in160, in162, c_168 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in164, in166, c_169 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in168, in170, c_170 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in172, in174, c_171 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in176, in178, c_172 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in180, in182, c_173 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in184, in186, c_174 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in188, in190, c_175 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in192, in194, c_176 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in196, in198, c_177 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in200, in202, c_178 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in204, in206, c_179 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in208, in210, c_180 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in212, in214, c_181 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in216, in218, c_182 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in220, in222, c_183 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in224, in226, c_184 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in228, in230, c_185 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in232, in234, c_186 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in236, in238, c_187 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in240, in242, c_188 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in244, in246, c_189 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in248, in250, c_190 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in252, in254, c_191 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in04, c_192 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in12, c_193 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in20, c_194 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in28, c_195 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in36, c_196 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in44, c_197 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in52, c_198 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in60, c_199 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in68, c_200 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in76, c_201 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in84, c_202 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in92, c_203 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in100, c_204 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in108, c_205 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in116, c_206 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in124, c_207 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in132, c_208 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in136, in140, c_209 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in144, in148, c_210 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in152, in156, c_211 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in160, in164, c_212 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in168, in172, c_213 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in176, in180, c_214 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in184, in188, c_215 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in192, in196, c_216 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in200, in204, c_217 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in208, in212, c_218 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in216, in220, c_219 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in224, in228, c_220 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in232, in236, c_221 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in240, in244, c_222 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in248, in252, c_223 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in08, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in24, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in40, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in56, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in72, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in88, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in104, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in120, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in136, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in144, in152, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in160, in168, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in176, in184, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in192, in200, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in208, in216, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in224, in232, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in240, in248, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in16, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in48, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in80, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in112, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in144, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in160, in176, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in192, in208, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in224, in240, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in32, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in96, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in160, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in192, in224, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in64, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in128, in192, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in00, in128, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in64, in192, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in96, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in160, in224, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in32, in160, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in96, in224, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in48, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in112, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in144, in176, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in208, in240, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in80, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in144, in208, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in16, in144, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in80, in208, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in112, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in176, in240, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in48, in176, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in112, in240, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in24, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in56, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in88, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in120, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in136, in152, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in168, in184, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in200, in216, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in232, in248, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in40, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in104, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in136, in168, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in200, in232, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in72, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in136, in200, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in08, in136, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in72, in200, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in104, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in168, in232, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in40, in168, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in104, in232, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in56, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in120, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in152, in184, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in216, in248, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in88, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in152, in216, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in24, in152, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in88, in216, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in120, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in184, in248, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in56, in184, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in120, in248, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in12, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in28, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in44, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in60, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in76, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in92, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in108, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in124, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in132, in140, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in148, in156, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in164, in172, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in180, in188, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in196, in204, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in212, in220, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in228, in236, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in244, in252, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in20, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in52, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in84, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in116, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in132, in148, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in164, in180, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in196, in212, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in228, in244, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in36, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in100, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in132, in164, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in196, in228, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in68, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in132, in196, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in04, in132, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in68, in196, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in100, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in164, in228, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in36, in164, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in100, in228, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in52, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in116, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in148, in180, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in212, in244, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in84, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in148, in212, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in20, in148, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in84, in212, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in116, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in180, in244, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in52, in180, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in116, in244, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in28, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in60, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in92, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in124, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in140, in156, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in172, in188, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in204, in220, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in236, in252, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in44, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in108, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in140, in172, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in204, in236, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in76, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in140, in204, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in12, in140, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in76, in204, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in108, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in172, in236, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in44, in172, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in108, in236, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in60, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in124, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in156, in188, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in220, in252, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in92, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in156, in220, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in28, in156, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in92, in220, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in124, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in188, in252, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in60, in188, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in124, in252, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in06, c_192 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in14, c_193 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in22, c_194 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in30, c_195 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in38, c_196 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in46, c_197 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in54, c_198 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in62, c_199 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in70, c_200 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in78, c_201 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in86, c_202 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in94, c_203 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in102, c_204 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in110, c_205 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in118, c_206 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in122, in126, c_207 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in130, in134, c_208 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in138, in142, c_209 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in146, in150, c_210 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in154, in158, c_211 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in162, in166, c_212 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in170, in174, c_213 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in178, in182, c_214 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in186, in190, c_215 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in194, in198, c_216 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in202, in206, c_217 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in210, in214, c_218 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in218, in222, c_219 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in226, in230, c_220 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in234, in238, c_221 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in242, in246, c_222 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in250, in254, c_223 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in10, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in26, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in42, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in58, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in74, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in90, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in106, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in122, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in130, in138, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in146, in154, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in162, in170, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in178, in186, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in194, in202, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in210, in218, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in226, in234, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in242, in250, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in18, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in50, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in82, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in114, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in130, in146, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in162, in178, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in194, in210, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in226, in242, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in34, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in98, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in130, in162, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in194, in226, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in66, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in130, in194, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in02, in130, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in66, in194, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in98, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in162, in226, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in34, in162, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in98, in226, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in50, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in114, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in146, in178, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in210, in242, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in82, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in146, in210, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in18, in146, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in82, in210, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in114, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in178, in242, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in50, in178, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in114, in242, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in26, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in58, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in90, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in122, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in138, in154, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in170, in186, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in202, in218, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in234, in250, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in42, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in106, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in138, in170, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in202, in234, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in74, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in138, in202, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in10, in138, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in74, in202, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in106, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in170, in234, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in42, in170, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in106, in234, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in58, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in122, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in154, in186, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in218, in250, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in90, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in154, in218, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in26, in154, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in90, in218, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in122, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in186, in250, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in58, in186, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in122, in250, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in14, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in30, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in46, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in62, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in78, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in94, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in110, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in118, in126, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in134, in142, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in150, in158, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in166, in174, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in182, in190, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in198, in206, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in214, in222, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in230, in238, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in246, in254, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in22, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in54, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in86, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in118, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in134, in150, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in166, in182, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in198, in214, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in230, in246, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in38, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in102, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in134, in166, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in198, in230, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in70, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in134, in198, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in06, in134, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in70, in198, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in102, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in166, in230, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in38, in166, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in102, in230, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in54, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in118, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in150, in182, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in214, in246, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in86, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in150, in214, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in22, in150, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in86, in214, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in118, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in182, in246, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in54, in182, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in118, in246, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in30, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in62, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in94, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in110, in126, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in142, in158, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in174, in190, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in206, in222, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in238, in254, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in46, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in110, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in142, in174, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in206, in238, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in78, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in142, in206, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in14, in142, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in78, in206, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in110, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in174, in238, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in46, in174, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in110, in238, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in62, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in94, in126, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in158, in190, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in222, in254, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in94, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in158, in222, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in30, in158, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in94, in222, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in62, in126, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in190, in254, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in62, in190, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in126, in254, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in03, c_128 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in07, c_129 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in11, c_130 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in15, c_131 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in19, c_132 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in23, c_133 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in27, c_134 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in31, c_135 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in35, c_136 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in39, c_137 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in43, c_138 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in47, c_139 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in51, c_140 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in55, c_141 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in59, c_142 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in61, in63, c_143 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in67, c_144 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in71, c_145 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in75, c_146 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in79, c_147 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in83, c_148 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in87, c_149 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in91, c_150 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in93, in95, c_151 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in99, c_152 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in103, c_153 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in107, c_154 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in109, in111, c_155 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in115, c_156 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in117, in119, c_157 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in121, in123, c_158 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in125, in127, c_159 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in129, in131, c_160 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in133, in135, c_161 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in137, in139, c_162 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in141, in143, c_163 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in145, in147, c_164 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in149, in151, c_165 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in153, in155, c_166 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in157, in159, c_167 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in161, in163, c_168 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in165, in167, c_169 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in169, in171, c_170 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in173, in175, c_171 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in177, in179, c_172 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in181, in183, c_173 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in185, in187, c_174 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in189, in191, c_175 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in193, in195, c_176 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in197, in199, c_177 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in201, in203, c_178 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in205, in207, c_179 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in209, in211, c_180 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in213, in215, c_181 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in217, in219, c_182 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in221, in223, c_183 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in225, in227, c_184 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in229, in231, c_185 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in233, in235, c_186 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in237, in239, c_187 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in241, in243, c_188 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in245, in247, c_189 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in249, in251, c_190 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in253, in255, c_191 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in05, c_192 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in13, c_193 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in21, c_194 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in29, c_195 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in37, c_196 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in45, c_197 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in53, c_198 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in61, c_199 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in69, c_200 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in77, c_201 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in85, c_202 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in93, c_203 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in101, c_204 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in109, c_205 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in117, c_206 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in121, in125, c_207 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in129, in133, c_208 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in137, in141, c_209 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in145, in149, c_210 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in153, in157, c_211 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in161, in165, c_212 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in169, in173, c_213 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in177, in181, c_214 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in185, in189, c_215 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in193, in197, c_216 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in201, in205, c_217 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in209, in213, c_218 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in217, in221, c_219 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in225, in229, c_220 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in233, in237, c_221 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in241, in245, c_222 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in249, in253, c_223 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in09, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in25, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in41, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in57, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in73, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in89, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in105, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in121, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in129, in137, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in145, in153, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in161, in169, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in177, in185, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in193, in201, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in209, in217, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in225, in233, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in241, in249, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in17, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in49, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in81, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in113, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in129, in145, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in161, in177, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in193, in209, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in225, in241, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in33, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in97, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in129, in161, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in193, in225, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in65, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in129, in193, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in01, in129, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in65, in193, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in97, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in161, in225, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in33, in161, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in97, in225, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in49, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in113, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in145, in177, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in209, in241, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in81, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in145, in209, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in17, in145, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in81, in209, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in113, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in177, in241, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in49, in177, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in113, in241, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in25, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in57, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in89, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in121, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in137, in153, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in169, in185, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in201, in217, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in233, in249, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in41, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in105, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in137, in169, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in201, in233, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in73, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in137, in201, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in09, in137, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in73, in201, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in105, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in169, in233, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in41, in169, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in105, in233, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in57, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in121, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in153, in185, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in217, in249, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in89, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in153, in217, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in25, in153, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in89, in217, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in121, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in185, in249, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in57, in185, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in121, in249, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in13, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in29, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in45, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in61, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in77, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in93, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in109, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in117, in125, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in133, in141, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in149, in157, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in165, in173, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in181, in189, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in197, in205, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in213, in221, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in229, in237, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in245, in253, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in21, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in53, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in85, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in117, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in133, in149, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in165, in181, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in197, in213, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in229, in245, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in37, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in101, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in133, in165, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in197, in229, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in69, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in133, in197, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in05, in133, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in69, in197, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in101, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in165, in229, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in37, in165, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in101, in229, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in53, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in117, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in149, in181, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in213, in245, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in85, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in149, in213, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in21, in149, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in85, in213, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in117, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in181, in245, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in53, in181, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in117, in245, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in29, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in61, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in93, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in109, in125, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in141, in157, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in173, in189, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in205, in221, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in237, in253, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in45, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in109, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in141, in173, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in205, in237, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in77, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in141, in205, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in13, in141, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in77, in205, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in109, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in173, in237, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in45, in173, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in109, in237, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in61, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in93, in125, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in157, in189, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in221, in253, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in93, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in157, in221, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in29, in157, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in93, in221, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in61, in125, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in189, in253, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in61, in189, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in125, in253, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in07, c_192 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in15, c_193 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in23, c_194 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in31, c_195 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in39, c_196 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in47, c_197 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in55, c_198 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in59, in63, c_199 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in71, c_200 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in79, c_201 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in87, c_202 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in91, in95, c_203 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in103, c_204 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in107, in111, c_205 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in115, in119, c_206 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in123, in127, c_207 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in131, in135, c_208 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in139, in143, c_209 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in147, in151, c_210 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in155, in159, c_211 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in163, in167, c_212 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in171, in175, c_213 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in179, in183, c_214 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in187, in191, c_215 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in195, in199, c_216 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in203, in207, c_217 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in211, in215, c_218 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in219, in223, c_219 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in227, in231, c_220 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in235, in239, c_221 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in243, in247, c_222 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in251, in255, c_223 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in11, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in27, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in43, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in59, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in75, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in91, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in107, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in115, in123, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in131, in139, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in147, in155, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in163, in171, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in179, in187, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in195, in203, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in211, in219, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in227, in235, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in243, in251, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in19, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in51, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in83, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in115, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in131, in147, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in163, in179, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in195, in211, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in227, in243, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in35, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in99, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in131, in163, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in195, in227, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in67, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in131, in195, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in03, in131, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in67, in195, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in99, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in163, in227, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in35, in163, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in99, in227, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in51, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in115, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in147, in179, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in211, in243, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in83, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in147, in211, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in19, in147, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in83, in211, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in115, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in179, in243, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in51, in179, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in115, in243, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in27, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in59, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in91, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in107, in123, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in139, in155, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in171, in187, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in203, in219, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in235, in251, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in43, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in107, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in139, in171, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in203, in235, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in75, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in139, in203, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in11, in139, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in75, in203, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in107, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in171, in235, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in43, in171, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in107, in235, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in59, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in91, in123, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in155, in187, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in219, in251, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in91, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in155, in219, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in27, in155, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in91, in219, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in59, in123, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in187, in251, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in59, in187, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in123, in251, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in15, c_224 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in31, c_225 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in47, c_226 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in55, in63, c_227 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in79, c_228 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in87, in95, c_229 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in103, in111, c_230 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in119, in127, c_231 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in135, in143, c_232 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in151, in159, c_233 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in167, in175, c_234 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in183, in191, c_235 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in199, in207, c_236 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in215, in223, c_237 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in231, in239, c_238 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in247, in255, c_239 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in23, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in55, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in87, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in103, in119, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in135, in151, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in167, in183, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in199, in215, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in231, in247, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in39, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in103, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in135, in167, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in199, in231, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in71, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in135, in199, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in07, in135, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in71, in199, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in103, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in167, in231, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in39, in167, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in103, in231, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in55, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in87, in119, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in151, in183, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in215, in247, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in87, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in151, in215, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in23, in151, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in87, in215, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in55, in119, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in183, in247, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in55, in183, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in119, in247, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in31, c_240 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in47, in63, c_241 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in79, in95, c_242 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in111, in127, c_243 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in143, in159, c_244 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in175, in191, c_245 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in207, in223, c_246 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in239, in255, c_247 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in47, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in79, in111, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in143, in175, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in207, in239, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in79, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in143, in207, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in15, in143, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in79, in207, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in47, in111, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in175, in239, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in47, in175, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in111, in239, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in63, c_248 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in95, in127, c_249 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in159, in191, c_250 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in223, in255, c_251 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in95, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in159, in223, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in159, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in95, in223, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in63, in127, c_252 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in191, in255, c_253 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in63, in191, c_254 ); \ + FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in127, in255, c_254 ); \ + } while( 0 ) + + + +#define FD_REEDSOL_FFT_CONSTANTS_256_0 0, 0, 133, 0, 218, 130, 88, 0, 71, 189, 250, 18, 85, 175, 232, 0, 11, 174, 165, 33, 42, 143, 132, 45, 38, 131, 136, 12, 7, 162, 169, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254 +#define FD_REEDSOL_FFT_IMPL_256( c_00, c_01, c_02, c_03, c_04, c_05 , \ + c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ + c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ + c_28, c_29, c_30, c_31, c_32, c_33, c_34, c_35, c_36, c_37, c_38 , \ + c_39, c_40, c_41, c_42, c_43, c_44, c_45, c_46, c_47, c_48, c_49 , \ + c_50, c_51, c_52, c_53, c_54, c_55, c_56, c_57, c_58, c_59, c_60 , \ + c_61, c_62, c_63, c_64, c_65, c_66, c_67, c_68, c_69, c_70, c_71 , \ + c_72, c_73, c_74, c_75, c_76, c_77, c_78, c_79, c_80, c_81, c_82 , \ + c_83, c_84, c_85, c_86, c_87, c_88, c_89, c_90, c_91, c_92, c_93 , \ + c_94, c_95, c_96, c_97, c_98, c_99, c_100, c_101, c_102, c_103 , \ + c_104, c_105, c_106, c_107, c_108, c_109, c_110, c_111, c_112, c_113, \ + c_114, c_115, c_116, c_117, c_118, c_119, c_120, c_121, c_122, c_123, \ + c_124, c_125, c_126, c_127, c_128, c_129, c_130, c_131, c_132, c_133, \ + c_134, c_135, c_136, c_137, c_138, c_139, c_140, c_141, c_142, c_143, \ + c_144, c_145, c_146, c_147, c_148, c_149, c_150, c_151, c_152, c_153, \ + c_154, c_155, c_156, c_157, c_158, c_159, c_160, c_161, c_162, c_163, \ + c_164, c_165, c_166, c_167, c_168, c_169, c_170, c_171, c_172, c_173, \ + c_174, c_175, c_176, c_177, c_178, c_179, c_180, c_181, c_182, c_183, \ + c_184, c_185, c_186, c_187, c_188, c_189, c_190, c_191, c_192, c_193, \ + c_194, c_195, c_196, c_197, c_198, c_199, c_200, c_201, c_202, c_203, \ + c_204, c_205, c_206, c_207, c_208, c_209, c_210, c_211, c_212, c_213, \ + c_214, c_215, c_216, c_217, c_218, c_219, c_220, c_221, c_222, c_223, \ + c_224, c_225, c_226, c_227, c_228, c_229, c_230, c_231, c_232, c_233, \ + c_234, c_235, c_236, c_237, c_238, c_239, c_240, c_241, c_242, c_243, \ + c_244, c_245, c_246, c_247, c_248, c_249, c_250, c_251, c_252, c_253, \ + c_254, in00, in01, in02, in03, in04, in05, in06, in07, in08, in09 , \ + in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20 , \ + in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31 , \ + in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42 , \ + in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53 , \ + in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64 , \ + in65, in66, in67, in68, in69, in70, in71, in72, in73, in74, in75 , \ + in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86 , \ + in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97 , \ + in98, in99, in100, in101, in102, in103, in104, in105, in106, in107 , \ + in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, \ + in118, in119, in120, in121, in122, in123, in124, in125, in126, in127, \ + in128, in129, in130, in131, in132, in133, in134, in135, in136, in137, \ + in138, in139, in140, in141, in142, in143, in144, in145, in146, in147, \ + in148, in149, in150, in151, in152, in153, in154, in155, in156, in157, \ + in158, in159, in160, in161, in162, in163, in164, in165, in166, in167, \ + in168, in169, in170, in171, in172, in173, in174, in175, in176, in177, \ + in178, in179, in180, in181, in182, in183, in184, in185, in186, in187, \ + in188, in189, in190, in191, in192, in193, in194, in195, in196, in197, \ + in198, in199, in200, in201, in202, in203, in204, in205, in206, in207, \ + in208, in209, in210, in211, in212, in213, in214, in215, in216, in217, \ + in218, in219, in220, in221, in222, in223, in224, in225, in226, in227, \ + in228, in229, in230, in231, in232, in233, in234, in235, in236, in237, \ + in238, in239, in240, in241, in242, in243, in244, in245, in246, in247, \ + in248, in249, in250, in251, in252, in253, in254, in255) \ + do { \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in128, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in192, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in64, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in192, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in160, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in224, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in96, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in160, in224, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in32, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in96, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in160, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in192, in224, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in144, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in208, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in80, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in144, in208, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in176, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in240, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in112, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in176, in240, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in48, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in112, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in144, in176, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in208, in240, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in16, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in48, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in80, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in112, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in144, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in160, in176, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in192, in208, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in224, in240, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in136, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in200, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in72, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in136, in200, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in168, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in232, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in104, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in168, in232, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in40, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in104, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in136, in168, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in200, in232, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in152, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in216, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in88, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in152, in216, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in184, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in248, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in120, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in184, in248, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in56, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in120, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in152, in184, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in216, in248, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in24, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in56, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in88, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in120, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in136, in152, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in168, in184, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in200, in216, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in232, in248, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in08, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in24, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in40, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in56, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in72, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in88, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in104, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in120, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in136, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in144, in152, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in160, in168, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in176, in184, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in192, in200, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in208, in216, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in224, in232, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in240, in248, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in132, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in196, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in68, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in132, in196, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in164, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in228, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in100, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in164, in228, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in36, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in100, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in132, in164, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in196, in228, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in148, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in212, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in84, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in148, in212, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in180, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in244, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in116, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in180, in244, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in52, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in116, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in148, in180, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in212, in244, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in20, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in52, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in84, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in116, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in132, in148, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in164, in180, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in196, in212, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in228, in244, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in140, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in204, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in76, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in140, in204, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in172, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in236, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in108, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in172, in236, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in44, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in108, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in140, in172, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in204, in236, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in156, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in220, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in92, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in156, in220, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in188, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in124, in252, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in124, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in188, in252, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in60, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in124, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in156, in188, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in220, in252, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in28, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in60, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in92, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in124, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in140, in156, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in172, in188, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in204, in220, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in236, in252, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in12, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in28, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in44, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in60, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in76, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in92, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in108, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in124, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in132, in140, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in148, in156, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in164, in172, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in180, in188, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in196, in204, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in212, in220, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in228, in236, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in244, in252, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in04, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in12, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in20, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in28, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in36, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in44, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in52, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in60, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in68, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in76, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in84, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in92, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in100, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in108, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in116, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in124, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in132, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in136, in140, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in144, in148, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in152, in156, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in160, in164, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in168, in172, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in176, in180, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in184, in188, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in192, in196, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in200, in204, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in208, in212, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in216, in220, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in224, in228, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in232, in236, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in240, in244, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in248, in252, c_62 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in130, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in194, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in66, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in130, in194, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in162, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in226, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in98, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in162, in226, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in34, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in98, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in130, in162, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in194, in226, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in146, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in210, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in82, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in146, in210, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in178, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in242, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in114, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in178, in242, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in50, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in114, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in146, in178, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in210, in242, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in18, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in50, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in82, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in114, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in130, in146, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in162, in178, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in194, in210, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in226, in242, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in138, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in202, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in74, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in138, in202, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in170, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in234, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in106, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in170, in234, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in42, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in106, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in138, in170, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in202, in234, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in154, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in218, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in90, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in154, in218, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in186, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in122, in250, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in122, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in186, in250, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in58, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in122, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in154, in186, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in218, in250, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in26, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in58, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in90, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in122, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in138, in154, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in170, in186, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in202, in218, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in234, in250, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in10, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in26, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in42, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in58, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in74, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in90, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in106, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in122, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in130, in138, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in146, in154, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in162, in170, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in178, in186, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in194, in202, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in210, in218, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in226, in234, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in242, in250, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in134, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in198, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in70, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in134, in198, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in166, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in230, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in102, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in166, in230, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in38, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in102, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in134, in166, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in198, in230, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in150, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in214, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in86, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in150, in214, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in182, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in118, in246, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in118, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in182, in246, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in54, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in118, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in150, in182, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in214, in246, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in22, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in54, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in86, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in118, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in134, in150, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in166, in182, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in198, in214, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in230, in246, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in142, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in206, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in78, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in142, in206, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in174, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in110, in238, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in110, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in174, in238, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in46, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in110, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in142, in174, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in206, in238, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in158, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in94, in222, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in94, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in158, in222, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in190, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in126, in254, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in126, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in190, in254, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in62, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in94, in126, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in158, in190, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in222, in254, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in30, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in62, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in94, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in110, in126, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in142, in158, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in174, in190, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in206, in222, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in238, in254, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in14, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in30, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in46, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in62, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in78, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in94, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in110, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in118, in126, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in134, in142, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in150, in158, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in166, in174, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in182, in190, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in198, in206, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in214, in222, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in230, in238, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in246, in254, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in06, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in14, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in22, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in30, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in38, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in46, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in54, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in62, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in70, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in78, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in86, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in94, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in102, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in110, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in118, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in122, in126, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in130, in134, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in138, in142, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in146, in150, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in154, in158, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in162, in166, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in170, in174, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in178, in182, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in186, in190, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in194, in198, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in202, in206, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in210, in214, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in218, in222, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in226, in230, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in234, in238, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in242, in246, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in250, in254, c_62 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in02, c_63 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in06, c_64 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in10, c_65 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in14, c_66 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in18, c_67 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in22, c_68 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in26, c_69 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in30, c_70 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in34, c_71 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in38, c_72 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in42, c_73 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in46, c_74 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in50, c_75 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in54, c_76 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in58, c_77 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in62, c_78 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in66, c_79 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in70, c_80 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in74, c_81 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in78, c_82 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in82, c_83 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in86, c_84 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in90, c_85 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in94, c_86 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in98, c_87 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in102, c_88 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in106, c_89 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in110, c_90 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in114, c_91 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in118, c_92 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in122, c_93 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in124, in126, c_94 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in130, c_95 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in132, in134, c_96 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in136, in138, c_97 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in140, in142, c_98 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in144, in146, c_99 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in148, in150, c_100 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in152, in154, c_101 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in156, in158, c_102 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in160, in162, c_103 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in164, in166, c_104 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in168, in170, c_105 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in172, in174, c_106 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in176, in178, c_107 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in180, in182, c_108 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in184, in186, c_109 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in188, in190, c_110 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in192, in194, c_111 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in196, in198, c_112 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in200, in202, c_113 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in204, in206, c_114 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in208, in210, c_115 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in212, in214, c_116 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in216, in218, c_117 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in220, in222, c_118 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in224, in226, c_119 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in228, in230, c_120 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in232, in234, c_121 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in236, in238, c_122 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in240, in242, c_123 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in244, in246, c_124 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in248, in250, c_125 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in252, in254, c_126 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in129, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in193, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in65, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in129, in193, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in161, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in225, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in97, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in161, in225, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in33, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in97, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in129, in161, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in193, in225, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in145, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in209, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in81, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in145, in209, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in177, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in241, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in113, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in177, in241, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in49, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in113, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in145, in177, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in209, in241, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in17, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in49, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in81, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in113, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in129, in145, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in161, in177, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in193, in209, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in225, in241, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in137, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in201, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in73, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in137, in201, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in169, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in233, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in105, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in169, in233, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in41, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in105, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in137, in169, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in201, in233, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in153, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in217, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in89, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in153, in217, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in185, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in121, in249, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in121, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in185, in249, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in57, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in121, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in153, in185, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in217, in249, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in25, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in57, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in89, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in121, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in137, in153, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in169, in185, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in201, in217, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in233, in249, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in09, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in25, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in41, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in57, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in73, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in89, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in105, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in121, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in129, in137, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in145, in153, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in161, in169, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in177, in185, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in193, in201, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in209, in217, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in225, in233, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in241, in249, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in133, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in197, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in69, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in133, in197, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in165, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in229, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in101, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in165, in229, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in37, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in101, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in133, in165, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in197, in229, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in149, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in213, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in85, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in149, in213, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in181, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in117, in245, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in117, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in181, in245, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in53, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in117, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in149, in181, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in213, in245, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in21, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in53, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in85, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in117, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in133, in149, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in165, in181, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in197, in213, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in229, in245, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in141, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in205, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in77, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in141, in205, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in173, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in109, in237, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in109, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in173, in237, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in45, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in109, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in141, in173, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in205, in237, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in157, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in93, in221, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in93, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in157, in221, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in61, in189, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in125, in253, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in61, in125, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in189, in253, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in61, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in93, in125, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in157, in189, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in221, in253, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in29, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in61, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in93, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in109, in125, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in141, in157, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in173, in189, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in205, in221, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in237, in253, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in13, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in29, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in45, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in61, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in77, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in93, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in109, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in117, in125, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in133, in141, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in149, in157, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in165, in173, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in181, in189, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in197, in205, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in213, in221, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in229, in237, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in245, in253, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in05, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in13, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in21, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in29, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in37, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in45, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in53, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in61, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in69, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in77, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in85, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in93, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in101, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in109, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in117, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in121, in125, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in129, in133, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in137, in141, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in145, in149, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in153, in157, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in161, in165, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in169, in173, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in177, in181, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in185, in189, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in193, in197, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in201, in205, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in209, in213, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in217, in221, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in225, in229, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in233, in237, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in241, in245, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in249, in253, c_62 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in131, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in195, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in67, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in131, in195, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in163, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in227, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in99, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in163, in227, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in35, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in99, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in131, in163, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in195, in227, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in147, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in211, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in83, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in147, in211, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in179, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in115, in243, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in115, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in179, in243, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in51, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in115, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in147, in179, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in211, in243, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in19, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in51, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in83, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in115, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in131, in147, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in163, in179, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in195, in211, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in227, in243, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in139, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in203, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in75, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in139, in203, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in171, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in107, in235, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in107, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in171, in235, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in43, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in107, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in139, in171, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in203, in235, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in155, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in91, in219, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in91, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in155, in219, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in59, in187, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in123, in251, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in59, in123, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in187, in251, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in59, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in91, in123, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in155, in187, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in219, in251, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in27, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in59, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in91, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in107, in123, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in139, in155, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in171, in187, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in203, in219, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in235, in251, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in11, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in27, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in43, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in59, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in75, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in91, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in107, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in115, in123, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in131, in139, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in147, in155, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in163, in171, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in179, in187, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in195, in203, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in211, in219, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in227, in235, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in243, in251, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in135, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in199, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in71, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in135, in199, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in167, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in103, in231, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in103, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in167, in231, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in39, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in103, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in135, in167, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in199, in231, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in151, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in87, in215, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in87, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in151, in215, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in55, in183, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in119, in247, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in55, in119, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in183, in247, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in55, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in87, in119, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in151, in183, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in215, in247, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in23, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in55, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in87, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in103, in119, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in135, in151, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in167, in183, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in199, in215, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in231, in247, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in143, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in79, in207, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in79, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in143, in207, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in47, in175, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in111, in239, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in47, in111, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in175, in239, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in47, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in79, in111, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in143, in175, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in207, in239, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in31, in159, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in95, in223, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in31, in95, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in159, in223, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in63, in191, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in127, in255, c_00 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in63, in127, c_01 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in191, in255, c_02 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in31, in63, c_03 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in95, in127, c_04 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in159, in191, c_05 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in223, in255, c_06 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in15, in31, c_07 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in47, in63, c_08 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in79, in95, c_09 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in111, in127, c_10 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in143, in159, c_11 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in175, in191, c_12 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in207, in223, c_13 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in239, in255, c_14 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in07, in15, c_15 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in23, in31, c_16 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in39, in47, c_17 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in55, in63, c_18 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in71, in79, c_19 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in87, in95, c_20 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in103, in111, c_21 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in119, in127, c_22 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in135, in143, c_23 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in151, in159, c_24 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in167, in175, c_25 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in183, in191, c_26 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in199, in207, c_27 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in215, in223, c_28 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in231, in239, c_29 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in247, in255, c_30 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in03, in07, c_31 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in11, in15, c_32 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in19, in23, c_33 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in27, in31, c_34 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in35, in39, c_35 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in43, in47, c_36 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in51, in55, c_37 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in59, in63, c_38 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in67, in71, c_39 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in75, in79, c_40 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in83, in87, c_41 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in91, in95, c_42 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in99, in103, c_43 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in107, in111, c_44 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in115, in119, c_45 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in123, in127, c_46 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in131, in135, c_47 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in139, in143, c_48 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in147, in151, c_49 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in155, in159, c_50 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in163, in167, c_51 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in171, in175, c_52 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in179, in183, c_53 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in187, in191, c_54 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in195, in199, c_55 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in203, in207, c_56 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in211, in215, c_57 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in219, in223, c_58 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in227, in231, c_59 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in235, in239, c_60 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in243, in247, c_61 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in251, in255, c_62 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in01, in03, c_63 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in05, in07, c_64 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in09, in11, c_65 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in13, in15, c_66 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in17, in19, c_67 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in21, in23, c_68 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in25, in27, c_69 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in29, in31, c_70 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in33, in35, c_71 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in37, in39, c_72 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in41, in43, c_73 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in45, in47, c_74 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in49, in51, c_75 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in53, in55, c_76 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in57, in59, c_77 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in61, in63, c_78 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in65, in67, c_79 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in69, in71, c_80 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in73, in75, c_81 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in77, in79, c_82 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in81, in83, c_83 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in85, in87, c_84 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in89, in91, c_85 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in93, in95, c_86 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in97, in99, c_87 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in101, in103, c_88 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in105, in107, c_89 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in109, in111, c_90 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in113, in115, c_91 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in117, in119, c_92 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in121, in123, c_93 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in125, in127, c_94 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in129, in131, c_95 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in133, in135, c_96 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in137, in139, c_97 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in141, in143, c_98 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in145, in147, c_99 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in149, in151, c_100 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in153, in155, c_101 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in157, in159, c_102 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in161, in163, c_103 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in165, in167, c_104 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in169, in171, c_105 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in173, in175, c_106 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in177, in179, c_107 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in181, in183, c_108 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in185, in187, c_109 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in189, in191, c_110 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in193, in195, c_111 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in197, in199, c_112 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in201, in203, c_113 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in205, in207, c_114 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in209, in211, c_115 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in213, in215, c_116 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in217, in219, c_117 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in221, in223, c_118 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in225, in227, c_119 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in229, in231, c_120 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in233, in235, c_121 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in237, in239, c_122 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in241, in243, c_123 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in245, in247, c_124 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in249, in251, c_125 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in253, in255, c_126 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in00, in01, c_127 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in02, in03, c_128 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in04, in05, c_129 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in06, in07, c_130 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in08, in09, c_131 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in10, in11, c_132 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in12, in13, c_133 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in14, in15, c_134 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in16, in17, c_135 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in18, in19, c_136 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in20, in21, c_137 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in22, in23, c_138 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in24, in25, c_139 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in26, in27, c_140 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in28, in29, c_141 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in30, in31, c_142 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in32, in33, c_143 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in34, in35, c_144 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in36, in37, c_145 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in38, in39, c_146 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in40, in41, c_147 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in42, in43, c_148 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in44, in45, c_149 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in46, in47, c_150 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in48, in49, c_151 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in50, in51, c_152 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in52, in53, c_153 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in54, in55, c_154 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in56, in57, c_155 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in58, in59, c_156 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in60, in61, c_157 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in63, c_158 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in64, in65, c_159 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in66, in67, c_160 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in68, in69, c_161 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in70, in71, c_162 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in72, in73, c_163 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in74, in75, c_164 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in76, in77, c_165 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in78, in79, c_166 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in80, in81, c_167 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in82, in83, c_168 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in84, in85, c_169 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in86, in87, c_170 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in88, in89, c_171 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in90, in91, c_172 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in92, in93, c_173 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in94, in95, c_174 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in96, in97, c_175 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in98, in99, c_176 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in100, in101, c_177 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in102, in103, c_178 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in104, in105, c_179 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in106, in107, c_180 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in108, in109, c_181 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in110, in111, c_182 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in112, in113, c_183 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in114, in115, c_184 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in116, in117, c_185 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in118, in119, c_186 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in120, in121, c_187 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in122, in123, c_188 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in124, in125, c_189 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in126, in127, c_190 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in128, in129, c_191 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in130, in131, c_192 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in132, in133, c_193 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in134, in135, c_194 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in136, in137, c_195 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in138, in139, c_196 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in140, in141, c_197 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in142, in143, c_198 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in144, in145, c_199 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in146, in147, c_200 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in148, in149, c_201 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in150, in151, c_202 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in152, in153, c_203 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in154, in155, c_204 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in156, in157, c_205 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in158, in159, c_206 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in160, in161, c_207 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in162, in163, c_208 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in164, in165, c_209 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in166, in167, c_210 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in168, in169, c_211 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in170, in171, c_212 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in172, in173, c_213 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in174, in175, c_214 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in176, in177, c_215 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in178, in179, c_216 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in180, in181, c_217 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in182, in183, c_218 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in184, in185, c_219 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in186, in187, c_220 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in188, in189, c_221 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in190, in191, c_222 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in192, in193, c_223 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in194, in195, c_224 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in196, in197, c_225 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in198, in199, c_226 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in200, in201, c_227 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in202, in203, c_228 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in204, in205, c_229 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in206, in207, c_230 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in208, in209, c_231 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in210, in211, c_232 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in212, in213, c_233 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in214, in215, c_234 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in216, in217, c_235 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in218, in219, c_236 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in220, in221, c_237 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in222, in223, c_238 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in224, in225, c_239 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in226, in227, c_240 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in228, in229, c_241 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in230, in231, c_242 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in232, in233, c_243 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in234, in235, c_244 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in236, in237, c_245 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in238, in239, c_246 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in240, in241, c_247 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in242, in243, c_248 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in244, in245, c_249 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in246, in247, c_250 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in248, in249, c_251 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in250, in251, c_252 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in252, in253, c_253 ); \ + FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in254, in255, c_254 ); \ + } while( 0 ) + + + #define FD_REEDSOL_IFFT_CONSTANTS_128_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 11, 174, 165, 33, 42, 143, 132, 0, 71, 189, 250, 0, 218, 0 #define FD_REEDSOL_IFFT_CONSTANTS_128_128 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 45, 38, 131, 136, 12, 7, 162, 169, 18, 85, 175, 232, 130, 88, 133 #define FD_REEDSOL_IFFT_IMPL_128( c_00, c_01, c_02, c_03, c_04, c_05 , \ @@ -590,7 +2750,6 @@ #define FD_REEDSOL_FFT_CONSTANTS_128_0 0, 0, 218, 0, 71, 189, 250, 0, 11, 174, 165, 33, 42, 143, 132, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 #define FD_REEDSOL_FFT_CONSTANTS_128_128 133, 130, 88, 18, 85, 175, 232, 45, 38, 131, 136, 12, 7, 162, 169, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254 -#define FD_REEDSOL_FFT_CONSTANTS_128_256 0, 0, 218, 0, 71, 189, 250, 0, 11, 174, 165, 33, 42, 143, 132, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 #define FD_REEDSOL_FFT_IMPL_128( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ @@ -1497,6 +3656,7 @@ #define FD_REEDSOL_IFFT_CONSTANTS_32_32 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 237, 235, 241, 247, 149, 147, 137, 143, 38, 48, 71, 81, 174, 165, 71 #define FD_REEDSOL_IFFT_CONSTANTS_32_64 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 179, 181, 175, 169, 203, 205, 215, 209, 183, 161, 214, 192, 33, 42, 189 #define FD_REEDSOL_IFFT_CONSTANTS_32_96 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 94, 88, 66, 68, 38, 32, 58, 60, 145, 135, 240, 230, 143, 132, 250 +#define FD_REEDSOL_IFFT_CONSTANTS_32_128 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 182, 176, 170, 172, 206, 200, 210, 212, 12, 26, 109, 123, 45, 38, 18 #define FD_REEDSOL_IFFT_IMPL_32( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27 , \ @@ -1593,6 +3753,7 @@ #define FD_REEDSOL_FFT_CONSTANTS_32_32 71, 174, 165, 38, 48, 71, 81, 237, 235, 241, 247, 149, 147, 137, 143, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 #define FD_REEDSOL_FFT_CONSTANTS_32_64 189, 33, 42, 183, 161, 214, 192, 179, 181, 175, 169, 203, 205, 215, 209, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94 #define FD_REEDSOL_FFT_CONSTANTS_32_96 250, 143, 132, 145, 135, 240, 230, 94, 88, 66, 68, 38, 32, 58, 60, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 +#define FD_REEDSOL_FFT_CONSTANTS_32_128 18, 45, 38, 12, 26, 109, 123, 182, 176, 170, 172, 206, 200, 210, 212, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158 #define FD_REEDSOL_FFT_IMPL_32( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16, \ c_17, c_18, c_19, c_20, c_21, c_22, c_23, c_24, c_25, c_26, c_27, \ @@ -1691,6 +3852,9 @@ #define FD_REEDSOL_IFFT_CONSTANTS_16_48 48, 50, 52, 54, 56, 58, 60, 62, 149, 147, 137, 143, 71, 81, 165 #define FD_REEDSOL_IFFT_CONSTANTS_16_64 64, 66, 68, 70, 72, 74, 76, 78, 179, 181, 175, 169, 183, 161, 33 #define FD_REEDSOL_IFFT_CONSTANTS_16_80 80, 82, 84, 86, 88, 90, 92, 94, 203, 205, 215, 209, 214, 192, 42 +#define FD_REEDSOL_IFFT_CONSTANTS_16_96 96, 98, 100, 102, 104, 106, 108, 110, 94, 88, 66, 68, 145, 135, 143 +#define FD_REEDSOL_IFFT_CONSTANTS_16_112 112, 114, 116, 118, 120, 122, 124, 126, 38, 32, 58, 60, 240, 230, 132 +#define FD_REEDSOL_IFFT_CONSTANTS_16_128 128, 130, 132, 134, 136, 138, 140, 142, 182, 176, 170, 172, 12, 26, 45 #define FD_REEDSOL_IFFT_IMPL_16( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, in00, in01 , \ in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12 , \ @@ -1738,6 +3902,9 @@ #define FD_REEDSOL_FFT_CONSTANTS_16_48 165, 71, 81, 149, 147, 137, 143, 48, 50, 52, 54, 56, 58, 60, 62 #define FD_REEDSOL_FFT_CONSTANTS_16_64 33, 183, 161, 179, 181, 175, 169, 64, 66, 68, 70, 72, 74, 76, 78 #define FD_REEDSOL_FFT_CONSTANTS_16_80 42, 214, 192, 203, 205, 215, 209, 80, 82, 84, 86, 88, 90, 92, 94 +#define FD_REEDSOL_FFT_CONSTANTS_16_96 143, 145, 135, 94, 88, 66, 68, 96, 98, 100, 102, 104, 106, 108, 110 +#define FD_REEDSOL_FFT_CONSTANTS_16_112 132, 240, 230, 38, 32, 58, 60, 112, 114, 116, 118, 120, 122, 124, 126 +#define FD_REEDSOL_FFT_CONSTANTS_16_128 45, 12, 26, 182, 176, 170, 172, 128, 130, 132, 134, 136, 138, 140, 142 #define FD_REEDSOL_FFT_IMPL_16( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, in00, in01, \ in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, \ @@ -1789,6 +3956,13 @@ #define FD_REEDSOL_IFFT_CONSTANTS_8_56 56, 58, 60, 62, 137, 143, 81 #define FD_REEDSOL_IFFT_CONSTANTS_8_64 64, 66, 68, 70, 179, 181, 183 #define FD_REEDSOL_IFFT_CONSTANTS_8_72 72, 74, 76, 78, 175, 169, 161 +#define FD_REEDSOL_IFFT_CONSTANTS_8_80 80, 82, 84, 86, 203, 205, 214 +#define FD_REEDSOL_IFFT_CONSTANTS_8_88 88, 90, 92, 94, 215, 209, 192 +#define FD_REEDSOL_IFFT_CONSTANTS_8_96 96, 98, 100, 102, 94, 88, 145 +#define FD_REEDSOL_IFFT_CONSTANTS_8_104 104, 106, 108, 110, 66, 68, 135 +#define FD_REEDSOL_IFFT_CONSTANTS_8_112 112, 114, 116, 118, 38, 32, 240 +#define FD_REEDSOL_IFFT_CONSTANTS_8_120 120, 122, 124, 126, 58, 60, 230 +#define FD_REEDSOL_IFFT_CONSTANTS_8_128 128, 130, 132, 134, 182, 176, 12 #define FD_REEDSOL_IFFT_IMPL_8( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, in00, in01, in02, in03, in04, in05, in06, in07) \ do { \ @@ -1818,6 +3992,13 @@ #define FD_REEDSOL_FFT_CONSTANTS_8_56 81, 137, 143, 56, 58, 60, 62 #define FD_REEDSOL_FFT_CONSTANTS_8_64 183, 179, 181, 64, 66, 68, 70 #define FD_REEDSOL_FFT_CONSTANTS_8_72 161, 175, 169, 72, 74, 76, 78 +#define FD_REEDSOL_FFT_CONSTANTS_8_80 214, 203, 205, 80, 82, 84, 86 +#define FD_REEDSOL_FFT_CONSTANTS_8_88 192, 215, 209, 88, 90, 92, 94 +#define FD_REEDSOL_FFT_CONSTANTS_8_96 145, 94, 88, 96, 98, 100, 102 +#define FD_REEDSOL_FFT_CONSTANTS_8_104 135, 66, 68, 104, 106, 108, 110 +#define FD_REEDSOL_FFT_CONSTANTS_8_112 240, 38, 32, 112, 114, 116, 118 +#define FD_REEDSOL_FFT_CONSTANTS_8_120 230, 58, 60, 120, 122, 124, 126 +#define FD_REEDSOL_FFT_CONSTANTS_8_128 12, 182, 176, 128, 130, 132, 134 #define FD_REEDSOL_FFT_IMPL_8( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, in00, in01, in02, in03, in04, in05, in06, in07) \ do { \ @@ -1855,6 +4036,22 @@ #define FD_REEDSOL_IFFT_CONSTANTS_4_60 60, 62, 143 #define FD_REEDSOL_IFFT_CONSTANTS_4_64 64, 66, 179 #define FD_REEDSOL_IFFT_CONSTANTS_4_68 68, 70, 181 +#define FD_REEDSOL_IFFT_CONSTANTS_4_72 72, 74, 175 +#define FD_REEDSOL_IFFT_CONSTANTS_4_76 76, 78, 169 +#define FD_REEDSOL_IFFT_CONSTANTS_4_80 80, 82, 203 +#define FD_REEDSOL_IFFT_CONSTANTS_4_84 84, 86, 205 +#define FD_REEDSOL_IFFT_CONSTANTS_4_88 88, 90, 215 +#define FD_REEDSOL_IFFT_CONSTANTS_4_92 92, 94, 209 +#define FD_REEDSOL_IFFT_CONSTANTS_4_96 96, 98, 94 +#define FD_REEDSOL_IFFT_CONSTANTS_4_100 100, 102, 88 +#define FD_REEDSOL_IFFT_CONSTANTS_4_104 104, 106, 66 +#define FD_REEDSOL_IFFT_CONSTANTS_4_108 108, 110, 68 +#define FD_REEDSOL_IFFT_CONSTANTS_4_112 112, 114, 38 +#define FD_REEDSOL_IFFT_CONSTANTS_4_116 116, 118, 32 +#define FD_REEDSOL_IFFT_CONSTANTS_4_120 120, 122, 58 +#define FD_REEDSOL_IFFT_CONSTANTS_4_124 124, 126, 60 +#define FD_REEDSOL_IFFT_CONSTANTS_4_128 128, 130, 182 +#define FD_REEDSOL_IFFT_CONSTANTS_4_132 132, 134, 176 #define FD_REEDSOL_IFFT_IMPL_4( c_00, c_01, c_02, in00, in01, in02 , \ in03) \ do { \ @@ -1884,6 +4081,22 @@ #define FD_REEDSOL_FFT_CONSTANTS_4_60 143, 60, 62 #define FD_REEDSOL_FFT_CONSTANTS_4_64 179, 64, 66 #define FD_REEDSOL_FFT_CONSTANTS_4_68 181, 68, 70 +#define FD_REEDSOL_FFT_CONSTANTS_4_72 175, 72, 74 +#define FD_REEDSOL_FFT_CONSTANTS_4_76 169, 76, 78 +#define FD_REEDSOL_FFT_CONSTANTS_4_80 203, 80, 82 +#define FD_REEDSOL_FFT_CONSTANTS_4_84 205, 84, 86 +#define FD_REEDSOL_FFT_CONSTANTS_4_88 215, 88, 90 +#define FD_REEDSOL_FFT_CONSTANTS_4_92 209, 92, 94 +#define FD_REEDSOL_FFT_CONSTANTS_4_96 94, 96, 98 +#define FD_REEDSOL_FFT_CONSTANTS_4_100 88, 100, 102 +#define FD_REEDSOL_FFT_CONSTANTS_4_104 66, 104, 106 +#define FD_REEDSOL_FFT_CONSTANTS_4_108 68, 108, 110 +#define FD_REEDSOL_FFT_CONSTANTS_4_112 38, 112, 114 +#define FD_REEDSOL_FFT_CONSTANTS_4_116 32, 116, 118 +#define FD_REEDSOL_FFT_CONSTANTS_4_120 58, 120, 122 +#define FD_REEDSOL_FFT_CONSTANTS_4_124 60, 124, 126 +#define FD_REEDSOL_FFT_CONSTANTS_4_128 182, 128, 130 +#define FD_REEDSOL_FFT_CONSTANTS_4_132 176, 132, 134 #define FD_REEDSOL_FFT_IMPL_4( c_00, c_01, c_02, in00, in01, in02 , \ in03) \ do { \ diff --git a/src/ballet/reedsol/fd_reedsol_internal.h b/src/ballet/reedsol/fd_reedsol_internal.h index a7dd4a73cd9..84b1b764ca7 100644 --- a/src/ballet/reedsol/fd_reedsol_internal.h +++ b/src/ballet/reedsol/fd_reedsol_internal.h @@ -2,7 +2,8 @@ #define HEADER_fd_src_ballet_reedsol_fd_reedsol_internal_h #include "../../util/fd_util_base.h" -/* Contains function declarations for the interal encoding functions. */ +/* Contains function declarations for the interal encoding and recovery + functions. */ /* FALLTHRU: Tells the compiler that falling through to the next case of the switch statement is intentional and not a bug. When brutality @@ -38,4 +39,63 @@ void fd_reedsol_encode_32_32( ulong shred_sz, uchar * _scratch ); #endif +/* fd_reedsol_recover_{first, var}_{n}: Verifies the consistency + of the Reed-Solomon encoded data, and recovers any missing data. + At least data_shred_cnt of the first n shreds must be un-erased, + which implies data_shred_cnt <= n. + + The _first variant imposes the additional constraint that the first + data_shred_cnt shreds must be un-erased, is the case when no packets + have been lost. This version is faster. + + Unlike the encode operations, the math doesn't care much whether a + shred is a data shred or parity shred for recover operations, hence + the function only has one shred array. The parity shreds come + immediately after the data shreds. + + For each value of i in [0, data_shred_cnt+parity_shred_cnt), erased[ + i ] must be 0 (if shred[ i ] contains valid data) or 1 if shred[ i ] + is an erasure (i.e. wasn't received, was corrupted, etc.). If + erased[ i ]==1, the contents of shred[ i ] are ignored on entry, and + upon return, shred[ i ][ j ] will be overwritten with the correct + data for j in [0, shred_sz). + + Note that since data_shred_cnt+parity_shred_cnt<=134, shred[ i ] and + erased[ i ] for i>=134 are completely ignored. + + Returns one of: + FD_REEDSOL_OK if okay + FD_REEDSOL_ERR_INCONSISTENT if the shreds are not consistent with + having come from a Reed-Solomon encoding of data_shred_cnt data + shreds + FD_REEDSOL_ERR_INSUFFICIENT if there's not enough un-erased data to + recover data_shred_cnt data shreds + */ + +int fd_reedsol_recover_var_16( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); +int fd_reedsol_recover_var_32( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); +int fd_reedsol_recover_var_64( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); +int fd_reedsol_recover_var_128( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); +int fd_reedsol_recover_var_256( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); + #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_internal_h */ diff --git a/src/ballet/reedsol/fd_reedsol_pi.c b/src/ballet/reedsol/fd_reedsol_pi.c new file mode 100644 index 00000000000..8b651da12c8 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_pi.c @@ -0,0 +1,1195 @@ +#include +#include "fd_reedsol_pi.h" +#if FD_HAS_AVX +#include "../../util/simd/fd_avx.h" +#include "../../util/simd/fd_sse.h" +#include +#endif + +#if FD_HAS_GFNI +#include "fd_reedsol_arith_gfni.h" +#elif FD_HAS_AVX +#include "fd_reedsol_arith_avx2.h" +#else +#include "fd_reedsol_arith_none.h" +#endif + +/* TODO: Move this high-level overview + + The main Lin, et al. paper proposes a clever method for dealing with + erasures. Suppose there is a polynomial P(x) of degree >23). + We need at least 31 bits of precision for the product, so + mulh_epu16 is perfect. */ + return ws_and( ws_bcast( 0xFF ), ws_add( x, ws_shru( ws_mulhi( x, ws_bcast( (short)0x8081 ) ), 7 ) ) ); +} + +/* The following macros implement the unscaled Fast Walsh-Hadamard + transform. As alluded to above, this gives us a way to compute Pi + and Pi' in O(n lg n) time. These are designed for use within this + file, not external use. + + Unlike the rest of the similar-seeming components in fd_reedsol (e.g. + FFT, PPT), this computes the transform within a single (or few) AVX + vectors, not in parallel across each component of the vector. I.e. if + FD_HAS_AVX, to compute a 16-element FWHD, you pass one AVX vector + (16*short), not 16 vectors. + + Also unlike the rest of the similar-seeming components in fd_reedsol, + this works on the group Z/255Z (integers mod 255). Since 255 is not + a prime, this is not a field, but the FD_REEDSOL_FWHT only needs addition, + subtraction, and division by powers of 2 (which have inverses mod + 255), so it's not a problem. */ + +#if FD_REEDSOL_PI_USE_SHORT + +#define FD_REEDSOL_FWHT_16( x ) do { ws_t _x = (x); \ + _x = ws_add( _mm256_setr_m128i( _mm256_extracti128_si256( _x, 1 ), _mm256_extracti128_si256( _x, 0 ) ), \ + ws_adjust_sign( _x, _mm256_setr_epi16( 1,1,1,1, 1,1,1,1, -1,-1,-1,-1, -1,-1,-1,-1 ) ) ); \ + _x = ws_add( _mm256_shuffle_epi32( _x, 0x4E ), \ + ws_adjust_sign( _x, _mm256_setr_epi16( 1,1,1,1, -1,-1,-1,-1, 1,1,1,1, -1,-1,-1,-1 ) ) ); \ + _x = ws_add( _mm256_shuffle_epi32( _x, 0xB1 ), \ + ws_adjust_sign( _x, _mm256_setr_epi16( 1,1,-1,-1, 1,1,-1,-1, 1,1,-1,-1, 1,1,-1,-1 ) ) ); \ + _x = ws_add( _mm256_shuffle_epi8( _x, _mm256_setr_epi8( 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13, \ + 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 ) ), \ + ws_adjust_sign( _x, _mm256_setr_epi16( 1,-1,1,-1, 1,-1,1,-1, 1,-1,1,-1, 1,-1,1,-1 ) ) ); \ + (x) = _x; } while( 0 ) + +#define FD_REEDSOL_FWHT_32( x0, x1 ) do { \ + ws_t _y0i = (x0); ws_t _y1i = (x1); \ + ws_t _y0 = ws_add( _y0i, _y1i ); ws_t _y1 = ws_sub( _y0i, _y1i ); \ + FD_REEDSOL_FWHT_16( _y0 ); FD_REEDSOL_FWHT_16( _y1 ); \ + (x0) = _y0; (x1) = _y1; \ +} while( 0 ) + + +#define FD_REEDSOL_FWHT_64( x0, x1, x2, x3 ) do { \ + ws_t _z0, _z1, _z2, _z3; ws_t _z0i, _z1i, _z2i, _z3i; \ + _z0i = (x0); _z1i = (x1); _z2i = (x2); _z3i = (x3); \ + _z0 = ws_add( _z0i, _z2i ); _z1 = ws_add( _z1i, _z3i ); _z2 = ws_sub( _z0i, _z2i ); _z3 = ws_sub( _z1i, _z3i ); \ + FD_REEDSOL_FWHT_32( _z0, _z1 ); FD_REEDSOL_FWHT_32( _z2, _z3 ); \ + (x0) = _z0; (x1) = _z1; (x2) = _z2; (x3) = _z3; \ +} while( 0 ) + +#define FD_REEDSOL_FWHT_128( x0, x1, x2, x3, x4, x5, x6, x7 ) do { \ + ws_t _w0, _w1, _w2, _w3, _w4, _w5, _w6, _w7; \ + ws_t _w0i, _w1i, _w2i, _w3i, _w4i, _w5i, _w6i, _w7i; \ + _w0i = (x0); _w1i = (x1); _w2i = (x2); _w3i = (x3); \ + _w4i = (x4); _w5i = (x5); _w6i = (x6); _w7i = (x7); \ + _w0 = ws_add( _w0i, _w4i ); _w1 = ws_add( _w1i, _w5i ); _w2 = ws_add( _w2i, _w6i ); _w3 = ws_add( _w3i, _w7i ); \ + _w4 = ws_sub( _w0i, _w4i ); _w5 = ws_sub( _w1i, _w5i ); _w6 = ws_sub( _w2i, _w6i ); _w7 = ws_sub( _w3i, _w7i ); \ + FD_REEDSOL_FWHT_64( _w0, _w1, _w2, _w3 ); FD_REEDSOL_FWHT_64( _w4, _w5, _w6, _w7 ); \ + (x0) = _w0; (x1) = _w1; (x2) = _w2; (x3) = _w3; (x4) = _w4; (x5) = _w5; (x6) = _w6; (x7) = _w7; \ +} while( 0 ) + +#define FD_REEDSOL_FWHT_256( x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15 ) do { \ + ws_t _v0, _v1, _v2, _v3, _v4, _v5, _v6, _v7, _v8, _v9, _v10, _v11, _v12, _v13, _v14, _v15; \ + ws_t _v0i, _v1i, _v2i, _v3i, _v4i, _v5i, _v6i, _v7i, _v8i, _v9i, _v10i, _v11i, _v12i, _v13i, _v14i, _v15i; \ + _v0i = (x0); _v1i = (x1); _v2i = (x2); _v3i = (x3); \ + _v4i = (x4); _v5i = (x5); _v6i = (x6); _v7i = (x7); \ + _v8i = (x8); _v9i = (x9); _v10i = (x10); _v11i = (x11); \ + _v12i = (x12); _v13i = (x13); _v14i = (x14); _v15i = (x15); \ + _v0 = ws_add( _v0i, _v8i ); _v1 = ws_add( _v1i, _v9i ); _v2 = ws_add( _v2i, _v10i ); _v3 = ws_add( _v3i, _v11i ); \ + _v4 = ws_add( _v4i, _v12i ); _v5 = ws_add( _v5i, _v13i ); _v6 = ws_add( _v6i, _v14i ); _v7 = ws_add( _v7i, _v15i ); \ + _v8 = ws_sub( _v0i, _v8i ); _v9 = ws_sub( _v1i, _v9i ); _v10 = ws_sub( _v2i, _v10i ); _v11 = ws_sub( _v3i, _v11i ); \ + _v12 = ws_sub( _v4i, _v12i ); _v13 = ws_sub( _v5i, _v13i ); _v14 = ws_sub( _v6i, _v14i ); _v15 = ws_sub( _v7i, _v15i ); \ + FD_REEDSOL_FWHT_128( _v0, _v1, _v2, _v3, _v4, _v5, _v6, _v7 ); FD_REEDSOL_FWHT_128( _v8, _v9, _v10, _v11, _v12, _v13, _v14, _v15 ); \ + (x0) = _v0; (x1) = _v1; (x2) = _v2; (x3) = _v3; (x4) = _v4; (x5) = _v5; (x6) = _v6; (x7) = _v7; \ + (x8) = _v8; (x9) = _v9; (x10) = _v10; (x11) = _v11; (x12) = _v12; (x13) = _v13; (x14) = _v14; (x15) = _v15; \ +} while( 0 ) + +#else /* FD_REEDSOL_PI_USE_SHORT */ + +static inline wb_t +add_mod_255( wb_t a, wb_t b ) { + wb_t sum = wb_add( a, b ); + wb_t overflowed = wb_lt( sum, a ); + return wb_sub( sum, overflowed ); +} + +#define FD_REEDSOL_FWHT_16( x ) do { wb_t _x = (x); \ + wb_t negated, unshifted, shifted; \ + /* Shift by 8 elements (8B) */ \ + negated = wb_sub( wb_bcast( 0xFF ), _x ); \ + unshifted = _mm256_blend_epi32( _x, negated, 0xCC ); \ + shifted = _mm256_shuffle_epi32( _x, 0x4E ); \ + _x = add_mod_255( unshifted, shifted ); \ + /* Shift by 4 elements (4B) */ \ + negated = wb_sub( wb_bcast( 0xFF ), _x ); \ + unshifted = _mm256_blend_epi32( _x, negated, 0xAA ); \ + shifted = _mm256_shuffle_epi32( _x, 0xB1 ); \ + _x = add_mod_255( unshifted, shifted ); \ + /* Shift by 2 elements (2B) */ \ + negated = wb_sub( wb_bcast( 0xFF ), _x ); \ + unshifted = _mm256_blend_epi16( _x, negated, 0xAA ); \ + shifted = wb_exch_adj_pair( _x ); \ + _x = add_mod_255( unshifted, shifted ); \ + /* Shift by 1 element (1B) */ \ + negated = wb_sub( wb_bcast( 0xFF ), _x ); \ + unshifted = _mm256_blendv_epi8( _x, negated, wb_bcast_pair( 0x01, 0xFF ) ); \ + shifted = wb_exch_adj( _x ); \ + _x = add_mod_255( unshifted, shifted ); \ + (x) = _x; \ +} while( 0 ) + +#define FD_REEDSOL_FWHT_32( x ) do { wb_t _y = (x); \ + wb_t negated, unshifted, shifted; \ + /* Shift by 16 elements (16B) */ \ + negated = wb_sub( wb_bcast( 0xFF ), _y ); \ + unshifted = _mm256_blend_epi32( _y, negated, 0xF0 ); \ + shifted = _mm256_setr_m128i( _mm256_extracti128_si256( _y, 1 ), _mm256_extracti128_si256( _y, 0 ) ); \ + _y = add_mod_255( unshifted, shifted ); \ + FD_REEDSOL_FWHT_16( _y ); \ + (x) = _y; \ +} while( 0 ) + +#define FD_REEDSOL_FWHT_64( x0, x1 ) do { wb_t _z0i = (x0); wb_t _z1i = (x1); \ + wb_t _z0 = add_mod_255( _z0i, _z1i ); wb_t _z1 = add_mod_255( _z0i, wb_sub( wb_bcast( 0xFF ), _z1i ) ); \ + FD_REEDSOL_FWHT_32( _z0 ); FD_REEDSOL_FWHT_32( _z1 ); \ + (x0) = _z0; (x1) = _z1; \ +} while( 0 ) + +#define FD_REEDSOL_FWHT_128( x0, x1, x2, x3 ) do { wb_t _w0i = (x0); wb_t _w1i = (x1); wb_t _w2i = (x2); wb_t _w3i = (x3); \ + wb_t _w0, _w1, _w2, _w3; \ + _w0 = add_mod_255( _w0i, _w2i ); _w1 = add_mod_255( _w1i, _w3i ); \ + _w2 = add_mod_255( _w0i, wb_sub( wb_bcast( 0xFF ), _w2i ) ); _w3 = add_mod_255( _w1i, wb_sub( wb_bcast( 0xFF ), _w3i ) ); \ + FD_REEDSOL_FWHT_64( _w0, _w1 ); FD_REEDSOL_FWHT_64( _w2, _w3 ); \ + (x0) = _w0; (x1) = _w1; (x2) = _w2; (x3) = _w3; \ +} while( 0 ) + +#define FD_REEDSOL_FWHT_256( x0, x1, x2, x3, x4, x5, x6, x7 ) do { \ + wb_t _v0, _v1, _v2, _v3, _v4, _v5, _v6, _v7; \ + wb_t _v0i, _v1i, _v2i, _v3i, _v4i, _v5i, _v6i, _v7i; \ + _v0i = (x0); _v1i = (x1); _v2i = (x2); _v3i = (x3); \ + _v4i = (x4); _v5i = (x5); _v6i = (x6); _v7i = (x7); \ + _v0 = add_mod_255( _v0i, _v4i ); _v1 = add_mod_255( _v1i, _v5i ); \ + _v2 = add_mod_255( _v2i, _v6i ); _v3 = add_mod_255( _v3i, _v7i ); \ + _v4 = add_mod_255( _v0i, wb_sub( wb_bcast( 0xFF ), _v4i ) ); _v5 = add_mod_255( _v1i, wb_sub( wb_bcast( 0xFF ), _v5i ) ); \ + _v6 = add_mod_255( _v2i, wb_sub( wb_bcast( 0xFF ), _v6i ) ); _v7 = add_mod_255( _v3i, wb_sub( wb_bcast( 0xFF ), _v7i ) ); \ + FD_REEDSOL_FWHT_128( _v0, _v1, _v2, _v3 ); FD_REEDSOL_FWHT_128( _v4, _v5, _v6, _v7 ); \ + (x0) = _v0; (x1) = _v1; (x2) = _v2; (x3) = _v3; (x4) = _v4; (x5) = _v5; (x6) = _v6; (x7) = _v7; \ +} while( 0 ) +#endif + +/* Casts each element of a to a uchar, forming a 16-element uchar vector. Then + * casts each element of b to a uchar, forming a second 16-element uchar + * vector. Concatenates the two 16-element vectors to form a single + * 32-element wb_t (a first, then b). */ +static inline wb_t +compact_ws( ws_t a, ws_t b ) { + /* There's also _mm256_packus_epi16, but it's no better than this */ + wb_t shuffled0 = _mm256_shuffle_epi8(a, wb( 0, 2, 4, 6, 8, 10, 12, 14, 128,128,128,128,128,128,128,128, + 128, 128,128,128,128,128,128,128, 0, 2, 4, 6, 8, 10, 12, 14 ) ); + wb_t shuffled1 = _mm256_shuffle_epi8(b, wb( 0, 2, 4, 6, 8, 10, 12, 14, 128,128,128,128,128,128,128,128, + 128, 128,128,128,128,128,128,128, 0, 2, 4, 6, 8, 10, 12, 14 ) ); + return _mm256_setr_m128i( + _mm_or_si128( _mm256_extracti128_si256( shuffled0, 0 ), _mm256_extracti128_si256( shuffled0, 1 ) ), + _mm_or_si128( _mm256_extracti128_si256( shuffled1, 0 ), _mm256_extracti128_si256( shuffled1, 1 ) ) ); +} + +/* exp_{n}( x ) computes n^x_i in GF(2^8) for each byte x_i in the + vector x. That's exponentiation, not xor. For example, exp_76 + interprets 76 as an element of GF(2^8) and x_i as an integer, and + computes the product of multiplying GF(76) times itself x_i times. + Recall that exponentiation is an operator from (GF(2^8) x (Z/255Z)) + -> GF(2^8), so x is interpreted mod 255. (equivalently, observe + n^255=1). As an input, x^255 is okay and is the same as x^0. */ +static inline wb_t +exp_76( wb_t x ) { + /* Decompose x = xh3*0x80 + xh2*0x40 + xh1*0x20 + xh0*0x10 + xl + where 0<=xl<16 and 0<=xh_j<1 for each j. Then + + 76^x = (76^xl) * (76^0x10)^xh0 * (76^0x20)^xh1 * + (76^0x40)^xh2 (76^0x80)^xh3 + = (76^xl) * 2^xh0 * 4^xh1 * 16^xh2 * 29^xh3. + + We use vpshub to implement the 4-bit lookup table 76^xl. The for + the rest, we're either multiplying by a constant or not doing the + multiply, so we can use our normal GF_MUL with a blend. */ + + wb_t low = wb_and( x, wb_bcast( 0xF ) ); + wb_t exp_low = _mm256_shuffle_epi8( wb( 1, 76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44, + 1, 76, 157, 70, 95, 253, 217, 129, 133, 168, 230, 227, 130, 81, 18, 44 ), + low ); + wb_t with0 = _mm256_blendv_epi8( exp_low, GF_MUL( exp_low, 2 ), _mm256_slli_epi16( x, 3 ) ); + wb_t with1 = _mm256_blendv_epi8( with0, GF_MUL( with0, 4 ), _mm256_slli_epi16( x, 2 ) ); + wb_t with2 = _mm256_blendv_epi8( with1, GF_MUL( with1, 16 ), _mm256_slli_epi16( x, 1 ) ); + wb_t with3 = _mm256_blendv_epi8( with2, GF_MUL( with2, 29 ), x ); + return with3; +} + +static inline wb_t +exp_29( wb_t x ) { + wb_t low = wb_and( x, wb_bcast( 0xF ) ); + wb_t exp_low = _mm256_shuffle_epi8( wb( 1, 29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59, + 1, 29, 76, 143, 157, 106, 70, 93, 95, 101, 253, 254, 217, 13, 129, 59 ), + low ); + wb_t with0 = _mm256_blendv_epi8( exp_low, GF_MUL( exp_low, 133 ), _mm256_slli_epi16( x, 3 ) ); + wb_t with1 = _mm256_blendv_epi8( with0, GF_MUL( with0, 2 ), _mm256_slli_epi16( x, 2 ) ); + wb_t with2 = _mm256_blendv_epi8( with1, GF_MUL( with1, 4 ), _mm256_slli_epi16( x, 1 ) ); + wb_t with3 = _mm256_blendv_epi8( with2, GF_MUL( with2, 16 ), x ); + return with3; +} +static inline wb_t +exp_16( wb_t x ) { + wb_t low = wb_and( x, wb_bcast( 0xF ) ); + wb_t exp_low = _mm256_shuffle_epi8( wb( 1, 16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185, + 1, 16, 29, 205, 76, 180, 143, 24, 157, 37, 106, 238, 70, 20, 93, 185 ), + low ); + wb_t with0 = _mm256_blendv_epi8( exp_low, GF_MUL( exp_low, 95 ), _mm256_slli_epi16( x, 3 ) ); + wb_t with1 = _mm256_blendv_epi8( with0, GF_MUL( with0, 133 ), _mm256_slli_epi16( x, 2 ) ); + wb_t with2 = _mm256_blendv_epi8( with1, GF_MUL( with1, 2 ), _mm256_slli_epi16( x, 1 ) ); + wb_t with3 = _mm256_blendv_epi8( with2, GF_MUL( with2, 4 ), x ); + return with3; +} +static inline wb_t +exp_4( wb_t x ) { + wb_t low = wb_and( x, wb_bcast( 0xF ) ); + wb_t exp_low = _mm256_shuffle_epi8( wb( 1, 4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96, + 1, 4, 16, 64, 29, 116, 205, 19, 76, 45, 180, 234, 143, 6, 24, 96 ), + low ); + wb_t with0 = _mm256_blendv_epi8( exp_low, GF_MUL( exp_low, 157 ), _mm256_slli_epi16( x, 3 ) ); + wb_t with1 = _mm256_blendv_epi8( with0, GF_MUL( with0, 95 ), _mm256_slli_epi16( x, 2 ) ); + wb_t with2 = _mm256_blendv_epi8( with1, GF_MUL( with1, 133 ), _mm256_slli_epi16( x, 1 ) ); + wb_t with3 = _mm256_blendv_epi8( with2, GF_MUL( with2, 2 ), x ); + return with3; +} + +static inline wb_t +exp_2( wb_t x ) { + wb_t low = wb_and( x, wb_bcast( 0xF ) ); + wb_t exp_low = _mm256_shuffle_epi8( wb( 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38, + 1, 2, 4, 8, 16, 32, 64, 128, 29, 58, 116, 232, 205, 135, 19, 38), + low ); + wb_t with0 = _mm256_blendv_epi8( exp_low, GF_MUL( exp_low, 76 ), _mm256_slli_epi16( x, 3 ) ); + wb_t with1 = _mm256_blendv_epi8( with0, GF_MUL( with0, 157 ), _mm256_slli_epi16( x, 2 ) ); + wb_t with2 = _mm256_blendv_epi8( with1, GF_MUL( with1, 95 ), _mm256_slli_epi16( x, 1 ) ); + wb_t with3 = _mm256_blendv_epi8( with2, GF_MUL( with2, 133 ), x ); + return with3; +} + +#endif /* FD_HAS_AVX */ + +/* l_twiddle_{N} stores the size N FWHT of what the paper calls L~, i.e. + ( 0, Log(1), Log(2), Log(3), ... Log(N-1) ) + + The discrete log uses a primitive element of 2, and the FWHT is taken + mod 255, which means all of the values can fit in a uchar. However, + Intel doesn't give us a multiplication instruction for 8-bit + integers, which means that we'd have to zero-extend these values + anyways. + + Although L~ for a smaller size is a subset of that for a larger size, + because we also precompute the value of the FWHT, we store the + variables separately. Perhaps a good compiler could + constant-propagate through the AVX instructions, but it's just 4 + values of N, so I prefer not to depend on that. */ +static const short fwht_l_twiddle_16 [ 16 ] = {0xca,0xa1,0x6a,0xa9,0x73,0xfc,0xe2,0x44,0x93,0x74,0x08,0x7f,0x96,0x8c,0x42,0xf2}; +static const short fwht_l_twiddle_32 [ 32 ] = {0x24,0x8f,0xc2,0x7e,0x49,0x89,0x74,0xdc,0x4f,0x95,0x43,0xb4,0x09,0xba,0x03,0x83, + 0x71,0xb3,0x12,0xd4,0x9d,0x70,0x51,0xab,0xd7,0x53,0xcc,0x4a,0x24,0x5e,0x81,0x62}; +static const short fwht_l_twiddle_64 [ 64 ] = {0x05,0x81,0x9a,0x82,0x07,0x7c,0x3c,0xbe,0xd3,0xbc,0xed,0x23,0xc2,0x24,0xee,0xc8, + 0x3f,0x5d,0x11,0x18,0x8a,0xf9,0x1c,0x4b,0x0e,0x02,0x8e,0xe4,0x77,0x8c,0x97,0x6d, + 0x43,0x9d,0xea,0x7a,0x8b,0x96,0xac,0xfa,0xca,0x6e,0x98,0x46,0x4f,0x51,0x17,0x3e, + 0xa3,0x0a,0x13,0x91,0xb0,0xe6,0x86,0x0c,0xa1,0xa4,0x0b,0xaf,0xd0,0x30,0x6b,0x57}; +static const short fwht_l_twiddle_128[ 128 ] = {0xfe,0x89,0x15,0xeb,0x48,0xea,0x04,0xfe,0x32,0xd9,0xca,0x2c,0x1e,0x58,0x8d,0xed, + 0x6f,0x36,0x53,0x24,0xb2,0x27,0x3e,0x06,0xec,0x96,0x41,0x05,0xbe,0x1d,0xb1,0xdd, + 0x18,0x64,0xf4,0xc3,0x16,0x0a,0x2e,0x00,0xde,0x34,0xaf,0x42,0xd7,0x5e,0x92,0x02, + 0xbf,0x5a,0x6a,0x97,0xe1,0x39,0xd0,0xf6,0x66,0x86,0xb5,0x61,0x8a,0xa2,0x8f,0x49, + 0x0b,0x79,0x20,0x19,0xc5,0x0e,0x74,0x7e,0x75,0x9f,0x11,0x1a,0x67,0xef,0x50,0xa3, + 0x0f,0x84,0xce,0x0c,0x62,0xcc,0xf9,0x90,0x2f,0x6d,0xdb,0xc4,0x30,0xfb,0x7d,0xfc, + 0x6e,0xd6,0xe0,0x31,0x01,0x23,0x2b,0xf5,0xb6,0xa8,0x81,0x4a,0xc6,0x44,0x9b,0x7a, + 0x87,0xb9,0xbb,0x8b,0x7f,0x94,0x3c,0x21,0xdc,0xc2,0x60,0xfd,0x17,0xbd,0x47,0x65}; +static const short fwht_l_twiddle_256[ 256 ] = {0x00,0xfc,0xfb,0x15,0x2d,0xfa,0xc1,0x14,0x62,0x2c,0xd9,0xf9,0xc0,0x45,0x13,0xe8, + 0x01,0x61,0x86,0x2b,0xd8,0xba,0xf8,0x5d,0xbf,0x7a,0x44,0x6a,0x07,0x12,0xf1,0xe7, + 0x00,0xdc,0x60,0x0a,0x1f,0x85,0x1c,0x2a,0x8b,0xd7,0x92,0xb9,0xf7,0x82,0x5c,0xad, + 0x19,0xbe,0xb1,0x79,0x43,0x3d,0x69,0x9e,0x06,0x75,0x11,0x27,0x70,0xf0,0xd2,0xe6, + 0xfe,0x2f,0xdb,0xea,0x88,0x5f,0x7c,0x09,0x0c,0x1e,0x8d,0x84,0x1b,0x3f,0x29,0xd4, + 0x31,0x8a,0x8f,0xd6,0x91,0xcb,0xb8,0xc9,0xf6,0xb6,0x81,0x39,0xc7,0x5b,0x55,0xac, + 0x18,0x65,0xbd,0xf4,0x22,0xb0,0xb4,0x78,0x7f,0x42,0x34,0x3c,0x68,0x37,0x9d,0x4e, + 0xc5,0x05,0x96,0x74,0x10,0x59,0x26,0x9a,0x6f,0xa3,0xef,0x53,0x4b,0xd1,0xaa,0xe5, + 0xfd,0x16,0x2e,0xc2,0x63,0xda,0x46,0xe9,0x02,0x87,0xbb,0x5e,0x7b,0x6b,0x08,0xf2, + 0xdd,0x0b,0x20,0x1d,0x8c,0x93,0x83,0xae,0x1a,0xb2,0x3e,0x9f,0x76,0x28,0x71,0xd3, + 0x30,0xeb,0x89,0x7d,0x0d,0x8e,0x40,0xd5,0x32,0x90,0xcc,0xca,0xb7,0x3a,0xc8,0x56, + 0x66,0xf5,0x23,0xb5,0x80,0x35,0x38,0x4f,0xc6,0x97,0x5a,0x9b,0xa4,0x54,0x4c,0xab, + 0x17,0xc3,0x64,0x47,0x03,0xbc,0x6c,0xf3,0xde,0x21,0x94,0xaf,0xb3,0xa0,0x77,0x72, + 0xec,0x7e,0x0e,0x41,0x33,0xcd,0x3b,0x57,0x67,0x24,0x36,0x50,0x98,0x9c,0xa5,0x4d, + 0xc4,0x48,0x04,0x6d,0xdf,0x95,0xa1,0x73,0xed,0x0f,0xce,0x58,0x25,0x51,0x99,0xa6, + 0x49,0x6e,0xe0,0xa2,0xee,0xcf,0x52,0xa7,0x4a,0xe1,0xd0,0xa8,0xe2,0xa9,0xe3,0xe4}; + + +#if !FD_HAS_AVX +static void +gen_pi_noavx_generic( uchar const * is_erased, + uchar * output, + ulong sz, + const short * l_twiddle ) { + long scratch[ 256 ]; + + for( ulong i=0UL; i>23). + We need at least 31 bits of precision for the product, so + mulh_epu16 is perfect. */ + log_pi = ws_and( ws_bcast( 0xFF ), ws_add( log_pi, ws_shru( ws_mulhi( log_pi, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + /* Now 0<=log_pi < 255 */ + + /* Since our FWHT implementation is unscaled, we've computed a value + 16 times larger than what we'd like. 16^-1 == 16 (mod 255), but + we're just going to use this in the exponentiation, so we can + compute this implicitly. + 2^(log_pi * 16^-1) = 2^(16*log_pi) = (2^16)^log_pi = 76^log_pi + (where 2 is the primitive element we used for the logs, an element + of GF(2^8) ). */ + + wb_t compact_log_pi = compact_ws( log_pi, ws_zero() ); + wb_t pi = exp_76( compact_log_pi ); + + vb_st( output, _mm256_extracti128_si256( pi, 0 ) ); + +#else + wb_t erased_vec = _mm256_setr_m128i( vb_ldu( is_erased ), _mm_setzero_si128() ); + wb_t to_transform = erased_vec; + FD_REEDSOL_FWHT_16( to_transform ); + ws_t transformed = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform, 0 ) ); + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255. + 0<=product<256*255, so product is interpreted as unsigned. */ + ws_t product = ws_mullo( transformed, ws_ld( fwht_l_twiddle_16 ) ); + + /* Compute mod 255, using the same approach as above. */ + product = ws_and( ws_bcast( 0xFF ), ws_add( product, ws_shru( ws_mulhi( product, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + wb_t compact_product = compact_ws( product, ws_zero() ); + + FD_REEDSOL_FWHT_16( compact_product ); + + /* Negate the ones corresponding to erasures */ + compact_product = wb_if( wb_eq( erased_vec, wb_zero() ), compact_product, wb_sub( wb_bcast( 255 ), compact_product ) ); + + wb_t pi = exp_76( compact_product ); + vb_st( output, _mm256_extracti128_si256( pi, 0 ) ); +#endif +#else /* No AVX implementation */ + + gen_pi_noavx_generic( is_erased, output, 16UL, fwht_l_twiddle_16 ); + +#endif +} + + +void +fd_reedsol_gen_pi_32( uchar const * is_erased, + uchar * output ) { +#if FD_HAS_AVX +#if FD_REEDSOL_PI_USE_SHORT + ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); + ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); + + ws_t transformed0 = erased_vec0; + ws_t transformed1 = erased_vec1; + FD_REEDSOL_FWHT_32( transformed0, transformed1 ); /* FWHT( R~ ) */ + /* |transformed| <= 32 */ + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255 . + |product| <= 32*255, but definitely may be negative */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_32 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_32 + 16UL ) ); + + /* log_pi is congruent (mod 255) to what the paper calls + R_w = FWHT( FWHT( L~ ) * FWHT( R~ ) ). + |log_pi| <= 6945 using the same approach as above. */ + FD_REEDSOL_FWHT_32( product0, product1 ); + ws_t log_pi0 = product0; + ws_t log_pi1 = product1; + + /* Negate the ones corresponding to erasures to compute 1/Pi' */ + log_pi0 = ws_adjust_sign( log_pi0, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec0, 1 ) ) ); + log_pi1 = ws_adjust_sign( log_pi1, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec1, 1 ) ) ); + + log_pi0 = ws_add( log_pi0, ws_bcast( (short)(255*28) ) ); + log_pi1 = ws_add( log_pi1, ws_bcast( (short)(255*28) ) ); /* Now 0<= log_pi <= 14085 < 2^15 */ + + /* GCC informs me that for a ushort x, + (x%255) == 0xFF & ( x + (x*0x8081)>>23). + We need at least 31 bits of precision for the product, so + mulh_epu16 is perfect. */ + log_pi0 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi0, ws_shru( ws_mulhi( log_pi0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi1 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi1, ws_shru( ws_mulhi( log_pi1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + /* Now 0<=log_pi < 255 */ + + /* Since our FWHT implementation is unscaled, we've computed a value + 32 times larger than what we'd like. 32^-1 == 8 (mod 255), but + we're just going to use this in the exponentiation, so we can + compute this implicitly. + 2^(log_pi * 32^-1) = 2^(8*log_pi) = (2^8)^log_pi = 29^log_pi + (where 2 is the primitive element we used for the logs, an element + of GF(2^8) ). */ + + wb_t compact_log_pi = compact_ws( log_pi0, log_pi1 ); + wb_t pi = exp_29( compact_log_pi ); + + wb_st( output, pi ); + +#else + wb_t erased_vec = wb_ld( is_erased ); + wb_t to_transform = erased_vec; + FD_REEDSOL_FWHT_32( to_transform ); + ws_t transformed0 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform, 0 ) ); + ws_t transformed1 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform, 1 ) ); + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255. + 0<=product<256*255, so product is interpreted as unsigned. */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_32 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_32 + 16UL ) ); + + /* Compute mod 255, using the same approach as above. */ + product0 = ws_and( ws_bcast( 0xFF ), ws_add( product0, ws_shru( ws_mulhi( product0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product1 = ws_and( ws_bcast( 0xFF ), ws_add( product1, ws_shru( ws_mulhi( product1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + wb_t compact_product = compact_ws( product0, product1 ); + + FD_REEDSOL_FWHT_32( compact_product ); + + /* Negate the ones corresponding to erasures */ + compact_product = wb_if( wb_eq( erased_vec, wb_zero() ), compact_product, wb_sub( wb_bcast( 255 ), compact_product ) ); + + wb_t pi = exp_29( compact_product ); + wb_st( output, pi ); +#endif +#else /* No AVX implementation */ + + gen_pi_noavx_generic( is_erased, output, 32UL, fwht_l_twiddle_32 ); + +#endif +} + +void +fd_reedsol_gen_pi_64( uchar const * is_erased, + uchar * output ) { +#if FD_HAS_AVX +#if FD_REEDSOL_PI_USE_SHORT + ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); + ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); + ws_t erased_vec2 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 32UL ) ); + ws_t erased_vec3 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 48UL ) ); + + ws_t transformed0 = erased_vec0; + ws_t transformed1 = erased_vec1; + ws_t transformed2 = erased_vec2; + ws_t transformed3 = erased_vec3; + FD_REEDSOL_FWHT_64( transformed0, transformed1, transformed2, transformed3 ); /* FWHT( R~ ) */ + /* |transformed| <= 64 */ + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255 . + |product| <= 64*255, but definitely may be negative */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_64 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_64 + 16UL ) ); + ws_t product2 = ws_mullo( transformed2, ws_ld( fwht_l_twiddle_64 + 32UL ) ); + ws_t product3 = ws_mullo( transformed3, ws_ld( fwht_l_twiddle_64 + 48UL ) ); + + /* log_pi is congruent (mod 255) to what the paper calls + R_w = FWHT( FWHT( L~ ) * FWHT( R~ ) ). + |log_pi| <= 18918 using the same approach as above. */ + FD_REEDSOL_FWHT_64( product0, product1, product2, product3 ); + ws_t log_pi0 = product0; + ws_t log_pi1 = product1; + ws_t log_pi2 = product2; + ws_t log_pi3 = product3; + + /* Negate the ones corresponding to erasures to compute 1/Pi' */ + log_pi0 = ws_adjust_sign( log_pi0, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec0, 1 ) ) ); + log_pi1 = ws_adjust_sign( log_pi1, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec1, 1 ) ) ); + log_pi2 = ws_adjust_sign( log_pi2, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec2, 1 ) ) ); + log_pi3 = ws_adjust_sign( log_pi3, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec3, 1 ) ) ); + + log_pi0 = ws_add( log_pi0, ws_bcast( (short)(255*75) ) ); + log_pi1 = ws_add( log_pi1, ws_bcast( (short)(255*75) ) ); + log_pi2 = ws_add( log_pi2, ws_bcast( (short)(255*75) ) ); + log_pi3 = ws_add( log_pi3, ws_bcast( (short)(255*75) ) ); + /* Now 0<= log_pi <= 38043 < 2^16 (okay, since the next step treats it as unsigned */ + + /* GCC informs me that for a ushort x, + (x%255) == 0xFF & ( x + (x*0x8081)>>23). + We need at least 31 bits of precision for the product, so + mulh_epu16 is perfect. */ + log_pi0 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi0, ws_shru( ws_mulhi( log_pi0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi1 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi1, ws_shru( ws_mulhi( log_pi1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi2 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi2, ws_shru( ws_mulhi( log_pi2, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi3 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi3, ws_shru( ws_mulhi( log_pi3, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + /* Now 0<=log_pi < 255 */ + + /* Since our FWHT implementation is unscaled, we've computed a value + 64 times larger than what we'd like. 64^-1 == 4 (mod 255), but + we're just going to use this in the exponentiation, so we can + compute this implicitly. + 2^(log_pi * 64^-1) = 2^(4*log_pi) = (2^4)^log_pi = 16^log_pi + (where 2 is the primitive element we used for the logs, an element + of GF(2^8) ). */ + + wb_t compact_log_pi0 = compact_ws( log_pi0, log_pi1 ); + wb_t compact_log_pi1 = compact_ws( log_pi2, log_pi3 ); + wb_t pi0 = exp_16( compact_log_pi0 ); + wb_t pi1 = exp_16( compact_log_pi1 ); + + wb_st( output, pi0 ); + wb_st( output+32UL, pi1 ); + +#else + wb_t erased_vec0 = wb_ld( is_erased ); + wb_t erased_vec1 = wb_ld( is_erased + 32UL ); + wb_t to_transform0 = erased_vec0; + wb_t to_transform1 = erased_vec1; + + FD_REEDSOL_FWHT_64( to_transform0, to_transform1 ); + + ws_t transformed0 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform0, 0 ) ); + ws_t transformed1 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform0, 1 ) ); + ws_t transformed2 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform1, 0 ) ); + ws_t transformed3 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform1, 1 ) ); + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255. + 0<=product<256*255, so product is interpreted as unsigned. */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_64 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_64 + 16UL ) ); + ws_t product2 = ws_mullo( transformed2, ws_ld( fwht_l_twiddle_64 + 32UL ) ); + ws_t product3 = ws_mullo( transformed3, ws_ld( fwht_l_twiddle_64 + 48UL ) ); + + /* Compute mod 255, using the same approach as above. */ + product0 = ws_and( ws_bcast( 0xFF ), ws_add( product0, ws_shru( ws_mulhi( product0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product1 = ws_and( ws_bcast( 0xFF ), ws_add( product1, ws_shru( ws_mulhi( product1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product2 = ws_and( ws_bcast( 0xFF ), ws_add( product2, ws_shru( ws_mulhi( product2, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product3 = ws_and( ws_bcast( 0xFF ), ws_add( product3, ws_shru( ws_mulhi( product3, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + + wb_t compact_product0 = compact_ws( product0, product1 ); + wb_t compact_product1 = compact_ws( product2, product3 ); + + FD_REEDSOL_FWHT_64( compact_product0, compact_product1 ); + + /* Negate the ones corresponding to erasures */ + compact_product0 = wb_if( wb_eq( erased_vec0, wb_zero() ), compact_product0, wb_sub( wb_bcast( 255 ), compact_product0 ) ); + compact_product1 = wb_if( wb_eq( erased_vec1, wb_zero() ), compact_product1, wb_sub( wb_bcast( 255 ), compact_product1 ) ); + + wb_t pi0 = exp_16( compact_product0 ); + wb_t pi1 = exp_16( compact_product1 ); + wb_st( output , pi0 ); + wb_st( output + 32UL, pi1 ); +#endif +#else /* No AVX implementation */ + + gen_pi_noavx_generic( is_erased, output, 64UL, fwht_l_twiddle_64 ); + +#endif +} + +void +fd_reedsol_gen_pi_128( uchar const * is_erased, + uchar * output ) { +#if FD_HAS_AVX +#if FD_REEDSOL_PI_USE_SHORT + ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); + ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); + ws_t erased_vec2 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 32UL ) ); + ws_t erased_vec3 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 48UL ) ); + ws_t erased_vec4 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 64UL ) ); + ws_t erased_vec5 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 80UL ) ); + ws_t erased_vec6 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 96UL ) ); + ws_t erased_vec7 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 112UL ) ); + + ws_t transformed0 = erased_vec0; + ws_t transformed1 = erased_vec1; + ws_t transformed2 = erased_vec2; + ws_t transformed3 = erased_vec3; + ws_t transformed4 = erased_vec4; + ws_t transformed5 = erased_vec5; + ws_t transformed6 = erased_vec6; + ws_t transformed7 = erased_vec7; + FD_REEDSOL_FWHT_128( transformed0, transformed1, transformed2, transformed3, transformed4, transformed5, transformed6, transformed7 ); /* FWHT( R~ ) */ + /* |transformed| <= 128 */ + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255 . + -16256 <= product <= 32512 */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_128 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_128 + 16UL ) ); + ws_t product2 = ws_mullo( transformed2, ws_ld( fwht_l_twiddle_128 + 32UL ) ); + ws_t product3 = ws_mullo( transformed3, ws_ld( fwht_l_twiddle_128 + 48UL ) ); + ws_t product4 = ws_mullo( transformed4, ws_ld( fwht_l_twiddle_128 + 64UL ) ); + ws_t product5 = ws_mullo( transformed5, ws_ld( fwht_l_twiddle_128 + 80UL ) ); + ws_t product6 = ws_mullo( transformed6, ws_ld( fwht_l_twiddle_128 + 96UL ) ); + ws_t product7 = ws_mullo( transformed7, ws_ld( fwht_l_twiddle_128 + 112UL ) ); + + /* We need to reduce these mod 255 to prevent overflow in the next + step. 0 <= product+64*255 <= 48832 < 2^16. The mod operation + treats the input as unsigned though, so this is okay. */ + product0 = ws_add( product0, ws_bcast( (short)64*255 ) ); + product1 = ws_add( product1, ws_bcast( (short)64*255 ) ); + product2 = ws_add( product2, ws_bcast( (short)64*255 ) ); + product3 = ws_add( product3, ws_bcast( (short)64*255 ) ); + product4 = ws_add( product4, ws_bcast( (short)64*255 ) ); + product5 = ws_add( product5, ws_bcast( (short)64*255 ) ); + product6 = ws_add( product6, ws_bcast( (short)64*255 ) ); + product7 = ws_add( product7, ws_bcast( (short)64*255 ) ); + + product0 = ws_and( ws_bcast( 0xFF ), ws_add( product0, ws_shru( ws_mulhi( product0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product1 = ws_and( ws_bcast( 0xFF ), ws_add( product1, ws_shru( ws_mulhi( product1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product2 = ws_and( ws_bcast( 0xFF ), ws_add( product2, ws_shru( ws_mulhi( product2, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product3 = ws_and( ws_bcast( 0xFF ), ws_add( product3, ws_shru( ws_mulhi( product3, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product4 = ws_and( ws_bcast( 0xFF ), ws_add( product4, ws_shru( ws_mulhi( product4, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product5 = ws_and( ws_bcast( 0xFF ), ws_add( product5, ws_shru( ws_mulhi( product5, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product6 = ws_and( ws_bcast( 0xFF ), ws_add( product6, ws_shru( ws_mulhi( product6, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product7 = ws_and( ws_bcast( 0xFF ), ws_add( product7, ws_shru( ws_mulhi( product7, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + + /* Now 0 <= product < 255 */ + + /* log_pi is congruent (mod 255) to what the paper calls + R_w = FWHT( FWHT( L~ ) * FWHT( R~ ) ). + |log_pi| <= 128*255 */ + FD_REEDSOL_FWHT_128( product0, product1, product2, product3, product4, product5, product6, product7 ); + ws_t log_pi0 = product0; + ws_t log_pi1 = product1; + ws_t log_pi2 = product2; + ws_t log_pi3 = product3; + ws_t log_pi4 = product4; + ws_t log_pi5 = product5; + ws_t log_pi6 = product6; + ws_t log_pi7 = product7; + + /* Negate the ones corresponding to erasures to compute 1/Pi' */ + log_pi0 = ws_adjust_sign( log_pi0, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec0, 1 ) ) ); + log_pi1 = ws_adjust_sign( log_pi1, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec1, 1 ) ) ); + log_pi2 = ws_adjust_sign( log_pi2, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec2, 1 ) ) ); + log_pi3 = ws_adjust_sign( log_pi3, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec3, 1 ) ) ); + log_pi4 = ws_adjust_sign( log_pi4, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec4, 1 ) ) ); + log_pi5 = ws_adjust_sign( log_pi5, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec5, 1 ) ) ); + log_pi6 = ws_adjust_sign( log_pi6, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec6, 1 ) ) ); + log_pi7 = ws_adjust_sign( log_pi7, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec7, 1 ) ) ); + + log_pi0 = ws_add( log_pi0, ws_bcast( (short)(255*128) ) ); + log_pi1 = ws_add( log_pi1, ws_bcast( (short)(255*128) ) ); + log_pi2 = ws_add( log_pi2, ws_bcast( (short)(255*128) ) ); + log_pi3 = ws_add( log_pi3, ws_bcast( (short)(255*128) ) ); + log_pi4 = ws_add( log_pi4, ws_bcast( (short)(255*128) ) ); + log_pi5 = ws_add( log_pi5, ws_bcast( (short)(255*128) ) ); + log_pi6 = ws_add( log_pi6, ws_bcast( (short)(255*128) ) ); + log_pi7 = ws_add( log_pi7, ws_bcast( (short)(255*128) ) ); /* Now 0<= log_pi <= 65152 < 2^16 */ + + /* GCC informs me that for a ushort x, + (x%255) == 0xFF & ( x + (x*0x8081)>>23). + We need at least 31 bits of precision for the product, so + mulh_epu16 is perfect. */ + log_pi0 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi0, ws_shru( ws_mulhi( log_pi0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi1 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi1, ws_shru( ws_mulhi( log_pi1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi2 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi2, ws_shru( ws_mulhi( log_pi2, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi3 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi3, ws_shru( ws_mulhi( log_pi3, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi4 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi4, ws_shru( ws_mulhi( log_pi4, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi5 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi5, ws_shru( ws_mulhi( log_pi5, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi6 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi6, ws_shru( ws_mulhi( log_pi6, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + log_pi7 = ws_and( ws_bcast( 0xFF ), ws_add( log_pi7, ws_shru( ws_mulhi( log_pi7, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + /* Now 0<=log_pi < 255 */ + + /* Since our FWHT implementation is unscaled, we've computed a value + 128 times larger than what we'd like. 128^-1 == 2 (mod 255), but + we're just going to use this in the exponentiation, so we can + compute this implicitly. + 2^(log_pi * 128^-1) = 2^(2*log_pi) = (2^2)^log_pi = 4^log_pi + (where 2 is the primitive element we used for the logs, an element + of GF(2^8) ). */ + + wb_t compact_log_pi0 = compact_ws( log_pi0, log_pi1 ); + wb_t compact_log_pi1 = compact_ws( log_pi2, log_pi3 ); + wb_t compact_log_pi2 = compact_ws( log_pi4, log_pi5 ); + wb_t compact_log_pi3 = compact_ws( log_pi6, log_pi7 ); + wb_t pi0 = exp_4( compact_log_pi0 ); + wb_t pi1 = exp_4( compact_log_pi1 ); + wb_t pi2 = exp_4( compact_log_pi2 ); + wb_t pi3 = exp_4( compact_log_pi3 ); + + wb_st( output, pi0 ); + wb_st( output + 32UL, pi1 ); + wb_st( output + 64UL, pi2 ); + wb_st( output + 96UL, pi3 ); + +#else + wb_t erased_vec0 = wb_ld( is_erased ); + wb_t erased_vec1 = wb_ld( is_erased + 32UL ); + wb_t erased_vec2 = wb_ld( is_erased + 64UL ); + wb_t erased_vec3 = wb_ld( is_erased + 96UL ); + wb_t to_transform0 = erased_vec0; + wb_t to_transform1 = erased_vec1; + wb_t to_transform2 = erased_vec2; + wb_t to_transform3 = erased_vec3; + + FD_REEDSOL_FWHT_128( to_transform0, to_transform1, to_transform2, to_transform3 ); + + ws_t transformed0 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform0, 0 ) ); + ws_t transformed1 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform0, 1 ) ); + ws_t transformed2 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform1, 0 ) ); + ws_t transformed3 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform1, 1 ) ); + ws_t transformed4 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform2, 0 ) ); + ws_t transformed5 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform2, 1 ) ); + ws_t transformed6 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform3, 0 ) ); + ws_t transformed7 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform3, 1 ) ); + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255. + 0<=product<256*255, so product is interpreted as unsigned. */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_128 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_128 + 16UL ) ); + ws_t product2 = ws_mullo( transformed2, ws_ld( fwht_l_twiddle_128 + 32UL ) ); + ws_t product3 = ws_mullo( transformed3, ws_ld( fwht_l_twiddle_128 + 48UL ) ); + ws_t product4 = ws_mullo( transformed4, ws_ld( fwht_l_twiddle_128 + 64UL ) ); + ws_t product5 = ws_mullo( transformed5, ws_ld( fwht_l_twiddle_128 + 80UL ) ); + ws_t product6 = ws_mullo( transformed6, ws_ld( fwht_l_twiddle_128 + 96UL ) ); + ws_t product7 = ws_mullo( transformed7, ws_ld( fwht_l_twiddle_128 + 112UL ) ); + + /* Compute mod 255, using the same approach as above. */ + product0 = ws_and( ws_bcast( 0xFF ), ws_add( product0, ws_shru( ws_mulhi( product0, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product1 = ws_and( ws_bcast( 0xFF ), ws_add( product1, ws_shru( ws_mulhi( product1, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product2 = ws_and( ws_bcast( 0xFF ), ws_add( product2, ws_shru( ws_mulhi( product2, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product3 = ws_and( ws_bcast( 0xFF ), ws_add( product3, ws_shru( ws_mulhi( product3, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product4 = ws_and( ws_bcast( 0xFF ), ws_add( product4, ws_shru( ws_mulhi( product4, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product5 = ws_and( ws_bcast( 0xFF ), ws_add( product5, ws_shru( ws_mulhi( product5, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product6 = ws_and( ws_bcast( 0xFF ), ws_add( product6, ws_shru( ws_mulhi( product6, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + product7 = ws_and( ws_bcast( 0xFF ), ws_add( product7, ws_shru( ws_mulhi( product7, ws_bcast( (short)0x8081 ) ), 7 ) ) ); + wb_t compact_product0 = compact_ws( product0, product1 ); + wb_t compact_product1 = compact_ws( product2, product3 ); + wb_t compact_product2 = compact_ws( product4, product5 ); + wb_t compact_product3 = compact_ws( product6, product7 ); + + FD_REEDSOL_FWHT_128( compact_product0, compact_product1, compact_product2, compact_product3 ); + + /* Negate the ones corresponding to erasures */ + compact_product0 = wb_if( wb_eq( erased_vec0, wb_zero() ), compact_product0, wb_sub( wb_bcast( 255 ), compact_product0 ) ); + compact_product1 = wb_if( wb_eq( erased_vec1, wb_zero() ), compact_product1, wb_sub( wb_bcast( 255 ), compact_product1 ) ); + compact_product2 = wb_if( wb_eq( erased_vec2, wb_zero() ), compact_product2, wb_sub( wb_bcast( 255 ), compact_product2 ) ); + compact_product3 = wb_if( wb_eq( erased_vec3, wb_zero() ), compact_product3, wb_sub( wb_bcast( 255 ), compact_product3 ) ); + + wb_t pi0 = exp_4( compact_product0 ); + wb_t pi1 = exp_4( compact_product1 ); + wb_t pi2 = exp_4( compact_product2 ); + wb_t pi3 = exp_4( compact_product3 ); + wb_st( output, pi0 ); + wb_st( output + 32UL, pi1 ); + wb_st( output + 64UL, pi2 ); + wb_st( output + 96UL, pi3 ); +#endif +#else /* No AVX implementation */ + + gen_pi_noavx_generic( is_erased, output, 128UL, fwht_l_twiddle_128 ); + +#endif +} + +void +fd_reedsol_gen_pi_256( uchar const * is_erased, + uchar * output ) { +#if FD_HAS_AVX +#if FD_REEDSOL_PI_USE_SHORT + ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); + ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); + ws_t erased_vec2 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 32UL ) ); + ws_t erased_vec3 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 48UL ) ); + ws_t erased_vec4 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 64UL ) ); + ws_t erased_vec5 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 80UL ) ); + ws_t erased_vec6 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 96UL ) ); + ws_t erased_vec7 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 112UL ) ); + ws_t erased_vec8 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 128UL ) ); + ws_t erased_vec9 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 144UL ) ); + ws_t erased_vec10 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 160UL ) ); + ws_t erased_vec11 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 176UL ) ); + ws_t erased_vec12 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 192UL ) ); + ws_t erased_vec13 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 208UL ) ); + ws_t erased_vec14 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 224UL ) ); + ws_t erased_vec15 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 240UL ) ); + + ws_t transformed0 = erased_vec0; + ws_t transformed1 = erased_vec1; + ws_t transformed2 = erased_vec2; + ws_t transformed3 = erased_vec3; + ws_t transformed4 = erased_vec4; + ws_t transformed5 = erased_vec5; + ws_t transformed6 = erased_vec6; + ws_t transformed7 = erased_vec7; + ws_t transformed8 = erased_vec8; + ws_t transformed9 = erased_vec9; + ws_t transformed10 = erased_vec10; + ws_t transformed11 = erased_vec11; + ws_t transformed12 = erased_vec12; + ws_t transformed13 = erased_vec13; + ws_t transformed14 = erased_vec14; + ws_t transformed15 = erased_vec15; + FD_REEDSOL_FWHT_256( transformed0, transformed1, transformed2, transformed3, transformed4, transformed5, transformed6, transformed7, + transformed8, transformed9, transformed10, transformed11, transformed12, transformed13, transformed14, transformed15 ); /* FWHT( R~ ) */ + /* |transformed| <= 256 */ + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255 . + -32512 <= product <= 32512 */ + ws_t product0 = ws_mullo( transformed0, ws_ld( fwht_l_twiddle_256 ) ); + ws_t product1 = ws_mullo( transformed1, ws_ld( fwht_l_twiddle_256 + 16UL ) ); + ws_t product2 = ws_mullo( transformed2, ws_ld( fwht_l_twiddle_256 + 32UL ) ); + ws_t product3 = ws_mullo( transformed3, ws_ld( fwht_l_twiddle_256 + 48UL ) ); + ws_t product4 = ws_mullo( transformed4, ws_ld( fwht_l_twiddle_256 + 64UL ) ); + ws_t product5 = ws_mullo( transformed5, ws_ld( fwht_l_twiddle_256 + 80UL ) ); + ws_t product6 = ws_mullo( transformed6, ws_ld( fwht_l_twiddle_256 + 96UL ) ); + ws_t product7 = ws_mullo( transformed7, ws_ld( fwht_l_twiddle_256 + 112UL ) ); + ws_t product8 = ws_mullo( transformed8, ws_ld( fwht_l_twiddle_256 + 128UL ) ); + ws_t product9 = ws_mullo( transformed9, ws_ld( fwht_l_twiddle_256 + 144UL ) ); + ws_t product10 = ws_mullo( transformed10, ws_ld( fwht_l_twiddle_256 + 160UL ) ); + ws_t product11 = ws_mullo( transformed11, ws_ld( fwht_l_twiddle_256 + 176UL ) ); + ws_t product12 = ws_mullo( transformed12, ws_ld( fwht_l_twiddle_256 + 192UL ) ); + ws_t product13 = ws_mullo( transformed13, ws_ld( fwht_l_twiddle_256 + 208UL ) ); + ws_t product14 = ws_mullo( transformed14, ws_ld( fwht_l_twiddle_256 + 224UL ) ); + ws_t product15 = ws_mullo( transformed15, ws_ld( fwht_l_twiddle_256 + 240UL ) ); + + /* We need to reduce these mod 255 to prevent overflow in the next + step. 0 <= product+128*255 <= 65152 < 2^16. The mod operation + treats the input as unsigned though, so this is okay (but hanging + in there by a thread!). */ + product0 = ws_mod255( ws_add( product0, ws_bcast( (short)128*255 ) ) ); + product1 = ws_mod255( ws_add( product1, ws_bcast( (short)128*255 ) ) ); + product2 = ws_mod255( ws_add( product2, ws_bcast( (short)128*255 ) ) ); + product3 = ws_mod255( ws_add( product3, ws_bcast( (short)128*255 ) ) ); + product4 = ws_mod255( ws_add( product4, ws_bcast( (short)128*255 ) ) ); + product5 = ws_mod255( ws_add( product5, ws_bcast( (short)128*255 ) ) ); + product6 = ws_mod255( ws_add( product6, ws_bcast( (short)128*255 ) ) ); + product7 = ws_mod255( ws_add( product7, ws_bcast( (short)128*255 ) ) ); + product8 = ws_mod255( ws_add( product8, ws_bcast( (short)128*255 ) ) ); + product9 = ws_mod255( ws_add( product9, ws_bcast( (short)128*255 ) ) ); + product10 = ws_mod255( ws_add( product10, ws_bcast( (short)128*255 ) ) ); + product11 = ws_mod255( ws_add( product11, ws_bcast( (short)128*255 ) ) ); + product12 = ws_mod255( ws_add( product12, ws_bcast( (short)128*255 ) ) ); + product13 = ws_mod255( ws_add( product13, ws_bcast( (short)128*255 ) ) ); + product14 = ws_mod255( ws_add( product14, ws_bcast( (short)128*255 ) ) ); + product15 = ws_mod255( ws_add( product15, ws_bcast( (short)128*255 ) ) ); + + /* Now 0 <= product < 255 */ + + /* log_pi is congruent (mod 255) to what the paper calls + R_w = FWHT( FWHT( L~ ) * FWHT( R~ ) ). + If we do the FWHT in the normal way, it might overflow, so we need to inline it and stick a mod in the middle */ + ws_t log_pi0 = ws_mod255( ws_add( product0, product8 ) ); ws_t log_pi1 = ws_mod255( ws_add( product1, product9 ) ); + ws_t log_pi2 = ws_mod255( ws_add( product2, product10 ) ); ws_t log_pi3 = ws_mod255( ws_add( product3, product11 ) ); + ws_t log_pi4 = ws_mod255( ws_add( product4, product12 ) ); ws_t log_pi5 = ws_mod255( ws_add( product5, product13 ) ); + ws_t log_pi6 = ws_mod255( ws_add( product6, product14 ) ); ws_t log_pi7 = ws_mod255( ws_add( product7, product15 ) ); + ws_t log_pi8 = ws_mod255( ws_add( ws_sub( product0, product8 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi9 = ws_mod255( ws_add( ws_sub( product1, product9 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi10 = ws_mod255( ws_add( ws_sub( product2, product10 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi11 = ws_mod255( ws_add( ws_sub( product3, product11 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi12 = ws_mod255( ws_add( ws_sub( product4, product12 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi13 = ws_mod255( ws_add( ws_sub( product5, product13 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi14 = ws_mod255( ws_add( ws_sub( product6, product14 ), ws_bcast( (short)255*2 ) ) ); + ws_t log_pi15 = ws_mod255( ws_add( ws_sub( product7, product15 ), ws_bcast( (short)255*2 ) ) ); + + FD_REEDSOL_FWHT_128( log_pi0, log_pi1, log_pi2, log_pi3, log_pi4, log_pi5, log_pi6, log_pi7 ); + FD_REEDSOL_FWHT_128( log_pi8, log_pi9, log_pi10, log_pi11, log_pi12, log_pi13, log_pi14, log_pi15 ); + /* Now |log_pi| <= 128*255 */ + + /* Negate the ones corresponding to erasures to compute 1/Pi' */ + log_pi0 = ws_adjust_sign( log_pi0, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec0, 1 ) ) ); + log_pi1 = ws_adjust_sign( log_pi1, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec1, 1 ) ) ); + log_pi2 = ws_adjust_sign( log_pi2, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec2, 1 ) ) ); + log_pi3 = ws_adjust_sign( log_pi3, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec3, 1 ) ) ); + log_pi4 = ws_adjust_sign( log_pi4, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec4, 1 ) ) ); + log_pi5 = ws_adjust_sign( log_pi5, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec5, 1 ) ) ); + log_pi6 = ws_adjust_sign( log_pi6, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec6, 1 ) ) ); + log_pi7 = ws_adjust_sign( log_pi7, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec7, 1 ) ) ); + log_pi8 = ws_adjust_sign( log_pi8, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec8, 1 ) ) ); + log_pi9 = ws_adjust_sign( log_pi9, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec9, 1 ) ) ); + log_pi10 = ws_adjust_sign( log_pi10, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec10, 1 ) ) ); + log_pi11 = ws_adjust_sign( log_pi11, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec11, 1 ) ) ); + log_pi12 = ws_adjust_sign( log_pi12, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec12, 1 ) ) ); + log_pi13 = ws_adjust_sign( log_pi13, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec13, 1 ) ) ); + log_pi14 = ws_adjust_sign( log_pi14, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec14, 1 ) ) ); + log_pi15 = ws_adjust_sign( log_pi15, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec15, 1 ) ) ); + + + /* After the addition below, 0<= log_pi <= 65152 < 2^16. The mod + brings it back to 0 <= log_pi < 255. */ + log_pi0 = ws_mod255( ws_add( log_pi0, ws_bcast( (short)(255*128) ) ) ); + log_pi1 = ws_mod255( ws_add( log_pi1, ws_bcast( (short)(255*128) ) ) ); + log_pi2 = ws_mod255( ws_add( log_pi2, ws_bcast( (short)(255*128) ) ) ); + log_pi3 = ws_mod255( ws_add( log_pi3, ws_bcast( (short)(255*128) ) ) ); + log_pi4 = ws_mod255( ws_add( log_pi4, ws_bcast( (short)(255*128) ) ) ); + log_pi5 = ws_mod255( ws_add( log_pi5, ws_bcast( (short)(255*128) ) ) ); + log_pi6 = ws_mod255( ws_add( log_pi6, ws_bcast( (short)(255*128) ) ) ); + log_pi7 = ws_mod255( ws_add( log_pi7, ws_bcast( (short)(255*128) ) ) ); + log_pi8 = ws_mod255( ws_add( log_pi8, ws_bcast( (short)(255*128) ) ) ); + log_pi9 = ws_mod255( ws_add( log_pi9, ws_bcast( (short)(255*128) ) ) ); + log_pi10 = ws_mod255( ws_add( log_pi10, ws_bcast( (short)(255*128) ) ) ); + log_pi11 = ws_mod255( ws_add( log_pi11, ws_bcast( (short)(255*128) ) ) ); + log_pi12 = ws_mod255( ws_add( log_pi12, ws_bcast( (short)(255*128) ) ) ); + log_pi13 = ws_mod255( ws_add( log_pi13, ws_bcast( (short)(255*128) ) ) ); + log_pi14 = ws_mod255( ws_add( log_pi14, ws_bcast( (short)(255*128) ) ) ); + log_pi15 = ws_mod255( ws_add( log_pi15, ws_bcast( (short)(255*128) ) ) ); + + /* Since our FWHT implementation is unscaled, we've computed a value + 256 times larger than what we'd like. 256^-1==1^-1 == 1 (mod 255), + so we don't need to do anything special. */ + + wb_t compact_log_pi0 = compact_ws( log_pi0, log_pi1 ); + wb_t compact_log_pi1 = compact_ws( log_pi2, log_pi3 ); + wb_t compact_log_pi2 = compact_ws( log_pi4, log_pi5 ); + wb_t compact_log_pi3 = compact_ws( log_pi6, log_pi7 ); + wb_t compact_log_pi4 = compact_ws( log_pi8, log_pi9 ); + wb_t compact_log_pi5 = compact_ws( log_pi10, log_pi11 ); + wb_t compact_log_pi6 = compact_ws( log_pi12, log_pi13 ); + wb_t compact_log_pi7 = compact_ws( log_pi14, log_pi15 ); + wb_t pi0 = exp_2( compact_log_pi0 ); + wb_t pi1 = exp_2( compact_log_pi1 ); + wb_t pi2 = exp_2( compact_log_pi2 ); + wb_t pi3 = exp_2( compact_log_pi3 ); + wb_t pi4 = exp_2( compact_log_pi4 ); + wb_t pi5 = exp_2( compact_log_pi5 ); + wb_t pi6 = exp_2( compact_log_pi6 ); + wb_t pi7 = exp_2( compact_log_pi7 ); + + wb_st( output, pi0 ); + wb_st( output + 32UL, pi1 ); + wb_st( output + 64UL, pi2 ); + wb_st( output + 96UL, pi3 ); + wb_st( output + 128UL, pi4 ); + wb_st( output + 160UL, pi5 ); + wb_st( output + 192UL, pi6 ); + wb_st( output + 224UL, pi7 ); + +#else + wb_t erased_vec0 = wb_ld( is_erased ); + wb_t erased_vec1 = wb_ld( is_erased + 32UL ); + wb_t erased_vec2 = wb_ld( is_erased + 64UL ); + wb_t erased_vec3 = wb_ld( is_erased + 96UL ); + wb_t erased_vec4 = wb_ld( is_erased + 128UL ); + wb_t erased_vec5 = wb_ld( is_erased + 160UL ); + wb_t erased_vec6 = wb_ld( is_erased + 192UL ); + wb_t erased_vec7 = wb_ld( is_erased + 224UL ); + wb_t to_transform0 = erased_vec0; + wb_t to_transform1 = erased_vec1; + wb_t to_transform2 = erased_vec2; + wb_t to_transform3 = erased_vec3; + wb_t to_transform4 = erased_vec4; + wb_t to_transform5 = erased_vec5; + wb_t to_transform6 = erased_vec6; + wb_t to_transform7 = erased_vec7; + + FD_REEDSOL_FWHT_256( to_transform0, to_transform1, to_transform2, to_transform3, + to_transform4, to_transform5, to_transform6, to_transform7 ); + + ws_t transformed0 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform0, 0 ) ); + ws_t transformed1 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform0, 1 ) ); + ws_t transformed2 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform1, 0 ) ); + ws_t transformed3 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform1, 1 ) ); + ws_t transformed4 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform2, 0 ) ); + ws_t transformed5 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform2, 1 ) ); + ws_t transformed6 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform3, 0 ) ); + ws_t transformed7 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform3, 1 ) ); + ws_t transformed8 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform4, 0 ) ); + ws_t transformed9 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform4, 1 ) ); + ws_t transformed10 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform5, 0 ) ); + ws_t transformed11 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform5, 1 ) ); + ws_t transformed12 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform6, 0 ) ); + ws_t transformed13 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform6, 1 ) ); + ws_t transformed14 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform7, 0 ) ); + ws_t transformed15 = _mm256_cvtepu8_epi16( _mm256_extracti128_si256( to_transform7, 1 ) ); + + /* product is congruent to FWHT( R~) * FWHT( L~ ) mod 255. + 0<=product<256*255, so product is interpreted as unsigned. */ + ws_t product0 = ws_mod255( ws_mullo( transformed0, ws_ld( fwht_l_twiddle_256 ) ) ); + ws_t product1 = ws_mod255( ws_mullo( transformed1, ws_ld( fwht_l_twiddle_256 + 16UL ) ) ); + ws_t product2 = ws_mod255( ws_mullo( transformed2, ws_ld( fwht_l_twiddle_256 + 32UL ) ) ); + ws_t product3 = ws_mod255( ws_mullo( transformed3, ws_ld( fwht_l_twiddle_256 + 48UL ) ) ); + ws_t product4 = ws_mod255( ws_mullo( transformed4, ws_ld( fwht_l_twiddle_256 + 64UL ) ) ); + ws_t product5 = ws_mod255( ws_mullo( transformed5, ws_ld( fwht_l_twiddle_256 + 80UL ) ) ); + ws_t product6 = ws_mod255( ws_mullo( transformed6, ws_ld( fwht_l_twiddle_256 + 96UL ) ) ); + ws_t product7 = ws_mod255( ws_mullo( transformed7, ws_ld( fwht_l_twiddle_256 + 112UL ) ) ); + ws_t product8 = ws_mod255( ws_mullo( transformed8, ws_ld( fwht_l_twiddle_256 + 128UL ) ) ); + ws_t product9 = ws_mod255( ws_mullo( transformed9, ws_ld( fwht_l_twiddle_256 + 144UL ) ) ); + ws_t product10 = ws_mod255( ws_mullo( transformed10, ws_ld( fwht_l_twiddle_256 + 160UL ) ) ); + ws_t product11 = ws_mod255( ws_mullo( transformed11, ws_ld( fwht_l_twiddle_256 + 176UL ) ) ); + ws_t product12 = ws_mod255( ws_mullo( transformed12, ws_ld( fwht_l_twiddle_256 + 192UL ) ) ); + ws_t product13 = ws_mod255( ws_mullo( transformed13, ws_ld( fwht_l_twiddle_256 + 208UL ) ) ); + ws_t product14 = ws_mod255( ws_mullo( transformed14, ws_ld( fwht_l_twiddle_256 + 224UL ) ) ); + ws_t product15 = ws_mod255( ws_mullo( transformed15, ws_ld( fwht_l_twiddle_256 + 240UL ) ) ); + + wb_t compact_product0 = compact_ws( product0, product1 ); + wb_t compact_product1 = compact_ws( product2, product3 ); + wb_t compact_product2 = compact_ws( product4, product5 ); + wb_t compact_product3 = compact_ws( product6, product7 ); + wb_t compact_product4 = compact_ws( product8, product9 ); + wb_t compact_product5 = compact_ws( product10, product11 ); + wb_t compact_product6 = compact_ws( product12, product13 ); + wb_t compact_product7 = compact_ws( product14, product15 ); + + FD_REEDSOL_FWHT_256( compact_product0, compact_product1, compact_product2, compact_product3, + compact_product4, compact_product5, compact_product6, compact_product7 ); + + /* Negate the ones corresponding to erasures */ + compact_product0 = wb_if( wb_eq( erased_vec0, wb_zero() ), compact_product0, wb_sub( wb_bcast( 255 ), compact_product0 ) ); + compact_product1 = wb_if( wb_eq( erased_vec1, wb_zero() ), compact_product1, wb_sub( wb_bcast( 255 ), compact_product1 ) ); + compact_product2 = wb_if( wb_eq( erased_vec2, wb_zero() ), compact_product2, wb_sub( wb_bcast( 255 ), compact_product2 ) ); + compact_product3 = wb_if( wb_eq( erased_vec3, wb_zero() ), compact_product3, wb_sub( wb_bcast( 255 ), compact_product3 ) ); + compact_product4 = wb_if( wb_eq( erased_vec4, wb_zero() ), compact_product4, wb_sub( wb_bcast( 255 ), compact_product4 ) ); + compact_product5 = wb_if( wb_eq( erased_vec5, wb_zero() ), compact_product5, wb_sub( wb_bcast( 255 ), compact_product5 ) ); + compact_product6 = wb_if( wb_eq( erased_vec6, wb_zero() ), compact_product6, wb_sub( wb_bcast( 255 ), compact_product6 ) ); + compact_product7 = wb_if( wb_eq( erased_vec7, wb_zero() ), compact_product7, wb_sub( wb_bcast( 255 ), compact_product7 ) ); + + wb_t pi0 = exp_2( compact_product0 ); + wb_t pi1 = exp_2( compact_product1 ); + wb_t pi2 = exp_2( compact_product2 ); + wb_t pi3 = exp_2( compact_product3 ); + wb_t pi4 = exp_2( compact_product4 ); + wb_t pi5 = exp_2( compact_product5 ); + wb_t pi6 = exp_2( compact_product6 ); + wb_t pi7 = exp_2( compact_product7 ); + wb_st( output, pi0 ); + wb_st( output + 32UL, pi1 ); + wb_st( output + 64UL, pi2 ); + wb_st( output + 96UL, pi3 ); + wb_st( output + 128UL, pi4 ); + wb_st( output + 160UL, pi5 ); + wb_st( output + 192UL, pi6 ); + wb_st( output + 224UL, pi7 ); +#endif +#else /* No AVX implementation */ + + gen_pi_noavx_generic( is_erased, output, 256UL, fwht_l_twiddle_256 ); + +#endif +} diff --git a/src/ballet/reedsol/fd_reedsol_pi.h b/src/ballet/reedsol/fd_reedsol_pi.h new file mode 100644 index 00000000000..1af598dabc7 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_pi.h @@ -0,0 +1,47 @@ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_pi_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_pi_h +#include "../../util/fd_util_base.h" + +/* This file generates what + S. -J. Lin, T. Y. Al-Naffouri, Y. S. Han and W. -H. Chung, "Novel + Polynomial Basis With Fast Fourier Transform and Its Application to + Reed–Solomon Erasure Codes," in IEEE Transactions on Information + Theory, vol. 62, no. 11, pp. 6284-6299, Nov. 2016, doi: + 10.1109/TIT.2016.2608892. + and + Didier, Frédéric. "Efficient erasure decoding of Reed-Solomon + codes." arXiv preprint arXiv:0901.1886 (2009). + call Pi and 1/Pi'. For more information about Pi and Pi', see the + implementation or the papers referenced above. + + The main set of functions this file exposes is + + void fd_reedsol_gen_pi_{N}( uchar const * is_erased, uchar * output ) + + for N in {16, 32, 64, 128, 256}. Since Pi is only needed for elements + that are not erased, Pi' is only needed for elements that are erased, + and it is computationally beneficial to compute them at the same + time, this function computes them both. + + is_erased and output must point to the first element of arrays + indexed [0, N). They must be aligned to 32 bytes. + + Upon return, output[i] stores Pi(i) if is_erased[i]==0 and 1/Pi'(i) + if is_erased[i]==1. It's undefined behavior for is_erased to contain + something other than 0 or 1. + + Pi and Pi' are both elements of GF(2^8) stored in their normal byte + representation. */ +void fd_reedsol_gen_pi_16 ( uchar const * is_erased, uchar * output ); +void fd_reedsol_gen_pi_32 ( uchar const * is_erased, uchar * output ); +void fd_reedsol_gen_pi_64 ( uchar const * is_erased, uchar * output ); +void fd_reedsol_gen_pi_128( uchar const * is_erased, uchar * output ); +void fd_reedsol_gen_pi_256( uchar const * is_erased, uchar * output ); + +/* The following are the pre-computed values for common cases. + They're exposed in this header so that the values to multiply are + known at compile time to eliminate loads on the critical path. */ +/* TODO: Decide on pre-computed cases and add them */ + + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_pi_h */ diff --git a/src/ballet/reedsol/fd_reedsol_recover_128.c b/src/ballet/reedsol/fd_reedsol_recover_128.c new file mode 100644 index 00000000000..728bb443799 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_recover_128.c @@ -0,0 +1,614 @@ +#include "../../util/fd_util.h" +#include "fd_reedsol.h" +#include "fd_reedsol_internal.h" +#if FD_HAS_GFNI +#include "fd_reedsol_arith_gfni.h" +#elif FD_HAS_AVX +#include "fd_reedsol_arith_avx2.h" +#else +#include "fd_reedsol_arith_none.h" +#endif +#include "fd_reedsol_fft.h" +#include "fd_reedsol_ppt.h" +#include "fd_reedsol_fderiv.h" +#include "fd_reedsol_pi.h" + +int fd_reedsol_recover_var_128( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ) { + uchar _erased[ 128 ] W_ATTR; + uchar pi[ 128 ] W_ATTR; + ulong shred_cnt = data_shred_cnt + parity_shred_cnt; + ulong loaded_cnt = 0UL; + for( ulong i=0UL; i<128UL; i++) { + int load_shred = ((i=shred_cnt, do nothing, which will keep the value of the + shred if it existed in the variable. */ + #define STORE_COMPARE_RELOAD( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + else var = gf_ldu( shred[ n ] + shred_pos ); \ + } while( 0 ) + #define STORE_COMPARE( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + } while( 0 ) + switch( fd_ulong_min( shred_cnt, 128UL ) ) { + case 128UL: STORE_COMPARE_RELOAD( 127, in127 ); FALLTHRU + case 127UL: STORE_COMPARE_RELOAD( 126, in126 ); FALLTHRU + case 126UL: STORE_COMPARE_RELOAD( 125, in125 ); FALLTHRU + case 125UL: STORE_COMPARE_RELOAD( 124, in124 ); FALLTHRU + case 124UL: STORE_COMPARE_RELOAD( 123, in123 ); FALLTHRU + case 123UL: STORE_COMPARE_RELOAD( 122, in122 ); FALLTHRU + case 122UL: STORE_COMPARE_RELOAD( 121, in121 ); FALLTHRU + case 121UL: STORE_COMPARE_RELOAD( 120, in120 ); FALLTHRU + case 120UL: STORE_COMPARE_RELOAD( 119, in119 ); FALLTHRU + case 119UL: STORE_COMPARE_RELOAD( 118, in118 ); FALLTHRU + case 118UL: STORE_COMPARE_RELOAD( 117, in117 ); FALLTHRU + case 117UL: STORE_COMPARE_RELOAD( 116, in116 ); FALLTHRU + case 116UL: STORE_COMPARE_RELOAD( 115, in115 ); FALLTHRU + case 115UL: STORE_COMPARE_RELOAD( 114, in114 ); FALLTHRU + case 114UL: STORE_COMPARE_RELOAD( 113, in113 ); FALLTHRU + case 113UL: STORE_COMPARE_RELOAD( 112, in112 ); FALLTHRU + case 112UL: STORE_COMPARE_RELOAD( 111, in111 ); FALLTHRU + case 111UL: STORE_COMPARE_RELOAD( 110, in110 ); FALLTHRU + case 110UL: STORE_COMPARE_RELOAD( 109, in109 ); FALLTHRU + case 109UL: STORE_COMPARE_RELOAD( 108, in108 ); FALLTHRU + case 108UL: STORE_COMPARE_RELOAD( 107, in107 ); FALLTHRU + case 107UL: STORE_COMPARE_RELOAD( 106, in106 ); FALLTHRU + case 106UL: STORE_COMPARE_RELOAD( 105, in105 ); FALLTHRU + case 105UL: STORE_COMPARE_RELOAD( 104, in104 ); FALLTHRU + case 104UL: STORE_COMPARE_RELOAD( 103, in103 ); FALLTHRU + case 103UL: STORE_COMPARE_RELOAD( 102, in102 ); FALLTHRU + case 102UL: STORE_COMPARE_RELOAD( 101, in101 ); FALLTHRU + case 101UL: STORE_COMPARE_RELOAD( 100, in100 ); FALLTHRU + case 100UL: STORE_COMPARE_RELOAD( 99, in99 ); FALLTHRU + case 99UL: STORE_COMPARE_RELOAD( 98, in98 ); FALLTHRU + case 98UL: STORE_COMPARE_RELOAD( 97, in97 ); FALLTHRU + case 97UL: STORE_COMPARE_RELOAD( 96, in96 ); FALLTHRU + case 96UL: STORE_COMPARE_RELOAD( 95, in95 ); FALLTHRU + case 95UL: STORE_COMPARE_RELOAD( 94, in94 ); FALLTHRU + case 94UL: STORE_COMPARE_RELOAD( 93, in93 ); FALLTHRU + case 93UL: STORE_COMPARE_RELOAD( 92, in92 ); FALLTHRU + case 92UL: STORE_COMPARE_RELOAD( 91, in91 ); FALLTHRU + case 91UL: STORE_COMPARE_RELOAD( 90, in90 ); FALLTHRU + case 90UL: STORE_COMPARE_RELOAD( 89, in89 ); FALLTHRU + case 89UL: STORE_COMPARE_RELOAD( 88, in88 ); FALLTHRU + case 88UL: STORE_COMPARE_RELOAD( 87, in87 ); FALLTHRU + case 87UL: STORE_COMPARE_RELOAD( 86, in86 ); FALLTHRU + case 86UL: STORE_COMPARE_RELOAD( 85, in85 ); FALLTHRU + case 85UL: STORE_COMPARE_RELOAD( 84, in84 ); FALLTHRU + case 84UL: STORE_COMPARE_RELOAD( 83, in83 ); FALLTHRU + case 83UL: STORE_COMPARE_RELOAD( 82, in82 ); FALLTHRU + case 82UL: STORE_COMPARE_RELOAD( 81, in81 ); FALLTHRU + case 81UL: STORE_COMPARE_RELOAD( 80, in80 ); FALLTHRU + case 80UL: STORE_COMPARE_RELOAD( 79, in79 ); FALLTHRU + case 79UL: STORE_COMPARE_RELOAD( 78, in78 ); FALLTHRU + case 78UL: STORE_COMPARE_RELOAD( 77, in77 ); FALLTHRU + case 77UL: STORE_COMPARE_RELOAD( 76, in76 ); FALLTHRU + case 76UL: STORE_COMPARE_RELOAD( 75, in75 ); FALLTHRU + case 75UL: STORE_COMPARE_RELOAD( 74, in74 ); FALLTHRU + case 74UL: STORE_COMPARE_RELOAD( 73, in73 ); FALLTHRU + case 73UL: STORE_COMPARE_RELOAD( 72, in72 ); FALLTHRU + case 72UL: STORE_COMPARE_RELOAD( 71, in71 ); FALLTHRU + case 71UL: STORE_COMPARE_RELOAD( 70, in70 ); FALLTHRU + case 70UL: STORE_COMPARE_RELOAD( 69, in69 ); FALLTHRU + case 69UL: STORE_COMPARE_RELOAD( 68, in68 ); FALLTHRU + case 68UL: STORE_COMPARE_RELOAD( 67, in67 ); FALLTHRU + case 67UL: STORE_COMPARE_RELOAD( 66, in66 ); FALLTHRU + case 66UL: STORE_COMPARE_RELOAD( 65, in65 ); FALLTHRU + case 65UL: STORE_COMPARE_RELOAD( 64, in64 ); FALLTHRU + case 64UL: STORE_COMPARE_RELOAD( 63, in63 ); FALLTHRU + case 63UL: STORE_COMPARE_RELOAD( 62, in62 ); FALLTHRU + case 62UL: STORE_COMPARE_RELOAD( 61, in61 ); FALLTHRU + case 61UL: STORE_COMPARE_RELOAD( 60, in60 ); FALLTHRU + case 60UL: STORE_COMPARE_RELOAD( 59, in59 ); FALLTHRU + case 59UL: STORE_COMPARE_RELOAD( 58, in58 ); FALLTHRU + case 58UL: STORE_COMPARE_RELOAD( 57, in57 ); FALLTHRU + case 57UL: STORE_COMPARE_RELOAD( 56, in56 ); FALLTHRU + case 56UL: STORE_COMPARE_RELOAD( 55, in55 ); FALLTHRU + case 55UL: STORE_COMPARE_RELOAD( 54, in54 ); FALLTHRU + case 54UL: STORE_COMPARE_RELOAD( 53, in53 ); FALLTHRU + case 53UL: STORE_COMPARE_RELOAD( 52, in52 ); FALLTHRU + case 52UL: STORE_COMPARE_RELOAD( 51, in51 ); FALLTHRU + case 51UL: STORE_COMPARE_RELOAD( 50, in50 ); FALLTHRU + case 50UL: STORE_COMPARE_RELOAD( 49, in49 ); FALLTHRU + case 49UL: STORE_COMPARE_RELOAD( 48, in48 ); FALLTHRU + case 48UL: STORE_COMPARE_RELOAD( 47, in47 ); FALLTHRU + case 47UL: STORE_COMPARE_RELOAD( 46, in46 ); FALLTHRU + case 46UL: STORE_COMPARE_RELOAD( 45, in45 ); FALLTHRU + case 45UL: STORE_COMPARE_RELOAD( 44, in44 ); FALLTHRU + case 44UL: STORE_COMPARE_RELOAD( 43, in43 ); FALLTHRU + case 43UL: STORE_COMPARE_RELOAD( 42, in42 ); FALLTHRU + case 42UL: STORE_COMPARE_RELOAD( 41, in41 ); FALLTHRU + case 41UL: STORE_COMPARE_RELOAD( 40, in40 ); FALLTHRU + case 40UL: STORE_COMPARE_RELOAD( 39, in39 ); FALLTHRU + case 39UL: STORE_COMPARE_RELOAD( 38, in38 ); FALLTHRU + case 38UL: STORE_COMPARE_RELOAD( 37, in37 ); FALLTHRU + case 37UL: STORE_COMPARE_RELOAD( 36, in36 ); FALLTHRU + case 36UL: STORE_COMPARE_RELOAD( 35, in35 ); FALLTHRU + case 35UL: STORE_COMPARE_RELOAD( 34, in34 ); FALLTHRU + case 34UL: STORE_COMPARE_RELOAD( 33, in33 ); FALLTHRU + case 33UL: STORE_COMPARE_RELOAD( 32, in32 ); FALLTHRU + case 32UL: STORE_COMPARE_RELOAD( 31, in31 ); FALLTHRU + case 31UL: STORE_COMPARE_RELOAD( 30, in30 ); FALLTHRU + case 30UL: STORE_COMPARE_RELOAD( 29, in29 ); FALLTHRU + case 29UL: STORE_COMPARE_RELOAD( 28, in28 ); FALLTHRU + case 28UL: STORE_COMPARE_RELOAD( 27, in27 ); FALLTHRU + case 27UL: STORE_COMPARE_RELOAD( 26, in26 ); FALLTHRU + case 26UL: STORE_COMPARE_RELOAD( 25, in25 ); FALLTHRU + case 25UL: STORE_COMPARE_RELOAD( 24, in24 ); FALLTHRU + case 24UL: STORE_COMPARE_RELOAD( 23, in23 ); FALLTHRU + case 23UL: STORE_COMPARE_RELOAD( 22, in22 ); FALLTHRU + case 22UL: STORE_COMPARE_RELOAD( 21, in21 ); FALLTHRU + case 21UL: STORE_COMPARE_RELOAD( 20, in20 ); FALLTHRU + case 20UL: STORE_COMPARE_RELOAD( 19, in19 ); FALLTHRU + case 19UL: STORE_COMPARE_RELOAD( 18, in18 ); FALLTHRU + case 18UL: STORE_COMPARE_RELOAD( 17, in17 ); FALLTHRU + case 17UL: STORE_COMPARE_RELOAD( 16, in16 ); FALLTHRU + case 16UL: STORE_COMPARE_RELOAD( 15, in15 ); FALLTHRU + case 15UL: STORE_COMPARE_RELOAD( 14, in14 ); FALLTHRU + case 14UL: STORE_COMPARE_RELOAD( 13, in13 ); FALLTHRU + case 13UL: STORE_COMPARE_RELOAD( 12, in12 ); FALLTHRU + case 12UL: STORE_COMPARE_RELOAD( 11, in11 ); FALLTHRU + case 11UL: STORE_COMPARE_RELOAD( 10, in10 ); FALLTHRU + case 10UL: STORE_COMPARE_RELOAD( 9, in09 ); FALLTHRU + case 9UL: STORE_COMPARE_RELOAD( 8, in08 ); FALLTHRU + case 8UL: STORE_COMPARE_RELOAD( 7, in07 ); FALLTHRU + case 7UL: STORE_COMPARE_RELOAD( 6, in06 ); FALLTHRU + case 6UL: STORE_COMPARE_RELOAD( 5, in05 ); FALLTHRU + case 5UL: STORE_COMPARE_RELOAD( 4, in04 ); FALLTHRU + case 4UL: STORE_COMPARE_RELOAD( 3, in03 ); FALLTHRU + case 3UL: STORE_COMPARE_RELOAD( 2, in02 ); FALLTHRU + case 2UL: STORE_COMPARE_RELOAD( 1, in01 ); FALLTHRU + case 1UL: STORE_COMPARE_RELOAD( 0, in00 ); + } + + ulong shreds_remaining = shred_cnt-fd_ulong_min( shred_cnt, 128UL ); + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 128, 0, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 128, 128, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 128UL ) ) { + case 7UL: STORE_COMPARE( 134, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 133, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 132, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 131, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 130, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 129, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 128, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 128UL ); + } + if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT; + shred_pos += GF_WIDTH; + shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH)=shred_cnt, do nothing, which will keep the value of the + shred if it existed in the variable. */ + #define STORE_COMPARE_RELOAD( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + else var = gf_ldu( shred[ n ] + shred_pos ); \ + } while( 0 ) + #define STORE_COMPARE( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + } while( 0 ) + switch( fd_ulong_min( shred_cnt, 16UL ) ) { + case 16UL: STORE_COMPARE_RELOAD( 15, in15 ); FALLTHRU + case 15UL: STORE_COMPARE_RELOAD( 14, in14 ); FALLTHRU + case 14UL: STORE_COMPARE_RELOAD( 13, in13 ); FALLTHRU + case 13UL: STORE_COMPARE_RELOAD( 12, in12 ); FALLTHRU + case 12UL: STORE_COMPARE_RELOAD( 11, in11 ); FALLTHRU + case 11UL: STORE_COMPARE_RELOAD( 10, in10 ); FALLTHRU + case 10UL: STORE_COMPARE_RELOAD( 9, in09 ); FALLTHRU + case 9UL: STORE_COMPARE_RELOAD( 8, in08 ); FALLTHRU + case 8UL: STORE_COMPARE_RELOAD( 7, in07 ); FALLTHRU + case 7UL: STORE_COMPARE_RELOAD( 6, in06 ); FALLTHRU + case 6UL: STORE_COMPARE_RELOAD( 5, in05 ); FALLTHRU + case 5UL: STORE_COMPARE_RELOAD( 4, in04 ); FALLTHRU + case 4UL: STORE_COMPARE_RELOAD( 3, in03 ); FALLTHRU + case 3UL: STORE_COMPARE_RELOAD( 2, in02 ); FALLTHRU + case 2UL: STORE_COMPARE_RELOAD( 1, in01 ); FALLTHRU + case 1UL: STORE_COMPARE_RELOAD( 0, in00 ); + } + + ulong shreds_remaining = shred_cnt-fd_ulong_min( shred_cnt, 16UL ); + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 0, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 16, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 31, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 30, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 29, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 28, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 27, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 26, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 25, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 24, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 23, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 22, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 21, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 20, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 19, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 18, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 17, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 16, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 16, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 32, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 47, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 46, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 45, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 44, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 43, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 42, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 41, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 40, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 39, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 38, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 37, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 36, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 35, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 34, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 33, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 32, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 32, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 48, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 63, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 62, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 61, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 60, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 59, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 58, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 57, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 56, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 55, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 54, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 53, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 52, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 51, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 50, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 49, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 48, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 48, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 64, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 79, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 78, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 77, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 76, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 75, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 74, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 73, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 72, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 71, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 70, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 69, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 68, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 67, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 66, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 65, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 64, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 64, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 80, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 95, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 94, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 93, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 92, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 91, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 90, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 89, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 88, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 87, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 86, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 85, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 84, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 83, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 82, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 81, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 80, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 80, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 96, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 111, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 110, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 109, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 108, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 107, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 106, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 105, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 104, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 103, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 102, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 101, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 100, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 99, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 98, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 97, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 96, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 96, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 112, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 16UL: STORE_COMPARE( 127, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 126, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 125, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 124, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 123, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 122, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 121, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 120, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 119, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 118, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 117, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 116, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 115, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 114, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 113, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 112, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 16, 112, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 16, 128, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 16UL ) ) { + case 7UL: STORE_COMPARE( 134, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 133, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 132, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 131, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 130, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 129, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 128, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 16UL ); + } + if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT; + shred_pos += GF_WIDTH; + shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH)=shred_cnt, do nothing, which will keep the value of the + shred if it existed in the variable. */ + #define STORE_COMPARE_RELOAD( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + else var = gf_ldu( shred[ n ] + shred_pos ); \ + } while( 0 ) + #define STORE_COMPARE( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + } while( 0 ) + switch( fd_ulong_min( shred_cnt, 256UL ) ) { + case 134UL: STORE_COMPARE_RELOAD( 133, in133 ); FALLTHRU + case 133UL: STORE_COMPARE_RELOAD( 132, in132 ); FALLTHRU + case 132UL: STORE_COMPARE_RELOAD( 131, in131 ); FALLTHRU + case 131UL: STORE_COMPARE_RELOAD( 130, in130 ); FALLTHRU + case 130UL: STORE_COMPARE_RELOAD( 129, in129 ); FALLTHRU + case 129UL: STORE_COMPARE_RELOAD( 128, in128 ); FALLTHRU + case 128UL: STORE_COMPARE_RELOAD( 127, in127 ); FALLTHRU + case 127UL: STORE_COMPARE_RELOAD( 126, in126 ); FALLTHRU + case 126UL: STORE_COMPARE_RELOAD( 125, in125 ); FALLTHRU + case 125UL: STORE_COMPARE_RELOAD( 124, in124 ); FALLTHRU + case 124UL: STORE_COMPARE_RELOAD( 123, in123 ); FALLTHRU + case 123UL: STORE_COMPARE_RELOAD( 122, in122 ); FALLTHRU + case 122UL: STORE_COMPARE_RELOAD( 121, in121 ); FALLTHRU + case 121UL: STORE_COMPARE_RELOAD( 120, in120 ); FALLTHRU + case 120UL: STORE_COMPARE_RELOAD( 119, in119 ); FALLTHRU + case 119UL: STORE_COMPARE_RELOAD( 118, in118 ); FALLTHRU + case 118UL: STORE_COMPARE_RELOAD( 117, in117 ); FALLTHRU + case 117UL: STORE_COMPARE_RELOAD( 116, in116 ); FALLTHRU + case 116UL: STORE_COMPARE_RELOAD( 115, in115 ); FALLTHRU + case 115UL: STORE_COMPARE_RELOAD( 114, in114 ); FALLTHRU + case 114UL: STORE_COMPARE_RELOAD( 113, in113 ); FALLTHRU + case 113UL: STORE_COMPARE_RELOAD( 112, in112 ); FALLTHRU + case 112UL: STORE_COMPARE_RELOAD( 111, in111 ); FALLTHRU + case 111UL: STORE_COMPARE_RELOAD( 110, in110 ); FALLTHRU + case 110UL: STORE_COMPARE_RELOAD( 109, in109 ); FALLTHRU + case 109UL: STORE_COMPARE_RELOAD( 108, in108 ); FALLTHRU + case 108UL: STORE_COMPARE_RELOAD( 107, in107 ); FALLTHRU + case 107UL: STORE_COMPARE_RELOAD( 106, in106 ); FALLTHRU + case 106UL: STORE_COMPARE_RELOAD( 105, in105 ); FALLTHRU + case 105UL: STORE_COMPARE_RELOAD( 104, in104 ); FALLTHRU + case 104UL: STORE_COMPARE_RELOAD( 103, in103 ); FALLTHRU + case 103UL: STORE_COMPARE_RELOAD( 102, in102 ); FALLTHRU + case 102UL: STORE_COMPARE_RELOAD( 101, in101 ); FALLTHRU + case 101UL: STORE_COMPARE_RELOAD( 100, in100 ); FALLTHRU + case 100UL: STORE_COMPARE_RELOAD( 99, in99 ); FALLTHRU + case 99UL: STORE_COMPARE_RELOAD( 98, in98 ); FALLTHRU + case 98UL: STORE_COMPARE_RELOAD( 97, in97 ); FALLTHRU + case 97UL: STORE_COMPARE_RELOAD( 96, in96 ); FALLTHRU + case 96UL: STORE_COMPARE_RELOAD( 95, in95 ); FALLTHRU + case 95UL: STORE_COMPARE_RELOAD( 94, in94 ); FALLTHRU + case 94UL: STORE_COMPARE_RELOAD( 93, in93 ); FALLTHRU + case 93UL: STORE_COMPARE_RELOAD( 92, in92 ); FALLTHRU + case 92UL: STORE_COMPARE_RELOAD( 91, in91 ); FALLTHRU + case 91UL: STORE_COMPARE_RELOAD( 90, in90 ); FALLTHRU + case 90UL: STORE_COMPARE_RELOAD( 89, in89 ); FALLTHRU + case 89UL: STORE_COMPARE_RELOAD( 88, in88 ); FALLTHRU + case 88UL: STORE_COMPARE_RELOAD( 87, in87 ); FALLTHRU + case 87UL: STORE_COMPARE_RELOAD( 86, in86 ); FALLTHRU + case 86UL: STORE_COMPARE_RELOAD( 85, in85 ); FALLTHRU + case 85UL: STORE_COMPARE_RELOAD( 84, in84 ); FALLTHRU + case 84UL: STORE_COMPARE_RELOAD( 83, in83 ); FALLTHRU + case 83UL: STORE_COMPARE_RELOAD( 82, in82 ); FALLTHRU + case 82UL: STORE_COMPARE_RELOAD( 81, in81 ); FALLTHRU + case 81UL: STORE_COMPARE_RELOAD( 80, in80 ); FALLTHRU + case 80UL: STORE_COMPARE_RELOAD( 79, in79 ); FALLTHRU + case 79UL: STORE_COMPARE_RELOAD( 78, in78 ); FALLTHRU + case 78UL: STORE_COMPARE_RELOAD( 77, in77 ); FALLTHRU + case 77UL: STORE_COMPARE_RELOAD( 76, in76 ); FALLTHRU + case 76UL: STORE_COMPARE_RELOAD( 75, in75 ); FALLTHRU + case 75UL: STORE_COMPARE_RELOAD( 74, in74 ); FALLTHRU + case 74UL: STORE_COMPARE_RELOAD( 73, in73 ); FALLTHRU + case 73UL: STORE_COMPARE_RELOAD( 72, in72 ); FALLTHRU + case 72UL: STORE_COMPARE_RELOAD( 71, in71 ); FALLTHRU + case 71UL: STORE_COMPARE_RELOAD( 70, in70 ); FALLTHRU + case 70UL: STORE_COMPARE_RELOAD( 69, in69 ); FALLTHRU + case 69UL: STORE_COMPARE_RELOAD( 68, in68 ); FALLTHRU + case 68UL: STORE_COMPARE_RELOAD( 67, in67 ); FALLTHRU + case 67UL: STORE_COMPARE_RELOAD( 66, in66 ); FALLTHRU + case 66UL: STORE_COMPARE_RELOAD( 65, in65 ); FALLTHRU + case 65UL: STORE_COMPARE_RELOAD( 64, in64 ); FALLTHRU + case 64UL: STORE_COMPARE_RELOAD( 63, in63 ); FALLTHRU + case 63UL: STORE_COMPARE_RELOAD( 62, in62 ); FALLTHRU + case 62UL: STORE_COMPARE_RELOAD( 61, in61 ); FALLTHRU + case 61UL: STORE_COMPARE_RELOAD( 60, in60 ); FALLTHRU + case 60UL: STORE_COMPARE_RELOAD( 59, in59 ); FALLTHRU + case 59UL: STORE_COMPARE_RELOAD( 58, in58 ); FALLTHRU + case 58UL: STORE_COMPARE_RELOAD( 57, in57 ); FALLTHRU + case 57UL: STORE_COMPARE_RELOAD( 56, in56 ); FALLTHRU + case 56UL: STORE_COMPARE_RELOAD( 55, in55 ); FALLTHRU + case 55UL: STORE_COMPARE_RELOAD( 54, in54 ); FALLTHRU + case 54UL: STORE_COMPARE_RELOAD( 53, in53 ); FALLTHRU + case 53UL: STORE_COMPARE_RELOAD( 52, in52 ); FALLTHRU + case 52UL: STORE_COMPARE_RELOAD( 51, in51 ); FALLTHRU + case 51UL: STORE_COMPARE_RELOAD( 50, in50 ); FALLTHRU + case 50UL: STORE_COMPARE_RELOAD( 49, in49 ); FALLTHRU + case 49UL: STORE_COMPARE_RELOAD( 48, in48 ); FALLTHRU + case 48UL: STORE_COMPARE_RELOAD( 47, in47 ); FALLTHRU + case 47UL: STORE_COMPARE_RELOAD( 46, in46 ); FALLTHRU + case 46UL: STORE_COMPARE_RELOAD( 45, in45 ); FALLTHRU + case 45UL: STORE_COMPARE_RELOAD( 44, in44 ); FALLTHRU + case 44UL: STORE_COMPARE_RELOAD( 43, in43 ); FALLTHRU + case 43UL: STORE_COMPARE_RELOAD( 42, in42 ); FALLTHRU + case 42UL: STORE_COMPARE_RELOAD( 41, in41 ); FALLTHRU + case 41UL: STORE_COMPARE_RELOAD( 40, in40 ); FALLTHRU + case 40UL: STORE_COMPARE_RELOAD( 39, in39 ); FALLTHRU + case 39UL: STORE_COMPARE_RELOAD( 38, in38 ); FALLTHRU + case 38UL: STORE_COMPARE_RELOAD( 37, in37 ); FALLTHRU + case 37UL: STORE_COMPARE_RELOAD( 36, in36 ); FALLTHRU + case 36UL: STORE_COMPARE_RELOAD( 35, in35 ); FALLTHRU + case 35UL: STORE_COMPARE_RELOAD( 34, in34 ); FALLTHRU + case 34UL: STORE_COMPARE_RELOAD( 33, in33 ); FALLTHRU + case 33UL: STORE_COMPARE_RELOAD( 32, in32 ); FALLTHRU + case 32UL: STORE_COMPARE_RELOAD( 31, in31 ); FALLTHRU + case 31UL: STORE_COMPARE_RELOAD( 30, in30 ); FALLTHRU + case 30UL: STORE_COMPARE_RELOAD( 29, in29 ); FALLTHRU + case 29UL: STORE_COMPARE_RELOAD( 28, in28 ); FALLTHRU + case 28UL: STORE_COMPARE_RELOAD( 27, in27 ); FALLTHRU + case 27UL: STORE_COMPARE_RELOAD( 26, in26 ); FALLTHRU + case 26UL: STORE_COMPARE_RELOAD( 25, in25 ); FALLTHRU + case 25UL: STORE_COMPARE_RELOAD( 24, in24 ); FALLTHRU + case 24UL: STORE_COMPARE_RELOAD( 23, in23 ); FALLTHRU + case 23UL: STORE_COMPARE_RELOAD( 22, in22 ); FALLTHRU + case 22UL: STORE_COMPARE_RELOAD( 21, in21 ); FALLTHRU + case 21UL: STORE_COMPARE_RELOAD( 20, in20 ); FALLTHRU + case 20UL: STORE_COMPARE_RELOAD( 19, in19 ); FALLTHRU + case 19UL: STORE_COMPARE_RELOAD( 18, in18 ); FALLTHRU + case 18UL: STORE_COMPARE_RELOAD( 17, in17 ); FALLTHRU + case 17UL: STORE_COMPARE_RELOAD( 16, in16 ); FALLTHRU + case 16UL: STORE_COMPARE_RELOAD( 15, in15 ); FALLTHRU + case 15UL: STORE_COMPARE_RELOAD( 14, in14 ); FALLTHRU + case 14UL: STORE_COMPARE_RELOAD( 13, in13 ); FALLTHRU + case 13UL: STORE_COMPARE_RELOAD( 12, in12 ); FALLTHRU + case 12UL: STORE_COMPARE_RELOAD( 11, in11 ); FALLTHRU + case 11UL: STORE_COMPARE_RELOAD( 10, in10 ); FALLTHRU + case 10UL: STORE_COMPARE_RELOAD( 9, in09 ); FALLTHRU + case 9UL: STORE_COMPARE_RELOAD( 8, in08 ); FALLTHRU + case 8UL: STORE_COMPARE_RELOAD( 7, in07 ); FALLTHRU + case 7UL: STORE_COMPARE_RELOAD( 6, in06 ); FALLTHRU + case 6UL: STORE_COMPARE_RELOAD( 5, in05 ); FALLTHRU + case 5UL: STORE_COMPARE_RELOAD( 4, in04 ); FALLTHRU + case 4UL: STORE_COMPARE_RELOAD( 3, in03 ); FALLTHRU + case 3UL: STORE_COMPARE_RELOAD( 2, in02 ); FALLTHRU + case 2UL: STORE_COMPARE_RELOAD( 1, in01 ); FALLTHRU + case 1UL: STORE_COMPARE_RELOAD( 0, in00 ); + } + + if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT; + shred_pos += GF_WIDTH; + shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH)=shred_cnt, do nothing, which will keep the value of the + shred if it existed in the variable. */ + #define STORE_COMPARE_RELOAD( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + else var = gf_ldu( shred[ n ] + shred_pos ); \ + } while( 0 ) + #define STORE_COMPARE( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + } while( 0 ) + switch( fd_ulong_min( shred_cnt, 32UL ) ) { + case 32UL: STORE_COMPARE_RELOAD( 31, in31 ); FALLTHRU + case 31UL: STORE_COMPARE_RELOAD( 30, in30 ); FALLTHRU + case 30UL: STORE_COMPARE_RELOAD( 29, in29 ); FALLTHRU + case 29UL: STORE_COMPARE_RELOAD( 28, in28 ); FALLTHRU + case 28UL: STORE_COMPARE_RELOAD( 27, in27 ); FALLTHRU + case 27UL: STORE_COMPARE_RELOAD( 26, in26 ); FALLTHRU + case 26UL: STORE_COMPARE_RELOAD( 25, in25 ); FALLTHRU + case 25UL: STORE_COMPARE_RELOAD( 24, in24 ); FALLTHRU + case 24UL: STORE_COMPARE_RELOAD( 23, in23 ); FALLTHRU + case 23UL: STORE_COMPARE_RELOAD( 22, in22 ); FALLTHRU + case 22UL: STORE_COMPARE_RELOAD( 21, in21 ); FALLTHRU + case 21UL: STORE_COMPARE_RELOAD( 20, in20 ); FALLTHRU + case 20UL: STORE_COMPARE_RELOAD( 19, in19 ); FALLTHRU + case 19UL: STORE_COMPARE_RELOAD( 18, in18 ); FALLTHRU + case 18UL: STORE_COMPARE_RELOAD( 17, in17 ); FALLTHRU + case 17UL: STORE_COMPARE_RELOAD( 16, in16 ); FALLTHRU + case 16UL: STORE_COMPARE_RELOAD( 15, in15 ); FALLTHRU + case 15UL: STORE_COMPARE_RELOAD( 14, in14 ); FALLTHRU + case 14UL: STORE_COMPARE_RELOAD( 13, in13 ); FALLTHRU + case 13UL: STORE_COMPARE_RELOAD( 12, in12 ); FALLTHRU + case 12UL: STORE_COMPARE_RELOAD( 11, in11 ); FALLTHRU + case 11UL: STORE_COMPARE_RELOAD( 10, in10 ); FALLTHRU + case 10UL: STORE_COMPARE_RELOAD( 9, in09 ); FALLTHRU + case 9UL: STORE_COMPARE_RELOAD( 8, in08 ); FALLTHRU + case 8UL: STORE_COMPARE_RELOAD( 7, in07 ); FALLTHRU + case 7UL: STORE_COMPARE_RELOAD( 6, in06 ); FALLTHRU + case 6UL: STORE_COMPARE_RELOAD( 5, in05 ); FALLTHRU + case 5UL: STORE_COMPARE_RELOAD( 4, in04 ); FALLTHRU + case 4UL: STORE_COMPARE_RELOAD( 3, in03 ); FALLTHRU + case 3UL: STORE_COMPARE_RELOAD( 2, in02 ); FALLTHRU + case 2UL: STORE_COMPARE_RELOAD( 1, in01 ); FALLTHRU + case 1UL: STORE_COMPARE_RELOAD( 0, in00 ); + } + + ulong shreds_remaining = shred_cnt-fd_ulong_min( shred_cnt, 32UL ); + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 32, 0, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 32, 32, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 32UL ) ) { + case 32UL: STORE_COMPARE( 63, in31 ); FALLTHRU + case 31UL: STORE_COMPARE( 62, in30 ); FALLTHRU + case 30UL: STORE_COMPARE( 61, in29 ); FALLTHRU + case 29UL: STORE_COMPARE( 60, in28 ); FALLTHRU + case 28UL: STORE_COMPARE( 59, in27 ); FALLTHRU + case 27UL: STORE_COMPARE( 58, in26 ); FALLTHRU + case 26UL: STORE_COMPARE( 57, in25 ); FALLTHRU + case 25UL: STORE_COMPARE( 56, in24 ); FALLTHRU + case 24UL: STORE_COMPARE( 55, in23 ); FALLTHRU + case 23UL: STORE_COMPARE( 54, in22 ); FALLTHRU + case 22UL: STORE_COMPARE( 53, in21 ); FALLTHRU + case 21UL: STORE_COMPARE( 52, in20 ); FALLTHRU + case 20UL: STORE_COMPARE( 51, in19 ); FALLTHRU + case 19UL: STORE_COMPARE( 50, in18 ); FALLTHRU + case 18UL: STORE_COMPARE( 49, in17 ); FALLTHRU + case 17UL: STORE_COMPARE( 48, in16 ); FALLTHRU + case 16UL: STORE_COMPARE( 47, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 46, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 45, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 44, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 43, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 42, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 41, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 40, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 39, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 38, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 37, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 36, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 35, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 34, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 33, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 32, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 32UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 32, 32, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 32, 64, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 32UL ) ) { + case 32UL: STORE_COMPARE( 95, in31 ); FALLTHRU + case 31UL: STORE_COMPARE( 94, in30 ); FALLTHRU + case 30UL: STORE_COMPARE( 93, in29 ); FALLTHRU + case 29UL: STORE_COMPARE( 92, in28 ); FALLTHRU + case 28UL: STORE_COMPARE( 91, in27 ); FALLTHRU + case 27UL: STORE_COMPARE( 90, in26 ); FALLTHRU + case 26UL: STORE_COMPARE( 89, in25 ); FALLTHRU + case 25UL: STORE_COMPARE( 88, in24 ); FALLTHRU + case 24UL: STORE_COMPARE( 87, in23 ); FALLTHRU + case 23UL: STORE_COMPARE( 86, in22 ); FALLTHRU + case 22UL: STORE_COMPARE( 85, in21 ); FALLTHRU + case 21UL: STORE_COMPARE( 84, in20 ); FALLTHRU + case 20UL: STORE_COMPARE( 83, in19 ); FALLTHRU + case 19UL: STORE_COMPARE( 82, in18 ); FALLTHRU + case 18UL: STORE_COMPARE( 81, in17 ); FALLTHRU + case 17UL: STORE_COMPARE( 80, in16 ); FALLTHRU + case 16UL: STORE_COMPARE( 79, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 78, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 77, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 76, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 75, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 74, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 73, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 72, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 71, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 70, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 69, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 68, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 67, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 66, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 65, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 64, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 32UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 32, 64, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 32, 96, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 32UL ) ) { + case 32UL: STORE_COMPARE( 127, in31 ); FALLTHRU + case 31UL: STORE_COMPARE( 126, in30 ); FALLTHRU + case 30UL: STORE_COMPARE( 125, in29 ); FALLTHRU + case 29UL: STORE_COMPARE( 124, in28 ); FALLTHRU + case 28UL: STORE_COMPARE( 123, in27 ); FALLTHRU + case 27UL: STORE_COMPARE( 122, in26 ); FALLTHRU + case 26UL: STORE_COMPARE( 121, in25 ); FALLTHRU + case 25UL: STORE_COMPARE( 120, in24 ); FALLTHRU + case 24UL: STORE_COMPARE( 119, in23 ); FALLTHRU + case 23UL: STORE_COMPARE( 118, in22 ); FALLTHRU + case 22UL: STORE_COMPARE( 117, in21 ); FALLTHRU + case 21UL: STORE_COMPARE( 116, in20 ); FALLTHRU + case 20UL: STORE_COMPARE( 115, in19 ); FALLTHRU + case 19UL: STORE_COMPARE( 114, in18 ); FALLTHRU + case 18UL: STORE_COMPARE( 113, in17 ); FALLTHRU + case 17UL: STORE_COMPARE( 112, in16 ); FALLTHRU + case 16UL: STORE_COMPARE( 111, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 110, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 109, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 108, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 107, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 106, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 105, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 104, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 103, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 102, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 101, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 100, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 99, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 98, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 97, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 96, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 32UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 32, 96, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 32, 128, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 32UL ) ) { + case 7UL: STORE_COMPARE( 134, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 133, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 132, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 131, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 130, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 129, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 128, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 32UL ); + } + if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT; + shred_pos += GF_WIDTH; + shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH)=shred_cnt, do nothing, which will keep the value of the + shred if it existed in the variable. */ + #define STORE_COMPARE_RELOAD( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + else var = gf_ldu( shred[ n ] + shred_pos ); \ + } while( 0 ) + #define STORE_COMPARE( n, var ) do{ \ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \ + else diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \ + } while( 0 ) + switch( fd_ulong_min( shred_cnt, 64UL ) ) { + case 64UL: STORE_COMPARE_RELOAD( 63, in63 ); FALLTHRU + case 63UL: STORE_COMPARE_RELOAD( 62, in62 ); FALLTHRU + case 62UL: STORE_COMPARE_RELOAD( 61, in61 ); FALLTHRU + case 61UL: STORE_COMPARE_RELOAD( 60, in60 ); FALLTHRU + case 60UL: STORE_COMPARE_RELOAD( 59, in59 ); FALLTHRU + case 59UL: STORE_COMPARE_RELOAD( 58, in58 ); FALLTHRU + case 58UL: STORE_COMPARE_RELOAD( 57, in57 ); FALLTHRU + case 57UL: STORE_COMPARE_RELOAD( 56, in56 ); FALLTHRU + case 56UL: STORE_COMPARE_RELOAD( 55, in55 ); FALLTHRU + case 55UL: STORE_COMPARE_RELOAD( 54, in54 ); FALLTHRU + case 54UL: STORE_COMPARE_RELOAD( 53, in53 ); FALLTHRU + case 53UL: STORE_COMPARE_RELOAD( 52, in52 ); FALLTHRU + case 52UL: STORE_COMPARE_RELOAD( 51, in51 ); FALLTHRU + case 51UL: STORE_COMPARE_RELOAD( 50, in50 ); FALLTHRU + case 50UL: STORE_COMPARE_RELOAD( 49, in49 ); FALLTHRU + case 49UL: STORE_COMPARE_RELOAD( 48, in48 ); FALLTHRU + case 48UL: STORE_COMPARE_RELOAD( 47, in47 ); FALLTHRU + case 47UL: STORE_COMPARE_RELOAD( 46, in46 ); FALLTHRU + case 46UL: STORE_COMPARE_RELOAD( 45, in45 ); FALLTHRU + case 45UL: STORE_COMPARE_RELOAD( 44, in44 ); FALLTHRU + case 44UL: STORE_COMPARE_RELOAD( 43, in43 ); FALLTHRU + case 43UL: STORE_COMPARE_RELOAD( 42, in42 ); FALLTHRU + case 42UL: STORE_COMPARE_RELOAD( 41, in41 ); FALLTHRU + case 41UL: STORE_COMPARE_RELOAD( 40, in40 ); FALLTHRU + case 40UL: STORE_COMPARE_RELOAD( 39, in39 ); FALLTHRU + case 39UL: STORE_COMPARE_RELOAD( 38, in38 ); FALLTHRU + case 38UL: STORE_COMPARE_RELOAD( 37, in37 ); FALLTHRU + case 37UL: STORE_COMPARE_RELOAD( 36, in36 ); FALLTHRU + case 36UL: STORE_COMPARE_RELOAD( 35, in35 ); FALLTHRU + case 35UL: STORE_COMPARE_RELOAD( 34, in34 ); FALLTHRU + case 34UL: STORE_COMPARE_RELOAD( 33, in33 ); FALLTHRU + case 33UL: STORE_COMPARE_RELOAD( 32, in32 ); FALLTHRU + case 32UL: STORE_COMPARE_RELOAD( 31, in31 ); FALLTHRU + case 31UL: STORE_COMPARE_RELOAD( 30, in30 ); FALLTHRU + case 30UL: STORE_COMPARE_RELOAD( 29, in29 ); FALLTHRU + case 29UL: STORE_COMPARE_RELOAD( 28, in28 ); FALLTHRU + case 28UL: STORE_COMPARE_RELOAD( 27, in27 ); FALLTHRU + case 27UL: STORE_COMPARE_RELOAD( 26, in26 ); FALLTHRU + case 26UL: STORE_COMPARE_RELOAD( 25, in25 ); FALLTHRU + case 25UL: STORE_COMPARE_RELOAD( 24, in24 ); FALLTHRU + case 24UL: STORE_COMPARE_RELOAD( 23, in23 ); FALLTHRU + case 23UL: STORE_COMPARE_RELOAD( 22, in22 ); FALLTHRU + case 22UL: STORE_COMPARE_RELOAD( 21, in21 ); FALLTHRU + case 21UL: STORE_COMPARE_RELOAD( 20, in20 ); FALLTHRU + case 20UL: STORE_COMPARE_RELOAD( 19, in19 ); FALLTHRU + case 19UL: STORE_COMPARE_RELOAD( 18, in18 ); FALLTHRU + case 18UL: STORE_COMPARE_RELOAD( 17, in17 ); FALLTHRU + case 17UL: STORE_COMPARE_RELOAD( 16, in16 ); FALLTHRU + case 16UL: STORE_COMPARE_RELOAD( 15, in15 ); FALLTHRU + case 15UL: STORE_COMPARE_RELOAD( 14, in14 ); FALLTHRU + case 14UL: STORE_COMPARE_RELOAD( 13, in13 ); FALLTHRU + case 13UL: STORE_COMPARE_RELOAD( 12, in12 ); FALLTHRU + case 12UL: STORE_COMPARE_RELOAD( 11, in11 ); FALLTHRU + case 11UL: STORE_COMPARE_RELOAD( 10, in10 ); FALLTHRU + case 10UL: STORE_COMPARE_RELOAD( 9, in09 ); FALLTHRU + case 9UL: STORE_COMPARE_RELOAD( 8, in08 ); FALLTHRU + case 8UL: STORE_COMPARE_RELOAD( 7, in07 ); FALLTHRU + case 7UL: STORE_COMPARE_RELOAD( 6, in06 ); FALLTHRU + case 6UL: STORE_COMPARE_RELOAD( 5, in05 ); FALLTHRU + case 5UL: STORE_COMPARE_RELOAD( 4, in04 ); FALLTHRU + case 4UL: STORE_COMPARE_RELOAD( 3, in03 ); FALLTHRU + case 3UL: STORE_COMPARE_RELOAD( 2, in02 ); FALLTHRU + case 2UL: STORE_COMPARE_RELOAD( 1, in01 ); FALLTHRU + case 1UL: STORE_COMPARE_RELOAD( 0, in00 ); + } + + ulong shreds_remaining = shred_cnt-fd_ulong_min( shred_cnt, 64UL ); + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 64, 0, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 64, 64, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 64UL ) ) { + case 64UL: STORE_COMPARE( 127, in63 ); FALLTHRU + case 63UL: STORE_COMPARE( 126, in62 ); FALLTHRU + case 62UL: STORE_COMPARE( 125, in61 ); FALLTHRU + case 61UL: STORE_COMPARE( 124, in60 ); FALLTHRU + case 60UL: STORE_COMPARE( 123, in59 ); FALLTHRU + case 59UL: STORE_COMPARE( 122, in58 ); FALLTHRU + case 58UL: STORE_COMPARE( 121, in57 ); FALLTHRU + case 57UL: STORE_COMPARE( 120, in56 ); FALLTHRU + case 56UL: STORE_COMPARE( 119, in55 ); FALLTHRU + case 55UL: STORE_COMPARE( 118, in54 ); FALLTHRU + case 54UL: STORE_COMPARE( 117, in53 ); FALLTHRU + case 53UL: STORE_COMPARE( 116, in52 ); FALLTHRU + case 52UL: STORE_COMPARE( 115, in51 ); FALLTHRU + case 51UL: STORE_COMPARE( 114, in50 ); FALLTHRU + case 50UL: STORE_COMPARE( 113, in49 ); FALLTHRU + case 49UL: STORE_COMPARE( 112, in48 ); FALLTHRU + case 48UL: STORE_COMPARE( 111, in47 ); FALLTHRU + case 47UL: STORE_COMPARE( 110, in46 ); FALLTHRU + case 46UL: STORE_COMPARE( 109, in45 ); FALLTHRU + case 45UL: STORE_COMPARE( 108, in44 ); FALLTHRU + case 44UL: STORE_COMPARE( 107, in43 ); FALLTHRU + case 43UL: STORE_COMPARE( 106, in42 ); FALLTHRU + case 42UL: STORE_COMPARE( 105, in41 ); FALLTHRU + case 41UL: STORE_COMPARE( 104, in40 ); FALLTHRU + case 40UL: STORE_COMPARE( 103, in39 ); FALLTHRU + case 39UL: STORE_COMPARE( 102, in38 ); FALLTHRU + case 38UL: STORE_COMPARE( 101, in37 ); FALLTHRU + case 37UL: STORE_COMPARE( 100, in36 ); FALLTHRU + case 36UL: STORE_COMPARE( 99, in35 ); FALLTHRU + case 35UL: STORE_COMPARE( 98, in34 ); FALLTHRU + case 34UL: STORE_COMPARE( 97, in33 ); FALLTHRU + case 33UL: STORE_COMPARE( 96, in32 ); FALLTHRU + case 32UL: STORE_COMPARE( 95, in31 ); FALLTHRU + case 31UL: STORE_COMPARE( 94, in30 ); FALLTHRU + case 30UL: STORE_COMPARE( 93, in29 ); FALLTHRU + case 29UL: STORE_COMPARE( 92, in28 ); FALLTHRU + case 28UL: STORE_COMPARE( 91, in27 ); FALLTHRU + case 27UL: STORE_COMPARE( 90, in26 ); FALLTHRU + case 26UL: STORE_COMPARE( 89, in25 ); FALLTHRU + case 25UL: STORE_COMPARE( 88, in24 ); FALLTHRU + case 24UL: STORE_COMPARE( 87, in23 ); FALLTHRU + case 23UL: STORE_COMPARE( 86, in22 ); FALLTHRU + case 22UL: STORE_COMPARE( 85, in21 ); FALLTHRU + case 21UL: STORE_COMPARE( 84, in20 ); FALLTHRU + case 20UL: STORE_COMPARE( 83, in19 ); FALLTHRU + case 19UL: STORE_COMPARE( 82, in18 ); FALLTHRU + case 18UL: STORE_COMPARE( 81, in17 ); FALLTHRU + case 17UL: STORE_COMPARE( 80, in16 ); FALLTHRU + case 16UL: STORE_COMPARE( 79, in15 ); FALLTHRU + case 15UL: STORE_COMPARE( 78, in14 ); FALLTHRU + case 14UL: STORE_COMPARE( 77, in13 ); FALLTHRU + case 13UL: STORE_COMPARE( 76, in12 ); FALLTHRU + case 12UL: STORE_COMPARE( 75, in11 ); FALLTHRU + case 11UL: STORE_COMPARE( 74, in10 ); FALLTHRU + case 10UL: STORE_COMPARE( 73, in09 ); FALLTHRU + case 9UL: STORE_COMPARE( 72, in08 ); FALLTHRU + case 8UL: STORE_COMPARE( 71, in07 ); FALLTHRU + case 7UL: STORE_COMPARE( 70, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 69, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 68, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 67, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 66, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 65, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 64, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 64UL ); + } + if( shreds_remaining>0UL ) { + FD_REEDSOL_GENERATE_IFFT( 64, 64, ALL_VARS ); + FD_REEDSOL_GENERATE_FFT( 64, 128, ALL_VARS ); + + switch( fd_ulong_min( shreds_remaining, 64UL ) ) { + case 7UL: STORE_COMPARE( 134, in06 ); FALLTHRU + case 6UL: STORE_COMPARE( 133, in05 ); FALLTHRU + case 5UL: STORE_COMPARE( 132, in04 ); FALLTHRU + case 4UL: STORE_COMPARE( 131, in03 ); FALLTHRU + case 3UL: STORE_COMPARE( 130, in02 ); FALLTHRU + case 2UL: STORE_COMPARE( 129, in01 ); FALLTHRU + case 1UL: STORE_COMPARE( 128, in00 ); + } + shreds_remaining -= fd_ulong_min( shreds_remaining, 64UL ); + } + if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT; + shred_pos += GF_WIDTH; + shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH)=shred_cnt, do nothing, which will keep the value of the") + cprint(" shred if it existed in the variable. */") + + + cprint("""#define STORE_COMPARE_RELOAD( n, var ) do{ \\ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \\ + else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \\ + else var = gf_ldu( shred[ n ] + shred_pos ); \\ + } while( 0 )""") + cprint("""#define STORE_COMPARE( n, var ) do{ \\ + if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \\ + else diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \\ + } while( 0 )""") + cprint(f"switch( fd_ulong_min( shred_cnt, {n}UL ) ) " + "{") + for k in range(min(n, max_shreds)-1, -1, -1): + fallthru = "" + if k>0: + fallthru = " FALLTHRU" + cprint(f"case {k+1:2}UL: STORE_COMPARE_RELOAD( {k:2}, in{k:02} );{fallthru}") + cprint("}") + cprint("") + + if max_shreds > n: + cprint(f"ulong shreds_remaining = shred_cnt-fd_ulong_min( shred_cnt, {n}UL );") + + potential_shreds_remaining = max_shreds - n + chunk_cnt = 0 + while potential_shreds_remaining>0: + cprint("if( shreds_remaining>0UL ) {") + cprint(f"FD_REEDSOL_GENERATE_IFFT( {n}, {n*chunk_cnt:2}, ALL_VARS );") + cprint(f"FD_REEDSOL_GENERATE_FFT( {n}, {n*(chunk_cnt+1):2}, ALL_VARS );") + cprint("") + cprint(f"switch( fd_ulong_min( shreds_remaining, {n}UL ) ) " + "{") + for k in range(min(n-1, potential_shreds_remaining), -1, -1): + fallthru = "" + if k>0: + fallthru = " FALLTHRU" + cprint(f"case {k+1:2}UL: STORE_COMPARE( {k+n*(chunk_cnt+1):2}, in{k:02} );{fallthru}") + cprint("}") + cprint(f'shreds_remaining -= fd_ulong_min( shreds_remaining, {n}UL );') + cprint("}") + + potential_shreds_remaining -= n + chunk_cnt += 1 + + cprint("if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT;") + + cprint('shred_pos += GF_WIDTH;') + cprint('shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH)p_cnt ) ) { FD_TEST( retval==FD_REEDSOL_ERR_INSUFFICIENT ); continue; } + + FD_TEST( FD_REEDSOL_OK==retval ); + + for( ulong i=0UL; i Date: Tue, 22 Aug 2023 17:44:39 -0500 Subject: [PATCH 4/6] Minor polishing From review. Mostly cosmetic stuff. The most significant change is the Galois field math implementation can be selected at compile time independent of build target. As part of this, fd_reedsol_internal.h and fd_reedsol_pi.h were merged into an fd_reedsol_private.h. The rest is mostly naming tweaks, inclusion tweaks, macro robustness, error naming strerror functions and the like. --- src/ballet/reedsol/fd_reedsol.c | 102 +++++--- src/ballet/reedsol/fd_reedsol.h | 270 +++++++++++--------- src/ballet/reedsol/fd_reedsol_arith_avx2.h | 95 ++++--- src/ballet/reedsol/fd_reedsol_arith_gfni.h | 84 +++--- src/ballet/reedsol/fd_reedsol_arith_none.h | 41 +-- src/ballet/reedsol/fd_reedsol_encode_128.c | 22 +- src/ballet/reedsol/fd_reedsol_encode_16.c | 22 +- src/ballet/reedsol/fd_reedsol_encode_32.c | 22 +- src/ballet/reedsol/fd_reedsol_encode_64.c | 22 +- src/ballet/reedsol/fd_reedsol_fderiv.h | 17 +- src/ballet/reedsol/fd_reedsol_fft.h | 43 +--- src/ballet/reedsol/fd_reedsol_gfni_32.S | 19 +- src/ballet/reedsol/fd_reedsol_internal.h | 101 -------- src/ballet/reedsol/fd_reedsol_pi.c | 64 ++--- src/ballet/reedsol/fd_reedsol_pi.h | 47 ---- src/ballet/reedsol/fd_reedsol_ppt.h | 145 +---------- src/ballet/reedsol/fd_reedsol_private.h | 204 +++++++++++++++ src/ballet/reedsol/fd_reedsol_recover_128.c | 31 +-- src/ballet/reedsol/fd_reedsol_recover_16.c | 31 +-- src/ballet/reedsol/fd_reedsol_recover_256.c | 31 +-- src/ballet/reedsol/fd_reedsol_recover_32.c | 31 +-- src/ballet/reedsol/fd_reedsol_recover_64.c | 31 +-- src/ballet/reedsol/generate_encode.py | 16 +- src/ballet/reedsol/generate_fderiv.py | 6 +- src/ballet/reedsol/generate_fft.py | 21 +- src/ballet/reedsol/generate_ppt.py | 27 +- src/ballet/reedsol/generate_recover.py | 27 +- src/ballet/reedsol/test_reedsol.c | 73 ++---- 28 files changed, 712 insertions(+), 933 deletions(-) delete mode 100644 src/ballet/reedsol/fd_reedsol_internal.h delete mode 100644 src/ballet/reedsol/fd_reedsol_pi.h create mode 100644 src/ballet/reedsol/fd_reedsol_private.h diff --git a/src/ballet/reedsol/fd_reedsol.c b/src/ballet/reedsol/fd_reedsol.c index 668203de286..d7faadef6c8 100644 --- a/src/ballet/reedsol/fd_reedsol.c +++ b/src/ballet/reedsol/fd_reedsol.c @@ -1,70 +1,88 @@ -#include "fd_reedsol.h" -#include "fd_reedsol_internal.h" +#include "fd_reedsol_private.h" + /* Include the constants in one central spot */ -#define INCLUDE_CONSTANTS -#if FD_HAS_GFNI -#include "fd_reedsol_arith_gfni.h" -#elif FD_HAS_AVX -#include "fd_reedsol_arith_avx2.h" + +#if FD_REEDSOL_ARITH_IMPL==0 +FD_IMPORT_BINARY( fd_reedsol_arith_consts_generic_mul, "src/ballet/reedsol/constants/generic_constants.bin" ); +#elif FD_REEDSOL_ARITH_IMPL==1 +FD_IMPORT_BINARY( fd_reedsol_arith_consts_avx_mul, "src/ballet/reedsol/constants/avx2_constants.bin" ); #else -#include "fd_reedsol_arith_none.h" +FD_IMPORT_BINARY( fd_reedsol_arith_consts_gfni_mul, "src/ballet/reedsol/constants/gfni_constants.bin" ); #endif -void fd_reedsol_encode_fini( fd_reedsol_t * rs ) { -#if FD_HAS_GFNI +void +fd_reedsol_encode_fini( fd_reedsol_t * rs ) { + +# if FD_REEDSOL_ARITH_IMPL==2 if( FD_LIKELY( (rs->data_shred_cnt==32UL) & (rs->parity_shred_cnt==32UL ) ) ) - fd_reedsol_encode_32_32( rs->shred_sz, rs->encode.data_shred, rs->encode.parity_shred, rs->scratch ); + fd_reedsol_private_encode_32_32( rs->shred_sz, rs->encode.data_shred, rs->encode.parity_shred, rs->scratch ); else -#endif - if( FD_UNLIKELY( rs->data_shred_cnt<=16UL ) ) - fd_reedsol_encode_16( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); - else if( FD_LIKELY( rs->data_shred_cnt<=32UL ) ) - fd_reedsol_encode_32( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); - else if( FD_LIKELY( rs->data_shred_cnt<=64UL ) ) - fd_reedsol_encode_64( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); - else - fd_reedsol_encode_128( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); +# endif + if( FD_UNLIKELY( rs->data_shred_cnt<=16UL ) ) + fd_reedsol_private_encode_16 ( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); + else if( FD_LIKELY( rs->data_shred_cnt<=32UL ) ) + fd_reedsol_private_encode_32 ( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); + else if( FD_LIKELY( rs->data_shred_cnt<=64UL ) ) + fd_reedsol_private_encode_64 ( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); + else + fd_reedsol_private_encode_128( rs->shred_sz, rs->encode.data_shred, rs->data_shred_cnt, rs->encode.parity_shred, rs->parity_shred_cnt ); rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; } - -int fd_reedsol_recover_fini( fd_reedsol_t * rs ) { - /* How many shreds do we need to consider in order to find - rs->data_shred_cnt un-erased? */ - ulong unerased = 0UL; - ulong i=0UL; +int +fd_reedsol_recover_fini( fd_reedsol_t * rs ) { ulong data_shred_cnt = rs->data_shred_cnt; ulong parity_shred_cnt = rs->parity_shred_cnt; + rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; + /* How many shreds do we need to consider in order to find + rs->data_shred_cnt un-erased? */ + + ulong unerased = 0UL; + ulong i = 0UL; for( ; irecover.erased[ i ]; if( unerased==data_shred_cnt ) break; } - if( FD_UNLIKELY( unerased != data_shred_cnt ) ) return FD_REEDSOL_ERR_INSUFFICIENT; + if( FD_UNLIKELY( unerased!=data_shred_cnt ) ) return FD_REEDSOL_ERR_PARTIAL; - /* if( FD_LIKELY( i==data_shred_cnt ) ) { - // Common case: we have all of the data shreds - if( FD_UNLIKELY( i<=16UL ) ) - return fd_reedsol_recover_first_16( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); - if( FD_LIKELY( i<=32UL ) ) - return fd_reedsol_recover_first_32( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); - if( FD_LIKELY( i<=64UL ) ) - return fd_reedsol_recover_first_64( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); - return fd_reedsol_recover_first_128( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); - } */ +# if 0 /* TODO: Add first variant for slightly more performance */ + if( FD_LIKELY( i==data_shred_cnt ) ) { + // Common case: we have all of the data shreds + if( FD_UNLIKELY( i<=16UL ) ) + return fd_reedsol_private_recover_first_16( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + if( FD_LIKELY( i<=32UL ) ) + return fd_reedsol_private_recover_first_32( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + if( FD_LIKELY( i<=64UL ) ) + return fd_reedsol_private_recover_first_64( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + return fd_reedsol_private_recover_first_128( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt ); + } +# endif if( FD_UNLIKELY( i<16UL ) ) - return fd_reedsol_recover_var_16( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + return fd_reedsol_private_recover_var_16( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); if( FD_LIKELY( i<32UL ) ) - return fd_reedsol_recover_var_32( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + return fd_reedsol_private_recover_var_32( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); if( FD_LIKELY( i<64UL ) ) - return fd_reedsol_recover_var_64( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + return fd_reedsol_private_recover_var_64( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); if( FD_LIKELY( i<128UL ) ) - return fd_reedsol_recover_var_128( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); - return fd_reedsol_recover_var_256( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + return fd_reedsol_private_recover_var_128( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); + + return fd_reedsol_private_recover_var_256( rs->shred_sz, rs->recover.shred, data_shred_cnt, parity_shred_cnt, rs->recover.erased ); +} + +char const * +fd_reedsol_strerror( int err ) { + switch( err ) { + case FD_REEDSOL_SUCCESS: return "success"; + case FD_REEDSOL_ERR_CORRUPT: return "corrupt"; + case FD_REEDSOL_ERR_PARTIAL: return "partial"; + default: break; + } + return "unknown"; } diff --git a/src/ballet/reedsol/fd_reedsol.h b/src/ballet/reedsol/fd_reedsol.h index 3cb320bed72..a9d19e76e52 100644 --- a/src/ballet/reedsol/fd_reedsol.h +++ b/src/ballet/reedsol/fd_reedsol.h @@ -18,57 +18,53 @@ mathematical structure thus forces each shred to be of identical size but doesn't otherwise impose any size restrictions.*/ +#include "../fd_ballet_base.h" -#include "../../util/fd_util.h" - - -/* FD_REEDSOL_{DATA, PARITY}_SHREDS_MAX describe the inclusive maximum +/* FD_REEDSOL_{DATA,PARITY}_SHREDS_MAX describe the inclusive maximum number of data and parity shreds that this implementation supports. These limits are not mathematical limits, but limits based on current Solana needs and performance. The common case for both shred counts to be set to 32. */ + #define FD_REEDSOL_DATA_SHREDS_MAX (67UL) #define FD_REEDSOL_PARITY_SHREDS_MAX (67UL) - #define FD_REEDSOL_ALIGN (128UL) -#define FD_REEDSOL_FOOTPRINT (2304UL) +#define FD_REEDSOL_FOOTPRINT (2304UL) /* 18*ALIGN */ -/* Return values for the recover operation, which is the only part that - can fail for non-bug reasons. Their meaning is documented with - fd_reedsol_recover_fini. */ -#define FD_REEDSOL_OK (0) -#define FD_REEDSOL_ERR_INCONSISTENT (-1) -#define FD_REEDSOL_ERR_INSUFFICIENT (-2) +/* FD_REEDSOL_SUCCESS, FD_REEDSOL_ERR_* are error code return values used + by the recover operation, which is the only part that can fail for + non-bug reasons. Their meaning is documented with + fd_reedsol_recover_fini. SUCCESS must be zero, ERR_* are negative + and distinct. */ -struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { - uchar scratch[ 1024 ]; /* Used for the ultra high performance implementation */ +#define FD_REEDSOL_SUCCESS (0) +#define FD_REEDSOL_ERR_CORRUPT (-1) +#define FD_REEDSOL_ERR_PARTIAL (-2) - /* shred_sz: the size of each shred in bytes (all shreds must be the - same size) */ - ulong shred_sz; +struct __attribute__((aligned(FD_REEDSOL_ALIGN))) fd_reedsol_private { + uchar scratch[ 1024 ]; // Used for the ultra high performance implementation - /* {data, parity}_shred_cnt: the number of data or parity shreds - (respectively) have been added to the in-process operation */ - ulong data_shred_cnt; - ulong parity_shred_cnt; + ulong shred_sz; // shred_sz: the size of each shred in bytes (all shreds must be the same size) + ulong data_shred_cnt; // {data,parity}_shred_cnt: the number of data or parity shreds + ulong parity_shred_cnt; // (respectively) have been added to the in-process operation union { + struct { - /* {data, parity}_shred: pointers to the 1st byte of each shred */ - uchar const * data_shred[ FD_REEDSOL_DATA_SHREDS_MAX ]; - uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; + uchar const * data_shred [ FD_REEDSOL_DATA_SHREDS_MAX ]; // {data,parity}_shred: pointers to the 1st byte of each shred + uchar * parity_shred[ FD_REEDSOL_PARITY_SHREDS_MAX ]; } encode; + struct { uchar * shred[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; - - /* erased: whether the shred at the corresponding - index is an erasure (i.e. wasn't received or was corrupted). - Used only for decoding operations. */ - /* TODO: Is this the right data type? Should it use a fd_smallset - instead? */ + /* erased: whether the shred at the corresponding index is an + erasure (i.e. wasn't received or was corrupted). Used only for + decoding operations. TODO: Is this the right data type? Should + it use a fd_smallset instead? */ uchar erased[ FD_REEDSOL_DATA_SHREDS_MAX + FD_REEDSOL_PARITY_SHREDS_MAX ]; } recover; + }; }; @@ -76,73 +72,82 @@ typedef struct fd_reedsol_private fd_reedsol_t; FD_PROTOTYPES_BEGIN -/* fd_reedsol_{align, footprint} return the alignment and footprint +/* fd_reedsol_{align,footprint} return the alignment and footprint required in bytes for a fd_reedsol_t. */ + static inline FD_FN_CONST ulong fd_reedsol_align( void ) { return FD_REEDSOL_ALIGN; } static inline FD_FN_CONST ulong fd_reedsol_footprint( void ) { return FD_REEDSOL_FOOTPRINT; } - /* fd_reedsol_encode_init: starts a Reed-Solomon encoding operation that - will encode shreds of size shred_sz. mem is assumed to be a piece - of memory that meets the alignment and size constraints specified - above. Takes a write interest in mem that persists until the - operation is canceled or finalized. shred_sz must be >= 32. Returns - mem. */ + will encode shreds of size shred_sz. mem is assumed to be a piece of + memory that meets the alignment and size constraints specified above. + Takes a write interest in mem that persists until the operation is + aborted or finalized. shred_sz must be >= 32. Returns mem as a a + newly initialized encoder. Every call to fd_reedsol_encode_init + should be paired with a call to fd_reedsol_encode_fini (normal + execution) or fd_reedsol_encode_abort (abnormal execution). */ static inline fd_reedsol_t * -fd_reedsol_encode_init( void * mem, ulong shred_sz ) { +fd_reedsol_encode_init( void * mem, + ulong shred_sz ) { fd_reedsol_t * rs = (fd_reedsol_t *)mem; - - rs->shred_sz = shred_sz; + rs->shred_sz = shred_sz; rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; - return rs; } /* fd_reedsol_encode_add_data_shred: adds a shred consisting of the - memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding - operation. Takes a read interest in the shred that persists for - the lifetime of the operation (i.e. until finalized or cancelled). - Data shreds have no alignment restrictions and can overlap with each - other but should not overlap with any parity shreds in the same - encoding operation. + memory [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding + operation. Takes a read interest in the shred that persists for the + lifetime of the operation (i.e. until finalized or aborted). Data + shreds have no alignment restrictions and can overlap with each other + but should not overlap with any parity shreds in the same encoding + operation. Note: The order in which data shreds are added relative to other data shreds matters. It impacts the parity data produced by the encoding - operation. */ + operation. + + Assumes rs is initialized as an encoder and returns rs (still + initialized as an encoder). */ static inline fd_reedsol_t * -fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, void const * ptr ) { - rs->encode.data_shred[ rs->data_shred_cnt++ ] = (uchar const*)ptr; +fd_reedsol_encode_add_data_shred( fd_reedsol_t * rs, + void const * ptr ) { + rs->encode.data_shred[ rs->data_shred_cnt++ ] = (uchar const*) ptr; return rs; } /* fd_reedsol_encode_add_parity_shred: adds the block of memory - [ptr, ptr+shred_sz) to the in-process Reed-Solomon encoding operation + [ptr,ptr+shred_sz) to the in-process Reed-Solomon encoding operation as the destination of a parity shred. Takes a write interest in the memory that persists for the lifetime of the operation (i.e. until - finalized or cancelled). Parity shreds have no alignment + finalized or aborted). Parity shreds have no alignment restrictions but must not overlap with each other or with data shreds in the same operation (U.B. if they overlap). Note: The order in which parity shreds are added matters only insofar - as which data will be written to which location. */ + as which data will be written to which location. + + Assumes rs is initialized as an encoder and returns rs (still + initialized as an encoder). */ static inline fd_reedsol_t * -fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, void * ptr ) { +fd_reedsol_encode_add_parity_shred( fd_reedsol_t * rs, + void * ptr ) { rs->encode.parity_shred[ rs->parity_shred_cnt++ ] = (uchar *)ptr; return rs; } - -/* fd_reedsol_encode_cancel cancels an in-progress encoding operation. +/* fd_reedsol_encode_abort aborts an in-progress encoding operation. Releases any read or write interests in any shreds that were added to the operation. Upon return, the contents of the parity shreds are - undefined. */ + undefined. Assumes rs is initialized as an encoder, rs will not be + initialized on return. */ static inline void -fd_reedsol_encode_cancel( fd_reedsol_t * rs ) { +fd_reedsol_encode_abort( fd_reedsol_t * rs ) { rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; } @@ -150,24 +155,29 @@ fd_reedsol_encode_cancel( fd_reedsol_t * rs ) { /* fd_reedsol_encode_fini finishes the in-progress encoding operation. Upon return, the parity shreds will be filled with the correct Reed-Solomon encoded parity data. Upon return, this will no longer - have any read or write interest in any of the provided shreds. */ -void fd_reedsol_encode_fini( fd_reedsol_t * rs ); + have any read or write interest in any of the provided shreds. + Assumes rs is initialized as an encoder, rs will not be initialized + on return. */ + +void +fd_reedsol_encode_fini( fd_reedsol_t * rs ); /* fd_reedsol_recover_init: starts a Reed-Solomon recover/decode operation that will recover shreds of size shred_sz. mem is assumed - to be a piece of memory that meets the alignment and size constraints - specified above. Takes a write interest in mem that persists until - the operation is canceled or finalized. shred_sz must be >= 32. - Returns mem. */ + to be an unused piece of memory that meets the alignment and size + constraints specified above. Takes a write interest in mem that + persists until the operation is aborted or finalized. shred_sz must + be >= 32. Returns mem as a newly initialized recoverer. Every call + to fd_reedsol_recover_init should be paired with a call to + fd_reedsol_recover_fini (normal execution) or + fd_reedsol_recover_abort (abnormal execution). */ + static inline fd_reedsol_t * fd_reedsol_recover_init( void * mem, ulong shred_sz ) { - /* TODO: This is the same as encode_init. Should I merge them? */ fd_reedsol_t * rs = (fd_reedsol_t *)mem; - - rs->shred_sz = shred_sz; + rs->shred_sz = shred_sz; rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; - return rs; } @@ -175,10 +185,10 @@ fd_reedsol_recover_init( void * mem, ulong shred_sz ) { memory [ptr, ptr+shred_sz) to the in-process Reed-Solomon recover operation as a source of data. Takes a read interest in the shred that persists for the lifetime of the operation (i.e. until finalized - or cancelled). Received shreds have no alignment restrictions and - can overlap with each other (if necessary, but there's no known use - case for doing so), but should not overlap with any erased shreds in - the same recovery operation. + or aborted). Received shreds have no alignment restrictions and can + overlap with each other (if necessary, but there's no known use case + for doing so), but should not overlap with any erased shreds in the + same recovery operation. The shred is treated as a data shred if is_data_shred is non-zero and as a parity shred if not. Data shreds and parity shreds are mostly @@ -190,29 +200,37 @@ fd_reedsol_recover_init( void * mem, ulong shred_sz ) { fd_reedsol_recover_add_erased_shred) is very important for recovery. Shreds must be added in the natural index order or the recover operation will almost certainly fail. In particular, all data shreds - must be added before any parity shreds are added. */ + must be added before any parity shreds are added. + + Assumes rs is initialized as a recoverer, returns rs (still + initialized as a recoverer). */ + static inline fd_reedsol_t * -fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs, int is_data_shred, void const * ptr ) { -#if FD_REEDSOL_HANDHOLDING - // assert is_data_shred==1 implies rs->parity_shred_cnt==0 - // data_shred_cnt, parity_shred_cnt won't go over the max - #endif +fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs, + int is_data_shred, + void const * ptr ) { + + /* Assumes is_data_shred==1 implies rs->parity_shred_cnt==0 and + data_shred_cnt, parity_shred_cnt won't go over the max */ + /* For performance reasons, we need to store all the shred pointers in one flat array, which means the array needs to be non-const. The const in the function signature signals that this operation won't modify the shred. */ - rs->recover.shred[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (void *)ptr; + + rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = (void *)ptr; rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)0; - rs->data_shred_cnt += !!is_data_shred; - rs->parity_shred_cnt += !is_data_shred; + rs->data_shred_cnt += !!is_data_shred; + rs->parity_shred_cnt += !is_data_shred; + return rs; } -/* fd_reedsol_recover_add_erased_shred adds the block of memory [ptr, - ptr+shred_sz) to the in-process Reed-Solomon recover operation as the - destination for a shred that will be recovered. Takes a write +/* fd_reedsol_recover_add_erased_shred adds the block of memory + [ptr,ptr+shred_sz) to the in-process Reed-Solomon recover operation + as the destination for a shred that will be recovered. Takes a write interest in the shred that persists for the lifetime of the operation - (i.e. until finalized or cancelled). Erased shreds have no alignment + (i.e. until finalized or aborted). Erased shreds have no alignment restrictions but should not overlap with any other shreds in the same recover operation. The contents of the the block of memory are ignored and will be overwritten by the time the operation is @@ -228,52 +246,60 @@ fd_reedsol_recover_add_rcvd_shred( fd_reedsol_t * rs, int is_data_shred, void co fd_reedsol_recover_add_rcvd_shred) is very important for recovery. Shreds must be added in the natural index order or the recover operation will almost certainly fail. In particular, all data shreds - must be added before any parity shreds are added. */ + must be added before any parity shreds are added. + + Assumes rs is initialized as a recoverer, returns rs (still + initialized as a recoverer). */ + static inline fd_reedsol_t * -fd_reedsol_recover_add_erased_shred( fd_reedsol_t * rs, int is_data_shred, void * ptr ) { -#if FD_REEDSOL_HANDHOLDING - // assert is_data_shred==1 implies rs->parity_shred_cnt==0 - // data_shred_cnt, parity_shred_cnt won't go over the max - #endif - rs->recover.shred[ rs->data_shred_cnt + rs->parity_shred_cnt ] = ptr; +fd_reedsol_recover_add_erased_shred( fd_reedsol_t * rs, + int is_data_shred, + void * ptr ) { + + /* Assumes assert is_data_shred==1 implies rs->parity_shred_cnt==0 and + data_shred_cnt, parity_shred_cnt won't go over the max */ + + rs->recover.shred [ rs->data_shred_cnt + rs->parity_shred_cnt ] = ptr; rs->recover.erased[ rs->data_shred_cnt + rs->parity_shred_cnt ] = (uchar)1; - rs->data_shred_cnt += !!is_data_shred; - rs->parity_shred_cnt += !is_data_shred; + rs->data_shred_cnt += !!is_data_shred; + rs->parity_shred_cnt += !is_data_shred; + return rs; } - -/* fd_reedsol_recover_cancel cancels an in-progress encoding operation. +/* fd_reedsol_recover_abort aborts an in-progress encoding operation. Releases any read or write interests in any shreds that were added to the operation. Upon return, the contents of the erased shreds are - undefined. */ + undefined. Assumes rs is initialized and rs will not be initialized + on return. */ + static inline void -fd_reedsol_recover_cancel( fd_reedsol_t * rs ) { +fd_reedsol_recover_abort( fd_reedsol_t * rs ) { rs->data_shred_cnt = 0UL; rs->parity_shred_cnt = 0UL; } - /* fd_reedsol_recover_fini finishes the in-progress recover operation. If successful, upon return, any erased shreds will be filled with the - correct data as recovered by the Reed-Solomon recovery algorithm. - - If the recover operation fails with FD_REEDSOL_ERR_{INCONSISTENT, - INSUFFICIENT} , the contents of any erased shreds are undefined. + correct data as recovered by the Reed-Solomon recovery algorithm. If + the recover operation fails with FD_REEDSOL_ERR_{CORRUPT,PARTIAL}, + the contents of any erased shreds are undefined. Upon return, this will no longer have any read or write interest in any of the provided shreds. Returns one of: - FD_REEDSOL_OK if the recover operation was successful - FD_REEDSOL_ERR_INCONSISTENT if the shreds are not consistent with - having come from a Reed-Solomon encoding with the provided number - of data shreds - FD_REEDSOL_ERR_INSUFFICIENT if there's not enough un-erased data to - recover data_shred_cnt data shreds. There must be at least one - un-erased shred (data or parity) for each data shred in the - operation. + FD_REEDSOL_SUCCESS if the recover operation was successful + + FD_REEDSOL_ERR_CORRUPT if the shreds are not consistent with having + come from a Reed-Solomon encoding with the provided number of data + shreds + + FD_REEDSOL_ERR_PARTIAL if there's not enough un-erased data to + recover data_shred_cnt data shreds. There must be at least one + un-erased shred (data or parity) for each data shred in the + operation. It's worth pointing out that the recovery process differs from typical network coding theory by making no effort to correct data @@ -281,8 +307,24 @@ fd_reedsol_recover_cancel( fd_reedsol_t * rs ) { any data corruption, and any shred that fails signature verification can be treated as an erasure. This prevents the network from forking if the leader (maliciously) creates data shreds from one version of - the block and parity shreds from another version of the block. */ -int fd_reedsol_recover_fini( fd_reedsol_t * rs ); + the block and parity shreds from another version of the block. -#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */ + Assumes rs is initialized as a recoverer, rs will not be initialized + on return. */ + +int +fd_reedsol_recover_fini( fd_reedsol_t * rs ); +/* Misc APIs */ + +/* fd_reedsol_strerror converts a FD_REEDSOL_SUCCESS / FD_REEDSOL_ERR_* + code into a human readable cstr. The lifetime of the returned + pointer is infinite. The returned pointer is always to a non-NULL + cstr. */ + +FD_FN_CONST char const * +fd_reedsol_strerror( int err ); + +FD_PROTOTYPES_END + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_avx2.h b/src/ballet/reedsol/fd_reedsol_arith_avx2.h index 6856fcb34b4..a40090aae47 100644 --- a/src/ballet/reedsol/fd_reedsol_arith_avx2.h +++ b/src/ballet/reedsol/fd_reedsol_arith_avx2.h @@ -1,48 +1,75 @@ #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h #define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h -#include "../../util/simd/fd_avx.h" -#define FD_REEDSOL_GF_ARITH_DEFINED 1 +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h +#error "Do not include this file directly; use fd_reedsol_private.h" +#endif + +#include "../../util/simd/fd_avx.h" typedef wb_t gf_t; + #define GF_WIDTH W_FOOTPRINT -#define gf_ldu wb_ldu -#define gf_stu wb_stu + +FD_PROTOTYPES_BEGIN + +#define gf_ldu wb_ldu +#define gf_stu wb_stu #define gf_zero wb_zero -#ifdef INCLUDE_CONSTANTS -FD_IMPORT_BINARY( fd_reedsol_arith_consts_avx_mul, "src/ballet/reedsol/constants/avx2_constants.bin" ); -#undef INCLUDE_CONSTANTS -#else extern uchar const fd_reedsol_arith_consts_avx_mul[] __attribute__((aligned(128))); -#endif + +/* TODO: This linkage is kinda wonky (maybe use FD_FN_UNUSED) if this + include gets used more generally. The below currently needs to be + available at compile time, not link time, to allow the optimizer to + use it. */ static uchar const fd_reedsol_arith_scale4[ 256UL ] = { - 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, 29, 13, 61, 45, 93, 77, 125, 109, 157, 141, 189, 173, 221, 205, 253, 237, 58, 42, 26, 10, 122, - 106, 90, 74, 186, 170, 154, 138, 250, 234, 218, 202, 39, 55, 7, 23, 103, 119, 71, 87, 167, 183, 135, 151, 231, 247, 199, 215, 116, 100, 84, 68, 52, 36, 20, 4, 244, 228, - 212, 196, 180, 164, 148, 132, 105, 121, 73, 89, 41, 57, 9, 25, 233, 249, 201, 217, 169, 185, 137, 153, 78, 94, 110, 126, 14, 30, 46, 62, 206, 222, 238, 254, 142, 158, 174, - 190, 83, 67, 115, 99, 19, 3, 51, 35, 211, 195, 243, 227, 147, 131, 179, 163, 232, 248, 200, 216, 168, 184, 136, 152, 104, 120, 72, 88, 40, 56, 8, 24, 245, 229, 213, 197, - 181, 165, 149, 133, 117, 101, 85, 69, 53, 37, 21, 5, 210, 194, 242, 226, 146, 130, 178, 162, 82, 66, 114, 98, 18, 2, 50, 34, 207, 223, 239, 255, 143, 159, 175, 191, 79, - 95, 111, 127, 15, 31, 47, 63, 156, 140, 188, 172, 220, 204, 252, 236, 28, 12, 60, 44, 92, 76, 124, 108, 129, 145, 161, 177, 193, 209, 225, 241, 1, 17, 33, 49, 65, 81, - 97, 113, 166, 182, 134, 150, 230, 246, 198, 214, 38, 54, 6, 22, 102, 118, 70, 86, 187, 171, 155, 139, 251, 235, 219, 203, 59, 43, 27, 11, 123, 107, 91, 75 }; /* Needs to be available at compile time, not link time, to allow the optimizer to use it */ + 0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240, + 29, 13, 61, 45, 93, 77, 125, 109, 157, 141, 189, 173, 221, 205, 253, 237, + 58, 42, 26, 10, 122, 106, 90, 74, 186, 170, 154, 138, 250, 234, 218, 202, + 39, 55, 7, 23, 103, 119, 71, 87, 167, 183, 135, 151, 231, 247, 199, 215, + 116, 100, 84, 68, 52, 36, 20, 4, 244, 228, 212, 196, 180, 164, 148, 132, + 105, 121, 73, 89, 41, 57, 9, 25, 233, 249, 201, 217, 169, 185, 137, 153, + 78, 94, 110, 126, 14, 30, 46, 62, 206, 222, 238, 254, 142, 158, 174, 190, + 83, 67, 115, 99, 19, 3, 51, 35, 211, 195, 243, 227, 147, 131, 179, 163, + 232, 248, 200, 216, 168, 184, 136, 152, 104, 120, 72, 88, 40, 56, 8, 24, + 245, 229, 213, 197, 181, 165, 149, 133, 117, 101, 85, 69, 53, 37, 21, 5, + 210, 194, 242, 226, 146, 130, 178, 162, 82, 66, 114, 98, 18, 2, 50, 34, + 207, 223, 239, 255, 143, 159, 175, 191, 79, 95, 111, 127, 15, 31, 47, 63, + 156, 140, 188, 172, 220, 204, 252, 236, 28, 12, 60, 44, 92, 76, 124, 108, + 129, 145, 161, 177, 193, 209, 225, 241, 1, 17, 33, 49, 65, 81, 97, 113, + 166, 182, 134, 150, 230, 246, 198, 214, 38, 54, 6, 22, 102, 118, 70, 86, + 187, 171, 155, 139, 251, 235, 219, 203, 59, 43, 27, 11, 123, 107, 91, 75 +}; #define GF_ADD wb_xor + #define GF_OR wb_or -#define GF_MUL( a, c ) (__extension__({ \ - wb_t lo = wb_and( a, wb_bcast( 0x0F ) ); \ - wb_t hi = wb_shr( a, 4 ); \ - wb_t p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*c ), lo ); \ - wb_t p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ c ] ), hi ); \ - /* c is known at compile time, so this is not a runtime branch */ \ - (c==0) ? wb_zero() : ( (c==1) ? a : wb_xor( p0, p1 ) ); } )) - -#define GF_MUL_VAR( a, c ) (__extension__({ \ - wb_t lo = wb_and( a, wb_bcast( 0x0F ) ); \ - wb_t hi = wb_shr( a, 4 ); \ - wb_t p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*c ), lo ); \ - wb_t p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ c ] ), hi ); \ - wb_xor( p0, p1 ); } )) - -#define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) ) ) - -#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */ + +#define GF_MUL( a, c ) (__extension__({ \ + wb_t _a = (a); \ + int _c = (c); \ + wb_t _lo = wb_and( _a, wb_bcast( 0x0F ) ); \ + wb_t _hi = wb_shr( _a, 4 ); \ + wb_t _p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*_c ), _lo ); \ + wb_t _p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ _c ] ), _hi ); \ + /* c is known at compile time, so this is not a runtime branch */ \ + (_c==0) ? wb_zero() : ( (_c==1) ? _a : wb_xor( _p0, _p1 ) ); \ + })) + +#define GF_MUL_VAR( a, c ) (__extension__({ \ + wb_t _a = (a); \ + int _c = (c); \ + wb_t _lo = wb_and( _a, wb_bcast( 0x0F ) ); \ + wb_t _hi = wb_shr( _a, 4 ); \ + wb_t _p0 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*_c ), _lo ); \ + wb_t _p1 = _mm256_shuffle_epi8( wb_ld( fd_reedsol_arith_consts_avx_mul + 32*fd_reedsol_arith_scale4[ _c ] ), _hi ); \ + wb_xor( _p0, _p1 ); \ + })) + +#define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) )) + +FD_PROTOTYPES_END + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_avx2_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_gfni.h b/src/ballet/reedsol/fd_reedsol_arith_gfni.h index d4a420693a9..6200cd600b2 100644 --- a/src/ballet/reedsol/fd_reedsol_arith_gfni.h +++ b/src/ballet/reedsol/fd_reedsol_arith_gfni.h @@ -1,63 +1,77 @@ #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h #define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h -#include "../../util/simd/fd_avx.h" -#define FD_REEDSOL_GF_ARITH_DEFINED 1 +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h +#error "Do not include this file directly; use fd_reedsol_private.h" +#endif + +#include "../../util/simd/fd_avx.h" typedef wb_t gf_t; + #define GF_WIDTH W_FOOTPRINT -#define gf_ldu wb_ldu -#define gf_stu wb_stu + +FD_PROTOTYPES_BEGIN + +#define gf_ldu wb_ldu +#define gf_stu wb_stu #define gf_zero wb_zero -#ifdef INCLUDE_CONSTANTS -FD_IMPORT_BINARY( fd_reedsol_arith_consts_gfni_mul, "src/ballet/reedsol/constants/gfni_constants.bin" ); -#undef INCLUDE_CONSTANTS -#else extern uchar const fd_reedsol_arith_consts_gfni_mul[] __attribute__((aligned(128))); -#endif #define GF_ADD wb_xor + #define GF_OR wb_or /* Older versions of GCC have a bug that cause them to think - _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two arguments - (other than that the second argument can be a memory address). That's - totally incorrect. It was fixed in GCC 10. See + _mm256_gf2p8affine_epi64_epi8 is a symmetric in the first two + arguments (other than that the second argument can be a memory + address). That's totally incorrect. It was fixed in GCC 10. See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92889 for more details. */ + #if !FD_USING_CLANG -#define GCC_VERSION (__GNUC__ * 10000 \ - + __GNUC_MINOR__ * 100 \ - + __GNUC_PATCHLEVEL__) +#define GCC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) #endif #if FD_USING_CLANG || (GCC_VERSION >= 100000) -/* c is known at compile time, so this is not a runtime branch */ -#define GF_MUL( a, c ) ((c==0) ? wb_zero() : ( (c==1) ? (a) : _mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) )) -#define GF_MUL_VAR( a, c ) (_mm256_gf2p8affine_epi64_epi8( a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 ) ) -#else +#define GF_MUL( a, c ) (__extension__({ \ + wb_t _a = (a); \ + int _c = (c); \ + /* c is known at compile time, so this is not a runtime branch */ \ + ((_c==0) ? wb_zero() : ((_c==1) ? _a : \ + _mm256_gf2p8affine_epi64_epi8( _a, wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*_c ), 0 ) )); \ + })) + +#define GF_MUL_VAR( a, c ) (_mm256_gf2p8affine_epi64_epi8( (a), wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ), 0 )) -#define GF_MUL( a, c ) (__extension__({ \ - wb_t product; \ - __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ - : [out]"=x"(product) \ - : [cons]"xm"( wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ) ), \ - [vec]"x" (a) ); \ - (c==0) ? wb_zero() : ( (c==1) ? (a) : product ); })) +#else +#define GF_MUL( a, c ) (__extension__({ \ + wb_t _a = (a); \ + int _c = (c); \ + wb_t _product; \ + __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ + : [out]"=x" (_product) \ + : [cons]"xm" (wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*_c )), \ + [vec]"x" (_a) ); \ + /* c is known at compile time, so this is not a runtime branch */ \ + (_c==0) ? wb_zero() : ( (_c==1) ? (_a) : _product ); \ + })) -#define GF_MUL_VAR( a, c ) (__extension__({ \ - wb_t product; \ - __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ - : [out]"=x"(product) \ - : [cons]"xm"( wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) ) ), \ - [vec]"x" (a) ); \ - (product); })) +#define GF_MUL_VAR( a, c ) (__extension__({ \ + wb_t _product; \ + __asm__( "vgf2p8affineqb $0x0, %[cons], %[vec], %[out]" \ + : [out]"=x" (_product) \ + : [cons]"xm" (wb_ld( fd_reedsol_arith_consts_gfni_mul + 32*(c) )), \ + [vec]"x" (a) ); \ + (_product); \ + })) #endif -#define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) ) ) +#define GF_ANY( x ) (0 != _mm256_movemask_epi8( wb_ne( (x), wb_zero() ) )) +FD_PROTOTYPES_END -#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */ +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_gfni_h */ diff --git a/src/ballet/reedsol/fd_reedsol_arith_none.h b/src/ballet/reedsol/fd_reedsol_arith_none.h index bcdc3d10060..058e3129e3e 100644 --- a/src/ballet/reedsol/fd_reedsol_arith_none.h +++ b/src/ballet/reedsol/fd_reedsol_arith_none.h @@ -1,37 +1,46 @@ #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h #define HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h -#include "../../util/fd_util_base.h" -#define FD_REEDSOL_GF_ARITH_DEFINED 1 +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h +#error "Do not include this file directly; use fd_reedsol_private.h" +#endif typedef ulong gf_t; /* One byte stored in a ulong */ + #define GF_WIDTH 1UL #define W_ATTR +FD_PROTOTYPES_BEGIN + static inline gf_t gf_ldu( uchar const * addr ) { return (ulong)(*addr); } static inline void gf_stu( uchar * addr, gf_t v ) { *addr = (uchar)v; } -#define gf_zero() (0UL) -#ifdef INCLUDE_CONSTANTS -FD_IMPORT_BINARY( fd_reedsol_arith_consts_generic_mul, "src/ballet/reedsol/constants/generic_constants.bin" ); -#undef INCLUDE_CONSTANTS -#else -extern uchar const fd_reedsol_arith_consts_generic_mul[] __attribute__((aligned(128))); -#endif -static FD_FN_UNUSED short const * gf_arith_log_tbl = (short const *)fd_reedsol_arith_consts_generic_mul; /* Indexed [0, 256) */ -static FD_FN_UNUSED uchar const * gf_arith_invlog_tbl = fd_reedsol_arith_consts_generic_mul + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512, 512) */ +#define gf_zero() (0UL) #define GF_ADD( a, b ) ((a)^(b)) + #define GF_OR( a, b ) ((a)|(b)) -/* c is known at compile time, so this is not a runtime branch. - Exposing log_tbl at compile time would let the compiler remove a +/* Exposing log_tbl at compile time would let the compiler remove a branch, but we don't care too much about performance in this case. */ -#define GF_MUL( a, c ) ((c==0) ? 0UL : ( (c==1) ? (a) : (ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] )) -#define GF_MUL_VAR( a, c ) ((ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ a ] + gf_arith_log_tbl[ c ] ] ) +#define GF_MUL( a, c ) (__extension__({ \ + ulong _a = (a); \ + int _c = (c); \ + /* c is known at compile time, so this is not a runtime branch. */ \ + ((_c==0) ? 0UL : ( (_c==1) ? _a : (ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ _a ] + gf_arith_log_tbl[ _c ] ] )); \ + })) + +#define GF_MUL_VAR( a, c ) ((ulong)gf_arith_invlog_tbl[ gf_arith_log_tbl[ (a) ] + gf_arith_log_tbl[ (c) ] ] ) #define GF_ANY( x ) (!!(x)) +extern uchar const fd_reedsol_arith_consts_generic_mul[] __attribute__((aligned(128))); + +FD_FN_UNUSED static short const * gf_arith_log_tbl = (short const *)fd_reedsol_arith_consts_generic_mul; /* Indexed [0,256) */ +FD_FN_UNUSED static uchar const * gf_arith_invlog_tbl = + fd_reedsol_arith_consts_generic_mul + 256UL*sizeof(short) + 512UL*sizeof(uchar); /* Indexed [-512,512) */ + +FD_PROTOTYPES_END -#endif /*HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h */ +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_arith_none_h */ diff --git a/src/ballet/reedsol/fd_reedsol_encode_128.c b/src/ballet/reedsol/fd_reedsol_encode_128.c index 4ca932116ef..bb9605896a0 100644 --- a/src/ballet/reedsol/fd_reedsol_encode_128.c +++ b/src/ballet/reedsol/fd_reedsol_encode_128.c @@ -1,19 +1,11 @@ -#include "../../util/fd_util.h" -#include "fd_reedsol_internal.h" -#if FD_HAS_GFNI -#include "fd_reedsol_arith_gfni.h" -#elif FD_HAS_AVX -#include "fd_reedsol_arith_avx2.h" -#else -#include "fd_reedsol_arith_none.h" -#endif -#include "fd_reedsol_fft.h" #include "fd_reedsol_ppt.h" -void fd_reedsol_encode_128( ulong shred_sz, - uchar const * const * data_shred, - ulong data_shred_cnt, - uchar * const * parity_shred, - ulong parity_shred_cnt ) { + +void +fd_reedsol_private_encode_128( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ) { for( ulong shred_pos=0UL; shred_pos=134 are completely ignored. - - Returns one of: - FD_REEDSOL_OK if okay - FD_REEDSOL_ERR_INCONSISTENT if the shreds are not consistent with - having come from a Reed-Solomon encoding of data_shred_cnt data - shreds - FD_REEDSOL_ERR_INSUFFICIENT if there's not enough un-erased data to - recover data_shred_cnt data shreds - */ - -int fd_reedsol_recover_var_16( ulong shred_sz, - uchar * const * shred, - ulong data_shred_cnt, - ulong parity_shred_cnt, - uchar const * erased ); -int fd_reedsol_recover_var_32( ulong shred_sz, - uchar * const * shred, - ulong data_shred_cnt, - ulong parity_shred_cnt, - uchar const * erased ); -int fd_reedsol_recover_var_64( ulong shred_sz, - uchar * const * shred, - ulong data_shred_cnt, - ulong parity_shred_cnt, - uchar const * erased ); -int fd_reedsol_recover_var_128( ulong shred_sz, - uchar * const * shred, - ulong data_shred_cnt, - ulong parity_shred_cnt, - uchar const * erased ); -int fd_reedsol_recover_var_256( ulong shred_sz, - uchar * const * shred, - ulong data_shred_cnt, - ulong parity_shred_cnt, - uchar const * erased ); - -#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_internal_h */ diff --git a/src/ballet/reedsol/fd_reedsol_pi.c b/src/ballet/reedsol/fd_reedsol_pi.c index 8b651da12c8..d1da559d5ca 100644 --- a/src/ballet/reedsol/fd_reedsol_pi.c +++ b/src/ballet/reedsol/fd_reedsol_pi.c @@ -1,18 +1,4 @@ -#include -#include "fd_reedsol_pi.h" -#if FD_HAS_AVX -#include "../../util/simd/fd_avx.h" -#include "../../util/simd/fd_sse.h" -#include -#endif - -#if FD_HAS_GFNI -#include "fd_reedsol_arith_gfni.h" -#elif FD_HAS_AVX -#include "fd_reedsol_arith_avx2.h" -#else -#include "fd_reedsol_arith_none.h" -#endif +#include "fd_reedsol_private.h" /* TODO: Move this high-level overview @@ -57,7 +43,7 @@ Fast Walsh-Hadamard transform in Appendix A that the code in this implementation also uses. */ -#if FD_HAS_AVX +#if FD_REEDSOL_ARITH_IMPL>0 /* When using AVX, the representation used for internal computation can be done with unsigned chars or with shorts. They give the same @@ -72,6 +58,7 @@ overcomplete representation of the integers mod 255 (yes, unfortunately not mod 256). In particular, the value 255 is allowed, which is interchangeable with 0. */ + #ifndef FD_REEDSOL_PI_USE_SHORT #define FD_REEDSOL_PI_USE_SHORT 0 #endif @@ -79,6 +66,8 @@ /* Define some helper macros like what we have in util/simd for a vector of shorts. */ +#include "../../util/simd/fd_sse.h" + #define ws_t __m256i #define ws_add(a,b) _mm256_add_epi16( (a), (b) ) #define ws_sub(a,b) _mm256_sub_epi16( (a), (b) ) @@ -113,8 +102,8 @@ ws_mod255( ws_t x ) { Unlike the rest of the similar-seeming components in fd_reedsol (e.g. FFT, PPT), this computes the transform within a single (or few) AVX vectors, not in parallel across each component of the vector. I.e. if - FD_HAS_AVX, to compute a 16-element FWHD, you pass one AVX vector - (16*short), not 16 vectors. + FD_REEDSOL_ARITH_IMPL>0, to compute a 16-element FWHD, you pass one + AVX vector (16*short), not 16 vectors. Also unlike the rest of the similar-seeming components in fd_reedsol, this works on the group Z/255Z (integers mod 255). Since 255 is not @@ -143,7 +132,6 @@ ws_mod255( ws_t x ) { (x0) = _y0; (x1) = _y1; \ } while( 0 ) - #define FD_REEDSOL_FWHT_64( x0, x1, x2, x3 ) do { \ ws_t _z0, _z1, _z2, _z3; ws_t _z0i, _z1i, _z2i, _z3i; \ _z0i = (x0); _z1i = (x1); _z2i = (x2); _z3i = (x3); \ @@ -349,7 +337,7 @@ exp_2( wb_t x ) { return with3; } -#endif /* FD_HAS_AVX */ +#endif /* FD_REEDSOL_ARITH_IMPL>0 */ /* l_twiddle_{N} stores the size N FWHT of what the paper calls L~, i.e. ( 0, Log(1), Log(2), Log(3), ... Log(N-1) ) @@ -397,8 +385,7 @@ static const short fwht_l_twiddle_256[ 256 ] = {0x00,0xfc,0xfb,0x15,0x2d,0xfa,0x 0xc4,0x48,0x04,0x6d,0xdf,0x95,0xa1,0x73,0xed,0x0f,0xce,0x58,0x25,0x51,0x99,0xa6, 0x49,0x6e,0xe0,0xa2,0xee,0xcf,0x52,0xa7,0x4a,0xe1,0xd0,0xa8,0xe2,0xa9,0xe3,0xe4}; - -#if !FD_HAS_AVX +#if FD_REEDSOL_ARITH_IMPL==0 static void gen_pi_noavx_generic( uchar const * is_erased, uchar * output, @@ -420,7 +407,6 @@ gen_pi_noavx_generic( uchar const * is_erased, for( ulong i=0UL; i0 #if FD_REEDSOL_PI_USE_SHORT ws_t erased_vec = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); @@ -518,11 +504,10 @@ fd_reedsol_gen_pi_16( uchar const * is_erased, #endif } - void -fd_reedsol_gen_pi_32( uchar const * is_erased, - uchar * output ) { -#if FD_HAS_AVX +fd_reedsol_private_gen_pi_32( uchar const * is_erased, + uchar * output ) { +#if FD_REEDSOL_ARITH_IMPL>0 #if FD_REEDSOL_PI_USE_SHORT ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); @@ -605,9 +590,9 @@ fd_reedsol_gen_pi_32( uchar const * is_erased, } void -fd_reedsol_gen_pi_64( uchar const * is_erased, - uchar * output ) { -#if FD_HAS_AVX +fd_reedsol_private_gen_pi_64( uchar const * is_erased, + uchar * output ) { +#if FD_REEDSOL_ARITH_IMPL>0 #if FD_REEDSOL_PI_USE_SHORT ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); @@ -723,9 +708,9 @@ fd_reedsol_gen_pi_64( uchar const * is_erased, } void -fd_reedsol_gen_pi_128( uchar const * is_erased, - uchar * output ) { -#if FD_HAS_AVX +fd_reedsol_private_gen_pi_128( uchar const * is_erased, + uchar * output ) { +#if FD_REEDSOL_ARITH_IMPL>0 #if FD_REEDSOL_PI_USE_SHORT ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); @@ -920,9 +905,9 @@ fd_reedsol_gen_pi_128( uchar const * is_erased, } void -fd_reedsol_gen_pi_256( uchar const * is_erased, - uchar * output ) { -#if FD_HAS_AVX +fd_reedsol_private_gen_pi_256( uchar const * is_erased, + uchar * output ) { +#if FD_REEDSOL_ARITH_IMPL>0 #if FD_REEDSOL_PI_USE_SHORT ws_t erased_vec0 = _mm256_cvtepu8_epi16( vb_ld( is_erased ) ); ws_t erased_vec1 = _mm256_cvtepu8_epi16( vb_ld( is_erased + 16UL ) ); @@ -1041,7 +1026,6 @@ fd_reedsol_gen_pi_256( uchar const * is_erased, log_pi14 = ws_adjust_sign( log_pi14, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec14, 1 ) ) ); log_pi15 = ws_adjust_sign( log_pi15, ws_sub( ws_bcast( 1 ), ws_shl( erased_vec15, 1 ) ) ); - /* After the addition below, 0<= log_pi <= 65152 < 2^16. The mod brings it back to 0 <= log_pi < 255. */ log_pi0 = ws_mod255( ws_add( log_pi0, ws_bcast( (short)(255*128) ) ) ); diff --git a/src/ballet/reedsol/fd_reedsol_pi.h b/src/ballet/reedsol/fd_reedsol_pi.h deleted file mode 100644 index 1af598dabc7..00000000000 --- a/src/ballet/reedsol/fd_reedsol_pi.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_pi_h -#define HEADER_fd_src_ballet_reedsol_fd_reedsol_pi_h -#include "../../util/fd_util_base.h" - -/* This file generates what - S. -J. Lin, T. Y. Al-Naffouri, Y. S. Han and W. -H. Chung, "Novel - Polynomial Basis With Fast Fourier Transform and Its Application to - Reed–Solomon Erasure Codes," in IEEE Transactions on Information - Theory, vol. 62, no. 11, pp. 6284-6299, Nov. 2016, doi: - 10.1109/TIT.2016.2608892. - and - Didier, Frédéric. "Efficient erasure decoding of Reed-Solomon - codes." arXiv preprint arXiv:0901.1886 (2009). - call Pi and 1/Pi'. For more information about Pi and Pi', see the - implementation or the papers referenced above. - - The main set of functions this file exposes is - - void fd_reedsol_gen_pi_{N}( uchar const * is_erased, uchar * output ) - - for N in {16, 32, 64, 128, 256}. Since Pi is only needed for elements - that are not erased, Pi' is only needed for elements that are erased, - and it is computationally beneficial to compute them at the same - time, this function computes them both. - - is_erased and output must point to the first element of arrays - indexed [0, N). They must be aligned to 32 bytes. - - Upon return, output[i] stores Pi(i) if is_erased[i]==0 and 1/Pi'(i) - if is_erased[i]==1. It's undefined behavior for is_erased to contain - something other than 0 or 1. - - Pi and Pi' are both elements of GF(2^8) stored in their normal byte - representation. */ -void fd_reedsol_gen_pi_16 ( uchar const * is_erased, uchar * output ); -void fd_reedsol_gen_pi_32 ( uchar const * is_erased, uchar * output ); -void fd_reedsol_gen_pi_64 ( uchar const * is_erased, uchar * output ); -void fd_reedsol_gen_pi_128( uchar const * is_erased, uchar * output ); -void fd_reedsol_gen_pi_256( uchar const * is_erased, uchar * output ); - -/* The following are the pre-computed values for common cases. - They're exposed in this header so that the values to multiply are - known at compile time to eliminate loads on the critical path. */ -/* TODO: Decide on pre-computed cases and add them */ - - -#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_pi_h */ diff --git a/src/ballet/reedsol/fd_reedsol_ppt.h b/src/ballet/reedsol/fd_reedsol_ppt.h index 815675be99b..c425987f54d 100644 --- a/src/ballet/reedsol/fd_reedsol_ppt.h +++ b/src/ballet/reedsol/fd_reedsol_ppt.h @@ -1,8 +1,9 @@ - /* Note: This file is auto generated. */ #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h #define HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h +#include "fd_reedsol_fft.h" + /* This file implements the Principal Pivot Transform for the Reed Solomon FFT operator as described in: S. -J. Lin, A. Alloum and T. Al-Naffouri, "Principal pivot @@ -13,7 +14,6 @@ The main macro this file provides is FD_REEDSOL_GENERATE_PPT. The rest of this file is auto-generated implementation details. - When the number of data shreds we have is not a power of 2, the approach used in the 32-32 case doesn't apply. I found the paper extending it to the general case uninterpretable. So we use the @@ -77,12 +77,6 @@ arbitrary subset of them. This file only implements the specific case. */ -#include "fd_reedsol_fft.h" -#ifndef FD_REEDSOL_GF_ARITH_DEFINED -#error "You must include fd_reedsol_arith_gfni.h or fd_reedsol_arith_avx2.h before including this file" -#endif - - /* FD_REEDSOL_GENERATE_PPT: Inserts code to compute the principal pivot transform of size n (must be a power of 2, currently only 16 and 32 are emitted by the code generator) and when you have k known @@ -102,9 +96,6 @@ #define FD_REEDSOL_GENERATE_PPT(n, k, ...) FD_REEDSOL_PPT_IMPL_##n##_##k( __VA_ARGS__ ) - - - #define GF_MUL22( inout0, inout1, c00, c01, c10, c11) \ do { \ gf_t temp = GF_ADD( GF_MUL( inout0, c00 ), GF_MUL( inout1, c01 ) ); \ @@ -112,8 +103,6 @@ inout0 = temp; \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_1( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09 , \ in10, in11, in12, in13, in14, in15) \ do { \ @@ -150,8 +139,6 @@ in00 = GF_ADD( GF_MUL( scratch_8, 0 ), in00 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_2( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09 , \ in10, in11, in12, in13, in14, in15) \ do { \ @@ -200,8 +187,6 @@ in01 = GF_ADD( GF_MUL( scratch_9, 0 ), in01 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_3( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, \ in11, in12, in13, in14, in15) \ do { \ @@ -254,8 +239,6 @@ in02 = GF_ADD( GF_MUL( scratch_10, 0 ), in02 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_4( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, \ in13, in14, in15) \ do { \ @@ -309,8 +292,6 @@ in03 = GF_ADD( GF_MUL( scratch_11, 0 ), in03 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_5( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, \ in15) \ do { \ @@ -371,8 +352,6 @@ in04 = GF_ADD( GF_MUL( scratch_12, 0 ), in04 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_6( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15) \ do { \ gf_t scratch_10, scratch_11, scratch_12, scratch_13, scratch_2, scratch_3, scratch_6, scratch_7, scratch_8, scratch_9; \ @@ -436,8 +415,6 @@ in05 = GF_ADD( GF_MUL( scratch_13, 0 ), in05 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_7( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14 , \ in15) \ do { \ @@ -498,8 +475,6 @@ in06 = GF_ADD( GF_MUL( scratch_14, 0 ), in06 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_8( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12 , \ in13, in14, in15) \ do { \ @@ -548,8 +523,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_9( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14 , \ in15) \ do { \ @@ -610,8 +583,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_10( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15) \ do { \ gf_t scratch_10, scratch_11, scratch_12, scratch_13, scratch_14, scratch_15, scratch_2, scratch_3, scratch_4, scratch_5; \ @@ -675,8 +646,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_11( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, \ in15) \ do { \ @@ -737,8 +706,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_12( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, \ in13, in14, in15) \ do { \ @@ -792,8 +759,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_13( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, \ in11, in12, in13, in14, in15) \ do { \ @@ -846,8 +811,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_14( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09 , \ in10, in11, in12, in13, in14, in15) \ do { \ @@ -896,8 +859,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_16_15( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09 , \ in10, in11, in12, in13, in14, in15) \ do { \ @@ -934,8 +895,6 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_17( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1050,8 +1009,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_18( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1174,8 +1131,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_19( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1298,8 +1253,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_20( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1419,8 +1372,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_21( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1543,8 +1494,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_22( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1667,8 +1616,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_23( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1783,8 +1730,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_24( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1882,8 +1827,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_25( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1989,8 +1932,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_26( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, \ in29, in30, in31) \ do { \ @@ -2096,8 +2037,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_27( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, \ in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2195,8 +2134,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_28( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, \ in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2283,8 +2220,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_29( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17 , \ in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2366,8 +2301,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_30( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17 , \ in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2441,8 +2374,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_32_31( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17 , \ in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2500,8 +2431,6 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_33( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -2718,8 +2647,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_34( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -2948,8 +2875,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_35( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3182,8 +3107,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_36( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3417,8 +3340,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_37( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3659,8 +3580,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_38( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3905,8 +3824,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_39( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4147,8 +4064,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_40( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4377,8 +4292,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_41( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4619,8 +4532,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_42( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4865,8 +4776,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_43( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5107,8 +5016,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_44( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5342,8 +5249,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_45( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5576,8 +5481,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_46( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5806,8 +5709,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_47( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6024,8 +5925,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_48( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6212,8 +6111,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_49( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6422,8 +6319,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_50( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6636,8 +6531,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_51( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6846,8 +6739,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_52( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7048,8 +6939,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_53( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7249,8 +7138,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_54( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7446,8 +7333,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_55( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7631,8 +7516,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_56( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7796,8 +7679,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_57( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7965,8 +7846,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_58( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8130,8 +8009,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_59( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8283,8 +8160,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_60( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8420,8 +8295,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_61( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8548,8 +8421,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_62( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8664,8 +8535,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_64_63( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8760,8 +8629,6 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_128_65( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -9177,8 +9044,6 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_128_66( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -9610,8 +9475,6 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_128_67( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -10051,8 +9914,6 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) - - #define FD_REEDSOL_PPT_IMPL_128_68( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -10497,6 +10358,4 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) - - #endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h */ diff --git a/src/ballet/reedsol/fd_reedsol_private.h b/src/ballet/reedsol/fd_reedsol_private.h new file mode 100644 index 00000000000..b82ef685c69 --- /dev/null +++ b/src/ballet/reedsol/fd_reedsol_private.h @@ -0,0 +1,204 @@ +#ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h +#define HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h + +/* Contains function declarations for the interal encoding and recovery + functions. */ + +#include "fd_reedsol.h" + +/* FD_REEDSOL_ARITH_IMPL is used to select which implementation of + Galois Field arithmetic should be used. Supported implementations + include: + + 0 - unaccelerated + 1 - AVX accelerated + 2 - GFNI accelerated */ + +#ifndef FD_REEDSOL_ARITH_IMPL +#if FD_HAS_GFNI +#define FD_REEDSOL_ARITH_IMPL 2 +#elif FD_HAS_AVX +#define FD_REEDSOL_ARITH_IMPL 1 +#else +#define FD_REEDSOL_ARITH_IMPL 0 +#endif +#endif + +#if FD_REEDSOL_ARITH_IMPL==0 +#include "fd_reedsol_arith_none.h" +#elif FD_REEDSOL_ARITH_IMPL==1 +#include "fd_reedsol_arith_avx2.h" +#elif FD_REEDSOL_ARITH_IMPL==2 +#include "fd_reedsol_arith_gfni.h" +#else +#error "Unsupported FD_REEDSOL_ARITH_IMPL" +#endif + +/* FALLTHRU: Tells the compiler that falling through to the next case + of the switch statement is intentional and not a bug. When brutality + is turned on, this must be used. Clang an GCC differ on what + annotations they accept, but this works for both. */ +/* TODO: CONSIDER MOVING SOMETHING LIKE THIS TO UTIL_BASE.H? */ + +#define FALLTHRU __attribute__((fallthrough)); + +FD_PROTOTYPES_BEGIN + +/* fd_reedsol_private_encode_{n} requires that data_shred_cnt <= n */ + +void +fd_reedsol_private_encode_16( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ); + +void +fd_reedsol_private_encode_32( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ); + +void +fd_reedsol_private_encode_64( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ); + +void +fd_reedsol_private_encode_128( ulong shred_sz, + uchar const * const * data_shred, + ulong data_shred_cnt, + uchar * const * parity_shred, + ulong parity_shred_cnt ); + +#if FD_HAS_GFNI +void +fd_reedsol_private_encode_32_32( ulong shred_sz, + uchar const * const * data_shred, + uchar * const * parity_shred, + uchar * _scratch ); +#endif + +/* fd_reedsol_private_recover_var_{n}: Verifies the consistency + of the Reed-Solomon encoded data, and recovers any missing data. + At least data_shred_cnt of the first n shreds must be un-erased, + which implies data_shred_cnt <= n. + + Unlike the encode operations, the math doesn't care much whether a + shred is a data shred or parity shred for recover operations, hence + the function only has one shred array. The parity shreds come + immediately after the data shreds. + + For each value of i in [0, data_shred_cnt+parity_shred_cnt), erased[ + i ] must be 0 (if shred[ i ] contains valid data) or 1 if shred[ i ] + is an erasure (i.e. wasn't received, was corrupted, etc.). If + erased[ i ]==1, the contents of shred[ i ] are ignored on entry, and + upon return, shred[ i ][ j ] will be overwritten with the correct + data for j in [0, shred_sz). + + Note that since data_shred_cnt+parity_shred_cnt<=134, shred[ i ] and + erased[ i ] for i>=134 are completely ignored. + + Returns one of: + + FD_REEDSOL_SUCCESS if okay + + FD_REEDSOL_ERR_CORRUPT if the shreds are not consistent with having + come from a Reed-Solomon encoding of data_shred_cnt data shreds + + FD_REEDSOL_ERR_PARTIAL if there's not enough un-erased data to + recover data_shred_cnt data shreds + + TODO: Add a recover_private_first_{n} variant that imposes the + additional constraint that the first data_shred_cnt shreds must be + un-erased, is the case when no packets have been lost. Would be + slightly faster. */ + +int +fd_reedsol_private_recover_var_16( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); + +int +fd_reedsol_private_recover_var_32( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); + +int +fd_reedsol_private_recover_var_64( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); + +int +fd_reedsol_private_recover_var_128( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); + +int +fd_reedsol_private_recover_var_256( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ); + +/* This below functions generate what: + + S. -J. Lin, T. Y. Al-Naffouri, Y. S. Han and W. -H. Chung, "Novel + Polynomial Basis With Fast Fourier Transform and Its Application to + Reed–Solomon Erasure Codes," in IEEE Transactions on Information + Theory, vol. 62, no. 11, pp. 6284-6299, Nov. 2016, doi: + 10.1109/TIT.2016.2608892. + + and: + + Didier, Frédéric. "Efficient erasure decoding of Reed-Solomon + codes." arXiv preprint arXiv:0901.1886 (2009). + + call Pi and 1/Pi'. For more information about Pi and Pi', see the + implementation or the papers referenced above. + + The main set of functions this file exposes is: + + void fd_reedsol_private_gen_pi_{N}( uchar const * is_erased, uchar * output ) + + for N in {16, 32, 64, 128, 256}. Since Pi is only needed for + elements that are not erased, Pi' is only needed for elements that + are erased, and it is computationally beneficial to compute them at + the same time, this function computes them both. + + is_erased and output must point to the first element of arrays + indexed [0, N). They must be aligned to 32 bytes. + + Upon return, output[i] stores Pi(i) if is_erased[i]==0 and 1/Pi'(i) + if is_erased[i]==1. It's undefined behavior for is_erased to contain + something other than 0 or 1. + + Pi and Pi' are both elements of GF(2^8) stored in their normal byte + representation. */ + +void fd_reedsol_private_gen_pi_16 ( uchar const * is_erased, uchar * output ); +void fd_reedsol_private_gen_pi_32 ( uchar const * is_erased, uchar * output ); +void fd_reedsol_private_gen_pi_64 ( uchar const * is_erased, uchar * output ); +void fd_reedsol_private_gen_pi_128( uchar const * is_erased, uchar * output ); +void fd_reedsol_private_gen_pi_256( uchar const * is_erased, uchar * output ); + +/* The following are the pre-computed values for common cases. + They're exposed in this header so that the values to multiply are + known at compile time to eliminate loads on the critical path. */ + +/* TODO: Decide on pre-computed cases and add them */ + +FD_PROTOTYPES_END + +#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_private_h */ diff --git a/src/ballet/reedsol/fd_reedsol_recover_128.c b/src/ballet/reedsol/fd_reedsol_recover_128.c index 728bb443799..3945b00710c 100644 --- a/src/ballet/reedsol/fd_reedsol_recover_128.c +++ b/src/ballet/reedsol/fd_reedsol_recover_128.c @@ -1,23 +1,12 @@ -#include "../../util/fd_util.h" -#include "fd_reedsol.h" -#include "fd_reedsol_internal.h" -#if FD_HAS_GFNI -#include "fd_reedsol_arith_gfni.h" -#elif FD_HAS_AVX -#include "fd_reedsol_arith_avx2.h" -#else -#include "fd_reedsol_arith_none.h" -#endif -#include "fd_reedsol_fft.h" #include "fd_reedsol_ppt.h" #include "fd_reedsol_fderiv.h" -#include "fd_reedsol_pi.h" -int fd_reedsol_recover_var_128( ulong shred_sz, - uchar * const * shred, - ulong data_shred_cnt, - ulong parity_shred_cnt, - uchar const * erased ) { +int +fd_reedsol_private_recover_var_128( ulong shred_sz, + uchar * const * shred, + ulong data_shred_cnt, + ulong parity_shred_cnt, + uchar const * erased ) { uchar _erased[ 128 ] W_ATTR; uchar pi[ 128 ] W_ATTR; ulong shred_cnt = data_shred_cnt + parity_shred_cnt; @@ -27,9 +16,9 @@ int fd_reedsol_recover_var_128( ulong shred_sz, _erased[ i ] = !load_shred; loaded_cnt += (ulong)load_shred; } - if( FD_UNLIKELY( loaded_cnt {(r_offset, i_round+1, omega_)} and {(r_offset+2**i_round, i_round+1, omega_ )}") butterflies.append((1, r_offset+omega_, r_offset+2**i_round+omega_, ( i_round, omega_ , beta ), (r_offset, i_round, omega_), (r_offset, i_round, omega_ + 2**i_round), (r_offset, i_round+1, omega_), (r_offset+2**i_round, i_round+1, omega_ ) )) - butterflies.extend(op_ifft(h, beta, i_round+1, r_offset)) butterflies.extend(op_ifft(h, beta, i_round+1, r_offset+2**i_round)) return butterflies @@ -188,8 +177,6 @@ def op_ifft( h, beta, i_round, r_offset ): "inout0 = GF_ADD( inout0, GF_MUL( inout1, c ) );", ]) - - for N in (256, 128, 64, 32, 16, 8, 4): inputs = [f"in{j:02}" for j in range(N)] macro_lines = [ ] @@ -210,7 +197,6 @@ def op_ifft( h, beta, i_round, r_offset ): shift_specific = [ f'{(int(sbar[ c[0], c[1]^shift ])):3}' for c in consts_array ] print(f"#define FD_REEDSOL_IFFT_CONSTANTS_{N}_{shift:<2} " + ', '.join(shift_specific), file=outf) - for t, i0, i1, c, fi0, fi1, fo0, fo1 in butterflies: assert t==1 assert current_vars[i0] == fi0 @@ -220,7 +206,6 @@ def op_ifft( h, beta, i_round, r_offset ): current_vars[i1] = fo1 print_macro(f"FD_REEDSOL_IFFT_IMPL_{N}", [f"c_{j:02}" for j in range(len(const_to_cidx))] + inputs, macro_lines) - macro_lines = [ ] butterflies = op_fft(N, shift, 0, 0) @@ -237,7 +222,6 @@ def op_ifft( h, beta, i_round, r_offset ): shift_specific = [ f'{int(sbar[ c[0], (c[1]^shift)&0xFF ]):3}' for c in consts_array ] print(f"#define FD_REEDSOL_FFT_CONSTANTS_{N}_{shift:<2} " + ', '.join(shift_specific), file=outf) - for t, i0, i1, c, fi0, fi1, fo0, fo1 in butterflies: assert t==0 assert current_vars[i0] == fi0 @@ -247,5 +231,4 @@ def op_ifft( h, beta, i_round, r_offset ): current_vars[i1] = fo1 print_macro(f"FD_REEDSOL_FFT_IMPL_{N}", [f"c_{j:02}" for j in range(len(const_to_cidx))] + inputs, macro_lines) - print("#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h */", file=outf) diff --git a/src/ballet/reedsol/generate_ppt.py b/src/ballet/reedsol/generate_ppt.py index 18fd08b2f06..f83e0aa2636 100644 --- a/src/ballet/reedsol/generate_ppt.py +++ b/src/ballet/reedsol/generate_ppt.py @@ -8,6 +8,8 @@ #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h #define HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h +#include "fd_reedsol_fft.h" + /* This file implements the Principal Pivot Transform for the Reed Solomon FFT operator as described in: S. -J. Lin, A. Alloum and T. Al-Naffouri, "Principal pivot @@ -18,7 +20,6 @@ The main macro this file provides is FD_REEDSOL_GENERATE_PPT. The rest of this file is auto-generated implementation details. - When the number of data shreds we have is not a power of 2, the approach used in the 32-32 case doesn't apply. I found the paper extending it to the general case uninterpretable. So we use the @@ -82,12 +83,6 @@ arbitrary subset of them. This file only implements the specific case. */ -#include "fd_reedsol_fft.h" -#ifndef FD_REEDSOL_GF_ARITH_DEFINED -#error "You must include fd_reedsol_arith_gfni.h or fd_reedsol_arith_avx2.h before including this file" -#endif - - /* FD_REEDSOL_GENERATE_PPT: Inserts code to compute the principal pivot transform of size n (must be a power of 2, currently only 16 and 32 are emitted by the code generator) and when you have k known @@ -107,8 +102,6 @@ #define FD_REEDSOL_GENERATE_PPT(n, k, ...) FD_REEDSOL_PPT_IMPL_##n##_##k( __VA_ARGS__ ) - - """ outf = open('fd_reedsol_ppt.h', "wt") @@ -147,7 +140,7 @@ def m_fft( lg_h, beta ): omega_ = j*2**(i_round+1) idx = r + omega_ offset = 2**i_round - # print(f"Round {i_round} (offset {offset}), idx={idx} (paired with {idx+offset}): j = {j}") + # print(f"Round {i_round} (offset {offset}), idx={idx} (paired with {idx+offset}): j = {j}") s = GF(4) matrA[ idx, idx ] = 1 matrA[ idx, idx+offset ] = GF(sbar[i_round, omega_ + beta]) @@ -172,7 +165,7 @@ def m_ifft( lg_h, beta ): omega_ = j*2**(i_round+1) idx = r + omega_ offset = 2**i_round - # print(f"Round {i_round} (offset {offset}), idx={idx} (paired with {idx+offset}): j = {j}") + # print(f"Round {i_round} (offset {offset}), idx={idx} (paired with {idx+offset}): j = {j}") matrA[ idx+offset, idx ] = 1 matrA[ idx+offset, idx+offset ] = 1 matrA[ idx, idx ] = 1 @@ -203,7 +196,7 @@ def Bmatr(lg_sz, shift): def principal_pivot_transform_k_no_x(lg_sz, k, alpha_offset): n = 2**lg_sz - # alpha is [0, min(k - alpha_offset, n)) + # alpha is [0, min(k - alpha_offset, n)) if n>=4: if k-alpha_offset >= n: return [ ("IFFT", n, alpha_offset) ] @@ -220,7 +213,6 @@ def principal_pivot_transform_k_no_x(lg_sz, k, alpha_offset): matrix = GF(np.array([[GF(1)/f[0,0], f[0,1]/f[0,0]], [ f[1,0]/f[0,0], f[1,1]-f[1,0]*f[0,1]/f[0,0]]])) return [ ("MM22", alpha_offset, alpha_offset+1, matrix) ] - B = Bmatr(lg_sz, alpha_offset) Bupper = np.linalg.inv(B) Blower = GF(np.array([[GF(1)/B[0,0], B[0,1]/B[0,0]],[B[1,0]/B[0,0], B[1,1] - B[1,0]*B[0,1]/B[0,0]]])) @@ -240,8 +232,6 @@ def principal_pivot_transform_k_no_x(lg_sz, k, alpha_offset): else: operations.append( ("MM22", j+alpha_offset, j+alpha_offset+n2, B)) - - operations.extend( principal_pivot_transform_k_no_x(lg_sz-1, k, alpha_offset) ) # Fixup the part of J2 that needs U1 @@ -251,7 +241,6 @@ def principal_pivot_transform_k_no_x(lg_sz, k, alpha_offset): if in_alpha1 and not in_alpha2: operations.append( ("MULACC", j+n2+alpha_offset, j+alpha_offset, Blower[1,0]) ) - operations.extend( principal_pivot_transform_k_no_x(lg_sz-1, k, alpha_offset+n//2) ) for j in range(n2): @@ -290,7 +279,6 @@ def print_macro(macro_name, args, lines, indent=2): print(" "*indent + "} while( 0 )", file=outf) print("\n\n", file=outf) - print_macro("GF_MUL22", ["inout0", "inout1", "c00", "c01", "c10", "c11"], [ "gf_t temp = GF_ADD( GF_MUL( inout0, c00 ), GF_MUL( inout1, c01 ) );", "inout1 = GF_ADD( GF_MUL( inout0, c10 ), GF_MUL( inout1, c11 ) );", @@ -306,7 +294,6 @@ def print_macro(macro_name, args, lines, indent=2): scratch_to_declare = set() - for op in operations: if op[0] == "IFFT": n, shift = op[1:] @@ -341,7 +328,6 @@ def print_macro(macro_name, args, lines, indent=2): print_macro(f"FD_REEDSOL_PPT_IMPL_{N}_{k}", inputs, macro_lines) - if False: #debug first_bytes = GF([0]*1 + [1] +[0]*30) scratch_first_bytes = GF([0]*32) @@ -368,7 +354,4 @@ def print_macro(macro_name, args, lines, indent=2): dest, src_scratch, const = op[1:] first_bytes[dest] += scratch_first_bytes[src_scratch] * const - - - print("#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h */", file=outf) diff --git a/src/ballet/reedsol/generate_recover.py b/src/ballet/reedsol/generate_recover.py index 7085d573be8..ca16217ac98 100644 --- a/src/ballet/reedsol/generate_recover.py +++ b/src/ballet/reedsol/generate_recover.py @@ -1,4 +1,3 @@ - indent = 0 def cprint(string): global indent @@ -15,24 +14,12 @@ def cprint(string): def make_recover_var(n, max_shreds): global outf with open(f'fd_reedsol_recover_{n}.c', 'wt') as outf: - cprint('#include "../../util/fd_util.h"') - cprint('#include "fd_reedsol.h"') - cprint('#include "fd_reedsol_internal.h"') - cprint('#if FD_HAS_GFNI') - cprint('#include "fd_reedsol_arith_gfni.h"') - cprint('#elif FD_HAS_AVX') - cprint('#include "fd_reedsol_arith_avx2.h"') - cprint('#else') - cprint('#include "fd_reedsol_arith_none.h"') - cprint('#endif') - cprint('#include "fd_reedsol_fft.h"') cprint('#include "fd_reedsol_ppt.h"') cprint('#include "fd_reedsol_fderiv.h"') - cprint('#include "fd_reedsol_pi.h"') cprint('') - fn_name = f'int fd_reedsol_recover_var_{n}(' + fn_name = f'int fd_reedsol_private_recover_var_{n}(' cprint(fn_name + " ulong shred_sz,") cprint(" "*len(fn_name) + " uchar * const * shred,") cprint(" "*len(fn_name) + " ulong data_shred_cnt,") @@ -50,10 +37,10 @@ def make_recover_var(n, max_shreds): cprint(f'loaded_cnt += (ulong)load_shred;') cprint('}') - cprint(f'if( FD_UNLIKELY( loaded_cnt=shred_cnt, do nothing, which will keep the value of the") cprint(" shred if it existed in the variable. */") - cprint("""#define STORE_COMPARE_RELOAD( n, var ) do{ \\ if( erased[ n ] ) gf_stu( shred[ n ] + shred_pos, var ); \\ else if( _erased[ n ] ) diff = GF_OR( diff, GF_ADD( var, gf_ldu( shred[ n ] + shred_pos ) ) ); \\ @@ -151,16 +136,14 @@ def make_recover_var(n, max_shreds): potential_shreds_remaining -= n chunk_cnt += 1 - cprint("if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_INCONSISTENT;") + cprint("if( FD_UNLIKELY( GF_ANY( diff ) ) ) return FD_REEDSOL_ERR_CORRUPT;") cprint('shred_pos += GF_WIDTH;') cprint('shred_pos = fd_ulong_if( ((shred_sz-GF_WIDTH) -#include "fd_reedsol.h" -#include "../../util/fd_util.h" - -#if FD_HAS_GFNI -#include "fd_reedsol_arith_gfni.h" -#elif FD_HAS_AVX -#include "fd_reedsol_arith_avx2.h" -#else -#include "fd_reedsol_arith_none.h" -#endif -#include "fd_reedsol_fft.h" #include "fd_reedsol_ppt.h" - -#include "fd_reedsol_pi.h" +#include FD_IMPORT_BINARY( fd_reedsol_generic_constants, "src/ballet/reedsol/constants/generic_constants.bin" ); static short const * log_tbl = (short const *)fd_reedsol_generic_constants; /* Indexed [0, 256) */ @@ -24,6 +11,10 @@ uchar data_shreds[ SHRED_SZ * FD_REEDSOL_DATA_SHREDS_MAX ]; uchar parity_shreds[ SHRED_SZ * FD_REEDSOL_PARITY_SHREDS_MAX ]; uchar recovered_shreds[ SHRED_SZ * FD_REEDSOL_PARITY_SHREDS_MAX ]; +FD_STATIC_ASSERT( FD_REEDSOL_SUCCESS == 0, unit_test ); +FD_STATIC_ASSERT( FD_REEDSOL_ERR_CORRUPT==-1, unit_test ); +FD_STATIC_ASSERT( FD_REEDSOL_ERR_PARTIAL==-2, unit_test ); + FD_STATIC_ASSERT( sizeof(fd_reedsol_t) == FD_REEDSOL_FOOTPRINT, reedsol_footprint ); uchar mem[ FD_REEDSOL_FOOTPRINT ] __attribute__((aligned(FD_REEDSOL_ALIGN))); @@ -81,7 +72,6 @@ void fd_reedsol_encode_ref( ulong shred_sz, long l_p = 255L - log_tbl[ top_matrix[ row ][ row ] ]; /* We've chosen row so that top_matrix[row][row] != 0, so 0p_cnt ) ) { FD_TEST( retval==FD_REEDSOL_ERR_INSUFFICIENT ); continue; } + if( FD_UNLIKELY( e_cnt>p_cnt ) ) { FD_TEST( retval==FD_REEDSOL_ERR_PARTIAL ); continue; } - FD_TEST( FD_REEDSOL_OK==retval ); + FD_TEST( FD_REEDSOL_SUCCESS==retval ); for( ulong i=0UL; i Date: Thu, 24 Aug 2023 20:59:31 +0000 Subject: [PATCH 5/6] fix bugs in gen_pi_noavx_generic --- src/ballet/reedsol/fd_reedsol_pi.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/ballet/reedsol/fd_reedsol_pi.c b/src/ballet/reedsol/fd_reedsol_pi.c index d1da559d5ca..281698f3859 100644 --- a/src/ballet/reedsol/fd_reedsol_pi.c +++ b/src/ballet/reedsol/fd_reedsol_pi.c @@ -109,7 +109,13 @@ ws_mod255( ws_t x ) { this works on the group Z/255Z (integers mod 255). Since 255 is not a prime, this is not a field, but the FD_REEDSOL_FWHT only needs addition, subtraction, and division by powers of 2 (which have inverses mod - 255), so it's not a problem. */ + 255), so it's not a problem. + + The typical FWHT multiplies by a factor of 1/sqrt(2) at each step. + To convert the unscaled version to the scaled version, divide the + result by sqrt(2)^lg(N). Since we often do two transforms, we need + to divide by N ( = (sqrt(2)^lg(N))^2 ). + */ #if FD_REEDSOL_PI_USE_SHORT @@ -351,7 +357,7 @@ exp_2( wb_t x ) { Although L~ for a smaller size is a subset of that for a larger size, because we also precompute the value of the FWHT, we store the variables separately. Perhaps a good compiler could - constant-propagate through the AVX instructions, but it's just 4 + constant-propagate through the AVX instructions, but it's just 5 values of N, so I prefer not to depend on that. */ static const short fwht_l_twiddle_16 [ 16 ] = {0xca,0xa1,0x6a,0xa9,0x73,0xfc,0xe2,0x44,0x93,0x74,0x08,0x7f,0x96,0x8c,0x42,0xf2}; static const short fwht_l_twiddle_32 [ 32 ] = {0x24,0x8f,0xc2,0x7e,0x49,0x89,0x74,0xdc,0x4f,0x95,0x43,0xb4,0x09,0xba,0x03,0x83, @@ -395,7 +401,7 @@ gen_pi_noavx_generic( uchar const * is_erased, for( ulong i=0UL; i Date: Fri, 25 Aug 2023 02:58:54 -0500 Subject: [PATCH 6/6] refactor to improve compilation time --- src/ballet/reedsol/Local.mk | 26 +- src/ballet/reedsol/fd_reedsol_encode_128.c | 8 +- src/ballet/reedsol/fd_reedsol_encode_64.c | 63 +- src/ballet/reedsol/fd_reedsol_fft.h | 33 +- src/ballet/reedsol/fd_reedsol_ppt.h | 71 +- src/ballet/reedsol/fd_reedsol_recover_128.c | 5 +- src/ballet/reedsol/fd_reedsol_recover_256.c | 5 +- src/ballet/reedsol/generate_encode.py | 14 +- src/ballet/reedsol/generate_fft.py | 81 +- src/ballet/reedsol/generate_ppt.py | 61 +- src/ballet/reedsol/generate_recover.py | 16 +- src/ballet/reedsol/test_reedsol.c | 91 +- src/ballet/reedsol/wrapped_impl/Local.mk | 15 + .../wrapped_impl/fd_reedsol_fft_impl_128_0.c | 779 +++++++++ .../fd_reedsol_fft_impl_128_128.c | 779 +++++++++ .../wrapped_impl/fd_reedsol_fft_impl_256_0.c | 1547 +++++++++++++++++ .../wrapped_impl/fd_reedsol_fft_impl_64_0.c | 395 +++++ .../wrapped_impl/fd_reedsol_fft_impl_64_128.c | 395 +++++ .../wrapped_impl/fd_reedsol_fft_impl_64_64.c | 395 +++++ .../wrapped_impl/fd_reedsol_ppt_impl_17.c | 809 +++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_25.c | 708 ++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_33.c | 1380 +++++++++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_40.c | 986 +++++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_45.c | 986 +++++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_50.c | 986 +++++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_55.c | 986 +++++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_60.c | 789 +++++++++ .../wrapped_impl/fd_reedsol_ppt_impl_65.c | 1168 +++++++++++++ 28 files changed, 13460 insertions(+), 117 deletions(-) create mode 100644 src/ballet/reedsol/wrapped_impl/Local.mk create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_fft_impl_128_0.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_fft_impl_128_128.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_fft_impl_256_0.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_fft_impl_64_0.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_fft_impl_64_128.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_fft_impl_64_64.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_17.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_25.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_33.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_40.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_45.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_50.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_55.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_60.c create mode 100644 src/ballet/reedsol/wrapped_impl/fd_reedsol_ppt_impl_65.c diff --git a/src/ballet/reedsol/Local.mk b/src/ballet/reedsol/Local.mk index 716efa0a4e6..fa06d194d46 100644 --- a/src/ballet/reedsol/Local.mk +++ b/src/ballet/reedsol/Local.mk @@ -1,16 +1,16 @@ $(call add-hdrs,fd_reedsol.h) ifdef FD_HAS_GFNI -$(call add-asms,fd_reedsol_gfni_32,fd_ballet) +$(call add-asms,fd_reedsol_gfni_32,fd_reedsol) endif -$(call add-objs,fd_reedsol,fd_ballet) -$(call add-objs,fd_reedsol_encode_16,fd_ballet) -$(call add-objs,fd_reedsol_encode_32,fd_ballet) -$(call add-objs,fd_reedsol_encode_64,fd_ballet) -$(call add-objs,fd_reedsol_encode_128,fd_ballet) -$(call add-objs,fd_reedsol_recover_16,fd_ballet) -$(call add-objs,fd_reedsol_recover_32,fd_ballet) -$(call add-objs,fd_reedsol_recover_64,fd_ballet) -$(call add-objs,fd_reedsol_recover_128,fd_ballet) -$(call add-objs,fd_reedsol_recover_256,fd_ballet) -$(call add-objs,fd_reedsol_pi,fd_ballet) -$(call make-unit-test,test_reedsol,test_reedsol,fd_ballet fd_util) +$(call add-objs,fd_reedsol,fd_reedsol) +$(call add-objs,fd_reedsol_encode_16,fd_reedsol) +$(call add-objs,fd_reedsol_encode_32,fd_reedsol) +$(call add-objs,fd_reedsol_encode_64,fd_reedsol) +$(call add-objs,fd_reedsol_encode_128,fd_reedsol) +$(call add-objs,fd_reedsol_recover_16,fd_reedsol) +$(call add-objs,fd_reedsol_recover_32,fd_reedsol) +$(call add-objs,fd_reedsol_recover_64,fd_reedsol) +$(call add-objs,fd_reedsol_recover_128,fd_reedsol) +$(call add-objs,fd_reedsol_recover_256,fd_reedsol) +$(call add-objs,fd_reedsol_pi,fd_reedsol) +$(call make-unit-test,test_reedsol,test_reedsol,fd_reedsol fd_util) diff --git a/src/ballet/reedsol/fd_reedsol_encode_128.c b/src/ballet/reedsol/fd_reedsol_encode_128.c index bb9605896a0..d4377be2fbf 100644 --- a/src/ballet/reedsol/fd_reedsol_encode_128.c +++ b/src/ballet/reedsol/fd_reedsol_encode_128.c @@ -62,11 +62,11 @@ fd_reedsol_private_encode_128( ulong shred_sz, case 65UL: in64 = gf_ldu( data_shred[ 64 ] + shred_pos ); } #define ALL_VARS in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65, in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127 + #define ALL_VARS_REF &in00, &in01, &in02, &in03, &in04, &in05, &in06, &in07, &in08, &in09, &in10, &in11, &in12, &in13, &in14, &in15, &in16, &in17, &in18, &in19, &in20, &in21, &in22, &in23, &in24, &in25, &in26, &in27, &in28, &in29, &in30, &in31, &in32, &in33, &in34, &in35, &in36, &in37, &in38, &in39, &in40, &in41, &in42, &in43, &in44, &in45, &in46, &in47, &in48, &in49, &in50, &in51, &in52, &in53, &in54, &in55, &in56, &in57, &in58, &in59, &in60, &in61, &in62, &in63, &in64, &in65, &in66, &in67, &in68, &in69, &in70, &in71, &in72, &in73, &in74, &in75, &in76, &in77, &in78, &in79, &in80, &in81, &in82, &in83, &in84, &in85, &in86, &in87, &in88, &in89, &in90, &in91, &in92, &in93, &in94, &in95, &in96, &in97, &in98, &in99, &in100, &in101, &in102, &in103, &in104, &in105, &in106, &in107, &in108, &in109, &in110, &in111, &in112, &in113, &in114, &in115, &in116, &in117, &in118, &in119, &in120, &in121, &in122, &in123, &in124, &in125, &in126, &in127 switch( data_shred_cnt ) { - case 128UL: FD_REEDSOL_GENERATE_IFFT( 128, 0, ALL_VARS ); break; - case 67UL: FD_REEDSOL_GENERATE_PPT( 128, 67, ALL_VARS ); break; - case 66UL: FD_REEDSOL_GENERATE_PPT( 128, 66, ALL_VARS ); break; - case 65UL: FD_REEDSOL_GENERATE_PPT( 128, 65, ALL_VARS ); break; + case 67UL: fd_reedsol_ppt_128_67( ALL_VARS_REF ); break; + case 66UL: fd_reedsol_ppt_128_66( ALL_VARS_REF ); break; + case 65UL: fd_reedsol_ppt_128_65( ALL_VARS_REF ); break; } /* That generated the first 128-data_shred_cnt parity shreds in the last 128-data_shred_cnt variables. We might only need diff --git a/src/ballet/reedsol/fd_reedsol_encode_64.c b/src/ballet/reedsol/fd_reedsol_encode_64.c index b46f7f3ffa4..b4f78a2f045 100644 --- a/src/ballet/reedsol/fd_reedsol_encode_64.c +++ b/src/ballet/reedsol/fd_reedsol_encode_64.c @@ -66,39 +66,40 @@ fd_reedsol_private_encode_64( ulong shred_sz, case 33UL: in32 = gf_ldu( data_shred[ 32 ] + shred_pos ); } #define ALL_VARS in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63 + #define ALL_VARS_REF &in00, &in01, &in02, &in03, &in04, &in05, &in06, &in07, &in08, &in09, &in10, &in11, &in12, &in13, &in14, &in15, &in16, &in17, &in18, &in19, &in20, &in21, &in22, &in23, &in24, &in25, &in26, &in27, &in28, &in29, &in30, &in31, &in32, &in33, &in34, &in35, &in36, &in37, &in38, &in39, &in40, &in41, &in42, &in43, &in44, &in45, &in46, &in47, &in48, &in49, &in50, &in51, &in52, &in53, &in54, &in55, &in56, &in57, &in58, &in59, &in60, &in61, &in62, &in63 switch( data_shred_cnt ) { case 64UL: FD_REEDSOL_GENERATE_IFFT( 64, 0, ALL_VARS ); break; - case 63UL: FD_REEDSOL_GENERATE_PPT( 64, 63, ALL_VARS ); break; - case 62UL: FD_REEDSOL_GENERATE_PPT( 64, 62, ALL_VARS ); break; - case 61UL: FD_REEDSOL_GENERATE_PPT( 64, 61, ALL_VARS ); break; - case 60UL: FD_REEDSOL_GENERATE_PPT( 64, 60, ALL_VARS ); break; - case 59UL: FD_REEDSOL_GENERATE_PPT( 64, 59, ALL_VARS ); break; - case 58UL: FD_REEDSOL_GENERATE_PPT( 64, 58, ALL_VARS ); break; - case 57UL: FD_REEDSOL_GENERATE_PPT( 64, 57, ALL_VARS ); break; - case 56UL: FD_REEDSOL_GENERATE_PPT( 64, 56, ALL_VARS ); break; - case 55UL: FD_REEDSOL_GENERATE_PPT( 64, 55, ALL_VARS ); break; - case 54UL: FD_REEDSOL_GENERATE_PPT( 64, 54, ALL_VARS ); break; - case 53UL: FD_REEDSOL_GENERATE_PPT( 64, 53, ALL_VARS ); break; - case 52UL: FD_REEDSOL_GENERATE_PPT( 64, 52, ALL_VARS ); break; - case 51UL: FD_REEDSOL_GENERATE_PPT( 64, 51, ALL_VARS ); break; - case 50UL: FD_REEDSOL_GENERATE_PPT( 64, 50, ALL_VARS ); break; - case 49UL: FD_REEDSOL_GENERATE_PPT( 64, 49, ALL_VARS ); break; - case 48UL: FD_REEDSOL_GENERATE_PPT( 64, 48, ALL_VARS ); break; - case 47UL: FD_REEDSOL_GENERATE_PPT( 64, 47, ALL_VARS ); break; - case 46UL: FD_REEDSOL_GENERATE_PPT( 64, 46, ALL_VARS ); break; - case 45UL: FD_REEDSOL_GENERATE_PPT( 64, 45, ALL_VARS ); break; - case 44UL: FD_REEDSOL_GENERATE_PPT( 64, 44, ALL_VARS ); break; - case 43UL: FD_REEDSOL_GENERATE_PPT( 64, 43, ALL_VARS ); break; - case 42UL: FD_REEDSOL_GENERATE_PPT( 64, 42, ALL_VARS ); break; - case 41UL: FD_REEDSOL_GENERATE_PPT( 64, 41, ALL_VARS ); break; - case 40UL: FD_REEDSOL_GENERATE_PPT( 64, 40, ALL_VARS ); break; - case 39UL: FD_REEDSOL_GENERATE_PPT( 64, 39, ALL_VARS ); break; - case 38UL: FD_REEDSOL_GENERATE_PPT( 64, 38, ALL_VARS ); break; - case 37UL: FD_REEDSOL_GENERATE_PPT( 64, 37, ALL_VARS ); break; - case 36UL: FD_REEDSOL_GENERATE_PPT( 64, 36, ALL_VARS ); break; - case 35UL: FD_REEDSOL_GENERATE_PPT( 64, 35, ALL_VARS ); break; - case 34UL: FD_REEDSOL_GENERATE_PPT( 64, 34, ALL_VARS ); break; - case 33UL: FD_REEDSOL_GENERATE_PPT( 64, 33, ALL_VARS ); break; + case 63UL: fd_reedsol_ppt_64_63( ALL_VARS_REF ); break; + case 62UL: fd_reedsol_ppt_64_62( ALL_VARS_REF ); break; + case 61UL: fd_reedsol_ppt_64_61( ALL_VARS_REF ); break; + case 60UL: fd_reedsol_ppt_64_60( ALL_VARS_REF ); break; + case 59UL: fd_reedsol_ppt_64_59( ALL_VARS_REF ); break; + case 58UL: fd_reedsol_ppt_64_58( ALL_VARS_REF ); break; + case 57UL: fd_reedsol_ppt_64_57( ALL_VARS_REF ); break; + case 56UL: fd_reedsol_ppt_64_56( ALL_VARS_REF ); break; + case 55UL: fd_reedsol_ppt_64_55( ALL_VARS_REF ); break; + case 54UL: fd_reedsol_ppt_64_54( ALL_VARS_REF ); break; + case 53UL: fd_reedsol_ppt_64_53( ALL_VARS_REF ); break; + case 52UL: fd_reedsol_ppt_64_52( ALL_VARS_REF ); break; + case 51UL: fd_reedsol_ppt_64_51( ALL_VARS_REF ); break; + case 50UL: fd_reedsol_ppt_64_50( ALL_VARS_REF ); break; + case 49UL: fd_reedsol_ppt_64_49( ALL_VARS_REF ); break; + case 48UL: fd_reedsol_ppt_64_48( ALL_VARS_REF ); break; + case 47UL: fd_reedsol_ppt_64_47( ALL_VARS_REF ); break; + case 46UL: fd_reedsol_ppt_64_46( ALL_VARS_REF ); break; + case 45UL: fd_reedsol_ppt_64_45( ALL_VARS_REF ); break; + case 44UL: fd_reedsol_ppt_64_44( ALL_VARS_REF ); break; + case 43UL: fd_reedsol_ppt_64_43( ALL_VARS_REF ); break; + case 42UL: fd_reedsol_ppt_64_42( ALL_VARS_REF ); break; + case 41UL: fd_reedsol_ppt_64_41( ALL_VARS_REF ); break; + case 40UL: fd_reedsol_ppt_64_40( ALL_VARS_REF ); break; + case 39UL: fd_reedsol_ppt_64_39( ALL_VARS_REF ); break; + case 38UL: fd_reedsol_ppt_64_38( ALL_VARS_REF ); break; + case 37UL: fd_reedsol_ppt_64_37( ALL_VARS_REF ); break; + case 36UL: fd_reedsol_ppt_64_36( ALL_VARS_REF ); break; + case 35UL: fd_reedsol_ppt_64_35( ALL_VARS_REF ); break; + case 34UL: fd_reedsol_ppt_64_34( ALL_VARS_REF ); break; + case 33UL: fd_reedsol_ppt_64_33( ALL_VARS_REF ); break; } /* That generated the first 64-data_shred_cnt parity shreds in the last 64-data_shred_cnt variables. We might only need diff --git a/src/ballet/reedsol/fd_reedsol_fft.h b/src/ballet/reedsol/fd_reedsol_fft.h index 476b9f2ae32..5dd306e0771 100644 --- a/src/ballet/reedsol/fd_reedsol_fft.h +++ b/src/ballet/reedsol/fd_reedsol_fft.h @@ -16,9 +16,9 @@ implementation details. Like the normal FFT and IFFT, the operator implemented in this file - (and henceforward referred to as FFT and IFFT) tranforms between one - basis and another. Rather than tranformations of a signal between - the frequency domain and the time domain, these operators tranform a + (and henceforward referred to as FFT and IFFT) transforms between one + basis and another. Rather than transformations of a signal between + the frequency domain and the time domain, these operators transform a polynomial between domains we call the "evaluation basis" and the "coefficient basis". @@ -62,14 +62,14 @@ arithmetic). FD_REEDSOL_GENERATE_IFFT: Inserts code to transform n input values - from the evaluation basis to the coefficient basis, descrbing a + from the evaluation basis to the coefficient basis, describing a polynomial P(x) of degree no more than n such that P(b) = in0, P(b+1)=in1, ... P(b+n-1)=in_{n-1} (where this arithmetic on b is integer arithmetic, not GF(2^8) arithmetic). - For both macros, n must be a power of 2 (only 4, 8, 16, 32 are - emitted by the code generator at the moment), and b must be a - non-negative multiple of n no more than 32. Both b and n must be + For both macros, n must be a power of 2 (4, 8, 16, 32, 64, 128, and + 256 are emitted by the code generator at the moment), and b must be a + non-negative multiple of n no more than 134. Both b and n must be literal integer values. The remaining n arguments should be vector variables of type gf_t. @@ -85,6 +85,13 @@ #define FD_REEDSOL_GENERATE_FFT( n, b, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_FFT_IMPL_##n, FD_CONCAT4(FD_REEDSOL_FFT_CONSTANTS_, n, _, b), __VA_ARGS__ ) #define FD_REEDSOL_GENERATE_IFFT( n, b, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_IFFT_IMPL_##n, FD_CONCAT4(FD_REEDSOL_IFFT_CONSTANTS_, n, _, b), __VA_ARGS__ ) +/* For n>=64, this header also declares + void fd_reedsol_{fft,ifft}_n_b( gf_t *, ... ) + that takes n gf_t elements by reference. The arguments are used for + input and output, and it performs the same operation as the similarly + named macro, but this signature allows the function to be defined in + a different compilation unit to speed up compile times. */ + #define FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( inout0, inout1, c) \ do { \ inout0 = GF_ADD( inout0, GF_MUL( inout1, c ) ); \ @@ -1175,6 +1182,7 @@ FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in127, in255, c_254 ); \ } while( 0 ) +void fd_reedsol_ifft_256_0 ( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_FFT_CONSTANTS_256_0 0, 0, 133, 0, 218, 130, 88, 0, 71, 189, 250, 18, 85, 175, 232, 0, 11, 174, 165, 33, 42, 143, 132, 45, 38, 131, 136, 12, 7, 162, 169, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254 #define FD_REEDSOL_FFT_IMPL_256( c_00, c_01, c_02, c_03, c_04, c_05 , \ c_06, c_07, c_08, c_09, c_10, c_11, c_12, c_13, c_14, c_15, c_16 , \ @@ -2253,6 +2261,7 @@ FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in254, in255, c_254 ); \ } while( 0 ) +void fd_reedsol_fft_256_0 ( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_IFFT_CONSTANTS_128_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 11, 174, 165, 33, 42, 143, 132, 0, 71, 189, 250, 0, 218, 0 #define FD_REEDSOL_IFFT_CONSTANTS_128_128 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 45, 38, 131, 136, 12, 7, 162, 169, 18, 85, 175, 232, 130, 88, 133 #define FD_REEDSOL_IFFT_IMPL_128( c_00, c_01, c_02, c_03, c_04, c_05 , \ @@ -2731,6 +2740,8 @@ FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in63, in127, c_126 ); \ } while( 0 ) +void fd_reedsol_ifft_128_0 ( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); +void fd_reedsol_ifft_128_128( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_FFT_CONSTANTS_128_0 0, 0, 218, 0, 71, 189, 250, 0, 11, 174, 165, 33, 42, 143, 132, 0, 22, 97, 119, 38, 48, 71, 81, 183, 161, 214, 192, 145, 135, 240, 230, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 #define FD_REEDSOL_FFT_CONSTANTS_128_128 133, 130, 88, 18, 85, 175, 232, 45, 38, 131, 136, 12, 7, 162, 169, 12, 26, 109, 123, 42, 60, 75, 93, 187, 173, 218, 204, 157, 139, 252, 234, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 5, 3, 25, 31, 125, 123, 97, 103, 232, 238, 244, 242, 144, 150, 140, 138, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 192, 194, 196, 198, 200, 202, 204, 206, 208, 210, 212, 214, 216, 218, 220, 222, 224, 226, 228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250, 252, 254 #define FD_REEDSOL_FFT_IMPL_128( c_00, c_01, c_02, c_03, c_04, c_05 , \ @@ -3209,6 +3220,8 @@ FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in126, in127, c_126 ); \ } while( 0 ) +void fd_reedsol_fft_128_0 ( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); +void fd_reedsol_fft_128_128( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_IFFT_CONSTANTS_64_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 0, 22, 97, 119, 38, 48, 71, 81, 0, 11, 174, 165, 0, 71, 0 #define FD_REEDSOL_IFFT_CONSTANTS_64_64 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 183, 161, 214, 192, 145, 135, 240, 230, 33, 42, 143, 132, 189, 250, 218 #define FD_REEDSOL_IFFT_CONSTANTS_64_128 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 12, 26, 109, 123, 42, 60, 75, 93, 45, 38, 131, 136, 18, 85, 130 @@ -3419,6 +3432,9 @@ FD_REEDSOL_PRIVATE_IFFT_BUTTERFLY( in31, in63, c_62 ); \ } while( 0 ) +void fd_reedsol_ifft_64_0 ( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); +void fd_reedsol_ifft_64_64( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); +void fd_reedsol_ifft_64_128( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_FFT_CONSTANTS_64_0 0, 0, 71, 0, 11, 174, 165, 0, 22, 97, 119, 38, 48, 71, 81, 0, 6, 28, 26, 120, 126, 100, 98, 237, 235, 241, 247, 149, 147, 137, 143, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62 #define FD_REEDSOL_FFT_CONSTANTS_64_64 218, 189, 250, 33, 42, 143, 132, 183, 161, 214, 192, 145, 135, 240, 230, 179, 181, 175, 169, 203, 205, 215, 209, 94, 88, 66, 68, 38, 32, 58, 60, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126 #define FD_REEDSOL_FFT_CONSTANTS_64_128 130, 18, 85, 45, 38, 131, 136, 12, 26, 109, 123, 42, 60, 75, 93, 182, 176, 170, 172, 206, 200, 210, 212, 91, 93, 71, 65, 35, 37, 63, 57, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 164, 166, 168, 170, 172, 174, 176, 178, 180, 182, 184, 186, 188, 190 @@ -3629,6 +3645,9 @@ FD_REEDSOL_PRIVATE_FFT_BUTTERFLY( in62, in63, c_62 ); \ } while( 0 ) +void fd_reedsol_fft_64_0 ( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); +void fd_reedsol_fft_64_64( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); +void fd_reedsol_fft_64_128( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_IFFT_CONSTANTS_32_0 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 0, 6, 28, 26, 120, 126, 100, 98, 0, 22, 97, 119, 0, 11, 0 #define FD_REEDSOL_IFFT_CONSTANTS_32_32 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 237, 235, 241, 247, 149, 147, 137, 143, 38, 48, 71, 81, 174, 165, 71 #define FD_REEDSOL_IFFT_CONSTANTS_32_64 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 179, 181, 175, 169, 203, 205, 215, 209, 183, 161, 214, 192, 33, 42, 189 diff --git a/src/ballet/reedsol/fd_reedsol_ppt.h b/src/ballet/reedsol/fd_reedsol_ppt.h index c425987f54d..79c4679b048 100644 --- a/src/ballet/reedsol/fd_reedsol_ppt.h +++ b/src/ballet/reedsol/fd_reedsol_ppt.h @@ -77,18 +77,20 @@ arbitrary subset of them. This file only implements the specific case. */ - /* FD_REEDSOL_GENERATE_PPT: Inserts code to compute the principal - pivot transform of size n (must be a power of 2, currently only 16 - and 32 are emitted by the code generator) and when you have k known - elements of the evaluation domain (i.e. k data shreds). k must be in - [1,n-1]. The remaining n arguments should be vector variables of +/* FD_REEDSOL_GENERATE_PPT: Inserts code to compute the principal pivot + transform of size n (must be a power of 2, currently 16, 32, 64, and + 128 are emitted by the code generator) and when you have k known + elements of the evaluation domain (i.e. k data shreds). k must be + less than n, but the code generator adds the additional restrictions + that k<=67 and only the smallest n is chosen for each k. + Additionally, The remaining n arguments should be vector variables of type gf_t (which is a typedef for wb_t in the AVX case). These are used as input and output, since there's no other good way to return n vector values. As such, this macro is not robust. As explained above, the PPT computes the k non-zero elements of the coefficient domain, followed by the first n-k parity elements. If - the last n-k return values are repalced with zero, they can then be + the last n-k return values are replaced with zero, they can then be used with FD_REEDSOL_GENERATE_FFT and the appropriate shift to compute many more parity elements. The PPT is computed in a vectorized fashion, i.e. the PPT of the ith byte is computed and @@ -96,6 +98,13 @@ #define FD_REEDSOL_GENERATE_PPT(n, k, ...) FD_REEDSOL_PPT_IMPL_##n##_##k( __VA_ARGS__ ) +/* For n>=32, this header also declares + void fd_reedsol_ppt_n_k( gf_t *, ... ) + that takes n gf_t elements by reference. The arguments are used for + input and output, and it performs the same operation as the similarly + named macro, but this signature allows the function to be defined in + a different compilation unit to speed up compile times. */ + #define GF_MUL22( inout0, inout1, c00, c01, c10, c11) \ do { \ gf_t temp = GF_ADD( GF_MUL( inout0, c00 ), GF_MUL( inout1, c01 ) ); \ @@ -895,6 +904,7 @@ in07 = GF_ADD( GF_MUL( scratch_15, 0 ), in07 ); \ } while( 0 ) +void fd_reedsol_ppt_32_17( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_17( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1009,6 +1019,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_18( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_18( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1131,6 +1142,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_19( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_19( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1253,6 +1265,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_20( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_20( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1372,6 +1385,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_21( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_21( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1494,6 +1508,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_22( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_22( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1616,6 +1631,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_23( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_23( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1730,6 +1746,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_24( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_24( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1827,6 +1844,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_25( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_25( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28 , \ in29, in30, in31) \ do { \ @@ -1932,6 +1950,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_26( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_26( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, \ in29, in30, in31) \ do { \ @@ -2037,6 +2056,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_27( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_27( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, \ in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2134,6 +2154,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_28( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_28( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, \ in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2220,6 +2241,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_29( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_29( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17 , \ in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2301,6 +2323,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_30( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_30( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17 , \ in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2374,6 +2397,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_32_31( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_32_31( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17 , \ in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31) \ do { \ @@ -2431,6 +2455,7 @@ in15 = GF_ADD( GF_MUL( scratch_31, 0 ), in15 ); \ } while( 0 ) +void fd_reedsol_ppt_64_33( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_33( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -2647,6 +2672,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_34( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_34( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -2875,6 +2901,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_35( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_35( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3107,6 +3134,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_36( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_36( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3340,6 +3368,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_37( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_37( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3580,6 +3609,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_38( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_38( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -3824,6 +3854,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_39( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_39( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4064,6 +4095,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_40( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_40( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4292,6 +4324,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_41( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_41( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4532,6 +4565,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_42( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_42( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -4776,6 +4810,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_43( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_43( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5016,6 +5051,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_44( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_44( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5249,6 +5285,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_45( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_45( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5481,6 +5518,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_46( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_46( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5709,6 +5747,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_47( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_47( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -5925,6 +5964,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_48( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_48( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6111,6 +6151,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_49( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_49( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6319,6 +6360,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_50( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_50( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6531,6 +6573,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_51( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_51( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6739,6 +6782,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_52( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_52( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -6939,6 +6983,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_53( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_53( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7138,6 +7183,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_54( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_54( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7333,6 +7379,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_55( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_55( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7516,6 +7563,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_56( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_56( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7679,6 +7727,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_57( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_57( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -7846,6 +7895,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_58( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_58( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8009,6 +8059,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_59( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_59( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8160,6 +8211,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_60( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_60( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8295,6 +8347,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_61( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_61( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8421,6 +8474,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_62( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_62( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8535,6 +8589,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_64_63( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_64_63( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33 , \ in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63) \ do { \ @@ -8629,6 +8684,7 @@ in31 = GF_ADD( GF_MUL( scratch_63, 0 ), in31 ); \ } while( 0 ) +void fd_reedsol_ppt_128_65( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_128_65( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -9044,6 +9100,7 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) +void fd_reedsol_ppt_128_66( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_128_66( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -9475,6 +9532,7 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) +void fd_reedsol_ppt_128_67( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_128_67( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ @@ -9914,6 +9972,7 @@ in63 = GF_ADD( GF_MUL( scratch_127, 0 ), in63 ); \ } while( 0 ) +void fd_reedsol_ppt_128_68( gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t*, gf_t* ); #define FD_REEDSOL_PPT_IMPL_128_68( in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65 , \ in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127) \ do { \ diff --git a/src/ballet/reedsol/fd_reedsol_recover_128.c b/src/ballet/reedsol/fd_reedsol_recover_128.c index 3945b00710c..589ee18a093 100644 --- a/src/ballet/reedsol/fd_reedsol_recover_128.c +++ b/src/ballet/reedsol/fd_reedsol_recover_128.c @@ -288,12 +288,13 @@ fd_reedsol_private_recover_var_128( ulong shred_sz, in126 = GF_MUL_VAR( in126, pi[ 126 ] ); in127 = GF_MUL_VAR( in127, pi[ 127 ] ); #define ALL_VARS in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65, in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127 + #define ALL_VARS_REF &in00, &in01, &in02, &in03, &in04, &in05, &in06, &in07, &in08, &in09, &in10, &in11, &in12, &in13, &in14, &in15, &in16, &in17, &in18, &in19, &in20, &in21, &in22, &in23, &in24, &in25, &in26, &in27, &in28, &in29, &in30, &in31, &in32, &in33, &in34, &in35, &in36, &in37, &in38, &in39, &in40, &in41, &in42, &in43, &in44, &in45, &in46, &in47, &in48, &in49, &in50, &in51, &in52, &in53, &in54, &in55, &in56, &in57, &in58, &in59, &in60, &in61, &in62, &in63, &in64, &in65, &in66, &in67, &in68, &in69, &in70, &in71, &in72, &in73, &in74, &in75, &in76, &in77, &in78, &in79, &in80, &in81, &in82, &in83, &in84, &in85, &in86, &in87, &in88, &in89, &in90, &in91, &in92, &in93, &in94, &in95, &in96, &in97, &in98, &in99, &in100, &in101, &in102, &in103, &in104, &in105, &in106, &in107, &in108, &in109, &in110, &in111, &in112, &in113, &in114, &in115, &in116, &in117, &in118, &in119, &in120, &in121, &in122, &in123, &in124, &in125, &in126, &in127 - FD_REEDSOL_GENERATE_IFFT( 128, 0, ALL_VARS ); + fd_reedsol_ifft_128_0( ALL_VARS_REF ); FD_REEDSOL_GENERATE_FDERIV( 128, ALL_VARS ); - FD_REEDSOL_GENERATE_FFT( 128, 0, ALL_VARS ); + fd_reedsol_fft_128_0( ALL_VARS_REF ); /* Again, we only need to multiply the erased ones, since we don't use the value of the non-erased ones anymore, but I'll take diff --git a/src/ballet/reedsol/fd_reedsol_recover_256.c b/src/ballet/reedsol/fd_reedsol_recover_256.c index be5de057dd7..b8e6161aad6 100644 --- a/src/ballet/reedsol/fd_reedsol_recover_256.c +++ b/src/ballet/reedsol/fd_reedsol_recover_256.c @@ -422,12 +422,13 @@ fd_reedsol_private_recover_var_256( ulong shred_sz, in132 = GF_MUL_VAR( in132, pi[ 132 ] ); in133 = GF_MUL_VAR( in133, pi[ 133 ] ); #define ALL_VARS in00, in01, in02, in03, in04, in05, in06, in07, in08, in09, in10, in11, in12, in13, in14, in15, in16, in17, in18, in19, in20, in21, in22, in23, in24, in25, in26, in27, in28, in29, in30, in31, in32, in33, in34, in35, in36, in37, in38, in39, in40, in41, in42, in43, in44, in45, in46, in47, in48, in49, in50, in51, in52, in53, in54, in55, in56, in57, in58, in59, in60, in61, in62, in63, in64, in65, in66, in67, in68, in69, in70, in71, in72, in73, in74, in75, in76, in77, in78, in79, in80, in81, in82, in83, in84, in85, in86, in87, in88, in89, in90, in91, in92, in93, in94, in95, in96, in97, in98, in99, in100, in101, in102, in103, in104, in105, in106, in107, in108, in109, in110, in111, in112, in113, in114, in115, in116, in117, in118, in119, in120, in121, in122, in123, in124, in125, in126, in127, in128, in129, in130, in131, in132, in133, in134, in135, in136, in137, in138, in139, in140, in141, in142, in143, in144, in145, in146, in147, in148, in149, in150, in151, in152, in153, in154, in155, in156, in157, in158, in159, in160, in161, in162, in163, in164, in165, in166, in167, in168, in169, in170, in171, in172, in173, in174, in175, in176, in177, in178, in179, in180, in181, in182, in183, in184, in185, in186, in187, in188, in189, in190, in191, in192, in193, in194, in195, in196, in197, in198, in199, in200, in201, in202, in203, in204, in205, in206, in207, in208, in209, in210, in211, in212, in213, in214, in215, in216, in217, in218, in219, in220, in221, in222, in223, in224, in225, in226, in227, in228, in229, in230, in231, in232, in233, in234, in235, in236, in237, in238, in239, in240, in241, in242, in243, in244, in245, in246, in247, in248, in249, in250, in251, in252, in253, in254, in255 + #define ALL_VARS_REF &in00, &in01, &in02, &in03, &in04, &in05, &in06, &in07, &in08, &in09, &in10, &in11, &in12, &in13, &in14, &in15, &in16, &in17, &in18, &in19, &in20, &in21, &in22, &in23, &in24, &in25, &in26, &in27, &in28, &in29, &in30, &in31, &in32, &in33, &in34, &in35, &in36, &in37, &in38, &in39, &in40, &in41, &in42, &in43, &in44, &in45, &in46, &in47, &in48, &in49, &in50, &in51, &in52, &in53, &in54, &in55, &in56, &in57, &in58, &in59, &in60, &in61, &in62, &in63, &in64, &in65, &in66, &in67, &in68, &in69, &in70, &in71, &in72, &in73, &in74, &in75, &in76, &in77, &in78, &in79, &in80, &in81, &in82, &in83, &in84, &in85, &in86, &in87, &in88, &in89, &in90, &in91, &in92, &in93, &in94, &in95, &in96, &in97, &in98, &in99, &in100, &in101, &in102, &in103, &in104, &in105, &in106, &in107, &in108, &in109, &in110, &in111, &in112, &in113, &in114, &in115, &in116, &in117, &in118, &in119, &in120, &in121, &in122, &in123, &in124, &in125, &in126, &in127, &in128, &in129, &in130, &in131, &in132, &in133, &in134, &in135, &in136, &in137, &in138, &in139, &in140, &in141, &in142, &in143, &in144, &in145, &in146, &in147, &in148, &in149, &in150, &in151, &in152, &in153, &in154, &in155, &in156, &in157, &in158, &in159, &in160, &in161, &in162, &in163, &in164, &in165, &in166, &in167, &in168, &in169, &in170, &in171, &in172, &in173, &in174, &in175, &in176, &in177, &in178, &in179, &in180, &in181, &in182, &in183, &in184, &in185, &in186, &in187, &in188, &in189, &in190, &in191, &in192, &in193, &in194, &in195, &in196, &in197, &in198, &in199, &in200, &in201, &in202, &in203, &in204, &in205, &in206, &in207, &in208, &in209, &in210, &in211, &in212, &in213, &in214, &in215, &in216, &in217, &in218, &in219, &in220, &in221, &in222, &in223, &in224, &in225, &in226, &in227, &in228, &in229, &in230, &in231, &in232, &in233, &in234, &in235, &in236, &in237, &in238, &in239, &in240, &in241, &in242, &in243, &in244, &in245, &in246, &in247, &in248, &in249, &in250, &in251, &in252, &in253, &in254, &in255 - FD_REEDSOL_GENERATE_IFFT( 256, 0, ALL_VARS ); + fd_reedsol_ifft_256_0( ALL_VARS_REF ); FD_REEDSOL_GENERATE_FDERIV( 256, ALL_VARS ); - FD_REEDSOL_GENERATE_FFT( 256, 0, ALL_VARS ); + fd_reedsol_fft_256_0( ALL_VARS_REF ); /* Again, we only need to multiply the erased ones, since we don't use the value of the non-erased ones anymore, but I'll take diff --git a/src/ballet/reedsol/generate_encode.py b/src/ballet/reedsol/generate_encode.py index 896171e3cab..3f0da717aa9 100644 --- a/src/ballet/reedsol/generate_encode.py +++ b/src/ballet/reedsol/generate_encode.py @@ -13,7 +13,9 @@ def make_encode(min_data_shreds, max_data_shreds, max_parity_shreds): with open(f'fd_reedsol_encode_{n}.c', 'wt') as outf: cprint('#include "fd_reedsol_ppt.h"') - fn_name = f'void fd_reedsol_private_encode_{n}(' + cprint('') + cprint('void') + fn_name = f'fd_reedsol_private_encode_{n}(' cprint(fn_name + " ulong shred_sz,") cprint(" "*len(fn_name) + " uchar const * const * data_shred,") cprint(" "*len(fn_name) + " ulong data_shred_cnt,") @@ -36,10 +38,16 @@ def make_encode(min_data_shreds, max_data_shreds, max_parity_shreds): cprint("}") all_vars = [ f'in{k:02}' for k in range(n) ] cprint(f"#define ALL_VARS " + ", ".join(all_vars)) + if n>=64: + cprint(f"#define ALL_VARS_REF &" + ", &".join(all_vars)) cprint("switch( data_shred_cnt ) {") - cprint(f"case {n:2}UL: FD_REEDSOL_GENERATE_IFFT( {n:2}, {0:2}, ALL_VARS ); break;") + if n <= max_data_shreds: + cprint(f"case {n:2}UL: FD_REEDSOL_GENERATE_IFFT( {n:2}, {0:2}, ALL_VARS ); break;") for k in range(max_data_shreds-1, min_data_shreds-1, -1): - cprint(f"case {k:2}UL: FD_REEDSOL_GENERATE_PPT( {n:2}, {k:2}, ALL_VARS ); break;") + if n<64: + cprint(f"case {k:2}UL: FD_REEDSOL_GENERATE_PPT( {n:2}, {k:2}, ALL_VARS ); break;") + else: + cprint(f"case {k:2}UL: fd_reedsol_ppt_{n}_{k}( ALL_VARS_REF ); break;") cprint("}") cprint(f"/* That generated the first {n}-data_shred_cnt parity shreds in the") cprint(f" last {n}-data_shred_cnt variables. We might only need") diff --git a/src/ballet/reedsol/generate_fft.py b/src/ballet/reedsol/generate_fft.py index 9e3b2fccbb2..5375abc03ac 100644 --- a/src/ballet/reedsol/generate_fft.py +++ b/src/ballet/reedsol/generate_fft.py @@ -2,8 +2,7 @@ import numpy as np import numpy.linalg -header = """ -/* Note: This file is auto generated. */ +header = """/* Note: This file is auto generated. */ #ifndef HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h #define HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h @@ -21,9 +20,9 @@ implementation details. Like the normal FFT and IFFT, the operator implemented in this file - (and henceforward referred to as FFT and IFFT) tranforms between one - basis and another. Rather than tranformations of a signal between - the frequency domain and the time domain, these operators tranform a + (and henceforward referred to as FFT and IFFT) transforms between one + basis and another. Rather than transformations of a signal between + the frequency domain and the time domain, these operators transform a polynomial between domains we call the "evaluation basis" and the "coefficient basis". @@ -67,14 +66,14 @@ arithmetic). FD_REEDSOL_GENERATE_IFFT: Inserts code to transform n input values - from the evaluation basis to the coefficient basis, descrbing a + from the evaluation basis to the coefficient basis, describing a polynomial P(x) of degree no more than n such that P(b) = in0, P(b+1)=in1, ... P(b+n-1)=in_{n-1} (where this arithmetic on b is integer arithmetic, not GF(2^8) arithmetic). - For both macros, n must be a power of 2 (only 4, 8, 16, 32 are - emitted by the code generator at the moment), and b must be a - non-negative multiple of n no more than 32. Both b and n must be + For both macros, n must be a power of 2 (4, 8, 16, 32, 64, 128, and + 256 are emitted by the code generator at the moment), and b must be a + non-negative multiple of n no more than 134. Both b and n must be literal integer values. The remaining n arguments should be vector variables of type gf_t. @@ -90,6 +89,12 @@ #define FD_REEDSOL_GENERATE_FFT( n, b, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_FFT_IMPL_##n, FD_CONCAT4(FD_REEDSOL_FFT_CONSTANTS_, n, _, b), __VA_ARGS__ ) #define FD_REEDSOL_GENERATE_IFFT( n, b, ...) FD_REEDSOL_PRIVATE_EXPAND( FD_REEDSOL_IFFT_IMPL_##n, FD_CONCAT4(FD_REEDSOL_IFFT_CONSTANTS_, n, _, b), __VA_ARGS__ ) +/* For n>=64, this header also declares + void fd_reedsol_{fft,ifft}_n_b( gf_t *, ... ) + that takes n gf_t elements by reference. The arguments are used for + input and output, and it performs the same operation as the similarly + named macro, but this signature allows the function to be defined in + a different compilation unit to speed up compile times. */ """ outf = open('fd_reedsol_fft.h', "wt") @@ -134,7 +139,7 @@ def print_macro(macro_name, args, lines, indent=2): for line in lines: print(" "*(2*indent) + line + " "*(maxwidth-len(line)-1-2*indent) + "\\", file=outf) print(" "*indent + "} while( 0 )", file=outf) - print("\n\n", file=outf) + print("", file=outf) def op_fft( h, beta, i_round, r_offset ): # print(f"Calling a_fft( {h}, {beta}, {i_round}, {r_offset} )") @@ -206,6 +211,10 @@ def op_ifft( h, beta, i_round, r_offset ): current_vars[i1] = fo1 print_macro(f"FD_REEDSOL_IFFT_IMPL_{N}", [f"c_{j:02}" for j in range(len(const_to_cidx))] + inputs, macro_lines) + if N>=64: + for shift in range(0, 67*2, N): + print(f"void fd_reedsol_ifft_{N}_{shift:<2}( " + ', '.join(['gf_t*']*N) + " );", file=outf) + macro_lines = [ ] butterflies = op_fft(N, shift, 0, 0) @@ -231,4 +240,56 @@ def op_ifft( h, beta, i_round, r_offset ): current_vars[i1] = fo1 print_macro(f"FD_REEDSOL_FFT_IMPL_{N}", [f"c_{j:02}" for j in range(len(const_to_cidx))] + inputs, macro_lines) + if N>=64: + for shift in range(0, 67*2, N): + print(f"void fd_reedsol_fft_{N}_{shift:<2}( " + ', '.join(['gf_t*']*N) + " );", file=outf) + print("#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_fft_h */", file=outf) + +for N in (256, 128, 64): + for shift in range(0, 67*2, N): + with open(f'wrapped_impl/fd_reedsol_fft_impl_{N}_{shift}.c', "wt") as outf: + print('#include "../fd_reedsol_fft.h"', file=outf) + print('\nvoid', file=outf) + fn_name = f"fd_reedsol_fft_{N}_{shift}( " + print(fn_name + "gf_t * _in00,", file=outf) + for l in range(1, N): + if l=32, this header also declares + void fd_reedsol_ppt_n_k( gf_t *, ... ) + that takes n gf_t elements by reference. The arguments are used for + input and output, and it performs the same operation as the similarly + named macro, but this signature allows the function to be defined in + a different compilation unit to speed up compile times. */ """ outf = open('fd_reedsol_ppt.h', "wt") @@ -277,7 +284,7 @@ def print_macro(macro_name, args, lines, indent=2): for line in lines: print(" "*(2*indent) + line + " "*(maxwidth-len(line)-1-2*indent) + "\\", file=outf) print(" "*indent + "} while( 0 )", file=outf) - print("\n\n", file=outf) + print("", file=outf) print_macro("GF_MUL22", ["inout0", "inout1", "c00", "c01", "c10", "c11"], [ "gf_t temp = GF_ADD( GF_MUL( inout0, c00 ), GF_MUL( inout1, c01 ) );", @@ -326,6 +333,8 @@ def print_macro(macro_name, args, lines, indent=2): scratch_to_declare = scratch_to_declare[16:] macro_lines = scratch_lines + macro_lines + if N>=32: + print(f"void fd_reedsol_ppt_{N}_{k}( { ', '.join(['gf_t*']*N) } );", file=outf) print_macro(f"FD_REEDSOL_PPT_IMPL_{N}_{k}", inputs, macro_lines) if False: #debug @@ -355,3 +364,37 @@ def print_macro(macro_name, args, lines, indent=2): first_bytes[dest] += scratch_first_bytes[src_scratch] * const print("#endif /* HEADER_fd_src_ballet_reedsol_fd_reedsol_ppt_h */", file=outf) + + +# file 2..n +batches = (17, 25, 33, 40, 45, 50, 55, 60, 65, 68) +for j in range(len(batches)-1): + start = batches[j] + end = batches[j+1] # exclusive + with open(f'wrapped_impl/fd_reedsol_ppt_impl_{start}.c', "wt") as outf: + print('#include "../fd_reedsol_ppt.h"', file=outf) + for k in range(start, end): + N = 1<<(k-1).bit_length() # Round to next power of 2 + if k==N: + continue # Skip powers of 2 because we don't use PPT in those cases + print('\nvoid', file=outf) + fn_name = f"fd_reedsol_ppt_{N}_{k}( " + print(fn_name + "gf_t * _in00,", file=outf) + for l in range(1, N): + if l64: + cprint(f"#define ALL_VARS_REF &" + ", &".join(all_vars)) cprint('') - cprint(f'FD_REEDSOL_GENERATE_IFFT( {n}, 0, ALL_VARS );') + if n>64: + cprint(f'fd_reedsol_ifft_{n}_0( ALL_VARS_REF );') + else: + cprint(f'FD_REEDSOL_GENERATE_IFFT( {n}, 0, ALL_VARS );') cprint('') cprint(f'FD_REEDSOL_GENERATE_FDERIV( {n}, ALL_VARS );') cprint('') - cprint(f'FD_REEDSOL_GENERATE_FFT( {n}, 0, ALL_VARS );') + if n>64: + cprint(f'fd_reedsol_fft_{n}_0( ALL_VARS_REF );') + else: + cprint(f'FD_REEDSOL_GENERATE_FFT( {n}, 0, ALL_VARS );') cprint('') cprint("/* Again, we only need to multiply the erased ones, since we don't") diff --git a/src/ballet/reedsol/test_reedsol.c b/src/ballet/reedsol/test_reedsol.c index 3c7385237d7..e4eb6718ec7 100644 --- a/src/ballet/reedsol/test_reedsol.c +++ b/src/ballet/reedsol/test_reedsol.c @@ -262,7 +262,8 @@ test_linearity( linear_func_t to_test, #define LOAD_VAR(offset, binary) gf_t v##binary = gf_ldu( inputs[offset] ); #define STORE_VAR(offset, binary) gf_stu( outputs[offset], v##binary ); -#define VAR(offset, binary) v##binary +#define VAR( offset, binary) v##binary +#define REFVAR(offset, binary) &v##binary #define COMMA() , #define NO_SEP() @@ -292,6 +293,32 @@ wrapped_ifft_##N##_shift( linear_chunk_t * inputs, linear_chunk_t * outputs ) { REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ } +#define WRAP_FFT2(N) \ +static void \ +wrapped_fft_##N( linear_chunk_t * inputs, linear_chunk_t * outputs ) { \ + REPEAT_##N(LOAD_VAR, NO_SEP , 0, ) \ + fd_reedsol_fft_##N##_0( REPEAT_##N(REFVAR, COMMA, 0, ) ); \ + REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ +} \ +static void \ +wrapped_fft_##N##_shift( linear_chunk_t * inputs, linear_chunk_t * outputs ) { \ + REPEAT_##N(LOAD_VAR, NO_SEP , 0, ) \ + fd_reedsol_fft_##N##_##N( REPEAT_##N(REFVAR, COMMA, 0, ) ); \ + REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ +} \ +static void \ +wrapped_ifft_##N( linear_chunk_t * inputs, linear_chunk_t * outputs ) { \ + REPEAT_##N(LOAD_VAR, NO_SEP , 0, ) \ + fd_reedsol_ifft_##N##_0( REPEAT_##N(REFVAR, COMMA, 0, ) ); \ + REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ +} \ +static void \ +wrapped_ifft_##N##_shift( linear_chunk_t * inputs, linear_chunk_t * outputs ) { \ + REPEAT_##N(LOAD_VAR, NO_SEP , 0, ) \ + fd_reedsol_ifft_##N##_##N( REPEAT_##N(REFVAR, COMMA, 0, ) ); \ + REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ +} + #define INVOKE(M, ...) M( __VA_ARGS__) #define WRAP_PPT(N, K) \ @@ -301,6 +328,13 @@ wrapped_ppt_##N##_## K ( linear_chunk_t * inputs, linear_chunk_t * outputs ) { \ FD_REEDSOL_GENERATE_PPT( N, K, REPEAT_##N(VAR, COMMA, 0, ) ); \ REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ } +#define WRAP_PPT2(N, K) \ +static void \ +wrapped_ppt_##N##_## K ( linear_chunk_t * inputs, linear_chunk_t * outputs ) { \ + INVOKE(REPEAT_##N, LOAD_VAR, NO_SEP , 0, ) \ + fd_reedsol_ppt_##N##_##K( REPEAT_##N(REFVAR, COMMA, 0, ) ); \ + REPEAT_##N(STORE_VAR, NO_SEP , 0, ) \ +} static ulong wrapped_data_shred_cnt, wrapped_parity_shred_cnt; static void @@ -313,28 +347,28 @@ wrapped_encode_generic( linear_chunk_t * inputs, linear_chunk_t * outputs ) { fd_reedsol_encode_fini( rs ); } -WRAP_FFT(4) WRAP_FFT(8) WRAP_FFT(16) WRAP_FFT(32) WRAP_FFT(64) WRAP_FFT(128) +WRAP_FFT(4) WRAP_FFT(8) WRAP_FFT(16) WRAP_FFT(32) WRAP_FFT2(64) WRAP_FFT2(128) WRAP_PPT(16, 1) WRAP_PPT(16, 2) WRAP_PPT(16, 3) WRAP_PPT(16, 4) WRAP_PPT(16, 5) WRAP_PPT(16, 6) WRAP_PPT(16, 7) WRAP_PPT(16, 8) WRAP_PPT(16, 9) WRAP_PPT(16, 10) WRAP_PPT(16, 11) WRAP_PPT(16, 12) WRAP_PPT(16, 13) WRAP_PPT(16, 14) WRAP_PPT(16, 15) -WRAP_PPT(32, 17) WRAP_PPT(32, 18) WRAP_PPT(32, 19) WRAP_PPT(32, 20) -WRAP_PPT(32, 21) WRAP_PPT(32, 22) WRAP_PPT(32, 23) WRAP_PPT(32, 24) -WRAP_PPT(32, 25) WRAP_PPT(32, 26) WRAP_PPT(32, 27) WRAP_PPT(32, 28) -WRAP_PPT(32, 29) WRAP_PPT(32, 30) WRAP_PPT(32, 31) +WRAP_PPT2(32, 17) WRAP_PPT2(32, 18) WRAP_PPT2(32, 19) WRAP_PPT2(32, 20) +WRAP_PPT2(32, 21) WRAP_PPT2(32, 22) WRAP_PPT2(32, 23) WRAP_PPT2(32, 24) +WRAP_PPT2(32, 25) WRAP_PPT2(32, 26) WRAP_PPT2(32, 27) WRAP_PPT2(32, 28) +WRAP_PPT2(32, 29) WRAP_PPT2(32, 30) WRAP_PPT2(32, 31) -WRAP_PPT(64, 33) WRAP_PPT(64, 34) WRAP_PPT(64, 35) WRAP_PPT(64, 36) -WRAP_PPT(64, 37) WRAP_PPT(64, 38) WRAP_PPT(64, 39) WRAP_PPT(64, 40) -WRAP_PPT(64, 41) WRAP_PPT(64, 42) WRAP_PPT(64, 43) WRAP_PPT(64, 44) -WRAP_PPT(64, 45) WRAP_PPT(64, 46) WRAP_PPT(64, 47) WRAP_PPT(64, 48) -WRAP_PPT(64, 49) WRAP_PPT(64, 50) WRAP_PPT(64, 51) WRAP_PPT(64, 52) -WRAP_PPT(64, 53) WRAP_PPT(64, 54) WRAP_PPT(64, 55) WRAP_PPT(64, 56) -WRAP_PPT(64, 57) WRAP_PPT(64, 58) WRAP_PPT(64, 59) WRAP_PPT(64, 60) -WRAP_PPT(64, 61) WRAP_PPT(64, 62) WRAP_PPT(64, 63) +WRAP_PPT2(64, 33) WRAP_PPT2(64, 34) WRAP_PPT2(64, 35) WRAP_PPT2(64, 36) +WRAP_PPT2(64, 37) WRAP_PPT2(64, 38) WRAP_PPT2(64, 39) WRAP_PPT2(64, 40) +WRAP_PPT2(64, 41) WRAP_PPT2(64, 42) WRAP_PPT2(64, 43) WRAP_PPT2(64, 44) +WRAP_PPT2(64, 45) WRAP_PPT2(64, 46) WRAP_PPT2(64, 47) WRAP_PPT2(64, 48) +WRAP_PPT2(64, 49) WRAP_PPT2(64, 50) WRAP_PPT2(64, 51) WRAP_PPT2(64, 52) +WRAP_PPT2(64, 53) WRAP_PPT2(64, 54) WRAP_PPT2(64, 55) WRAP_PPT2(64, 56) +WRAP_PPT2(64, 57) WRAP_PPT2(64, 58) WRAP_PPT2(64, 59) WRAP_PPT2(64, 60) +WRAP_PPT2(64, 61) WRAP_PPT2(64, 62) WRAP_PPT2(64, 63) -WRAP_PPT(128, 65) WRAP_PPT(128, 66) WRAP_PPT(128, 67) +WRAP_PPT2(128, 65) WRAP_PPT2(128, 66) WRAP_PPT2(128, 67) static void test_linearity_all( fd_rng_t * rng ) { @@ -414,7 +448,7 @@ test_linearity_all( fd_rng_t * rng ) { /* Reference implementations for s, S, and X as defined in fd_reedsol_fft.h */ static uchar -s_ref( int j, uchar x ) { /* j in [0, 5) */ +s_ref( int j, uchar x ) { /* j in [0, 6) */ ulong mask = fd_ulong_mask_lsb( j ); ulong min_x = x & (~mask); ulong max_x = min_x + mask + 1UL; @@ -427,9 +461,9 @@ s_ref( int j, uchar x ) { /* j in [0, 5) */ static uchar S_ref( int j, uchar x ) { return gfmul( s_ref( j, x ), gfinv( s_ref( j, (uchar)(1<