@@ -68,7 +68,18 @@ private static unsafe OperationStatus DecodeFromUtf8(ReadOnlySpan<byte> utf8, Sp
6868
6969                if  ( maxSrcLength  >=  24 ) 
7070                { 
71-                     byte *  end  =  srcMax  -  45 ; 
71+                     byte *  end  =  srcMax  -  88 ; 
72+                     if  ( Vector512 . IsHardwareAccelerated  &&  Avx512Vbmi . IsSupported  &&  ( end  >=  src ) ) 
73+                     { 
74+                         Avx512Decode ( ref  src ,  ref  dest ,  end ,  maxSrcLength ,  destLength ,  srcBytes ,  destBytes ) ; 
75+ 
76+                         if  ( src  ==  srcEnd ) 
77+                         { 
78+                             goto  DoneExit ; 
79+                         } 
80+                     } 
81+ 
82+                     end  =  srcMax  -  45 ; 
7283                    if  ( Avx2 . IsSupported  &&  ( end  >=  src ) ) 
7384                    { 
7485                        Avx2Decode ( ref  src ,  ref  dest ,  end ,  maxSrcLength ,  destLength ,  srcBytes ,  destBytes ) ; 
@@ -616,6 +627,78 @@ private static OperationStatus DecodeWithWhiteSpaceFromUtf8InPlace(Span<byte> ut
616627            return  status ; 
617628        } 
618629
630+         [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ] 
631+         [ CompExactlyDependsOn ( typeof ( Avx512BW ) ) ] 
632+         [ CompExactlyDependsOn ( typeof ( Avx512Vbmi ) ) ] 
633+         private  static unsafe  void  Avx512Decode ( ref  byte *  srcBytes ,  ref  byte *  destBytes ,  byte *  srcEnd ,  int  sourceLength ,  int  destLength ,  byte *  srcStart ,  byte *  destStart ) 
634+         { 
635+             // Reference for VBMI implementation : https://github.com/WojciechMula/base64simd/tree/master/decode 
636+             // If we have AVX512 support, pick off 64 bytes at a time for as long as we can, 
637+             // but make sure that we quit before seeing any == markers at the end of the 
638+             // string. Also, because we write 16 zeroes at the end of the output, ensure 
639+             // that there are at least 22 valid bytes of input data remaining to close the 
640+             // gap. 64 + 2 + 22 = 88 bytes. 
641+             byte *  src  =  srcBytes ; 
642+             byte *  dest  =  destBytes ; 
643+ 
644+             // The JIT won't hoist these "constants", so help it 
645+             Vector512 < sbyte >  vbmiLookup0  =  Vector512 . Create ( 
646+                 0x80808080 ,  0x80808080 ,  0x80808080 ,  0x80808080 , 
647+                 0x80808080 ,  0x80808080 ,  0x80808080 ,  0x80808080 , 
648+                 0x80808080 ,  0x80808080 ,  0x3e808080 ,  0x3f808080 , 
649+                 0x37363534 ,  0x3b3a3938 ,  0x80803d3c ,  0x80808080 ) . AsSByte ( ) ; 
650+             Vector512 < sbyte >  vbmiLookup1  =  Vector512 . Create ( 
651+                 0x02010080 ,  0x06050403 ,  0x0a090807 ,  0x0e0d0c0b , 
652+                 0x1211100f ,  0x16151413 ,  0x80191817 ,  0x80808080 , 
653+                 0x1c1b1a80 ,  0x201f1e1d ,  0x24232221 ,  0x28272625 , 
654+                 0x2c2b2a29 ,  0x302f2e2d ,  0x80333231 ,  0x80808080 ) . AsSByte ( ) ; 
655+             Vector512 < byte >  vbmiPackedLanesControl  =  Vector512 . Create ( 
656+                 0x06000102 ,  0x090a0405 ,  0x0c0d0e08 ,  0x16101112 , 
657+                 0x191a1415 ,  0x1c1d1e18 ,  0x26202122 ,  0x292a2425 , 
658+                 0x2c2d2e28 ,  0x36303132 ,  0x393a3435 ,  0x3c3d3e38 , 
659+                 0x00000000 ,  0x00000000 ,  0x00000000 ,  0x00000000 ) . AsByte ( ) ; 
660+ 
661+             Vector512 < sbyte >  mergeConstant0  =  Vector512 . Create ( 0x01400140 ) . AsSByte ( ) ; 
662+             Vector512 < short >  mergeConstant1  =  Vector512 . Create ( 0x00011000 ) . AsInt16 ( ) ; 
663+ 
664+             // This algorithm requires AVX512VBMI support. 
665+             // Vbmi was first introduced in CannonLake and is avaialable from IceLake on. 
666+             do 
667+             { 
668+                 AssertRead < Vector512 < sbyte > > ( src ,  srcStart ,  sourceLength ) ; 
669+                 Vector512 < sbyte >  str  =  Vector512 . Load ( src ) . AsSByte ( ) ; 
670+ 
671+                 // Step 1: Translate encoded Base64 input to their original indices 
672+                 // This step also checks for invalid inputs and exits. 
673+                 // After this, we have indices which are verified to have upper 2 bits set to 0 in each byte. 
674+                 // origIndex      = [...|00dddddd|00cccccc|00bbbbbb|00aaaaaa] 
675+                 Vector512 < sbyte >  origIndex  =  Avx512Vbmi . PermuteVar64x8x2 ( vbmiLookup0 ,  str ,  vbmiLookup1 ) ; 
676+                 Vector512 < sbyte >  errorVec  =  ( origIndex . AsInt32 ( )  |  str . AsInt32 ( ) ) . AsSByte ( ) ; 
677+                 if  ( errorVec . ExtractMostSignificantBits ( )  !=  0 ) 
678+                 { 
679+                     break ; 
680+                 } 
681+ 
682+                 // Step 2: Now we need to reshuffle bits to remove the 0 bits. 
683+                 // multiAdd1: [...|0000cccc|ccdddddd|0000aaaa|aabbbbbb] 
684+                 Vector512 < short >  multiAdd1  =  Avx512BW . MultiplyAddAdjacent ( origIndex . AsByte ( ) ,  mergeConstant0 ) ; 
685+                 // multiAdd1: [...|00000000|aaaaaabb|bbbbcccc|ccdddddd] 
686+                 Vector512 < int >  multiAdd2  =  Avx512BW . MultiplyAddAdjacent ( multiAdd1 ,  mergeConstant1 ) ; 
687+ 
688+                 // Step 3: Pack 48 bytes 
689+                 str  =  Avx512Vbmi . PermuteVar64x8 ( multiAdd2 . AsByte ( ) ,  vbmiPackedLanesControl ) . AsSByte ( ) ; 
690+ 
691+                 AssertWrite < Vector512 < sbyte > > ( dest ,  destStart ,  destLength ) ; 
692+                 str . Store ( ( sbyte * ) dest ) ; 
693+                 src  +=  64 ; 
694+                 dest  +=  48 ; 
695+             } 
696+             while  ( src  <=  srcEnd ) ; 
697+ 
698+             srcBytes  =  src ; 
699+             destBytes  =  dest ; 
700+         } 
701+ 
619702        [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ] 
620703        [ CompExactlyDependsOn ( typeof ( Avx2 ) ) ] 
621704        private  static unsafe  void  Avx2Decode ( ref  byte *  srcBytes ,  ref  byte *  destBytes ,  byte *  srcEnd ,  int  sourceLength ,  int  destLength ,  byte *  srcStart ,  byte *  destStart ) 
0 commit comments