From 52ad5acd1fe5dc4ecc830828b68077d70e05e757 Mon Sep 17 00:00:00 2001 From: bbartels Date: Wed, 17 Jun 2020 01:23:36 +0100 Subject: [PATCH 01/18] Vectorized String.Split() --- .../src/System/String.Manipulation.cs | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 2fe2781e2b71b8..469dcd723cce6f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -6,6 +6,9 @@ using System.Diagnostics; using System.Globalization; using System.Numerics; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; using System.Text; using Internal.Runtime.CompilerServices; @@ -1522,6 +1525,13 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild // Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator. case 1: sep0 = separators[0]; + + if (Avx2.IsSupported && 16 <= Length) + { + MakeSeparatorListVectorized(ref sepListBuilder, sep0); + return; + } + for (int i = 0; i < Length; i++) { if (this[i] == sep0) @@ -1533,6 +1543,13 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild case 2: sep0 = separators[0]; sep1 = separators[1]; + + if (Avx2.IsSupported && 16 <= Length) + { + MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1); + return; + } + for (int i = 0; i < Length; i++) { char c = this[i]; @@ -1546,6 +1563,13 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild sep0 = separators[0]; sep1 = separators[1]; sep2 = separators[2]; + + if (Avx2.IsSupported && 16 <= Length) + { + MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2); + return; + } + for (int i = 0; i < Length; i++) { char c = this[i]; @@ -1579,6 +1603,61 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild } } + private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char? c2 = null, char? c3 = null) + { + Vector256 shuffleConstant = Vector256.Create(0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + + Vector256 v1 = Vector256.Create(c); + Vector256? v2 = c2 is char sep2 ? Vector256.Create(sep2) : (Vector256?)null; + Vector256? v3 = c3 is char sep3 ? Vector256.Create(sep3) : (Vector256?)null; + + ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan()); + int cond = Length - (Length % Vector256.Count); + int i = 0; + + for (; i < cond; i += Vector256.Count) + { + ref char ri = ref Unsafe.Add(ref c0, i); + Vector256 charVector = Unsafe.As>(ref ri); + Vector256 cmp = Avx2.CompareEqual(charVector, v1); + + if (v2 is Vector256 vecSep2) + { + cmp = Avx2.Or(Avx2.CompareEqual(charVector, vecSep2), cmp); + } + + if (v3 is Vector256 vecSep3) + { + cmp = Avx2.Or(Avx2.CompareEqual(charVector, vecSep3), cmp); + } + + if (Avx.TestZ(cmp, cmp)) { continue; } + + Vector256 mask = Avx2.ShiftLeftLogical(cmp.AsUInt64(), 4).AsByte(); + mask = Avx2.Shuffle(mask, shuffleConstant); + + Vector128 res = Sse2.Or(Avx2.ExtractVector128(mask, 0), Avx2.ExtractVector128(mask, 1)); + ulong extractedBits = Bmi2.X64.ParallelBitExtract(0xFEDCBA9876543210, Sse2.X64.ConvertToUInt64(res.AsUInt64())); + + while (true) + { + sepListBuilder.Append(((byte)(extractedBits & 0xF)) + i); + extractedBits >>= 4; + if (extractedBits == 0) { break; } + } + } + + for (; i < Length; i++) + { + char curr = this[i]; + if (curr == c || (c2 != null && curr == c2) || (c3 != null && curr == c3)) + { + sepListBuilder.Append(i); + } + } + } + /// /// Uses ValueListBuilder to create list that holds indexes of separators in string. /// From 140c2ce8182974e674f14d07a73b06c8b67d10b5 Mon Sep 17 00:00:00 2001 From: bbartels Date: Wed, 17 Jun 2020 02:01:03 +0100 Subject: [PATCH 02/18] Fixed variable name --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 38272332720678..8e958e99454a53 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1669,8 +1669,8 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde for (; i < cond; i += Vector256.Count) { - ref char ri = ref Unsafe.Add(ref c0, i); - Vector256 charVector = Unsafe.As>(ref ri); + ref char ci = ref Unsafe.Add(ref c0, i); + Vector256 charVector = Unsafe.As>(ref ci); Vector256 cmp = Avx2.CompareEqual(charVector, v1); if (v2 is Vector256 vecSep2) From 58918a16d55a5563e9ca04c7c790930a1bfed7f4 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Wed, 17 Jun 2020 11:17:26 +0100 Subject: [PATCH 03/18] Update src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 8e958e99454a53..f276b109841a35 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1688,7 +1688,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde Vector256 mask = Avx2.ShiftLeftLogical(cmp.AsUInt64(), 4).AsByte(); mask = Avx2.Shuffle(mask, shuffleConstant); - Vector128 res = Sse2.Or(Avx2.ExtractVector128(mask, 0), Avx2.ExtractVector128(mask, 1)); + Vector128 res = Sse2.Or(mask.GetLower(), mask.GetUpper()); ulong extractedBits = Bmi2.X64.ParallelBitExtract(0xFEDCBA9876543210, Sse2.X64.ConvertToUInt64(res.AsUInt64())); while (true) From be469ffb343d7cad98abde710ac168ba8863c18a Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Wed, 17 Jun 2020 11:24:42 +0100 Subject: [PATCH 04/18] Update src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index f276b109841a35..ec948b966ea80a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1656,8 +1656,8 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char? c2 = null, char? c3 = null) { - Vector256 shuffleConstant = Vector256.Create(0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + ReadOnlySpan shuffleConstantData = new byte[] { 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + Vector256 shuffleConstant = Unsafe.ReadUnaligned>(ref MemoryMarshal.GetReference(shuffleConstantData)); Vector256 v1 = Vector256.Create(c); Vector256? v2 = c2 is char sep2 ? Vector256.Create(sep2) : (Vector256?)null; From 44db429d690b0978b84a3d5610414246ab9ceb5d Mon Sep 17 00:00:00 2001 From: bbartels Date: Wed, 17 Jun 2020 19:34:53 +0100 Subject: [PATCH 05/18] Applied Review Feedback --- .../src/System/String.Manipulation.cs | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index ec948b966ea80a..a000dd8303b220 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1656,7 +1656,12 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char? c2 = null, char? c3 = null) { - ReadOnlySpan shuffleConstantData = new byte[] { 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; + // Constant that defines indices of characters within an AVX-Register + const ulong indicesConstant = 0xFEDCBA9876543210; + ReadOnlySpan shuffleConstantData = new byte[] { + 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF + }; Vector256 shuffleConstant = Unsafe.ReadUnaligned>(ref MemoryMarshal.GetReference(shuffleConstantData)); Vector256 v1 = Vector256.Create(c); @@ -1669,8 +1674,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde for (; i < cond; i += Vector256.Count) { - ref char ci = ref Unsafe.Add(ref c0, i); - Vector256 charVector = Unsafe.As>(ref ci); + Vector256 charVector = ReadVector(ref c0, i); Vector256 cmp = Avx2.CompareEqual(charVector, v1); if (v2 is Vector256 vecSep2) @@ -1689,7 +1693,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde mask = Avx2.Shuffle(mask, shuffleConstant); Vector128 res = Sse2.Or(mask.GetLower(), mask.GetUpper()); - ulong extractedBits = Bmi2.X64.ParallelBitExtract(0xFEDCBA9876543210, Sse2.X64.ConvertToUInt64(res.AsUInt64())); + ulong extractedBits = Bmi2.X64.ParallelBitExtract(indicesConstant, Sse2.X64.ConvertToUInt64(res.AsUInt64())); while (true) { @@ -1707,6 +1711,13 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde sepListBuilder.Append(i); } } + + static Vector256 ReadVector(ref char c0, int offset) + { + ref char ci = ref Unsafe.Add(ref c0, offset); + ref byte b = ref Unsafe.As(ref ci); + return Unsafe.ReadUnaligned>(ref b); + } } /// From 995719bdebea4f2d085c07052995cdb8e7789d29 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Thu, 18 Jun 2020 10:06:55 +0100 Subject: [PATCH 06/18] Update src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index a000dd8303b220..d96577f0c85cb6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1714,7 +1714,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde static Vector256 ReadVector(ref char c0, int offset) { - ref char ci = ref Unsafe.Add(ref c0, offset); + ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset); ref byte b = ref Unsafe.As(ref ci); return Unsafe.ReadUnaligned>(ref b); } From 65cba0ca5ccc2bb55fa8044cb7326e2e0b6a455a Mon Sep 17 00:00:00 2001 From: bbartels Date: Thu, 18 Jun 2020 10:09:09 +0100 Subject: [PATCH 07/18] Applied Review Feedback --- .../src/System/String.Manipulation.cs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index a000dd8303b220..a8783ea7c7ed0f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1577,7 +1577,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild case 1: sep0 = separators[0]; - if (Avx2.IsSupported && 16 <= Length) + if (Avx2.IsSupported && Length >= 16) { MakeSeparatorListVectorized(ref sepListBuilder, sep0); return; @@ -1595,7 +1595,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild sep0 = separators[0]; sep1 = separators[1]; - if (Avx2.IsSupported && 16 <= Length) + if (Avx2.IsSupported && Length >= 16) { MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1); return; @@ -1615,7 +1615,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild sep1 = separators[1]; sep2 = separators[2]; - if (Avx2.IsSupported && 16 <= Length) + if (Avx2.IsSupported && Length >= 16) { MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2); return; @@ -1658,11 +1658,9 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde { // Constant that defines indices of characters within an AVX-Register const ulong indicesConstant = 0xFEDCBA9876543210; - ReadOnlySpan shuffleConstantData = new byte[] { - 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF - }; - Vector256 shuffleConstant = Unsafe.ReadUnaligned>(ref MemoryMarshal.GetReference(shuffleConstantData)); + // Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0) + Vector256 shuffleConstant = Vector256.Create(0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); Vector256 v1 = Vector256.Create(c); Vector256? v2 = c2 is char sep2 ? Vector256.Create(sep2) : (Vector256?)null; From 3d2a6457df402448df6e5ae570d689fafb31dd26 Mon Sep 17 00:00:00 2001 From: bbartels Date: Thu, 18 Jun 2020 10:30:07 +0100 Subject: [PATCH 08/18] Built branchless version with help of @gfoidl --- .../src/System/String.Manipulation.cs | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 8191b4ab683e31..84689aac0f076c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1663,8 +1663,8 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); Vector256 v1 = Vector256.Create(c); - Vector256? v2 = c2 is char sep2 ? Vector256.Create(sep2) : (Vector256?)null; - Vector256? v3 = c3 is char sep3 ? Vector256.Create(sep3) : (Vector256?)null; + Vector256 v2 = c2 is char sep2 ? Vector256.Create(sep2) : v1; + Vector256 v3 = c3 is char sep3 ? Vector256.Create(sep3) : v2; ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan()); int cond = Length - (Length % Vector256.Count); @@ -1675,15 +1675,8 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde Vector256 charVector = ReadVector(ref c0, i); Vector256 cmp = Avx2.CompareEqual(charVector, v1); - if (v2 is Vector256 vecSep2) - { - cmp = Avx2.Or(Avx2.CompareEqual(charVector, vecSep2), cmp); - } - - if (v3 is Vector256 vecSep3) - { - cmp = Avx2.Or(Avx2.CompareEqual(charVector, vecSep3), cmp); - } + cmp = Avx2.Or(Avx2.CompareEqual(charVector, v2), cmp); + cmp = Avx2.Or(Avx2.CompareEqual(charVector, v3), cmp); if (Avx.TestZ(cmp, cmp)) { continue; } From d9a86407e7c7fc335bd6cf49bb906bcf05668d22 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Thu, 18 Jun 2020 11:54:14 +0100 Subject: [PATCH 09/18] Update src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Günther Foidl --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 84689aac0f076c..d8a33a413c6d34 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1667,7 +1667,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde Vector256 v3 = c3 is char sep3 ? Vector256.Create(sep3) : v2; ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan()); - int cond = Length - (Length % Vector256.Count); + int cond = Length & -Vector256.Count; int i = 0; for (; i < cond; i += Vector256.Count) From 2b0b8f8d63b8a3e5af5d2d3e2160c17efc245561 Mon Sep 17 00:00:00 2001 From: bbartels Date: Thu, 18 Jun 2020 11:59:17 +0100 Subject: [PATCH 10/18] Removed nullable separator parameters --- .../src/System/String.Manipulation.cs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index d8a33a413c6d34..4ce25342361e62 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1579,7 +1579,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild if (Avx2.IsSupported && Length >= 16) { - MakeSeparatorListVectorized(ref sepListBuilder, sep0); + MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep0, sep0); return; } @@ -1597,7 +1597,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild if (Avx2.IsSupported && Length >= 16) { - MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1); + MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep1); return; } @@ -1654,10 +1654,8 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild } } - private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char? c2 = null, char? c3 = null) + private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char c2, char c3) { - // Constant that defines indices of characters within an AVX-Register - const ulong indicesConstant = 0xFEDCBA9876543210; // Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0) Vector256 shuffleConstant = Vector256.Create(0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); @@ -1684,6 +1682,9 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde mask = Avx2.Shuffle(mask, shuffleConstant); Vector128 res = Sse2.Or(mask.GetLower(), mask.GetUpper()); + + // Constant that defines indices of characters within an AVX-Register + const ulong indicesConstant = 0xFEDCBA9876543210; ulong extractedBits = Bmi2.X64.ParallelBitExtract(indicesConstant, Sse2.X64.ConvertToUInt64(res.AsUInt64())); while (true) @@ -1696,8 +1697,8 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde for (; i < Length; i++) { - char curr = this[i]; - if (curr == c || (c2 != null && curr == c2) || (c3 != null && curr == c3)) + char curr = Unsafe.Add(ref c0, (IntPtr)(uint)i); + if (curr == c || curr == c2 || curr == c3) { sepListBuilder.Append(i); } From cc4ae1fac35442296ec20a75176f3e766129d18e Mon Sep 17 00:00:00 2001 From: bbartels Date: Thu, 18 Jun 2020 12:22:14 +0100 Subject: [PATCH 11/18] Refactored MakeSeparatorList --- .../src/System/String.Manipulation.cs | 111 ++++++------------ 1 file changed, 35 insertions(+), 76 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 4ce25342361e62..82a238dfb25497 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1558,99 +1558,58 @@ private string[] SplitWithPostProcessing(ReadOnlySpan sepList, ReadOnlySpan /// to store indexes private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuilder sepListBuilder) { - char sep0, sep1, sep2; - - switch (separators.Length) + if (separators.Length == 0) { - // Special-case no separators to mean any whitespace is a separator. - case 0: - for (int i = 0; i < Length; i++) - { - if (char.IsWhiteSpace(this[i])) - { - sepListBuilder.Append(i); - } - } - break; - - // Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator. - case 1: - sep0 = separators[0]; - - if (Avx2.IsSupported && Length >= 16) + for (int i = 0; i < Length; i++) + { + if (char.IsWhiteSpace(this[i])) { - MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep0, sep0); - return; + sepListBuilder.Append(i); } + } + } - for (int i = 0; i < Length; i++) - { - if (this[i] == sep0) - { - sepListBuilder.Append(i); - } - } - break; - case 2: - sep0 = separators[0]; - sep1 = separators[1]; + else if (separators.Length <= 3) + { + char sep0, sep1, sep2; + sep0 = separators[0]; + sep1 = separators.Length > 1 ? separators[1] : sep0; + sep2 = separators.Length > 2 ? separators[2] : sep1; - if (Avx2.IsSupported && Length >= 16) - { - MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep1); - return; - } + if (Length >= 16 && Avx2.IsSupported) + { + MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2); + return; + } - for (int i = 0; i < Length; i++) + for (int i = 0; i < Length; i++) + { + char c = this[i]; + if (c == sep0 || c == sep1 || c == sep2) { - char c = this[i]; - if (c == sep0 || c == sep1) - { - sepListBuilder.Append(i); - } + sepListBuilder.Append(i); } - break; - case 3: - sep0 = separators[0]; - sep1 = separators[1]; - sep2 = separators[2]; + } + } - if (Avx2.IsSupported && Length >= 16) - { - MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2); - return; - } + else + { + unsafe + { + ProbabilisticMap map = default; + uint* charMap = (uint*)↦ + InitializeProbabilisticMap(charMap, separators); for (int i = 0; i < Length; i++) { char c = this[i]; - if (c == sep0 || c == sep1 || c == sep2) + if (IsCharBitSet(charMap, (byte)c) && IsCharBitSet(charMap, (byte)(c >> 8)) && + separators.Contains(c)) { sepListBuilder.Append(i); } } - break; - - // Handle > 3 separators with a probabilistic map, ala IndexOfAny. - // This optimizes for chars being unlikely to match a separator. - default: - unsafe - { - ProbabilisticMap map = default; - uint* charMap = (uint*)↦ - InitializeProbabilisticMap(charMap, separators); - - for (int i = 0; i < Length; i++) - { - char c = this[i]; - if (IsCharBitSet(charMap, (byte)c) && IsCharBitSet(charMap, (byte)(c >> 8)) && - separators.Contains(c)) - { - sepListBuilder.Append(i); - } - } - } - break; + } } } From 11c968bf4f812c4faeb5cfb0ca873f75ad351db8 Mon Sep 17 00:00:00 2001 From: bbartels Date: Thu, 18 Jun 2020 12:23:55 +0100 Subject: [PATCH 12/18] Fixed mistakenly removed comments --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 82a238dfb25497..19e69627af5774 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1558,6 +1558,7 @@ private string[] SplitWithPostProcessing(ReadOnlySpan sepList, ReadOnlySpan /// to store indexes private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuilder sepListBuilder) { + // Special-case no separators to mean any whitespace is a separator. if (separators.Length == 0) { for (int i = 0; i < Length; i++) @@ -1569,6 +1570,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild } } + // Special-case the common cases of 1, 2, and 3 separators, with manual comparisons against each separator. else if (separators.Length <= 3) { char sep0, sep1, sep2; @@ -1592,6 +1594,8 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild } } + // Handle > 3 separators with a probabilistic map, ala IndexOfAny. + // This optimizes for chars being unlikely to match a separator. else { unsafe From 707871aec5e1a642fa42f58045896f109ccbbfc9 Mon Sep 17 00:00:00 2001 From: bbartels Date: Fri, 19 Jun 2020 16:41:49 +0100 Subject: [PATCH 13/18] Removed dependency on BMI2 PEXT instruction --- .../src/System/String.Manipulation.cs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 19e69627af5774..f27e852a8a62aa 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1624,8 +1624,8 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); Vector256 v1 = Vector256.Create(c); - Vector256 v2 = c2 is char sep2 ? Vector256.Create(sep2) : v1; - Vector256 v3 = c3 is char sep3 ? Vector256.Create(sep3) : v2; + Vector256 v2 = Vector256.Create(c2); + Vector256 v3 = Vector256.Create(c3); ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan()); int cond = Length & -Vector256.Count; @@ -1645,16 +1645,15 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde mask = Avx2.Shuffle(mask, shuffleConstant); Vector128 res = Sse2.Or(mask.GetLower(), mask.GetUpper()); + ulong extractedBits = Sse2.X64.ConvertToUInt64(res.AsUInt64()); - // Constant that defines indices of characters within an AVX-Register - const ulong indicesConstant = 0xFEDCBA9876543210; - ulong extractedBits = Bmi2.X64.ParallelBitExtract(indicesConstant, Sse2.X64.ConvertToUInt64(res.AsUInt64())); - - while (true) + for (int idx = i; idx < Vector.Count; idx++) { - sepListBuilder.Append(((byte)(extractedBits & 0xF)) + i); + if ((extractedBits & 0xF) != 0) + { + sepListBuilder.Append(idx); + } extractedBits >>= 4; - if (extractedBits == 0) { break; } } } From 19e57f02da610f3154f746058e1117b029d1db9f Mon Sep 17 00:00:00 2001 From: bbartels Date: Fri, 19 Jun 2020 17:17:35 +0100 Subject: [PATCH 14/18] Fixed mistaken use of Vector.Count --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index f27e852a8a62aa..b94149a90f6e6a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1647,7 +1647,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde Vector128 res = Sse2.Or(mask.GetLower(), mask.GetUpper()); ulong extractedBits = Sse2.X64.ConvertToUInt64(res.AsUInt64()); - for (int idx = i; idx < Vector.Count; idx++) + for (int idx = i; idx < Vector256.Count; idx++) { if ((extractedBits & 0xF) != 0) { From 82329c645c5dc40b8b05b476abd8b66cc59a25be Mon Sep 17 00:00:00 2001 From: bbartels Date: Wed, 24 Jun 2020 17:08:26 +0100 Subject: [PATCH 15/18] Lowered string.Split() vectorization dependency from Avx2 to SSE41 --- .../src/System/String.Manipulation.cs | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index b94149a90f6e6a..6e3e4fbb79cd8e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1578,7 +1578,7 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild sep1 = separators.Length > 1 ? separators[1] : sep0; sep2 = separators.Length > 2 ? separators[2] : sep1; - if (Length >= 16 && Avx2.IsSupported) + if (Length >= 16 && Sse41.IsSupported) { MakeSeparatorListVectorized(ref sepListBuilder, sep0, sep1, sep2); return; @@ -1620,40 +1620,51 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char c2, char c3) { // Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0) - Vector256 shuffleConstant = Vector256.Create(0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, - 0xFF, 0xFF, 0xFF, 0xFF, 0x02, 0x06, 0x0A, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + Vector128 shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); - Vector256 v1 = Vector256.Create(c); - Vector256 v2 = Vector256.Create(c2); - Vector256 v3 = Vector256.Create(c3); + Vector128 v1 = Vector128.Create(c); + Vector128 v2 = Vector128.Create(c2); + Vector128 v3 = Vector128.Create(c3); ref char c0 = ref MemoryMarshal.GetReference(this.AsSpan()); - int cond = Length & -Vector256.Count; + int cond = Length & -Vector128.Count; int i = 0; - for (; i < cond; i += Vector256.Count) + for (; i < cond; i += Vector128.Count) { - Vector256 charVector = ReadVector(ref c0, i); - Vector256 cmp = Avx2.CompareEqual(charVector, v1); + Vector128 charVector = ReadVector(ref c0, i); + Vector128 cmp = Sse2.CompareEqual(charVector, v1); - cmp = Avx2.Or(Avx2.CompareEqual(charVector, v2), cmp); - cmp = Avx2.Or(Avx2.CompareEqual(charVector, v3), cmp); + cmp = Sse2.Or(Sse2.CompareEqual(charVector, v2), cmp); + cmp = Sse2.Or(Sse2.CompareEqual(charVector, v3), cmp); - if (Avx.TestZ(cmp, cmp)) { continue; } + if (Sse41.TestZ(cmp, cmp)) { continue; } - Vector256 mask = Avx2.ShiftLeftLogical(cmp.AsUInt64(), 4).AsByte(); - mask = Avx2.Shuffle(mask, shuffleConstant); + Vector128 mask = Sse2.ShiftRightLogical(cmp.AsUInt64(), 4).AsByte(); + mask = Ssse3.Shuffle(mask, shuffleConstant); - Vector128 res = Sse2.Or(mask.GetLower(), mask.GetUpper()); - ulong extractedBits = Sse2.X64.ConvertToUInt64(res.AsUInt64()); + uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32()); + mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 16).AsByte(); + uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32()); - for (int idx = i; idx < Vector256.Count; idx++) + for (int idx = i; lowBits != 0; idx++) { - if ((extractedBits & 0xF) != 0) + if ((lowBits & 0xF) != 0) { sepListBuilder.Append(idx); } - extractedBits >>= 4; + + lowBits >>= 8; + } + + for (int idx = i + 4; highBits != 0; idx++) + { + if ((highBits & 0xF) != 0) + { + sepListBuilder.Append(idx); + } + + highBits >>= 8; } } @@ -1666,11 +1677,11 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde } } - static Vector256 ReadVector(ref char c0, int offset) + static Vector128 ReadVector(ref char c0, int offset) { ref char ci = ref Unsafe.Add(ref c0, (IntPtr)(uint)offset); ref byte b = ref Unsafe.As(ref ci); - return Unsafe.ReadUnaligned>(ref b); + return Unsafe.ReadUnaligned>(ref b); } } From aa7454ab2466fd0dbd5169486f43f2e9a2a5b883 Mon Sep 17 00:00:00 2001 From: bbartels Date: Mon, 25 Jan 2021 21:20:49 +0000 Subject: [PATCH 16/18] Added Sse.IsSupported check --- .../src/System/String.Manipulation.cs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index b079e5e6137af1..413a5ea81c3d54 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1557,6 +1557,13 @@ private void MakeSeparatorList(ReadOnlySpan separators, ref ValueListBuild private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilder, char c, char c2, char c3) { + // Redundant test so we won't prejit remainder of this method + // on platforms without SSE. + if (!Sse.IsSupported) + { + throw new PlatformNotSupportedException(); + } + // Constant that allows for the truncation of 16-bit (FFFF/0000) values within a register to 4-bit (F/0) Vector128 shuffleConstant = Vector128.Create(0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); From e0f83379e29286a6d967f4c1e1aee1c181a63f6b Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Mon, 22 Mar 2021 19:10:17 +0000 Subject: [PATCH 17/18] Updated IsSupported check to match highest used ISA --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 413a5ea81c3d54..aef94cdcfb2951 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1559,7 +1559,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde { // Redundant test so we won't prejit remainder of this method // on platforms without SSE. - if (!Sse.IsSupported) + if (!Sse41.IsSupported) { throw new PlatformNotSupportedException(); } From d55cf9257bc5893ab7d42735aa51e17d7fb5c818 Mon Sep 17 00:00:00 2001 From: bbartels Date: Fri, 26 Mar 2021 18:03:02 +0000 Subject: [PATCH 18/18] Fixed possible cause for failing tests --- .../System.Private.CoreLib/src/System/String.Manipulation.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs index 9109bd98efba6a..9ea10350e952b7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/String.Manipulation.cs @@ -1586,7 +1586,7 @@ private void MakeSeparatorListVectorized(ref ValueListBuilder sepListBuilde mask = Ssse3.Shuffle(mask, shuffleConstant); uint lowBits = Sse2.ConvertToUInt32(mask.AsUInt32()); - mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 16).AsByte(); + mask = Sse2.ShiftRightLogical(mask.AsUInt64(), 32).AsByte(); uint highBits = Sse2.ConvertToUInt32(mask.AsUInt32()); for (int idx = i; lowBits != 0; idx++)