From 34fd97beb82ee6d15012d6dc9052af90871e2265 Mon Sep 17 00:00:00 2001 From: xtqqczze <45661989+xtqqczze@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:08:08 +0100 Subject: [PATCH 1/3] Add `WidenLatin1ToUtf16_MisalignedAddress` --- .../src/System/Text/Latin1Utility.cs | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs index e8cd792243e314..f0bc7de8f3ed5c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs @@ -945,6 +945,12 @@ private static unsafe nuint NarrowUtf16ToLatin1_Sse2(char* pUtf16Buffer, byte* p /// public static unsafe void WidenLatin1ToUtf16(byte* pLatin1Buffer, char* pUtf16Buffer, nuint elementCount) { + if (((nuint)pUtf16Buffer & 1) != 0) + { + // Input isn't char aligned, we won't be able to vectorize. + return WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount); + } + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized // code below. This has two benefits: (a) we can take advantage of specific instructions like // punpcklbw which we know are optimized, and (b) we can avoid downclocking the processor while @@ -1106,5 +1112,19 @@ private static unsafe void WidenLatin1ToUtf16_Fallback(byte* pLatin1Buffer, char currentOffset++; } } + + private static unsafe void WidenLatin1ToUtf16_MisalignedAddress(byte* pLatin1Buffer, char* pUtf16Buffer, nuint elementCount) + { + if (elementCount != 0) + { + do + { + Unsafe.WriteUnaligned(pUtf16Buffer, (char)*pLatin1Buffer); + pUtf16Buffer++; + pLatin1Buffer++; + } + while (--elementCount != 0); + } + } } } From 83119c31df826c87878960317ae8d3852a821b85 Mon Sep 17 00:00:00 2001 From: xtqqczze <45661989+xtqqczze@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:53:51 +0100 Subject: [PATCH 2/3] fix `SA1028` --- .../System.Private.CoreLib/src/System/Text/Latin1Utility.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs index f0bc7de8f3ed5c..9c292c78be079e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs @@ -945,12 +945,12 @@ private static unsafe nuint NarrowUtf16ToLatin1_Sse2(char* pUtf16Buffer, byte* p /// public static unsafe void WidenLatin1ToUtf16(byte* pLatin1Buffer, char* pUtf16Buffer, nuint elementCount) { - if (((nuint)pUtf16Buffer & 1) != 0) + if (((nuint)pUtf16Buffer & 1) != 0) { // Input isn't char aligned, we won't be able to vectorize. return WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount); } - + // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized // code below. This has two benefits: (a) we can take advantage of specific instructions like // punpcklbw which we know are optimized, and (b) we can avoid downclocking the processor while From 4218f4c5cdcd863f7e094966a50c3c4f306bd4d6 Mon Sep 17 00:00:00 2001 From: xtqqczze <45661989+xtqqczze@users.noreply.github.com> Date: Thu, 10 Aug 2023 16:55:19 +0100 Subject: [PATCH 3/3] fix `CS0127` --- .../System.Private.CoreLib/src/System/Text/Latin1Utility.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs index 9c292c78be079e..c66e39d690bcf0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs @@ -948,7 +948,8 @@ public static unsafe void WidenLatin1ToUtf16(byte* pLatin1Buffer, char* pUtf16Bu if (((nuint)pUtf16Buffer & 1) != 0) { // Input isn't char aligned, we won't be able to vectorize. - return WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount); + WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount); + return; } // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized