From 34fd97beb82ee6d15012d6dc9052af90871e2265 Mon Sep 17 00:00:00 2001
From: xtqqczze <45661989+xtqqczze@users.noreply.github.com>
Date: Thu, 10 Aug 2023 16:08:08 +0100
Subject: [PATCH 1/3] Add `WidenLatin1ToUtf16_MisalignedAddress`

---
 .../src/System/Text/Latin1Utility.cs          | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
index e8cd792243e314..f0bc7de8f3ed5c 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
@@ -945,6 +945,12 @@ private static unsafe nuint NarrowUtf16ToLatin1_Sse2(char* pUtf16Buffer, byte* p
         /// </summary>
         public static unsafe void WidenLatin1ToUtf16(byte* pLatin1Buffer, char* pUtf16Buffer, nuint elementCount)
         {
+            if (((nuint)pUtf16Buffer & 1) != 0) 
+            {
+                // Input isn't char aligned, we won't be able to vectorize.
+                return WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount);
+            }
+            
             // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized
             // code below. This has two benefits: (a) we can take advantage of specific instructions like
             // punpcklbw which we know are optimized, and (b) we can avoid downclocking the processor while
@@ -1106,5 +1112,19 @@ private static unsafe void WidenLatin1ToUtf16_Fallback(byte* pLatin1Buffer, char
                 currentOffset++;
             }
         }
+
+        private static unsafe void WidenLatin1ToUtf16_MisalignedAddress(byte* pLatin1Buffer, char* pUtf16Buffer, nuint elementCount)
+        {
+            if (elementCount != 0)
+            {
+                do
+                {
+                    Unsafe.WriteUnaligned(pUtf16Buffer, (char)*pLatin1Buffer);
+                    pUtf16Buffer++;
+                    pLatin1Buffer++;
+                }
+                while (--elementCount != 0);
+            }
+        }
     }
 }

From 83119c31df826c87878960317ae8d3852a821b85 Mon Sep 17 00:00:00 2001
From: xtqqczze <45661989+xtqqczze@users.noreply.github.com>
Date: Thu, 10 Aug 2023 16:53:51 +0100
Subject: [PATCH 2/3] fix `SA1028`

---
 .../System.Private.CoreLib/src/System/Text/Latin1Utility.cs   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
index f0bc7de8f3ed5c..9c292c78be079e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
@@ -945,12 +945,12 @@ private static unsafe nuint NarrowUtf16ToLatin1_Sse2(char* pUtf16Buffer, byte* p
         /// </summary>
         public static unsafe void WidenLatin1ToUtf16(byte* pLatin1Buffer, char* pUtf16Buffer, nuint elementCount)
         {
-            if (((nuint)pUtf16Buffer & 1) != 0) 
+            if (((nuint)pUtf16Buffer & 1) != 0)
             {
                 // Input isn't char aligned, we won't be able to vectorize.
                 return WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount);
             }
-            
+
             // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized
             // code below. This has two benefits: (a) we can take advantage of specific instructions like
             // punpcklbw which we know are optimized, and (b) we can avoid downclocking the processor while

From 4218f4c5cdcd863f7e094966a50c3c4f306bd4d6 Mon Sep 17 00:00:00 2001
From: xtqqczze <45661989+xtqqczze@users.noreply.github.com>
Date: Thu, 10 Aug 2023 16:55:19 +0100
Subject: [PATCH 3/3] fix `CS0127`

---
 .../System.Private.CoreLib/src/System/Text/Latin1Utility.cs    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
index 9c292c78be079e..c66e39d690bcf0 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Text/Latin1Utility.cs
@@ -948,7 +948,8 @@ public static unsafe void WidenLatin1ToUtf16(byte* pLatin1Buffer, char* pUtf16Bu
             if (((nuint)pUtf16Buffer & 1) != 0)
             {
                 // Input isn't char aligned, we won't be able to vectorize.
-                return WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount);
+                WidenLatin1ToUtf16_MisalignedAddress(pLatin1Buffer, pUtf16Buffer, elementCount);
+                return;
             }
 
             // If SSE2 is supported, use those specific intrinsics instead of the generic vectorized