Skip to content

Commit fa5d12e

Browse files
authored
[browser][non-icu] HybridGlobalization checking for prefix/suffix (#85093)
* Implementation. * HG does not belong to legacy code. * No need to create new instance when existing one is exported. * TextEncoder's behavior varies between hosts. * Nit * Cutting prevents us from using IgnoreSymbols. * Fixed asserts. * Fix. * Match platform with behavior. * Missing changes to prev commit.
1 parent 7c61b98 commit fa5d12e

File tree

14 files changed

+357
-81
lines changed

14 files changed

+357
-81
lines changed

docs/design/features/hybrid-globalization.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,20 @@ hiraganaBig.localeCompare(katakanaSmall, "en-US", { sensitivity: "base" }) // 0;
181181
`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace`
182182

183183
`IgnoreKanaType | IgnoreWidth | IgnoreSymbols | IgnoreNonSpace | IgnoreCase`
184+
185+
186+
**String starts with / ends with**
187+
188+
Affected public APIs:
189+
- CompareInfo.IsPrefix
190+
- CompareInfo.IsSuffix
191+
- String.StartsWith
192+
- String.EndsWith
193+
194+
Web API does not expose locale-sensitive endsWith/startsWith function. As a workaround, both strings get normalized and weightless characters are removed. Resulting strings are cut to the same length and comparison is performed. This approach, beyond having the same compare option limitations as described under **String comparison**, has additional limitations connected with the workaround used. Because we are normalizing strings to be able to cut them, we cannot calculate the match length on the original strings. Methods that calculate this information throw PlatformNotSupported exception:
195+
196+
- [CompareInfo.IsPrefix](https://learn.microsoft.com/en-us/dotnet/api/system.globalization.compareinfo.isprefix?view=net-8.0#system-globalization-compareinfo-isprefix(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-globalization-compareoptions-system-int32@))
197+
- [CompareInfo.IsSuffix](https://learn.microsoft.com/en-us/dotnet/api/system.globalization.compareinfo.issuffix?view=net-8.0#system-globalization-compareinfo-issuffix(system-readonlyspan((system-char))-system-readonlyspan((system-char))-system-globalization-compareoptions-system-int32@))
198+
199+
- `IgnoreSymbols`
200+
Only comparisons that do not skip character types are allowed. E.g. `IgnoreSymbols` skips symbol-chars in comparison/indexing. All `CompareOptions` combinations that include `IgnoreSymbols` throw `PlatformNotSupportedException`.

src/libraries/Common/src/Interop/Browser/Interop.CompareInfo.cs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,11 @@ internal static unsafe partial class JsGlobalization
99
{
1010
[MethodImplAttribute(MethodImplOptions.InternalCall)]
1111
internal static extern unsafe int CompareString(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
12+
13+
[MethodImplAttribute(MethodImplOptions.InternalCall)]
14+
internal static extern unsafe bool StartsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
15+
16+
[MethodImplAttribute(MethodImplOptions.InternalCall)]
17+
internal static extern unsafe bool EndsWith(out string exceptionMessage, in string culture, char* str1, int str1Len, char* str2, int str2Len, global::System.Globalization.CompareOptions options);
1218
}
1319
}

src/libraries/System.Globalization/tests/CompareInfo/CompareInfoTests.IsPrefix.cs

Lines changed: 40 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,8 @@
77

88
namespace System.Globalization.Tests
99
{
10-
public class CompareInfoIsPrefixTests
10+
public class CompareInfoIsPrefixTests : CompareInfoTestsBase
1111
{
12-
private static CompareInfo s_invariantCompare = CultureInfo.InvariantCulture.CompareInfo;
13-
private static CompareInfo s_germanCompare = new CultureInfo("de-DE").CompareInfo;
14-
private static CompareInfo s_hungarianCompare = new CultureInfo("hu-HU").CompareInfo;
15-
private static CompareInfo s_turkishCompare = new CultureInfo("tr-TR").CompareInfo;
16-
private static CompareInfo s_frenchCompare = new CultureInfo("fr-FR").CompareInfo;
17-
1812
public static IEnumerable<object[]> IsPrefix_TestData()
1913
{
2014
// Empty strings
@@ -31,7 +25,8 @@ public static IEnumerable<object[]> IsPrefix_TestData()
3125
yield return new object[] { s_invariantCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
3226
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.Ordinal, false, 0 };
3327
yield return new object[] { s_invariantCompare, "dz", "d", CompareOptions.None, true, 1 };
34-
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
28+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
29+
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.None, false, 0 };
3530
yield return new object[] { s_hungarianCompare, "dz", "d", CompareOptions.Ordinal, true, 1 };
3631

3732
// Turkish
@@ -56,7 +51,7 @@ public static IEnumerable<object[]> IsPrefix_TestData()
5651
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.Ordinal, false, 0 };
5752
yield return new object[] { s_invariantCompare, "\u00C0nimal", "a\u0300", CompareOptions.OrdinalIgnoreCase, false, 0 };
5853
yield return new object[] { s_invariantCompare, "FooBar", "Foo\u0400Bar", CompareOptions.Ordinal, false, 0 };
59-
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", CompareOptions.IgnoreNonSpace, true, 7 };
54+
yield return new object[] { s_invariantCompare, "FooBA\u0300R", "FooB\u00C0R", supportedIgnoreNonSpaceOption, true, 7 };
6055
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.None, false, 0 };
6156
yield return new object[] { s_invariantCompare, "o\u0308", "o", CompareOptions.Ordinal, true, 1 };
6257
yield return new object[] { s_invariantCompare, "o\u0000\u0308", "o", CompareOptions.None, true, 1 };
@@ -76,42 +71,57 @@ public static IEnumerable<object[]> IsPrefix_TestData()
7671
yield return new object[] { s_invariantCompare, "\uD800\uD800", "\uD800\uD800", CompareOptions.None, true, 2 };
7772

7873
// Ignore symbols
79-
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
80-
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 };
74+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
75+
{
76+
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.IgnoreSymbols, true, 6 };
77+
yield return new object[] { s_invariantCompare, "Test's can be interesting", "Tests", CompareOptions.None, false, 0 };
78+
}
8179

8280
// Platform differences
83-
bool useNls = PlatformDetection.IsNlsGlobalization;
84-
if (useNls)
81+
// in HybridGlobalization on Browser we use TextEncoder that is not supported for v8 and the manual decoding works like NLS
82+
bool behavesLikeNls = PlatformDetection.IsNlsGlobalization ||
83+
(PlatformDetection.IsHybridGlobalizationOnBrowser && !PlatformDetection.IsBrowserDomSupportedOrNodeJS);
84+
if (behavesLikeNls)
8585
{
86-
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
87-
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
88-
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
86+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
87+
{
88+
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, true, 7 };
89+
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, true, 7 };
90+
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, true, 1 };
91+
}
8992
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, true, 1 };
9093
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, true, 1 };
9194
}
9295
else
9396
{
9497
yield return new object[] { s_hungarianCompare, "dzsdzsfoobar", "ddzsf", CompareOptions.None, false, 0 };
95-
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 };
98+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
99+
yield return new object[] { s_invariantCompare, "''Tests", "Tests", CompareOptions.IgnoreSymbols, false, 0 };
96100
yield return new object[] { s_frenchCompare, "\u0153", "oe", CompareOptions.None, false, 0 };
97101
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.None, false, 0 };
98102
yield return new object[] { s_invariantCompare, "\uD800\uDC00", "\uD800", CompareOptions.IgnoreCase, false, 0 };
99103
}
100104

101105
// ICU bugs
102106
// UInt16 overflow: https://unicode-org.atlassian.net/browse/ICU-20832 fixed in https://github.com/unicode-org/icu/pull/840 (ICU 65)
103-
if (useNls || PlatformDetection.ICUVersion.Major >= 65)
107+
if (PlatformDetection.IsNlsGlobalization || PlatformDetection.ICUVersion.Major >= 65)
104108
{
105109
yield return new object[] { s_frenchCompare, "b", new string('a', UInt16.MaxValue + 1), CompareOptions.None, false, 0 };
106110
}
107111

108112
// Prefixes where matched length does not equal value string length
109-
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", CompareOptions.IgnoreNonSpace, true, 2 };
110-
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", CompareOptions.IgnoreNonSpace, true, 1 };
111-
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 7 };
112-
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
113-
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, true, 6 };
114-
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", CompareOptions.IgnoreCase | CompareOptions.IgnoreNonSpace, false, 0 };
113+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
114+
{
115+
yield return new object[] { s_invariantCompare, "dzxyz", "\u01F3", supportedIgnoreNonSpaceOption, true, 2 };
116+
yield return new object[] { s_invariantCompare, "\u01F3xyz", "dz", supportedIgnoreNonSpaceOption, true, 1 };
117+
}
118+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
119+
{
120+
yield return new object[] { s_germanCompare, "Strasse xyz", "stra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 7 };
121+
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Strasse", supportedIgnoreCaseIgnoreNonSpaceOptions, true, 6 };
122+
}
123+
yield return new object[] { s_germanCompare, "Strasse xyz", "xtra\u00DFe", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
124+
yield return new object[] { s_germanCompare, "stra\u00DFe xyz", "Xtrasse", supportedIgnoreCaseIgnoreNonSpaceOptions, false, 0 };
115125
}
116126

117127
[Theory]
@@ -140,8 +150,11 @@ public void IsPrefix(CompareInfo compareInfo, string source, string value, Compa
140150
valueBoundedMemory.MakeReadonly();
141151

142152
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options));
143-
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
144-
Assert.Equal(expectedMatchLength, actualMatchLength);
153+
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
154+
{
155+
Assert.Equal(expected, compareInfo.IsPrefix(sourceBoundedMemory.Span, valueBoundedMemory.Span, options, out int actualMatchLength));
156+
Assert.Equal(expectedMatchLength, actualMatchLength);
157+
}
145158
}
146159

147160
[Fact]
@@ -150,7 +163,7 @@ public void IsPrefix_UnassignedUnicode()
150163
bool result = PlatformDetection.IsNlsGlobalization ? true : false;
151164
int expectedMatchLength = (result) ? 6 : 0;
152165
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.None, result, expectedMatchLength);
153-
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", CompareOptions.IgnoreNonSpace, result, expectedMatchLength);
166+
IsPrefix(s_invariantCompare, "FooBar", "Foo\uFFFFBar", supportedIgnoreNonSpaceOption, result, expectedMatchLength);
154167
}
155168

156169
[Fact]

0 commit comments

Comments
 (0)