Skip to content

Commit 3750ac5

Browse files
authored
[browser] HybridGlobalization correct HashCode ranges of skipped unicodes (#102912)
* Add full tests. Behavior is correct. * Feedback.
1 parent 9fca0c3 commit 3750ac5

File tree

2 files changed

+38
-94
lines changed

2 files changed

+38
-94
lines changed

src/libraries/System.Private.CoreLib/src/System/Globalization/CompareInfo.WebAssembly.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ private unsafe int JsIndexOfCore(ReadOnlySpan<char> source, ReadOnlySpan<char> t
117117
}
118118
}
119119

120-
// there are chars that are ignored by ICU hashing algorithm but not ignored by invariant hashing
120+
// there are chars that are considered equal by HybridGlobalization but do not have equal hashes when binary hashed
121121
// Control: 1105 (out of 1105)
122122
// Format: 697 (out of 731)
123123
// OtherPunctuation: 6919 (out of 7004)

src/libraries/System.Runtime/tests/System.Globalization.Tests/CompareInfo/CompareInfoTests.HashCode.cs

Lines changed: 37 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -12,108 +12,52 @@ namespace System.Globalization.Tests
1212
{
1313
public class CompareInfoHashCodeTests : CompareInfoTestsBase
1414
{
15-
public static IEnumerable<object[]> HashCodeLocalized_TestData()
16-
{
17-
yield return new object[] { s_invariantCompare, "foo", "Foo", CompareOptions.IgnoreCase };
18-
yield return new object[] { s_invariantCompare, "igloo", "\u0130GLOO", CompareOptions.IgnoreCase };
19-
yield return new object[] { s_invariantCompare, "igloo", "\u0130GLOO", CompareOptions.None };
20-
yield return new object[] { s_invariantCompare, "igloo", "IGLOO", CompareOptions.IgnoreCase };
21-
yield return new object[] { new CultureInfo("pl-PL").CompareInfo, "igloo", "\u0130GLOO", CompareOptions.IgnoreCase };
22-
yield return new object[] { new CultureInfo("pl-PL").CompareInfo, "igloo", "IGLOO", CompareOptions.IgnoreCase };
23-
yield return new object[] { new CultureInfo("tr-TR").CompareInfo, "igloo", "\u0130GLOO", CompareOptions.IgnoreCase };
24-
yield return new object[] { new CultureInfo("tr-TR").CompareInfo, "igloo", "IGLOO", CompareOptions.IgnoreCase };
25-
26-
if (!PlatformDetection.IsHybridGlobalizationOnBrowser)
27-
{
28-
if (PlatformDetection.IsNotHybridGlobalizationOnApplePlatform)
29-
yield return new object[] { new CultureInfo("en-GB").CompareInfo, "100", "100!", CompareOptions.IgnoreSymbols }; // HG: equal: True, hashCodesEqual: False
30-
yield return new object[] { new CultureInfo("ja-JP").CompareInfo, "\u30A2", "\u3042", CompareOptions.IgnoreKanaType }; // HG: equal: True, hashCodesEqual: False
31-
yield return new object[] { new CultureInfo("en-GB").CompareInfo, "caf\u00E9", "cafe\u0301", CompareOptions.IgnoreNonSpace | CompareOptions.IgnoreKanaType }; // HG: equal: True, hashCodesEqual: False
32-
}
33-
}
3415

35-
[Theory]
36-
[MemberData(nameof(HashCodeLocalized_TestData))]
37-
public void HashCodeLocalized(CompareInfo cmpInfo, string str1, string str2, CompareOptions options)
16+
[OuterLoop]
17+
[ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
18+
[ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnApplePlatform))]
19+
public void CheckHashingInLineWithEqual()
3820
{
39-
bool areEqual = cmpInfo.Compare(str1, str2, options) == 0;
40-
var hashCode1 = cmpInfo.GetHashCode(str1, options);
41-
var hashCode2 = cmpInfo.GetHashCode(str2, options);
42-
bool areHashCodesEqual = hashCode1 == hashCode2;
43-
44-
if (areEqual)
45-
{
46-
Assert.True(areHashCodesEqual);
47-
}
48-
else
21+
int additionalCollisions = 0;
22+
CultureInfo[] cultures = CultureInfo.GetCultures(CultureTypes.AllCultures);
23+
foreach (CultureInfo culture in cultures)
4924
{
50-
Assert.False(areHashCodesEqual);
51-
}
52-
53-
// implication of the above behavior:
54-
StringComparer stringComparer = new CustomComparer(cmpInfo, options);
55-
TryAddToCustomDictionary(stringComparer, str1, str2, areHashCodesEqual);
56-
}
25+
// Japanese does not have "None" compare option, it always ignores Kana
26+
// HashCode is not available for options different than None or IgnoreCase
27+
if (culture.Name.Split('-')[0] == "ja")
28+
continue;
5729

58-
private void TryAddToCustomDictionary(StringComparer comparer, string str1, string str2, bool shouldFail)
59-
{
60-
Dictionary<string, int> customDictionary = new Dictionary<string, int>(comparer);
61-
customDictionary.Add(str1, 0);
62-
try
63-
{
64-
customDictionary.Add(str2, 1);
65-
Assert.False(shouldFail);
66-
}
67-
catch (ArgumentException ex)
68-
{
69-
Assert.True(shouldFail);
70-
Assert.Contains("An item with the same key has already been added.", ex.Message);
30+
for (int i = 0; i <= 65535; i++)
31+
CheckChar(i, culture);
7132
}
72-
catch (Exception ex)
33+
34+
void CheckChar(int charCode, CultureInfo culture)
7335
{
74-
Assert.Fail($"Unexpected exception thrown: {ex}");
36+
var cmpInfo = culture.CompareInfo;
37+
char character = (char)charCode;
38+
string str1 = $"a{character}b";
39+
string str2 = "ab";
40+
CompareOptions options = CompareOptions.None;
41+
var hashCode1 = cmpInfo.GetHashCode(str1, options);
42+
var hashCode2 = cmpInfo.GetHashCode(str2, options);
43+
bool areHashCodesEqual = hashCode1 == hashCode2;
44+
StringComparer stringComparer = new CustomComparer(cmpInfo, options);
45+
// if equal => same, then expect hash => same
46+
if (stringComparer.Compare(str1, str2) == 0)
47+
{
48+
Assert.True(areHashCodesEqual, $"Expected equal hashes for equal strings. The check failed for culture {culture.Name}, character: {character}, code: {charCode}.");
49+
}
50+
// if equal => diff, then expect hash => diff
51+
else
52+
{
53+
if (areHashCodesEqual)
54+
{
55+
additionalCollisions++; // this should be smallest possible, 11541466
56+
}
57+
}
7558
}
7659
}
7760

78-
public static IEnumerable<object[]> CheckHashingOfSkippedChars_TestData()
79-
{
80-
// one char from each ignored category that is skipped on ICU
81-
yield return new object[] { '\u0008', s_invariantCompare }; // Control: BACKSPACE
82-
yield return new object[] { '\u200B', s_invariantCompare }; // Format: ZERO WIDTH SPACE
83-
yield return new object[] { '\u180A', s_invariantCompare }; // OtherPunctuation: MONGOLIAN NIRUGU
84-
yield return new object[] { '\uFE73', s_invariantCompare }; // OtherLetter: THAI CHARACTER PAIYANNOI
85-
yield return new object[] { '\u0F3E', s_invariantCompare }; // SpacingCombiningMark: "TIBETAN MARK GTER YIG MGO UM RTAGS GNYIS
86-
yield return new object[] { '\u0640', s_invariantCompare }; // ModifierLetter: ARABIC TATWEEL
87-
yield return new object[] { '\u0488', s_invariantCompare }; // EnclosingMark: COMBINING CYRILLIC HUNDRED THOUSANDS SIGN
88-
yield return new object[] { '\u034F', s_invariantCompare }; // NonSpacingMark: DIAERESIS
89-
CompareInfo thaiCmpInfo = new CultureInfo("th-TH").CompareInfo;
90-
yield return new object[] { '\u0020', thaiCmpInfo }; // SpaceSeparator: SPACE
91-
yield return new object[] { '\u0028', thaiCmpInfo }; // OpenPunctuation: LEFT PARENTHESIS
92-
yield return new object[] { '\u007D', thaiCmpInfo }; // ClosePunctuation: RIGHT PARENTHESIS
93-
yield return new object[] { '\u2013', thaiCmpInfo }; // DashPunctuation: EN DASH
94-
yield return new object[] { '\u005F', thaiCmpInfo }; // ConnectorPunctuation: LOW LINE
95-
yield return new object[] { '\u2018', thaiCmpInfo }; // InitialQuotePunctuation: LEFT SINGLE QUOTATION MARK
96-
yield return new object[] { '\u2019', thaiCmpInfo }; // FinalQuotePunctuation: RIGHT SINGLE QUOTATION MARK
97-
yield return new object[] { '\u2028', thaiCmpInfo }; // LineSeparator: LINE SEPARATOR
98-
yield return new object[] { '\u2029', thaiCmpInfo }; // ParagraphSeparator: PARAGRAPH SEPARATOR
99-
}
100-
101-
[ConditionalTheory(typeof(PlatformDetection), nameof(PlatformDetection.IsIcuGlobalization))]
102-
[MemberData(nameof(CheckHashingOfSkippedChars_TestData))]
103-
[ActiveIssue("https://github.com/dotnet/runtime/issues/95338", typeof(PlatformDetection), nameof(PlatformDetection.IsHybridGlobalizationOnApplePlatform))]
104-
public void CheckHashingOfSkippedChars(char character, CompareInfo cmpInfo)
105-
{
106-
string str1 = $"a{character}b";
107-
string str2 = "ab";
108-
CompareOptions options = CompareOptions.None;
109-
var hashCode1 = cmpInfo.GetHashCode(str1, options);
110-
var hashCode2 = cmpInfo.GetHashCode(str2, options);
111-
bool areHashCodesEqual = hashCode1 == hashCode2;
112-
Assert.True(areHashCodesEqual);
113-
StringComparer stringComparer = new CustomComparer(cmpInfo, options);
114-
TryAddToCustomDictionary(stringComparer, str1, str2, areHashCodesEqual);
115-
}
116-
11761
public static IEnumerable<object[]> GetHashCodeTestData => new[]
11862
{
11963
new object[] { "abc", CompareOptions.OrdinalIgnoreCase, "ABC", CompareOptions.OrdinalIgnoreCase, true },

0 commit comments

Comments
 (0)