Skip to content

Commit 0cf7145

Browse files
committed
feature #52198 [String] New locale aware casing methods (bram123)
This PR was squashed before being merged into the 7.1 branch. Discussion ---------- [String] New locale aware casing methods | Q | A | ------------- | --- | Branch? | 7.1 | Bug fix? | no | New feature? | yes<!-- please update src/**/CHANGELOG.md files --> | Deprecations? | no | Tickets | Fix #52161 | License | MIT Adds new localeUpper/localeLower/localeTitle methods to the AbstractUnicodeString class - To change the string case according to locale-specific case mappings Code examples: ```php $string = new UnicodeString('άδικος'); echo $string->upper(), PHP_EOL; // ΆΔΙΚΟΣ echo $string->localeUpper('el'), PHP_EOL; // ΑΔΙΚΟΣ $string = new UnicodeString('ijssel'); echo $string->title(), PHP_EOL; // Ijssel echo $string->localeTitle('nl'), PHP_EOL; // IJssel $string = new UnicodeString('İSTANBUL'); echo $string->lower(), PHP_EOL; // i̇stanbul (LATIN SMALL LETTER I COMBINING DOT ABOVE) echo $string->localeLower('tr'), PHP_EOL; // istanbul (LATIN SMALL LETTER I) ``` Commits ------- 7f4ed5c6720 [String] New locale aware casing methods
2 parents c26cedd + 2172dad commit 0cf7145

File tree

3 files changed

+193
-0
lines changed

3 files changed

+193
-0
lines changed

AbstractUnicodeString.php

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,21 @@ public function lower(): static
220220
return $str;
221221
}
222222

223+
/**
224+
* @param string $locale In the format language_region (e.g. tr_TR)
225+
*/
226+
public function localeLower(string $locale): static
227+
{
228+
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Lower')) {
229+
$str = clone $this;
230+
$str->string = $transliterator->transliterate($str->string);
231+
232+
return $str;
233+
}
234+
235+
return $this->lower();
236+
}
237+
223238
public function match(string $regexp, int $flags = 0, int $offset = 0): array
224239
{
225240
$match = ((\PREG_PATTERN_ORDER | \PREG_SET_ORDER) & $flags) ? 'preg_match_all' : 'preg_match';
@@ -363,6 +378,21 @@ public function title(bool $allWords = false): static
363378
return $str;
364379
}
365380

381+
/**
382+
* @param string $locale In the format language_region (e.g. tr_TR)
383+
*/
384+
public function localeTitle(string $locale): static
385+
{
386+
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Title')) {
387+
$str = clone $this;
388+
$str->string = $transliterator->transliterate($str->string);
389+
390+
return $str;
391+
}
392+
393+
return $this->title();
394+
}
395+
366396
public function trim(string $chars = " \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}"): static
367397
{
368398
if (" \t\n\r\0\x0B\x0C\u{A0}\u{FEFF}" !== $chars && !preg_match('//u', $chars)) {
@@ -450,6 +480,21 @@ public function upper(): static
450480
return $str;
451481
}
452482

483+
/**
484+
* @param string $locale In the format language_region (e.g. tr_TR)
485+
*/
486+
public function localeUpper(string $locale): static
487+
{
488+
if (null !== $transliterator = $this->getLocaleTransliterator($locale, 'Upper')) {
489+
$str = clone $this;
490+
$str->string = $transliterator->transliterate($str->string);
491+
492+
return $str;
493+
}
494+
495+
return $this->upper();
496+
}
497+
453498
public function width(bool $ignoreAnsiDecoration = true): int
454499
{
455500
$width = 0;
@@ -587,4 +632,33 @@ private function wcswidth(string $string): int
587632

588633
return $width;
589634
}
635+
636+
private function getLocaleTransliterator(string $locale, string $id): ?\Transliterator
637+
{
638+
$rule = $locale.'-'.$id;
639+
if (\array_key_exists($rule, self::$transliterators)) {
640+
return self::$transliterators[$rule];
641+
}
642+
643+
if (null !== $transliterator = self::$transliterators[$rule] = \Transliterator::create($rule)) {
644+
return $transliterator;
645+
}
646+
647+
// Try to find a parent locale (nl_BE -> nl)
648+
if (false === $i = strpos($locale, '_')) {
649+
return null;
650+
}
651+
652+
$parentRule = substr_replace($locale, '-'.$id, $i);
653+
654+
// Parent locale was already cached, return and store as current locale
655+
if (\array_key_exists($parentRule, self::$transliterators)) {
656+
return self::$transliterators[$rule] = self::$transliterators[$parentRule];
657+
}
658+
659+
// Create transliterator based on parent locale and cache the result on both initial and parent locale values
660+
$transliterator = \Transliterator::create($parentRule);
661+
662+
return self::$transliterators[$rule] = self::$transliterators[$parentRule] = $transliterator;
663+
}
590664
}

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
CHANGELOG
22
=========
33

4+
7.1
5+
---
6+
7+
* Add `localeLower()`, `localeUpper()`, `localeTitle()` methods to `AbstractUnicodeString`
8+
49
6.2
510
---
611

Tests/AbstractUnicodeTestCase.php

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,48 @@ public function testAsciiClosureRule()
5050
$this->assertSame('Dieser Wert sollte grOEsser oder gleich', (string) $s->ascii([$rule]));
5151
}
5252

53+
/**
54+
* @dataProvider provideLocaleLower
55+
*
56+
* @requires extension intl
57+
*/
58+
public function testLocaleLower(string $locale, string $expected, string $origin)
59+
{
60+
$instance = static::createFromString($origin)->localeLower($locale);
61+
62+
$this->assertNotSame(static::createFromString($origin), $instance);
63+
$this->assertEquals(static::createFromString($expected), $instance);
64+
$this->assertSame($expected, (string) $instance);
65+
}
66+
67+
/**
68+
* @dataProvider provideLocaleUpper
69+
*
70+
* @requires extension intl
71+
*/
72+
public function testLocaleUpper(string $locale, string $expected, string $origin)
73+
{
74+
$instance = static::createFromString($origin)->localeUpper($locale);
75+
76+
$this->assertNotSame(static::createFromString($origin), $instance);
77+
$this->assertEquals(static::createFromString($expected), $instance);
78+
$this->assertSame($expected, (string) $instance);
79+
}
80+
81+
/**
82+
* @dataProvider provideLocaleTitle
83+
*
84+
* @requires extension intl
85+
*/
86+
public function testLocaleTitle(string $locale, string $expected, string $origin)
87+
{
88+
$instance = static::createFromString($origin)->localeTitle($locale);
89+
90+
$this->assertNotSame(static::createFromString($origin), $instance);
91+
$this->assertEquals(static::createFromString($expected), $instance);
92+
$this->assertSame($expected, (string) $instance);
93+
}
94+
5395
public function provideCreateFromCodePoint(): array
5496
{
5597
return [
@@ -291,6 +333,78 @@ public static function provideLower(): array
291333
);
292334
}
293335

336+
public static function provideLocaleLower(): array
337+
{
338+
return [
339+
// Lithuanian
340+
// Introduce an explicit dot above when lowercasing capital I's and J's
341+
// whenever there are more accents above.
342+
// LATIN CAPITAL LETTER I WITH OGONEK -> LATIN SMALL LETTER I WITH OGONEK
343+
['lt', 'į', 'Į'],
344+
// LATIN CAPITAL LETTER I WITH GRAVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE
345+
['lt', 'i̇̀', 'Ì'],
346+
// LATIN CAPITAL LETTER I WITH ACUTE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING ACUTE ACCENT
347+
['lt', 'i̇́', 'Í'],
348+
// LATIN CAPITAL LETTER I WITH TILDE -> LATIN SMALL LETTER I COMBINING DOT ABOVE COMBINING TILDE
349+
['lt', 'i̇̃', 'Ĩ'],
350+
351+
// Turkish and Azeri
352+
// When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into 'i'.
353+
// LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I
354+
['tr', 'i', 'İ'],
355+
['tr_TR', 'i', 'İ'],
356+
['az', 'i', 'İ'],
357+
358+
// Default casing rules
359+
// LATIN CAPITAL LETTER I WITH DOT ABOVE -> LATIN SMALL LETTER I COMBINING DOT ABOVE
360+
['en_US', '', 'İ'],
361+
['en', '', 'İ'],
362+
];
363+
}
364+
365+
public static function provideLocaleUpper(): array
366+
{
367+
return [
368+
// Turkish and Azeri
369+
// When uppercasing, i turns into a dotted capital I
370+
// LATIN SMALL LETTER I -> LATIN CAPITAL LETTER I WITH DOT ABOVE
371+
['tr', 'İ', 'i'],
372+
['tr_TR', 'İ', 'i'],
373+
['az', 'İ', 'i'],
374+
375+
// Greek
376+
// Remove accents when uppercasing
377+
// GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA
378+
['el', 'Α', 'ά'],
379+
['el_GR', 'Α', 'ά'],
380+
381+
// Default casing rules
382+
// GREEK SMALL LETTER ALPHA WITH TONOS -> GREEK CAPITAL LETTER ALPHA WITH TONOS
383+
['en_US', 'Ά', 'ά'],
384+
['en', 'Ά', 'ά'],
385+
];
386+
}
387+
388+
public static function provideLocaleTitle(): array
389+
{
390+
return [
391+
// Greek
392+
// Titlecasing words, should keep the accents on the first letter
393+
['el', 'Άδικος', 'άδικος'],
394+
['el_GR', 'Άδικος', 'άδικος'],
395+
['en', 'Άδικος', 'άδικος'],
396+
397+
// Dutch
398+
// Title casing should treat 'ij' as one character
399+
['nl_NL', 'IJssel', 'ijssel'],
400+
['nl_BE', 'IJssel', 'ijssel'],
401+
['nl', 'IJssel', 'ijssel'],
402+
403+
// Default casing rules
404+
['en', 'Ijssel', 'ijssel'],
405+
];
406+
}
407+
294408
public static function provideUpper(): array
295409
{
296410
return array_merge(

0 commit comments

Comments
 (0)