Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ New library features
Standard library changes
------------------------

* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]).

#### Package Manager

Expand Down
17 changes: 6 additions & 11 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,8 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO
"""
islowercase(c::AbstractChar) -> Bool

Tests whether a character is a lowercase letter.
A character is classified as lowercase if it belongs to Unicode category Ll,
Letter: Lowercase.
Tests whether a character is a lowercase letter (according to the Unicode
standard's `Lowercase` derived property).

See also: [`isuppercase`](@ref).

Expand All @@ -298,16 +297,15 @@ julia> islowercase('❤')
false
```
"""
islowercase(c::AbstractChar) = category_code(c) == UTF8PROC_CATEGORY_LL
islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))

# true for Unicode upper and mixed case

"""
isuppercase(c::AbstractChar) -> Bool

Tests whether a character is an uppercase letter.
A character is classified as uppercase if it belongs to Unicode category Lu,
Letter: Uppercase, or Lt, Letter: Titlecase.
Tests whether a character is an uppercase letter (according to the Unicode
standard's `Uppercase` derived property).

See also: [`islowercase`](@ref).

Expand All @@ -323,10 +321,7 @@ julia> isuppercase('❤')
false
```
"""
function isuppercase(c::AbstractChar)
cat = category_code(c)
cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
end
isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))

"""
iscased(c::AbstractChar) -> Bool
Expand Down
11 changes: 7 additions & 4 deletions stdlib/Unicode/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -93,25 +93,28 @@ end
@testset "#5939 uft8proc character predicates" begin
alower=['a', 'd', 'j', 'y', 'z']
ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
for c in vcat(alower,ulower)
for c in vcat(alower,ulower,['ª'])
@test islowercase(c) == true
@test isuppercase(c) == false
@test isdigit(c) == false
@test isnumeric(c) == false
end

aupper=['A', 'D', 'J', 'Y', 'Z']
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Dž', 'Ж', 'Д']
uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Ж', 'Д']

for c in vcat(aupper,uupper)
for c in vcat(aupper,uupper,['Ⓐ'])
@test islowercase(c) == false
@test isuppercase(c) == true
@test isdigit(c) == false
@test isnumeric(c) == false
end

@test !isuppercase('Dž') # titlecase is not uppercase
@test Base.Unicode.iscased('Dž') # but is "cased"

nocase=['א','ﺵ']
alphas=vcat(alower,ulower,aupper,uupper,nocase)
alphas=vcat(alower,ulower,aupper,uupper,nocase,['Dž'])

for c in alphas
@test isletter(c) == true
Expand Down