Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ Standard library changes
* The Julia REPL now support bracketed paste on Windows which should significantly speed up pasting large code blocks into the REPL ([#59825])
* The REPL now provides syntax highlighting for input as you type. See the REPL docs for more info about customization.
* The REPL now supports automatic insertion of closing brackets, parentheses, and quotes. See the REPL docs for more info about customization.
* History searching has been rewritten to use a new interactive modal dialogue, using a fzf-like style.
* The display of `AbstractChar`s in the main REPL mode now includes LaTeX input information like what is shown in help mode ([#58181]).
* Display of repeated frames and cycles in stack traces has been improved by bracketing them in the trace and treating them consistently ([#55841]).

Expand Down
16 changes: 9 additions & 7 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -649,17 +649,17 @@ replace_err(repl) = error("Bad replacement string: $repl")
function _write_capture(io::IO, group::Int, str, r, re::RegexAndMatchData)
len = PCRE.substring_length_bynumber(re.match_data, group)
# in the case of an optional group that doesn't match, len == 0
len == 0 && return
len == 0 && return len
ensureroom(io, len+1)
PCRE.substring_copy_bynumber(re.match_data, group,
pointer(io.data, io.ptr), len+1)
io.ptr += len
io.size = max(io.size, io.ptr - 1)
nothing
return len
end
function _write_capture(io::IO, group::Int, str, r, re)
group == 0 || replace_err("pattern is not a Regex")
return print(io, SubString(str, r))
return write(io, SubString(str, r))
end


Expand All @@ -673,12 +673,13 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
repl = unescape_string(repl_s.string, KEEP_ESC)
i = firstindex(repl)
e = lastindex(repl)
nb = 0
while i <= e
if repl[i] == SUB_CHAR
next_i = nextind(repl, i)
next_i > e && replace_err(repl)
if repl[next_i] == SUB_CHAR
write(io, SUB_CHAR)
nb += write(io, SUB_CHAR)
i = nextind(repl, next_i)
elseif isdigit(repl[next_i])
group = parse(Int, repl[next_i])
Expand All @@ -691,7 +692,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
break
end
end
_write_capture(io, group, str, r, re)
nb += _write_capture(io, group, str, r, re)
elseif repl[next_i] == GROUP_CHAR
i = nextind(repl, next_i)
if i > e || repl[i] != LBRACKET
Expand All @@ -713,16 +714,17 @@ function _replace(io, repl_s::SubstitutionString, str, r, re)
else
group = -1
end
_write_capture(io, group, str, r, re)
nb += _write_capture(io, group, str, r, re)
i = nextind(repl, i)
else
replace_err(repl)
end
else
write(io, repl[i])
nb += write(io, repl[i])
i = nextind(repl, i)
end
end
nb
end

struct RegexMatchIterator{S <: AbstractString}
Expand Down
169 changes: 154 additions & 15 deletions base/strings/annotated_io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -163,18 +163,18 @@ This is implemented so that one can say write an `AnnotatedString` to an
`AnnotatedIOBuffer` one character at a time without needlessly producing a
new annotation for each character.
"""
function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionAnnotation}, offset::Int = position(io))
function _insert_annotations!(annots::Vector{RegionAnnotation}, newannots::Vector{RegionAnnotation}, offset::Int = 0)
run = 0
if !isempty(io.annotations) && last(last(io.annotations).region) == offset
for i in reverse(axes(annotations, 1))
annot = annotations[i]
if !isempty(annots) && last(last(annots).region) == offset
for i in reverse(axes(newannots, 1))
annot = newannots[i]
first(annot.region) == 1 || continue
i <= length(io.annotations) || continue
if annot.label == last(io.annotations).label && annot.value == last(io.annotations).value
i <= length(annots) || continue
if annot.label == last(annots).label && annot.value == last(annots).value
valid_run = true
for runlen in 1:i
new = annotations[begin+runlen-1]
old = io.annotations[end-i+runlen]
new = newannots[begin+runlen-1]
old = annots[end-i+runlen]
if last(old.region) != offset || first(new.region) != 1 || old.label != new.label || old.value != new.value
valid_run = false
break
Expand All @@ -188,18 +188,157 @@ function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionA
end
end
for runindex in 0:run-1
old_index = lastindex(io.annotations) - run + 1 + runindex
old = io.annotations[old_index]
new = annotations[begin+runindex]
io.annotations[old_index] = setindex(old, first(old.region):last(new.region)+offset, :region)
old_index = lastindex(annots) - run + 1 + runindex
old = annots[old_index]
new = newannots[begin+runindex]
extannot = (region = first(old.region):last(new.region)+offset,
label = old.label,
value = old.value)
annots[old_index] = extannot
end
for index in run+1:lastindex(annotations)
annot = annotations[index]
for index in run+1:lastindex(newannots)
annot = newannots[index]
start, stop = first(annot.region), last(annot.region)
push!(io.annotations, setindex(annotations[index], start+offset:stop+offset, :region))
# REVIEW: For some reason, construction of `newannot`
# can be a significant contributor to the overall runtime
# of this function. For instance, executing:
#
# replace(AnnotatedIOBuffer(), S"apple",
# 'e' => S"{red:x}", 'p' => S"{green:y}")
#
# results in 3 calls to `_insert_annotations!`. It takes
# ~570ns in total, compared to ~200ns if we push `annot`
# instead of `newannot`. Commenting out the `_insert_annotations!`
# line reduces the runtime to ~170ns, from which we can infer
# that constructing `newannot` is somehow responsible for
# a ~30ns -> ~400ns (~13x) increase in runtime!!
# This also comes with a marginal increase in allocations
# (compared to the commented out version) of 2 -> 14 (250b -> 720b).
#
# This seems quite strange, but I haven't dug into the generated
# LLVM or ASM code. If anybody reading this is interested in checking
# this out, that would be brilliant 🙏.
#
# What I have done is found that "direct tuple reconstruction"
# (as below) is several times faster than using `setindex`.
newannot = (region = start+offset:stop+offset,
label = annot.label,
value = annot.value)
push!(annots, newannot)
end
end

_insert_annotations!(io::AnnotatedIOBuffer, newannots::Vector{RegionAnnotation}, offset::Int = position(io)) =
_insert_annotations!(io.annotations, newannots, offset)

# String replacement

# REVIEW: For some reason the `Core.kwcall` indirection seems to cause a
# substantial slowdown here. If we remove `; count` from the signature
# and run the sample code above in `_insert_annotations!`, the runtime
# drops from ~4400ns to ~580ns (~7x faster). I cannot guess why this is.
function replace(out::AnnotatedIOBuffer, str::AnnotatedString, pat_f::Pair...; count = typemax(Int))
if count == 0 || isempty(pat_f)
write(out, str)
return out
end
e1, patterns, replacers, repspans, notfound = _replace_init(str.string, pat_f, count)
if notfound
foreach(_free_pat_replacer, patterns)
write(out, str)
return out
end
# Modelled after `Base.annotated_chartransform`, but needing
# to handle a bit more complexity.
isappending = eof(out)
newannots = empty(out.annotations)
bytepos = bytestart = firstindex(str.string)
replacements = [(region = (bytestart - 1):(bytestart - 1), offset = position(out))]
nrep = 1
while nrep <= count
repspans, ridx, xspan, newbytes, bytepos = @inline _replace_once(
out.io, str.string, bytestart, e1, patterns, replacers, repspans, count, nrep, bytepos)
first(xspan) >= e1 && break
nrep += 1
# NOTE: When the replaced pattern ends with a multi-codeunit character,
# `xspan` only covers up to the start of that character. However,
# for us to correctly account for the changes to the string we need
# the /entire/ span of codeunits that were replaced.
if !isempty(xspan) && codeunit(str.string, last(xspan)) > 0x80
xspan = first(xspan):nextind(str.string, last(xspan))-1
end
drift = last(replacements).offset
thisrep = (region = xspan, offset = drift + newbytes - length(xspan))
destoff = first(xspan) - 1 + drift
push!(replacements, thisrep)
replacement = replacers[ridx]
_isannotated(replacement) || continue
annots = annotations(replacement)
annots′ = if eltype(annots) == Annotation # When it's a char not a string
region = 1:newbytes
[@NamedTuple{region::UnitRange{Int}, label::Symbol, value}((region, label, value))
for (; label, value) in annots]
else
annots
end::Vector{RegionAnnotation}
_insert_annotations!(newannots, annots′, destoff)
end
push!(replacements, (region = e1:(e1-1), offset = last(replacements).offset))
foreach(_free_pat_replacer, patterns)
write(out.io, SubString(str.string, bytepos))
# NOTE: To enable more efficient annotation clearing,
# we make use of the fact that `_replace_once` picks
# replacements ordered by their match start position.
# This means that the start of `.region`s in
# `replacements` is monotonically increasing.
isappending || _clear_annotations_in_region!(out.annotations, first(replacements).offset:position(out))
for (; region, label, value) in str.annotations
start, stop = first(region), last(region)
prioridx = searchsortedlast(
replacements, (region = start:start, offset = 0),
by = r -> first(r.region))
postidx = searchsortedfirst(
replacements, (region = stop:stop, offset = 0),
by = r -> first(r.region))
priorrep, postrep = replacements[prioridx], replacements[postidx]
if prioridx == postidx && start >= first(priorrep.region) && stop <= last(priorrep.region)
# Region contained with a replacement
continue
elseif postidx - prioridx <= 1 && start > last(priorrep.region) && stop < first(postrep.region)
# Lies between replacements
shiftregion = (start + priorrep.offset):(stop + priorrep.offset)
shiftann = (region = shiftregion, label, value)
push!(out.annotations, shiftann)
else
# Split between replacements
prevrep = replacements[max(begin, prioridx - 1)]
for rep in @view replacements[max(begin, prioridx - 1):min(end, postidx + 1)]
gap = max(start, last(prevrep.region)+1):min(stop, first(rep.region)-1)
if !isempty(gap)
shiftregion = (first(gap) + prevrep.offset):(last(gap) + prevrep.offset)
shiftann = (; region = shiftregion, label, value)
push!(out.annotations, shiftann)
end
prevrep = rep
end
end
end
append!(out.annotations, newannots)
out
end

replace(out::IO, str::AnnotatedString, pat_f::Pair...; count=typemax(Int)) =
replace(out, str.string, pat_f...; count)

function replace(str::AnnotatedString, pat_f::Pair...; count=typemax(Int))
isempty(pat_f) || iszero(count) && return str
out = AnnotatedIOBuffer()
replace(out, str, pat_f...; count)
read(seekstart(out), AnnotatedString)
end

# Printing

function printstyled end

# NOTE: This is an interim solution to the invalidations caused
Expand Down
7 changes: 7 additions & 0 deletions base/strings/unicode.jl
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@ julia> uppercase("Julia")
"""
uppercase(s::AbstractString) = map(uppercase, s)
uppercase(s::AnnotatedString) = annotated_chartransform(uppercase, s)
uppercase(s::SubString{<:AnnotatedString}) = uppercase(AnnotatedString(s))

"""
lowercase(s::AbstractString)
Expand All @@ -655,6 +656,7 @@ julia> lowercase("STRINGS AND THINGS")
"""
lowercase(s::AbstractString) = map(lowercase, s)
lowercase(s::AnnotatedString) = annotated_chartransform(lowercase, s)
lowercase(s::SubString{<:AnnotatedString}) = lowercase(AnnotatedString(s))

"""
titlecase(s::AbstractString; [wordsep::Function], strict::Bool=true)::String
Expand Down Expand Up @@ -720,6 +722,9 @@ function titlecase(s::AnnotatedString; wordsep::Function = !isletter, strict::Bo
end
end

titlecase(s::SubString{<:AnnotatedString}; wordsep::Function = !isletter, strict::Bool=true) =
titlecase(AnnotatedString(s); wordsep=wordsep, strict=strict)

"""
uppercasefirst(s::AbstractString)::String

Expand Down Expand Up @@ -754,6 +759,7 @@ function uppercasefirst(s::AnnotatedString)
end
end
end
uppercasefirst(s::SubString{<:AnnotatedString}) = uppercasefirst(AnnotatedString(s))

"""
lowercasefirst(s::AbstractString)
Expand Down Expand Up @@ -787,6 +793,7 @@ function lowercasefirst(s::AnnotatedString)
end
end
end
lowercasefirst(s::SubString{<:AnnotatedString}) = lowercasefirst(AnnotatedString(s))

############################################################################
# iterators for grapheme segmentation
Expand Down
Loading