@@ -163,18 +163,18 @@ This is implemented so that one can say write an `AnnotatedString` to an
163163`AnnotatedIOBuffer` one character at a time without needlessly producing a
164164new annotation for each character.
165165"""
166- function _insert_annotations! (io :: AnnotatedIOBuffer , annotations :: Vector{RegionAnnotation} , offset:: Int = position (io) )
166+ function _insert_annotations! (annots :: Vector{RegionAnnotation} , newannots :: Vector{RegionAnnotation} , offset:: Int = 0 )
167167 run = 0
168- if ! isempty (io . annotations ) && last (last (io . annotations ). region) == offset
169- for i in reverse (axes (annotations , 1 ))
170- annot = annotations [i]
168+ if ! isempty (annots ) && last (last (annots ). region) == offset
169+ for i in reverse (axes (newannots , 1 ))
170+ annot = newannots [i]
171171 first (annot. region) == 1 || continue
172- i <= length (io . annotations ) || continue
173- if annot. label == last (io . annotations ). label && annot. value == last (io . annotations ). value
172+ i <= length (annots ) || continue
173+ if annot. label == last (annots ). label && annot. value == last (annots ). value
174174 valid_run = true
175175 for runlen in 1 : i
176- new = annotations [begin + runlen- 1 ]
177- old = io . annotations [end - i+ runlen]
176+ new = newannots [begin + runlen- 1 ]
177+ old = annots [end - i+ runlen]
178178 if last (old. region) != offset || first (new. region) != 1 || old. label != new. label || old. value != new. value
179179 valid_run = false
180180 break
@@ -188,18 +188,157 @@ function _insert_annotations!(io::AnnotatedIOBuffer, annotations::Vector{RegionA
188188 end
189189 end
190190 for runindex in 0 : run- 1
191- old_index = lastindex (io. annotations) - run + 1 + runindex
192- old = io. annotations[old_index]
193- new = annotations[begin + runindex]
194- io. annotations[old_index] = setindex (old, first (old. region): last (new. region)+ offset, :region )
191+ old_index = lastindex (annots) - run + 1 + runindex
192+ old = annots[old_index]
193+ new = newannots[begin + runindex]
194+ extannot = (region = first (old. region): last (new. region)+ offset,
195+ label = old. label,
196+ value = old. value)
197+ annots[old_index] = extannot
195198 end
196- for index in run+ 1 : lastindex (annotations )
197- annot = annotations [index]
199+ for index in run+ 1 : lastindex (newannots )
200+ annot = newannots [index]
198201 start, stop = first (annot. region), last (annot. region)
199- push! (io. annotations, setindex (annotations[index], start+ offset: stop+ offset, :region ))
202+ # REVIEW: For some reason, construction of `newannot`
203+ # can be a significant contributor to the overall runtime
204+ # of this function. For instance, executing:
205+ #
206+ # replace(AnnotatedIOBuffer(), S"apple",
207+ # 'e' => S"{red:x}", 'p' => S"{green:y}")
208+ #
209+ # results in 3 calls to `_insert_annotations!`. It takes
210+ # ~570ns in total, compared to ~200ns if we push `annot`
211+ # instead of `newannot`. Commenting out the `_insert_annotations!`
212+ # line reduces the runtime to ~170ns, from which we can infer
213+ # that constructing `newannot` is somehow responsible for
214+ # a ~30ns -> ~400ns (~13x) increase in runtime!!
215+ # This also comes with a marginal increase in allocations
216+ # (compared to the commented out version) of 2 -> 14 (250b -> 720b).
217+ #
218+ # This seems quite strange, but I haven't dug into the generated
219+ # LLVM or ASM code. If anybody reading this is interested in checking
220+ # this out, that would be brilliant 🙏.
221+ #
222+ # What I have done is found that "direct tuple reconstruction"
223+ # (as below) is several times faster than using `setindex`.
224+ newannot = (region = start+ offset: stop+ offset,
225+ label = annot. label,
226+ value = annot. value)
227+ push! (annots, newannot)
200228 end
201229end
202230
231+ _insert_annotations! (io:: AnnotatedIOBuffer , newannots:: Vector{RegionAnnotation} , offset:: Int = position (io)) =
232+ _insert_annotations! (io. annotations, newannots, offset)
233+
234+ # String replacement
235+
236+ # REVIEW: For some reason the `Core.kwcall` indirection seems to cause a
237+ # substantial slowdown here. If we remove `; count` from the signature
238+ # and run the sample code above in `_insert_annotations!`, the runtime
239+ # drops from ~4400ns to ~580ns (~7x faster). I cannot guess why this is.
240+ function replace (out:: AnnotatedIOBuffer , str:: AnnotatedString , pat_f:: Pair... ; count = typemax (Int))
241+ if count == 0 || isempty (pat_f)
242+ write (out, str)
243+ return out
244+ end
245+ e1, patterns, replacers, repspans, notfound = _replace_init (str. string, pat_f, count)
246+ if notfound
247+ foreach (_free_pat_replacer, patterns)
248+ write (out, str)
249+ return out
250+ end
251+ # Modelled after `Base.annotated_chartransform`, but needing
252+ # to handle a bit more complexity.
253+ isappending = eof (out)
254+ newannots = empty (out. annotations)
255+ bytepos = bytestart = firstindex (str. string)
256+ replacements = [(region = (bytestart - 1 ): (bytestart - 1 ), offset = position (out))]
257+ nrep = 1
258+ while nrep <= count
259+ repspans, ridx, xspan, newbytes, bytepos = @inline _replace_once (
260+ out. io, str. string, bytestart, e1, patterns, replacers, repspans, count, nrep, bytepos)
261+ first (xspan) >= e1 && break
262+ nrep += 1
263+ # NOTE: When the replaced pattern ends with a multi-codeunit character,
264+ # `xspan` only covers up to the start of that character. However,
265+ # for us to correctly account for the changes to the string we need
266+ # the /entire/ span of codeunits that were replaced.
267+ if ! isempty (xspan) && codeunit (str. string, last (xspan)) > 0x80
268+ xspan = first (xspan): nextind (str. string, last (xspan))- 1
269+ end
270+ drift = last (replacements). offset
271+ thisrep = (region = xspan, offset = drift + newbytes - length (xspan))
272+ destoff = first (xspan) - 1 + drift
273+ push! (replacements, thisrep)
274+ replacement = replacers[ridx]
275+ _isannotated (replacement) || continue
276+ annots = annotations (replacement)
277+ annots′ = if eltype (annots) == Annotation # When it's a char not a string
278+ region = 1 : newbytes
279+ [@NamedTuple {region:: UnitRange{Int64} , label:: Symbol , value}((region, label, value))
280+ for (; label, value) in annots]
281+ else
282+ annots
283+ end :: Vector{RegionAnnotation}
284+ _insert_annotations! (newannots, annots′, destoff)
285+ end
286+ push! (replacements, (region = e1: (e1- 1 ), offset = last (replacements). offset))
287+ foreach (_free_pat_replacer, patterns)
288+ write (out. io, SubString (str. string, bytepos))
289+ # NOTE: To enable more efficient annotation clearing,
290+ # we make use of the fact that `_replace_once` picks
291+ # replacements ordered by their match start position.
292+ # This means that the start of `.region`s in
293+ # `replacements` is monotonically increasing.
294+ isappending || _clear_annotations_in_region! (out. annotations, first (replacements). offset: position (out))
295+ for (; region, label, value) in str. annotations
296+ start, stop = first (region), last (region)
297+ prioridx = searchsortedlast (
298+ replacements, (region = start: start, offset = 0 ),
299+ by = r -> first (r. region))
300+ postidx = searchsortedfirst (
301+ replacements, (region = stop: stop, offset = 0 ),
302+ by = r -> first (r. region))
303+ priorrep, postrep = replacements[prioridx], replacements[postidx]
304+ if prioridx == postidx && start >= first (priorrep. region) && stop <= last (priorrep. region)
305+ # Region contained with a replacement
306+ continue
307+ elseif postidx - prioridx <= 1 && start > last (priorrep. region) && stop < first (postrep. region)
308+ # Lies between replacements
309+ shiftregion = (start + priorrep. offset): (stop + priorrep. offset)
310+ shiftann = (region = shiftregion, label, value)
311+ push! (out. annotations, shiftann)
312+ else
313+ # Split between replacements
314+ prevrep = replacements[max (begin , prioridx - 1 )]
315+ for rep in @view replacements[max (begin , prioridx - 1 ): min (end , postidx + 1 )]
316+ gap = max (start, last (prevrep. region)+ 1 ): min (stop, first (rep. region)- 1 )
317+ if ! isempty (gap)
318+ shiftregion = (first (gap) + prevrep. offset): (last (gap) + prevrep. offset)
319+ shiftann = (; region = shiftregion, label, value)
320+ push! (out. annotations, shiftann)
321+ end
322+ prevrep = rep
323+ end
324+ end
325+ end
326+ append! (out. annotations, newannots)
327+ out
328+ end
329+
330+ replace (out:: IO , str:: AnnotatedString , pat_f:: Pair... ; count= typemax (Int)) =
331+ replace (out, str. string, pat_f... ; count)
332+
333+ function replace (str:: AnnotatedString , pat_f:: Pair... ; count= typemax (Int))
334+ isempty (pat_f) || iszero (count) && return str
335+ out = AnnotatedIOBuffer ()
336+ replace (out, str, pat_f... ; count)
337+ read (seekstart (out), AnnotatedString)
338+ end
339+
340+ # Printing
341+
203342function printstyled end
204343
205344# NOTE: This is an interim solution to the invalidations caused
0 commit comments