@@ -144,6 +144,133 @@ enum TextWidthBasis {
144144 longestLine,
145145}
146146
147+ /// A [TextBoundary] subclass for locating word breaks.
148+ ///
149+ /// The underlying implementation uses [UAX #29] (https://unicode.org/reports/tr29/)
150+ /// defined default word boundaries.
151+ ///
152+ /// The default word break rules can be tailored to meet the requirements of
153+ /// different use cases. For instance, the default rule set keeps horizontal
154+ /// whitespaces together as a single word, which may not make sense in a
155+ /// word-counting context -- "hello world" counts as 3 words instead of 2.
156+ /// An example is the [moveByWordBoundary] variant, which is a tailored
157+ /// word-break locator that more closely matches the default behavior of most
158+ /// platforms and editors when it comes to handling text editing keyboard
159+ /// shortcuts that move or delete word by word.
160+ class WordBoundary extends TextBoundary {
161+ /// Creates a [WordBoundary] with the text and layout information.
162+ WordBoundary ._(this ._text, this ._paragraph);
163+
164+ final InlineSpan _text;
165+ final ui.Paragraph _paragraph;
166+
167+ @override
168+ TextRange getTextBoundaryAt (int position) => _paragraph.getWordBoundary (TextPosition (offset: max (position, 0 )));
169+
170+ // Combines two UTF-16 code units (high surrogate + low surrogate) into a
171+ // single code point that represents a supplementary character.
172+ static int _codePointFromSurrogates (int highSurrogate, int lowSurrogate) {
173+ assert (
174+ TextPainter ._isHighSurrogate (highSurrogate),
175+ 'U+${highSurrogate .toRadixString (16 ).toUpperCase ().padLeft (4 , "0" )}) is not a high surrogate.' ,
176+ );
177+ assert (
178+ TextPainter ._isLowSurrogate (lowSurrogate),
179+ 'U+${lowSurrogate .toRadixString (16 ).toUpperCase ().padLeft (4 , "0" )}) is not a low surrogate.' ,
180+ );
181+ const int base = 0x010000 - (0xD800 << 10 ) - 0xDC00 ;
182+ return (highSurrogate << 10 ) + lowSurrogate + base ;
183+ }
184+
185+ // The Runes class does not provide random access with a code unit offset.
186+ int ? _codePointAt (int index) {
187+ final int ? codeUnitAtIndex = _text.codeUnitAt (index);
188+ if (codeUnitAtIndex == null ) {
189+ return null ;
190+ }
191+ switch (codeUnitAtIndex & 0xFC00 ) {
192+ case 0xD800 :
193+ return _codePointFromSurrogates (codeUnitAtIndex, _text.codeUnitAt (index + 1 )! );
194+ case 0xDC00 :
195+ return _codePointFromSurrogates (_text.codeUnitAt (index - 1 )! , codeUnitAtIndex);
196+ default :
197+ return codeUnitAtIndex;
198+ }
199+ }
200+
201+ static bool _isNewline (int codePoint) {
202+ switch (codePoint) {
203+ case 0x000A :
204+ case 0x0085 :
205+ case 0x000B :
206+ case 0x000C :
207+ case 0x2028 :
208+ case 0x2029 :
209+ return true ;
210+ default :
211+ return false ;
212+ }
213+ }
214+
215+ bool _skipSpacesAndPunctuations (int offset, bool forward) {
216+ // Use code point since some punctuations are supplementary characters.
217+ // "inner" here refers to the code unit that's before the break in the
218+ // search direction (`forward`).
219+ final int ? innerCodePoint = _codePointAt (forward ? offset - 1 : offset);
220+ final int ? outerCodeUnit = _text.codeUnitAt (forward ? offset : offset - 1 );
221+
222+ // Make sure the hard break rules in UAX#29 take precedence over the ones we
223+ // add below. Luckily there're only 4 hard break rules for word breaks, and
224+ // dictionary based breaking does not introduce new hard breaks:
225+ // https://unicode-org.github.io/icu/userguide/boundaryanalysis/break-rules.html#word-dictionaries
226+ //
227+ // WB1 & WB2: always break at the start or the end of the text.
228+ final bool hardBreakRulesApply = innerCodePoint == null || outerCodeUnit == null
229+ // WB3a & WB3b: always break before and after newlines.
230+ || _isNewline (innerCodePoint) || _isNewline (outerCodeUnit);
231+ return hardBreakRulesApply || ! RegExp (r'[\p{Space_Separator}\p{Punctuation}]' , unicode: true ).hasMatch (String .fromCharCode (innerCodePoint));
232+ }
233+
234+ /// Returns a [TextBoundary] suitable for handling keyboard navigation
235+ /// commands that change the current selection word by word.
236+ ///
237+ /// This [TextBoundary] is used by text widgets in the flutter framework to
238+ /// provide default implementation for text editing shortcuts, for example,
239+ /// "delete to the previous word".
240+ ///
241+ /// The implementation applies the same set of rules [WordBoundary] uses,
242+ /// except that word breaks end on a space separator or a punctuation will be
243+ /// skipped, to match the behavior of most platforms. Additional rules may be
244+ /// added in the future to better match platform behaviors.
245+ late final TextBoundary moveByWordBoundary = _UntilTextBoundary (this , _skipSpacesAndPunctuations);
246+ }
247+
248+ class _UntilTextBoundary extends TextBoundary {
249+ const _UntilTextBoundary (this ._textBoundary, this ._predicate);
250+
251+ final UntilPredicate _predicate;
252+ final TextBoundary _textBoundary;
253+
254+ @override
255+ int ? getLeadingTextBoundaryAt (int position) {
256+ if (position < 0 ) {
257+ return null ;
258+ }
259+ final int ? offset = _textBoundary.getLeadingTextBoundaryAt (position);
260+ return offset == null || _predicate (offset, false )
261+ ? offset
262+ : getLeadingTextBoundaryAt (offset - 1 );
263+ }
264+
265+ @override
266+ int ? getTrailingTextBoundaryAt (int position) {
267+ final int ? offset = _textBoundary.getTrailingTextBoundaryAt (max (position, 0 ));
268+ return offset == null || _predicate (offset, true )
269+ ? offset
270+ : getTrailingTextBoundaryAt (offset);
271+ }
272+ }
273+
147274/// This is used to cache and pass the computed metrics regarding the
148275/// caret's size and position. This is preferred due to the expensive
149276/// nature of the calculation.
@@ -750,7 +877,7 @@ class TextPainter {
750877
751878 // Creates a ui.Paragraph using the current configurations in this class and
752879 // assign it to _paragraph.
753- void _createParagraph () {
880+ ui. Paragraph _createParagraph () {
754881 assert (_paragraph == null || _rebuildParagraphForPaint);
755882 final InlineSpan ? text = this .text;
756883 if (text == null ) {
@@ -763,8 +890,9 @@ class TextPainter {
763890 _debugMarkNeedsLayoutCallStack = null ;
764891 return true ;
765892 }());
766- _paragraph = builder.build ();
893+ final ui. Paragraph paragraph = _paragraph = builder.build ();
767894 _rebuildParagraphForPaint = false ;
895+ return paragraph;
768896 }
769897
770898 void _layoutParagraph (double minWidth, double maxWidth) {
@@ -861,13 +989,18 @@ class TextPainter {
861989 canvas.drawParagraph (_paragraph! , offset);
862990 }
863991
864- // Returns true iff the given value is a valid UTF-16 surrogate. The value
992+ // Returns true iff the given value is a valid UTF-16 high surrogate. The value
865993 // must be a UTF-16 code unit, meaning it must be in the range 0x0000-0xFFFF.
866994 //
867995 // See also:
868996 // * https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
869- static bool _isUtf16Surrogate (int value) {
870- return value & 0xF800 == 0xD800 ;
997+ static bool _isHighSurrogate (int value) {
998+ return value & 0xFC00 == 0xD800 ;
999+ }
1000+
1001+ // Whether the given UTF-16 code unit is a low (second) surrogate.
1002+ static bool _isLowSurrogate (int value) {
1003+ return value & 0xFC00 == 0xDC00 ;
8711004 }
8721005
8731006 // Checks if the glyph is either [Unicode.RLM] or [Unicode.LRM]. These values take
@@ -886,7 +1019,7 @@ class TextPainter {
8861019 return null ;
8871020 }
8881021 // TODO(goderbauer): doesn't handle extended grapheme clusters with more than one Unicode scalar value (https://github.com/flutter/flutter/issues/13404).
889- return _isUtf16Surrogate (nextCodeUnit) ? offset + 2 : offset + 1 ;
1022+ return _isHighSurrogate (nextCodeUnit) ? offset + 2 : offset + 1 ;
8901023 }
8911024
8921025 /// Returns the closest offset before `offset` at which the input cursor can
@@ -897,7 +1030,7 @@ class TextPainter {
8971030 return null ;
8981031 }
8991032 // TODO(goderbauer): doesn't handle extended grapheme clusters with more than one Unicode scalar value (https://github.com/flutter/flutter/issues/13404).
900- return _isUtf16Surrogate (prevCodeUnit) ? offset - 2 : offset - 1 ;
1033+ return _isLowSurrogate (prevCodeUnit) ? offset - 2 : offset - 1 ;
9011034 }
9021035
9031036 // Unicode value for a zero width joiner character.
@@ -916,7 +1049,7 @@ class TextPainter {
9161049 const int NEWLINE_CODE_UNIT = 10 ;
9171050
9181051 // Check for multi-code-unit glyphs such as emojis or zero width joiner.
919- final bool needsSearch = _isUtf16Surrogate (prevCodeUnit) || _text! .codeUnitAt (offset) == _zwjUtf16 || _isUnicodeDirectionality (prevCodeUnit);
1052+ final bool needsSearch = _isHighSurrogate (prevCodeUnit) || _isLowSurrogate (prevCodeUnit) || _text! .codeUnitAt (offset) == _zwjUtf16 || _isUnicodeDirectionality (prevCodeUnit);
9201053 int graphemeClusterLength = needsSearch ? 2 : 1 ;
9211054 List <TextBox > boxes = < TextBox > [];
9221055 while (boxes.isEmpty) {
@@ -966,7 +1099,7 @@ class TextPainter {
9661099 final int nextCodeUnit = plainText.codeUnitAt (min (offset, plainTextLength - 1 ));
9671100
9681101 // Check for multi-code-unit glyphs such as emojis or zero width joiner
969- final bool needsSearch = _isUtf16Surrogate (nextCodeUnit) || nextCodeUnit == _zwjUtf16 || _isUnicodeDirectionality (nextCodeUnit);
1102+ final bool needsSearch = _isHighSurrogate (nextCodeUnit) || _isLowSurrogate (nextCodeUnit) || nextCodeUnit == _zwjUtf16 || _isUnicodeDirectionality (nextCodeUnit);
9701103 int graphemeClusterLength = needsSearch ? 2 : 1 ;
9711104 List <TextBox > boxes = < TextBox > [];
9721105 while (boxes.isEmpty) {
@@ -1141,6 +1274,18 @@ class TextPainter {
11411274 return _paragraph! .getWordBoundary (position);
11421275 }
11431276
1277+ /// {@template flutter.painting.TextPainter.wordBoundaries}
1278+ /// Returns a [TextBoundary] that can be used to perform word boundary analysis
1279+ /// on the current [text] .
1280+ ///
1281+ /// This [TextBoundary] uses word boundary rules defined in [Unicode Standard
1282+ /// Annex #29](http://www.unicode.org/reports/tr29/#Word_Boundaries).
1283+ /// {@endtemplate}
1284+ ///
1285+ /// Currently word boundary analysis can only be performed after [layout]
1286+ /// has been called.
1287+ WordBoundary get wordBoundaries => WordBoundary ._(text! , _paragraph! );
1288+
11441289 /// Returns the text range of the line at the given offset.
11451290 ///
11461291 /// The newline (if any) is not returned as part of the range.
0 commit comments