diff --git a/lib/web_ui/lib/src/engine/dom.dart b/lib/web_ui/lib/src/engine/dom.dart index 69df856fd3a42..0975d736212e4 100644 --- a/lib/web_ui/lib/src/engine/dom.dart +++ b/lib/web_ui/lib/src/engine/dom.dart @@ -66,6 +66,9 @@ extension DomWindowExtension on DomWindow { /// The Trusted Types API (when available). /// See: https://developer.mozilla.org/en-US/docs/Web/API/Trusted_Types_API external DomTrustedTypePolicyFactory? get trustedTypes; + + // ignore: non_constant_identifier_names + external DomIntl get Intl; } typedef DomRequestAnimationFrameCallback = void Function(num highResTime); @@ -1659,3 +1662,42 @@ class _DomListWrapper extends Iterable { /// `toList` on the `Iterable`. Iterable createDomListWrapper(_DomList list) => _DomListWrapper._(list).cast(); + +@JS() +@staticInterop +class DomIntl {} + +extension DomIntlExtension on DomIntl { + /// This is a V8-only API for segmenting text. + /// + /// See: https://code.google.com/archive/p/v8-i18n/wikis/BreakIterator.wiki + external Object? get v8BreakIterator; +} + + +@JS() +@staticInterop +class DomV8BreakIterator {} + +extension DomV8BreakIteratorExtension on DomV8BreakIterator { + external void adoptText(String text); + external int first(); + external int next(); + external int current(); + external String breakType(); +} + +DomV8BreakIterator createV8BreakIterator() { + final Object? v8BreakIterator = domWindow.Intl.v8BreakIterator; + if (v8BreakIterator == null) { + throw UnimplementedError('v8BreakIterator is not supported.'); + } + + return js_util.callConstructor( + v8BreakIterator, + [ + js_util.getProperty(domWindow, 'undefined'), + js_util.jsify(const {'type': 'line'}), + ], + ); +} diff --git a/lib/web_ui/lib/src/engine/text/line_breaker.dart b/lib/web_ui/lib/src/engine/text/line_breaker.dart index 938e1ba3a38a8..5c30f81ce95c9 100644 --- a/lib/web_ui/lib/src/engine/text/line_breaker.dart +++ b/lib/web_ui/lib/src/engine/text/line_breaker.dart @@ -2,10 +2,25 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +import '../dom.dart'; import 'fragmenter.dart'; import 'line_break_properties.dart'; import 'unicode_range.dart'; +const Set _kNewlines = { + 0x000A, // LF + 0x000B, // BK + 0x000C, // BK + 0x000D, // CR + 0x0085, // NL + 0x2028, // BK + 0x2029, // BK +}; +const Set _kSpaces = { + 0x0020, // SP + 0x200B, // ZW +}; + /// Various types of line breaks as defined by the Unicode spec. enum LineBreakType { /// Indicates that a line break is possible but not mandatory. @@ -25,8 +40,21 @@ enum LineBreakType { } /// Splits [text] into fragments based on line breaks. -class LineBreakFragmenter extends TextFragmenter { - const LineBreakFragmenter(super.text); +abstract class LineBreakFragmenter extends TextFragmenter { + factory LineBreakFragmenter(String text) { + if (domWindow.Intl.v8BreakIterator != null) { + return V8LineBreakFragmenter(text); + } + return FWLineBreakFragmenter(text); + } + + @override + List fragment(); +} + +/// Flutter web's custom implementation of [LineBreakFragmenter]. +class FWLineBreakFragmenter extends TextFragmenter implements LineBreakFragmenter { + FWLineBreakFragmenter(super.text); @override List fragment() { @@ -34,6 +62,85 @@ class LineBreakFragmenter extends TextFragmenter { } } +/// An implementation of [LineBreakFragmenter] that uses V8's +/// `v8BreakIterator` API to find line breaks in the given [text]. +class V8LineBreakFragmenter extends TextFragmenter implements LineBreakFragmenter { + V8LineBreakFragmenter(super.text) + : assert(domWindow.Intl.v8BreakIterator != null); + + @override + List fragment() { + final List breaks = []; + int fragmentStart = 0; + + final DomV8BreakIterator iterator = createV8BreakIterator(); + + iterator.adoptText(text); + iterator.first(); + while (iterator.next() != -1) { + final LineBreakType type = _getBreakType(iterator); + + final int fragmentEnd = iterator.current(); + int trailingNewlines = 0; + int trailingSpaces = 0; + + // Calculate trailing newlines and spaces. + for (int i = fragmentStart; i < fragmentEnd; i++) { + final int codeUnit = text.codeUnitAt(i); + if (_kNewlines.contains(codeUnit)) { + trailingNewlines++; + trailingSpaces++; + } else if (_kSpaces.contains(codeUnit)) { + trailingSpaces++; + } else { + // Always break after a sequence of spaces. + if (trailingSpaces > 0) { + breaks.add(LineBreakFragment( + fragmentStart, + i, + LineBreakType.opportunity, + trailingNewlines: trailingNewlines, + trailingSpaces: trailingSpaces, + )); + fragmentStart = i; + trailingNewlines = 0; + trailingSpaces = 0; + } + } + } + + breaks.add(LineBreakFragment( + fragmentStart, + fragmentEnd, + type, + trailingNewlines: trailingNewlines, + trailingSpaces: trailingSpaces, + )); + fragmentStart = fragmentEnd; + } + + if (breaks.isEmpty || breaks.last.type == LineBreakType.mandatory) { + breaks.add(LineBreakFragment(text.length, text.length, LineBreakType.endOfText, trailingNewlines: 0, trailingSpaces: 0)); + } + + return breaks; + } + + /// Gets break type from v8BreakIterator. + LineBreakType _getBreakType(DomV8BreakIterator iterator) { + final int fragmentEnd = iterator.current(); + + // I don't know why v8BreakIterator uses the type "none" to mean "soft break". + if (iterator.breakType() != 'none') { + return LineBreakType.mandatory; + } + if (fragmentEnd == text.length) { + return LineBreakType.endOfText; + } + return LineBreakType.opportunity; + } +} + class LineBreakFragment extends TextFragment { const LineBreakFragment(super.start, super.end, this.type, { required this.trailingNewlines, diff --git a/lib/web_ui/test/text/line_breaker_test.dart b/lib/web_ui/test/text/line_breaker_test.dart index 9f2a684c6210f..479920271d4ea 100644 --- a/lib/web_ui/test/text/line_breaker_test.dart +++ b/lib/web_ui/test/text/line_breaker_test.dart @@ -17,7 +17,16 @@ void main() { } void testMain() { - group('$LineBreakFragmenter', () { + groupForEachFragmenter(({required bool isV8}) { + List split(String text) { + final LineBreakFragmenter fragmenter = + isV8 ? V8LineBreakFragmenter(text) : FWLineBreakFragmenter(text); + return [ + for (final LineBreakFragment fragment in fragmenter.fragment()) + Line.fromLineBreakFragment(text, fragment) + ]; + } + test('empty string', () { expect(split(''), [ Line('', endOfText), @@ -316,13 +325,15 @@ void testMain() { }); test('comprehensive test', () { - final List testCollection = - parseRawTestData(rawLineBreakTestData); + final List testCollection = parseRawTestData(rawLineBreakTestData, isV8: isV8); for (int t = 0; t < testCollection.length; t++) { final TestCase testCase = testCollection[t]; final String text = testCase.toText(); - final List fragments = LineBreakFragmenter(text).fragment(); + final LineBreakFragmenter fragmenter = isV8 + ? V8LineBreakFragmenter(text) + : FWLineBreakFragmenter(text); + final List fragments = fragmenter.fragment(); // `f` is the index in the `fragments` list. int f = 0; @@ -401,6 +412,23 @@ void testMain() { }); } +typedef CreateLineBreakFragmenter = LineBreakFragmenter Function(String text); +typedef GroupBody = void Function({required bool isV8}); + +void groupForEachFragmenter(GroupBody callback) { + group( + '$FWLineBreakFragmenter', + () => callback(isV8: false), + ); + + if (domWindow.Intl.v8BreakIterator != null) { + group( + '$V8LineBreakFragmenter', + () => callback(isV8: true), + ); + } +} + /// Holds information about how a line was split from a string. class Line { Line(this.text, this.breakType, {this.nl = 0, this.sp = 0}); @@ -447,10 +475,3 @@ class Line { return '"$escapedText" ($breakType, nl: $nl, sp: $sp)'; } } - -List split(String text) { - return [ - for (final LineBreakFragment fragment in LineBreakFragmenter(text).fragment()) - Line.fromLineBreakFragment(text, fragment) - ]; -} diff --git a/lib/web_ui/test/text/line_breaker_test_helper.dart b/lib/web_ui/test/text/line_breaker_test_helper.dart index 8093bca91fb44..8f92e11c77a00 100644 --- a/lib/web_ui/test/text/line_breaker_test_helper.dart +++ b/lib/web_ui/test/text/line_breaker_test_helper.dart @@ -3,11 +3,11 @@ // found in the LICENSE file. /// Parses raw test data into a list of [TestCase] objects. -List parseRawTestData(String rawTestData) { +List parseRawTestData(String rawTestData, {required bool isV8}) { return rawTestData .split('\n') .where(isValidTestCase) - .map(_checkReplacement) + .map((String line) => _checkReplacement(line, isV8: isV8)) .map(_parse) .toList(); } @@ -16,7 +16,7 @@ bool isValidTestCase(String line) { return line.startsWith('×'); } -String _checkReplacement(String line) { +String _checkReplacement(String line, {required bool isV8}) { String replacement = line; // Special cases for rules LB8, LB11, LB13, LB14, LB15, LB16, LB17 to allow @@ -28,38 +28,91 @@ String _checkReplacement(String line) { .replaceAllMapped(spacesRegex, (Match m) => 'SPACE (SP) ÷ [${m.group(1)}.'); } - // Some test cases contradict rule LB25, so we are fixing them with the few - // regexes below. - - final RegExp lb25Regex1 = RegExp(r'\((CP_CP30|CL)\)(.*?) ÷ \[999\.0\] (PERCENT|DOLLAR)'); - if (replacement.contains(lb25Regex1)) { - replacement = replacement - .replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR) - .replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO) - .replaceAllMapped( - lb25Regex1, - (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', - ); - } - final RegExp lb25Regex2 = RegExp(r'\((IS|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)'); - if (replacement.contains(lb25Regex2)) { - replacement = replacement - .replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU) - .replaceAllMapped( - lb25Regex2, - (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', - ); + if (!isV8) { + // Some test cases contradict rule LB25, so we are fixing them with the few + // regexes below. + + final RegExp lb25Regex1 = RegExp(r'\((CP_CP30|CL)\)(.*?) ÷ \[999\.0\] (PERCENT|DOLLAR)'); + if (replacement.contains(lb25Regex1)) { + replacement = replacement + .replaceAll(' ÷ 0024', ' × 0024') // DOLLAR SIGN (PR) + .replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO) + .replaceAllMapped( + lb25Regex1, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); + } + final RegExp lb25Regex2 = RegExp(r'\((IS|SY)\)(.*?) ÷ \[999\.0\] (DIGIT)'); + if (replacement.contains(lb25Regex2)) { + replacement = replacement + .replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU) + .replaceAllMapped( + lb25Regex2, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); + } + final RegExp lb25Regex3 = RegExp(r'\((PR|PO)\)(.*?) ÷ \[999\.0\] (LEFT)'); + if (replacement.contains(lb25Regex3)) { + replacement = replacement + .replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30) + .replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30) + .replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP) + .replaceAllMapped( + lb25Regex3, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); + } } - final RegExp lb25Regex3 = RegExp(r'\((PR|PO)\)(.*?) ÷ \[999\.0\] (LEFT)'); - if (replacement.contains(lb25Regex3)) { - replacement = replacement - .replaceAll(' ÷ 0028', ' × 0028') // LEFT PARENTHESIS (OP_OP30) - .replaceAll(' ÷ 007B', ' × 007B') // LEFT CURLY BRACKET (OP_OP30) - .replaceAll(' ÷ 2329', ' × 2329') // LEFT-POINTING ANGLE BRACKET (OP) - .replaceAllMapped( - lb25Regex3, - (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', - ); + + if (isV8) { + // v8BreakIterator deviates from the spec around Hiragana and Katakana + // letters. + + final RegExp hiragana21Regex = RegExp(r' × \[21\.03\] (HIRAGANA LETTER|KATAKANA LETTER|KATAKANA-HIRAGANA)'); + if (replacement.contains(hiragana21Regex) && !replacement.contains('(BB)') && !replacement.contains('(PR)')) { + replacement = replacement + .replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ) + .replaceAll(' × 30E5', ' ÷ 30E5') // KATAKANA LETTER (CJ) + .replaceAll(' × 30FC', ' ÷ 30FC') // KATAKANA-HIRAGANA PROLONGED SOUND MARK (CJ) + .replaceAllMapped( + hiragana21Regex, + (Match m) => ' ÷ [21.03] ${m.group(1)}', + ); + } + if (replacement.contains(' × [16.0] HIRAGANA LETTER')) { + replacement = replacement + .replaceAll(' × 3041', ' ÷ 3041') // HIRAGANA LETTER (CJ) + .replaceAll( + ' × [16.0] HIRAGANA LETTER', + ' ÷ [16.0] HIRAGANA LETTER', + ); + } + final RegExp hiraganaPercentRegex = RegExp(r'HIRAGANA .*? ÷ \[999\.0\] PERCENT'); + if (replacement.contains(hiraganaPercentRegex)) { + replacement = replacement + .replaceAll(' ÷ 0025', ' × 0025') // PERCENT SIGN (PO) + .replaceAll( + ' ÷ [999.0] PERCENT', + ' × [999.0] PERCENT', + ); + } + + // v8BreakIterator also deviates from the spec around hyphens, commas and + // full stops. + + final RegExp hyphenRegex = RegExp(r'\((HY|IS)\)(.*?) ÷ \[999\.0\] (DIGIT|NUMBER|SECTION|THAI|)'); + if (replacement.contains(hyphenRegex)) { + replacement = replacement + .replaceAll(' ÷ 0030', ' × 0030') // DIGIT ZERO (NU) + .replaceAll(' ÷ 0023', ' × 0023') // NUMBER SIGN (AL) + .replaceAll(' ÷ 00A7', ' × 00A7') // SECTION SIGN (AI_AL) + .replaceAll(' ÷ 0E01', ' × 0E01') // THAI CHARACTER KO KAI (SA_AL) + .replaceAll(' ÷ 50005', ' × 50005') // (XX_AL) + .replaceAllMapped( + hyphenRegex, + (Match m) => '(${m.group(1)})${m.group(2)} × [999.0] ${m.group(3)}', + ); + } } return replacement;