diff --git a/javascript/i18n/phonenumbers/phonenumberutil.js b/javascript/i18n/phonenumbers/phonenumberutil.js index 4445e9ab69..a7488cc101 100644 --- a/javascript/i18n/phonenumbers/phonenumberutil.js +++ b/javascript/i18n/phonenumbers/phonenumberutil.js @@ -744,46 +744,119 @@ i18n.phonenumbers.PhoneNumberUtil.DEFAULT_EXTN_PREFIX_ = ' ext. '; /** - * Pattern to capture digits used in an extension. - * Places a maximum length of '7' for an extension. + * Helper method for constructing regular expressions for parsing. Creates + * an expression that captures up to max_length digits. * - * @const - * @type {string} + * @return {string} RegEx pattern to capture extension digits. * @private */ -i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ = - '([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']{1,7})'; - +i18n.phonenumbers.PhoneNumberUtil.extnDigits_ = + function(maxLength) { + return ('([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']' + + '{1,' + maxLength + '})'); +}; /** - * Regexp of all possible ways to write extensions, for use when parsing. This - * will be run as a case-insensitive regexp match. Wide character versions are - * also provided after each ASCII version. There are three regular expressions - * here. The first covers RFC 3966 format, where the extension is added using - * ';ext='. The second more generic one starts with optional white space and - * ends with an optional full stop (.), followed by zero or more spaces/tabs - * /commas and then the numbers themselves. The other one covers the special - * case of American numbers where the extension is written with a hash at the - * end, such as '- 503#'. Note that the only capturing groups should be around - * the digits that you want to capture as part of the extension, or else parsing - * will fail! We allow two options for representing the accented o - the - * character itself, and one in the unicode decomposed form with the combining - * acute accent. + * Helper initialiser method to create the regular-expression pattern to match + * extensions. * - * @const - * @type {string} + * @return {string} RegEx pattern to capture extensions. * @private */ -i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ = - i18n.phonenumbers.PhoneNumberUtil.RFC3966_EXTN_PREFIX_ + - i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '|' + - '[ \u00A0\\t,]*' + - '(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|' + - '\u0434\u043E\u0431|' + - '[;,x\uFF58#\uFF03~\uFF5E]|int|anexo|\uFF49\uFF4E\uFF54)' + - '[:\\.\uFF0E]?[ \u00A0\\t,-]*' + - i18n.phonenumbers.PhoneNumberUtil.CAPTURING_EXTN_DIGITS_ + '#?|' + - '[- ]+([' + i18n.phonenumbers.PhoneNumberUtil.VALID_DIGITS_ + ']{1,5})#'; +i18n.phonenumbers.PhoneNumberUtil.createExtnPattern_ = + function() { + // We cap the maximum length of an extension based on the ambiguity of the way + // the extension is prefixed. As per ITU, the officially allowed length for + // extensions is actually 40, but we don't support this since we haven't seen real + // examples and this introduces many false interpretations as the extension labels + // are not standardized. + /** @type {string} */ + var extLimitAfterExplicitLabel = '20'; + /** @type {string} */ + var extLimitAfterLikelyLabel = '15'; + /** @type {string} */ + var extLimitAfterAmbiguousChar = '9'; + /** @type {string} */ + var extLimitWhenNotSure = '6'; + + /** @type {string} */ + var possibleSeparatorsBetweenNumberAndExtLabel = "[ \u00A0\\t,]*"; + // Optional full stop (.) or colon, followed by zero or more spaces/tabs/commas. + /** @type {string} */ + var possibleCharsAfterExtLabel = "[:\\.\uFF0E]?[ \u00A0\\t,-]*"; + /** @type {string} */ + var optionalExtnSuffix = "#?"; + + // Here the extension is called out in more explicit way, i.e mentioning it obvious + // patterns like "ext.". + /** @type {string} */ + var explicitExtLabels = + "(?:e?xt(?:ensi(?:o\u0301?|\u00F3))?n?|\uFF45?\uFF58\uFF54\uFF4E?|\u0434\u043E\u0431|anexo)"; + // One-character symbols that can be used to indicate an extension, and less + // commonly used or more ambiguous extension labels. + /** @type {string} */ + var ambiguousExtLabels = "(?:[x\uFF58#\uFF03~\uFF5E]|int|\uFF49\uFF4E\uFF54)"; + // When extension is not separated clearly. + /** @type {string} */ + var ambiguousSeparator = "[- ]+"; + // This is the same as possibleSeparatorsBetweenNumberAndExtLabel, but not matching + // comma as extension label may have it. + /** @type {string} */ + var possibleSeparatorsNumberExtLabelNoComma = "[ \u00A0\\t]*"; + // ",," is commonly used for auto dialling the extension when connected. First + // comma is matched through possibleSeparatorsBetweenNumberAndExtLabel, so we do + // not repeat it here. Semi-colon works in Iphone and Android also to pop up a + // button with the extension number following. + /** @type {string} */ + var autoDiallingAndExtLabelsFound = "(?:,{2}|;)"; + + /** @type {string} */ + var rfcExtn = i18n.phonenumbers.PhoneNumberUtil.RFC3966_EXTN_PREFIX_ + + i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterExplicitLabel); + /** @type {string} */ + var explicitExtn = possibleSeparatorsBetweenNumberAndExtLabel + explicitExtLabels + + possibleCharsAfterExtLabel + + i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterExplicitLabel) + + optionalExtnSuffix; + /** @type {string} */ + var ambiguousExtn = possibleSeparatorsBetweenNumberAndExtLabel + ambiguousExtLabels + + possibleCharsAfterExtLabel + + i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterAmbiguousChar) + + optionalExtnSuffix; + /** @type {string} */ + var americanStyleExtnWithSuffix = ambiguousSeparator + + i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitWhenNotSure) + "#"; + + /** @type {string} */ + var autoDiallingExtn = possibleSeparatorsNumberExtLabelNoComma + + autoDiallingAndExtLabelsFound + possibleCharsAfterExtLabel + + i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterLikelyLabel) + + optionalExtnSuffix; + /** @type {string} */ + var onlyCommasExtn = possibleSeparatorsNumberExtLabelNoComma + + "(?:,)+" + possibleCharsAfterExtLabel + + i18n.phonenumbers.PhoneNumberUtil.extnDigits_(extLimitAfterAmbiguousChar) + + optionalExtnSuffix; + + // The first regular expression covers RFC 3966 format, where the extension is added + // using ";ext=". The second more generic where extension is mentioned with explicit + // labels like "ext:". In both the above cases we allow more numbers in extension than + // any other extension labels. The third one captures when single character extension + // labels or less commonly used labels are used. In such cases we capture fewer + // extension digits in order to reduce the chance of falsely interpreting two + // numbers beside each other as a number + extension. The fourth one covers the + // special case of American numbers where the extension is written with a hash + // at the end, such as "- 503#". The fifth one is exclusively for extension + // autodialling formats which are used when dialling and in this case we accept longer + // extensions. The last one is more liberal on the number of commas that acts as + // extension labels, so we have a strict cap on the number of digits in such extensions. + return rfcExtn + "|" + + explicitExtn + "|" + + ambiguousExtn + "|" + + americanStyleExtnWithSuffix + "|" + + autoDiallingExtn + "|" + + onlyCommasExtn; +}; /** @@ -796,7 +869,7 @@ i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ = */ i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERN_ = new RegExp('(?:' + - i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ + + i18n.phonenumbers.PhoneNumberUtil.createExtnPattern_() + ')$', 'i'); @@ -814,7 +887,7 @@ i18n.phonenumbers.PhoneNumberUtil.VALID_PHONE_NUMBER_PATTERN_ = i18n.phonenumbers.PhoneNumberUtil.MIN_LENGTH_PHONE_NUMBER_PATTERN_ + '$|' + '^' + i18n.phonenumbers.PhoneNumberUtil.VALID_PHONE_NUMBER_ + - '(?:' + i18n.phonenumbers.PhoneNumberUtil.EXTN_PATTERNS_FOR_PARSING_ + + '(?:' + i18n.phonenumbers.PhoneNumberUtil.createExtnPattern_() + ')?' + '$', 'i'); diff --git a/javascript/i18n/phonenumbers/phonenumberutil_test.js b/javascript/i18n/phonenumbers/phonenumberutil_test.js index daf4632c3c..39e5110ffc 100644 --- a/javascript/i18n/phonenumbers/phonenumberutil_test.js +++ b/javascript/i18n/phonenumbers/phonenumberutil_test.js @@ -3552,6 +3552,151 @@ function testParseExtensions() { phoneUtil.parse('+1 (645) 123 1234 ext. 910#', RegionCode.US))); } +function testParseHandlesLongExtensionsWithExplicitLabels() { + // Test lower and upper limits of extension lengths for each type of label. + /** @type {!i18n.phonenumbers.PhoneNumber} */ + var nzNumber = new i18n.phonenumbers.PhoneNumber(); + nzNumber.setCountryCode(64); + nzNumber.setNationalNumber(33316005); + + // Firstly, when in RFC format: PhoneNumberUtil.extLimitAfterExplicitLabel + nzNumber.setExtension('0'); + assertTrue(nzNumber.equals( + phoneUtil.parse('tel:+6433316005;ext=0', RegionCode.NZ))); + nzNumber.setExtension('01234567890123456789'); + assertTrue(nzNumber.equals( + phoneUtil.parse('tel:+6433316005;ext=01234567890123456789', RegionCode.NZ))); + // Extension too long. + try { + phoneUtil.parse('tel:+6433316005;ext=012345678901234567890', RegionCode.NZ); + fail( + 'This should not parse as length of extension is higher than allowed: ' + + 'tel:+6433316005;ext=012345678901234567890'); + } catch (e) { + // Expected this exception. + assertEquals( + 'Wrong error type stored in exception.', + i18n.phonenumbers.Error.NOT_A_NUMBER, e.message); + } + + // Explicit extension label: PhoneNumberUtil.extLimitAfterExplicitLabel + nzNumber.setExtension('1'); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005ext:1', RegionCode.NZ))); + nzNumber.setExtension('12345678901234567890'); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 xtn:12345678901234567890', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 extension\t12345678901234567890', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 xtensio:12345678901234567890', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 xtensión, 12345678901234567890#', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005extension.12345678901234567890', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 доб:12345678901234567890', RegionCode.NZ))); + // Extension too long. + try { + phoneUtil.parse('03 3316005 extension 123456789012345678901', RegionCode.NZ); + fail( + 'This should not parse as length of extension is higher than allowed: ' + + '03 3316005 extension 123456789012345678901'); + } catch (e) { + // Expected this exception. + assertEquals( + 'Wrong error type stored in exception.', + i18n.phonenumbers.Error.TOO_LONG, e.message); + } +} + +function testParseHandlesLongExtensionsWithAutoDiallingLabels() { + // Lastly, cases of auto-dialling and other standard extension labels, + // PhoneNumberUtil.extLimitAfterLikelyLabel + var usNumberUserInput = new i18n.phonenumbers.PhoneNumber(); + usNumberUserInput.setCountryCode(1); + usNumberUserInput.setNationalNumber(2679000000); + usNumberUserInput.setExtension('123456789012345'); + assertTrue(usNumberUserInput.equals( + phoneUtil.parse('+12679000000,,123456789012345#', RegionCode.US))); + assertTrue(usNumberUserInput.equals( + phoneUtil.parse('+12679000000;123456789012345#', RegionCode.US))); + var ukNumberUserInput = new i18n.phonenumbers.PhoneNumber(); + ukNumberUserInput.setCountryCode(44); + ukNumberUserInput.setNationalNumber(2034000000); + ukNumberUserInput.setExtension('123456789'); + assertTrue(ukNumberUserInput.equals( + phoneUtil.parse('+442034000000,,123456789#', RegionCode.GB))); + // Extension too long. + try { + phoneUtil.parse('+12679000000,,1234567890123456#', RegionCode.US); + fail( + 'This should not parse as length of extension is higher than allowed: ' + + '+12679000000,,1234567890123456#'); + } catch (e) { + // Expected this exception. + assertEquals( + 'Wrong error type stored in exception.', + i18n.phonenumbers.Error.NOT_A_NUMBER, e.message); + } +} + +function testParseHandlesShortExtensionsWithAmbiguousChar() { + var nzNumber = new i18n.phonenumbers.PhoneNumber(); + nzNumber.setCountryCode(64); + nzNumber.setNationalNumber(33316005); + + // Secondly, for single and non-standard cases: + // PhoneNumberUtil.extLimitAfterAmbiguousChar + nzNumber.setExtension("123456789"); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 x 123456789', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 x. 123456789', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 #123456789#', RegionCode.NZ))); + assertTrue(nzNumber.equals( + phoneUtil.parse('03 3316005 ~ 123456789', RegionCode.NZ))); + // Extension too long. + try { + phoneUtil.parse("03 3316005 ~ 1234567890", RegionCode.NZ); + fail( + "This should not parse as length of extension is higher than allowed: " + + "03 3316005 ~ 1234567890"); + } catch (e) { + // Expected this exception. + assertEquals( + 'Wrong error type stored in exception.', + i18n.phonenumbers.Error.TOO_LONG, e.message); + } +} + +function testParseHandlesShortExtensionsWhenNotSureOfLabel() { + // Thirdly, when no explicit extension label present, but denoted by tailing #: + // PhoneNumberUtil.extLimitWhenNotSure + var usNumber = new i18n.phonenumbers.PhoneNumber(); + usNumber.setCountryCode(1); + usNumber.setNationalNumber(1234567890); + usNumber.setExtension('666666'); + assertTrue(usNumber.equals( + phoneUtil.parse('+1123-456-7890 666666#', RegionCode.US))); + usNumber.setExtension('6'); + assertTrue(usNumber.equals( + phoneUtil.parse('+11234567890-6#', RegionCode.US))); + // Extension too long. + try { + phoneUtil.parse('+1123-456-7890 7777777#', RegionCode.US); + fail( + 'This should not parse as length of extension is higher than allowed: ' + + '+1123-456-7890 7777777#'); + } catch (e) { + // Expected this exception. + assertEquals( + 'Wrong error type stored in exception.', + i18n.phonenumbers.Error.NOT_A_NUMBER, e.message); + } +} + function testParseAndKeepRaw() { var CCS = i18n.phonenumbers.PhoneNumber.CountryCodeSource; /** @type {!i18n.phonenumbers.PhoneNumber} */