diff --git a/__tests__/URL-test.js b/__tests__/URL-test.js new file mode 100644 index 00000000..820f91f5 --- /dev/null +++ b/__tests__/URL-test.js @@ -0,0 +1,39 @@ +import {URL_REGEX_WITH_REQUIRED_PROTOCOL, URL_REGEX} from '../lib/Url'; + +describe('Mandatory protocol for URL', () => { + it('correctly tests valid urls', () => { + const regexToTest = new RegExp(`^${URL_REGEX_WITH_REQUIRED_PROTOCOL}$`, 'i'); + expect(regexToTest.test('https://google.com/')).toBeTruthy(); + expect(regexToTest.test('http://google.com/')).toBeTruthy(); + expect(regexToTest.test('ftp://google.com/')).toBeTruthy(); + expect(regexToTest.test('https://we.are.expensify.com/how-we-got-here')).toBeTruthy(); + expect(regexToTest.test('https://google.com:12')).toBeTruthy(); + expect(regexToTest.test('https://google.com:65535')).toBeTruthy(); + expect(regexToTest.test('https://google.com:65535/path/my')).toBeTruthy(); + }); + it('correctly tests invalid urls', () => { + const regexToTest = new RegExp(`^${URL_REGEX_WITH_REQUIRED_PROTOCOL}$`, 'i'); + expect(regexToTest.test('google.com')).toBeFalsy(); + expect(regexToTest.test('https://google.com:02')).toBeFalsy(); + expect(regexToTest.test('https://google.com:65536')).toBeFalsy(); + expect(regexToTest.test('smtp://google.com')).toBeFalsy(); + }); +}); + +describe('Optional protocol for URL', () => { + it('correctly tests valid urls', () => { + const regexToTest = new RegExp(`^${URL_REGEX}$`, 'i'); + expect(regexToTest.test('google.com/')).toBeTruthy(); + expect(regexToTest.test('https://google.com/')).toBeTruthy(); + expect(regexToTest.test('ftp://google.com/')).toBeTruthy(); + expect(regexToTest.test('we.are.expensify.com/how-we-got-here')).toBeTruthy(); + expect(regexToTest.test('google.com:12')).toBeTruthy(); + expect(regexToTest.test('google.com:65535')).toBeTruthy(); + expect(regexToTest.test('google.com:65535/path/my')).toBeTruthy(); + }); + it('correctly tests invalid urls', () => { + const regexToTest = new RegExp(`^${URL_REGEX}$`, 'i'); + expect(regexToTest.test('google.com:02')).toBeFalsy(); + expect(regexToTest.test('google.com:65536')).toBeFalsy(); + }); +}); diff --git a/lib/Url.js b/lib/Url.js index c2d5f97e..71e0034f 100644 --- a/lib/Url.js +++ b/lib/Url.js @@ -1,16 +1,22 @@ import TLD_REGEX from './tlds'; -const URL_WEBSITE_REGEX = `(https?:\\/\\/)?((?:www\\.)?[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\\.)+(?:${TLD_REGEX})(?:\\:\\d{2,4}|\\b|(?=_))`; +const ALLOWED_PORTS = '([1-9][0-9]{0,3}|[1-5][0-9]{4}|6[0-4][0-9]{3}|65[0-4][0-9]{2}|655[0-2][0-9]|6553[0-5])'; +const URL_PROTOCOL_REGEX = '((ht|f)tps?:\\/\\/)'; +const URL_WEBSITE_REGEX = `${URL_PROTOCOL_REGEX}?((?:www\\.)?[a-z0-9](?:[-a-z0-9]*[a-z0-9])?\\.)+(?:${TLD_REGEX})(?:\\:${ALLOWED_PORTS}|\\b|(?=_))`; const addEscapedChar = reg => `(?:${reg}|&(?:amp|quot|#x27);)`; const URL_PATH_REGEX = `(?:${addEscapedChar('[.,=(+$!*]')}?\\/${addEscapedChar('[-\\w$@.+!*:(),=%~]')}*${addEscapedChar('[-\\w~@:%)]')}|\\/)*`; const URL_PARAM_REGEX = `(?:\\?${addEscapedChar('[-\\w$@.+!*()\\/,=%{}:;\\[\\]\\|_]')}*)?`; const URL_FRAGMENT_REGEX = `(?:#${addEscapedChar('[-\\w$@.+!*()[\\],=%;\\/:~]')}*)?`; const URL_REGEX = `(${URL_WEBSITE_REGEX}${URL_PATH_REGEX}(?:${URL_PARAM_REGEX}|${URL_FRAGMENT_REGEX})*)`; +const URL_REGEX_WITH_REQUIRED_PROTOCOL = URL_REGEX.replace(`${URL_PROTOCOL_REGEX}?`, URL_PROTOCOL_REGEX); + export { URL_WEBSITE_REGEX, URL_PATH_REGEX, URL_PARAM_REGEX, URL_FRAGMENT_REGEX, - URL_REGEX + URL_REGEX, + URL_REGEX_WITH_REQUIRED_PROTOCOL, + URL_PROTOCOL_REGEX, };