Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: decodeURL() #97

Merged
merged 4 commits into from
Sep 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ const sha1 = createSha1Hash();
});
```

### decodeURL(str)

Decode [encoded](https://en.wikipedia.org/wiki/Percent-encoding) URL or path. An alternative to the native [`decodeURI()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURI) function, with added ability to decode [punycoded](https://en.wikipedia.org/wiki/Punycode) domain.

``` js
decodeURL('http://foo.com/b%C3%A1r')
// http://foo.com/bár

decodeURL('http://xn--br-mia.com/baz')
// http://bár.com/baz

decodeURL('/foo/b%C3%A1r/')
// /foo/bár/
```

### encodeURL(str)

Encode URL or path into a [safe format](https://en.wikipedia.org/wiki/Percent-encoding). Domain is encoded into [punycode](https://en.wikipedia.org/wiki/Punycode) when necessary.
Expand Down
38 changes: 38 additions & 0 deletions lib/decode_url.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
'use strict';

const { parse, format } = require('url');
const { toUnicode } = require('./punycode');

const safeDecodeURI = (str) => {
try {
return decodeURI(str);
} catch (err) {
return str;
}
};

const decodeURL = (str) => {
const parsed = parse(str);
if (parsed.protocol) {
const obj = Object.assign({}, {
auth: parsed.auth,
protocol: parsed.protocol,
host: toUnicode(parsed.host),
pathname: safeDecodeURI(parsed.pathname)
});

if (parsed.hash) {
Object.assign(obj, { hash: safeDecodeURI(parsed.hash) });
}

if (parsed.search) {
Object.assign(obj, { search: safeDecodeURI(parsed.search) });
}

return format(obj);
}

return safeDecodeURI(str);
};

module.exports = decodeURL;
1 change: 1 addition & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ exports.CacheStream = require('./cache_stream');
exports.camelCaseKeys = require('./camel_case_keys');
exports.Color = require('./color');
exports.createSha1Hash = hash.createSha1Hash;
exports.decodeURL = require('./decode_url');
exports.encodeURL = require('./encode_url');
exports.escapeDiacritic = require('./escape_diacritic');
exports.escapeHTML = require('./escape_html');
Expand Down
237 changes: 237 additions & 0 deletions lib/punycode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
'use strict';

/* !
* punycode 2.1.1
* Licensed MIT (c) 2014-2019 Mathias Bynens <https://mathiasbynens.be/>
* https://github.com/bestiejs/punycode.js
*
* Only punycode.toUnicode(input) is implemented
*/

/** Highest positive signed 32-bit float value */
const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1

/** Bootstring parameters */
const base = 36;
const tMin = 1;
const tMax = 26;
const skew = 38;
const damp = 700;
const initialBias = 72;
const initialN = 128; // 0x80
const delimiter = '-'; // '\x2D'

/** Regular expressions */
const regexPunycode = /^xn--/;
const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators

/** Error messages */
const errors = {
'overflow': 'Overflow: input needs wider integers to process',
'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
'invalid-input': 'Invalid input'
};

/** Convenience shortcuts */
const { floor } = Math;
const baseMinusTMin = base - tMin;

/* --------------------------------------------------------------------------*/

/**
* A generic error utility function.
* @private
* @param {String} type The error type.
* @returns {Error} Throws a `RangeError` with the applicable error message.
*/
const error = (type) => {
throw new RangeError(errors[type]);
};

/**
* A generic `Array#map` utility function.
* @private
* @param {Array} array The array to iterate over.
* @param {Function} callback The function that gets called for every array
* item.
* @returns {Array} A new array of values returned by the callback function.
*/
const map = (array, fn) => {
const result = [];
let length = array.length;
while (length--) {
result[length] = fn(array[length]);
}
return result;
};

/**
* A simple `Array#map`-like wrapper to work with domain name strings or email
* addresses.
* @private
* @param {String} domain The domain name or email address.
* @param {Function} callback The function that gets called for every
* character.
* @returns {Array} A new string of characters returned by the callback
* function.
*/
const mapDomain = (string, fn) => {
// Avoid `split(regex)` for IE8 compatibility. See https://github.com/bestiejs/punycode.js/issues/17.
string = string.replace(regexSeparators, '\x2E');
const labels = string.split('.');
const encoded = map(labels, fn).join('.');
return encoded;
};

/**
* Converts a basic code point into a digit/integer.
* @see `digitToBasic()`
* @private
* @param {Number} codePoint The basic numeric code point value.
* @returns {Number} The numeric value of a basic code point (for use in
* representing integers) in the range `0` to `base - 1`, or `base` if
* the code point does not represent a value.
*/
const basicToDigit = (codePoint) => {
if (codePoint - 0x30 < 0x0A) {
return codePoint - 0x16;
}
if (codePoint - 0x41 < 0x1A) {
return codePoint - 0x41;
}
if (codePoint - 0x61 < 0x1A) {
return codePoint - 0x61;
}
return base;
};

/**
* Bias adaptation function as per section 3.4 of RFC 3492.
* https://tools.ietf.org/html/rfc3492#section-3.4
* @private
*/
const adapt = (delta, numPoints, firstTime) => {
let k = 0;
delta = firstTime ? floor(delta / damp) : delta >> 1;
delta += floor(delta / numPoints);
for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
delta = floor(delta / baseMinusTMin);
}
return floor(k + ((baseMinusTMin + 1) * delta / (delta + skew)));
};

/**
* Converts a Punycode string of ASCII-only symbols to a string of Unicode
* symbols.
* @memberOf punycode
* @param {String} input The Punycode string of ASCII-only symbols.
* @returns {String} The resulting string of Unicode symbols.
*/
const decode = (input) => {
// Don't use UCS-2.
const output = [];
const inputLength = input.length;
let i = 0;
let n = initialN;
let bias = initialBias;

// Handle the basic code points: let `basic` be the number of input code
// points before the last delimiter, or `0` if there is none, then copy
// the first basic code points to the output.

let basic = input.lastIndexOf(delimiter);
if (basic < 0) {
basic = 0;
}

for (let j = 0; j < basic; ++j) {
// if it's not a basic code point
if (input.charCodeAt(j) >= 0x80) {
error('not-basic');
}
output.push(input.charCodeAt(j));
}

// Main decoding loop: start just after the last delimiter if any basic code
// points were copied; start at the beginning otherwise.

for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

// `index` is the index of the next character to be consumed.
// Decode a generalized variable-length integer into `delta`,
// which gets added to `i`. The overflow checking is easier
// if we increase `i` as we go, then subtract off its starting
// value at the end to obtain `delta`.
let oldi = i;
for (let w = 1, k = base; /* no condition */; k += base) {

if (index >= inputLength) {
error('invalid-input');
}

const digit = basicToDigit(input.charCodeAt(index++));

if (digit >= base || digit > floor((maxInt - i) / w)) {
error('overflow');
}

i += digit * w;

let t;
if (k <= bias) t = tMin;
else if (k >= bias + tMax) t = tMax;
else t = k - bias;

if (digit < t) {
break;
}

const baseMinusT = base - t;
if (w > floor(maxInt / baseMinusT)) {
error('overflow');
}

w *= baseMinusT;

}

const out = output.length + 1;
bias = adapt(i - oldi, out, oldi === 0);

// `i` was supposed to wrap around from `out` to `0`,
// incrementing `n` each time, so we'll fix that now:
if (floor(i / out) > maxInt - n) {
error('overflow');
}

n += floor(i / out);
i %= out;

// Insert `n` at position `i` of the output.
output.splice(i++, 0, n);

}

return String.fromCodePoint(...output);
};

/**
* Converts a Punycode string representing a domain name or an email address
* to Unicode. Only the Punycoded parts of the input will be converted, i.e.
* it doesn't matter if you call it on a string that has already been
* converted to Unicode.
* @memberOf punycode
* @param {String} input The Punycoded domain name or email address to
* convert to Unicode.
* @returns {String} The Unicode representation of the given Punycode
* string.
*/
const toUnicode = (input) => {
return mapDomain(input, (string) => {
return regexPunycode.test(string)
? decode(string.slice(4).toLowerCase())
: string;
});
};

module.exports = { toUnicode: toUnicode };
87 changes: 87 additions & 0 deletions test/decode_url.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
'use strict';

require('chai').should();

describe('decodeURL', () => {
const decodeURL = require('../lib/decode_url');

it('regular', () => {
const content = 'http://foo.com/';
decodeURL(content).should.eql(content);
});

it('auth', () => {
const content = 'http://user:[email protected]/';
decodeURL(content).should.eql(content);
});

it('port', () => {
const content = 'http://foo.com:80/';
decodeURL(content).should.eql(content);
});

it('space', () => {
const content = 'http://foo.com/bar%20baz';
decodeURL(content).should.eql('http://foo.com/bar baz');
});

it('unicode', () => {
const content = 'http://foo.com/b%C3%A1r';
decodeURL(content).should.eql('http://foo.com/bár');
});

it('decode once', () => {
const content = 'http://fóo.com/bár';
decodeURL(content).should.eql(content);
});

it('hash', () => {
const content = 'http://foo.com/b%C3%A1r#b%C3%A0z';
decodeURL(content).should.eql('http://foo.com/bár#bàz');
});

it('query', () => {
const content = 'http://foo.com/bar?q%C3%BAery=b%C3%A1z';
decodeURL(content).should.eql('http://foo.com/bar?qúery=báz');
});

it('multiple queries', () => {
const content = 'http://foo.com/bar?query1=a%C3%A1a&query2=a%C3%A0a';
decodeURL(content).should.eql('http://foo.com/bar?query1=aáa&query2=aàa');
});

it('hash and query', () => {
const content = 'http://foo.com/bar?query=b%C3%A1z#f%C3%B3o';
decodeURL(content).should.eql('http://foo.com/bar?query=báz#fóo');
});

it('idn', () => {
const content = 'http://xn--br-mia.com/baz';
decodeURL(content).should.eql('http://bár.com/baz');
});

it('path', () => {
const content = '/foo/bar/';
decodeURL(content).should.eql(content);
});

it('path with space', () => {
const content = '/foo%20bar/baz/';
decodeURL(content).should.eql('/foo bar/baz/');
});

it('path with unicode', () => {
const content = '/foo/b%C3%A1r/';
decodeURL(content).should.eql('/foo/bár/');
});

it('decode path once', () => {
const content = '/foo/bár /';
decodeURL(content).should.eql(content);
});

it('anchor with unicode', () => {
const content = '#f%C3%B3o-b%C3%A1r';
decodeURL(content).should.eql('#fóo-bár');
});
});