Skip to content

Commit

Permalink
Merge pull request #97 from curbengh/decode-url
Browse files Browse the repository at this point in the history
feat: decodeURL()
  • Loading branch information
curbengh authored Sep 20, 2019
2 parents a4a0b37 + 8758e6b commit e463a12
Show file tree
Hide file tree
Showing 5 changed files with 378 additions and 0 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,21 @@ const sha1 = createSha1Hash();
});
```

### decodeURL(str)

Decode [encoded](https://en.wikipedia.org/wiki/Percent-encoding) URL or path. An alternative to the native [`decodeURI()`](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURI) function, with added ability to decode [punycoded](https://en.wikipedia.org/wiki/Punycode) domain.

``` js
decodeURL('http://foo.com/b%C3%A1r')
// http://foo.com/bár

decodeURL('http://xn--br-mia.com/baz')
// http://bár.com/baz

decodeURL('/foo/b%C3%A1r/')
// /foo/bár/
```

### encodeURL(str)

Encode URL or path into a [safe format](https://en.wikipedia.org/wiki/Percent-encoding). Domain is encoded into [punycode](https://en.wikipedia.org/wiki/Punycode) when necessary.
Expand Down
38 changes: 38 additions & 0 deletions lib/decode_url.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
'use strict';

const { parse, format } = require('url');
const { toUnicode } = require('./punycode');

const safeDecodeURI = (str) => {
try {
return decodeURI(str);
} catch (err) {
return str;
}
};

const decodeURL = (str) => {
const parsed = parse(str);
if (parsed.protocol) {
const obj = Object.assign({}, {
auth: parsed.auth,
protocol: parsed.protocol,
host: toUnicode(parsed.host),
pathname: safeDecodeURI(parsed.pathname)
});

if (parsed.hash) {
Object.assign(obj, { hash: safeDecodeURI(parsed.hash) });
}

if (parsed.search) {
Object.assign(obj, { search: safeDecodeURI(parsed.search) });
}

return format(obj);
}

return safeDecodeURI(str);
};

module.exports = decodeURL;
1 change: 1 addition & 0 deletions lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ exports.CacheStream = require('./cache_stream');
exports.camelCaseKeys = require('./camel_case_keys');
exports.Color = require('./color');
exports.createSha1Hash = hash.createSha1Hash;
exports.decodeURL = require('./decode_url');
exports.encodeURL = require('./encode_url');
exports.escapeDiacritic = require('./escape_diacritic');
exports.escapeHTML = require('./escape_html');
Expand Down
237 changes: 237 additions & 0 deletions lib/punycode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
'use strict';

/* !
* punycode 2.1.1
* Licensed MIT (c) 2014-2019 Mathias Bynens <https://mathiasbynens.be/>
* https://github.com/bestiejs/punycode.js
*
* Only punycode.toUnicode(input) is implemented
*/

/** Highest positive signed 32-bit float value */
const maxInt = 2147483647; // aka. 0x7FFFFFFF or 2^31-1

/** Bootstring parameters */
const base = 36;
const tMin = 1;
const tMax = 26;
const skew = 38;
const damp = 700;
const initialBias = 72;
const initialN = 128; // 0x80
const delimiter = '-'; // '\x2D'

/** Regular expressions */
const regexPunycode = /^xn--/;
const regexSeparators = /[\x2E\u3002\uFF0E\uFF61]/g; // RFC 3490 separators

/** Error messages */
const errors = {
'overflow': 'Overflow: input needs wider integers to process',
'not-basic': 'Illegal input >= 0x80 (not a basic code point)',
'invalid-input': 'Invalid input'
};

/** Convenience shortcuts */
const { floor } = Math;
const baseMinusTMin = base - tMin;

/* --------------------------------------------------------------------------*/

/**
* A generic error utility function.
* @private
* @param {String} type The error type.
* @returns {Error} Throws a `RangeError` with the applicable error message.
*/
const error = (type) => {
throw new RangeError(errors[type]);
};

/**
* A generic `Array#map` utility function.
* @private
* @param {Array} array The array to iterate over.
* @param {Function} callback The function that gets called for every array
* item.
* @returns {Array} A new array of values returned by the callback function.
*/
const map = (array, fn) => {
const result = [];
let length = array.length;
while (length--) {
result[length] = fn(array[length]);
}
return result;
};

/**
* A simple `Array#map`-like wrapper to work with domain name strings or email
* addresses.
* @private
* @param {String} domain The domain name or email address.
* @param {Function} callback The function that gets called for every
* character.
* @returns {Array} A new string of characters returned by the callback
* function.
*/
const mapDomain = (string, fn) => {
// Avoid `split(regex)` for IE8 compatibility. See https://github.com/bestiejs/punycode.js/issues/17.
string = string.replace(regexSeparators, '\x2E');
const labels = string.split('.');
const encoded = map(labels, fn).join('.');
return encoded;
};

/**
* Converts a basic code point into a digit/integer.
* @see `digitToBasic()`
* @private
* @param {Number} codePoint The basic numeric code point value.
* @returns {Number} The numeric value of a basic code point (for use in
* representing integers) in the range `0` to `base - 1`, or `base` if
* the code point does not represent a value.
*/
const basicToDigit = (codePoint) => {
if (codePoint - 0x30 < 0x0A) {
return codePoint - 0x16;
}
if (codePoint - 0x41 < 0x1A) {
return codePoint - 0x41;
}
if (codePoint - 0x61 < 0x1A) {
return codePoint - 0x61;
}
return base;
};

/**
* Bias adaptation function as per section 3.4 of RFC 3492.
* https://tools.ietf.org/html/rfc3492#section-3.4
* @private
*/
const adapt = (delta, numPoints, firstTime) => {
let k = 0;
delta = firstTime ? floor(delta / damp) : delta >> 1;
delta += floor(delta / numPoints);
for (/* no initialization */; delta > baseMinusTMin * tMax >> 1; k += base) {
delta = floor(delta / baseMinusTMin);
}
return floor(k + ((baseMinusTMin + 1) * delta / (delta + skew)));
};

/**
* Converts a Punycode string of ASCII-only symbols to a string of Unicode
* symbols.
* @memberOf punycode
* @param {String} input The Punycode string of ASCII-only symbols.
* @returns {String} The resulting string of Unicode symbols.
*/
const decode = (input) => {
// Don't use UCS-2.
const output = [];
const inputLength = input.length;
let i = 0;
let n = initialN;
let bias = initialBias;

// Handle the basic code points: let `basic` be the number of input code
// points before the last delimiter, or `0` if there is none, then copy
// the first basic code points to the output.

let basic = input.lastIndexOf(delimiter);
if (basic < 0) {
basic = 0;
}

for (let j = 0; j < basic; ++j) {
// if it's not a basic code point
if (input.charCodeAt(j) >= 0x80) {
error('not-basic');
}
output.push(input.charCodeAt(j));
}

// Main decoding loop: start just after the last delimiter if any basic code
// points were copied; start at the beginning otherwise.

for (let index = basic > 0 ? basic + 1 : 0; index < inputLength; /* no final expression */) {

// `index` is the index of the next character to be consumed.
// Decode a generalized variable-length integer into `delta`,
// which gets added to `i`. The overflow checking is easier
// if we increase `i` as we go, then subtract off its starting
// value at the end to obtain `delta`.
let oldi = i;
for (let w = 1, k = base; /* no condition */; k += base) {

if (index >= inputLength) {
error('invalid-input');
}

const digit = basicToDigit(input.charCodeAt(index++));

if (digit >= base || digit > floor((maxInt - i) / w)) {
error('overflow');
}

i += digit * w;

let t;
if (k <= bias) t = tMin;
else if (k >= bias + tMax) t = tMax;
else t = k - bias;

if (digit < t) {
break;
}

const baseMinusT = base - t;
if (w > floor(maxInt / baseMinusT)) {
error('overflow');
}

w *= baseMinusT;

}

const out = output.length + 1;
bias = adapt(i - oldi, out, oldi === 0);

// `i` was supposed to wrap around from `out` to `0`,
// incrementing `n` each time, so we'll fix that now:
if (floor(i / out) > maxInt - n) {
error('overflow');
}

n += floor(i / out);
i %= out;

// Insert `n` at position `i` of the output.
output.splice(i++, 0, n);

}

return String.fromCodePoint(...output);
};

/**
* Converts a Punycode string representing a domain name or an email address
* to Unicode. Only the Punycoded parts of the input will be converted, i.e.
* it doesn't matter if you call it on a string that has already been
* converted to Unicode.
* @memberOf punycode
* @param {String} input The Punycoded domain name or email address to
* convert to Unicode.
* @returns {String} The Unicode representation of the given Punycode
* string.
*/
const toUnicode = (input) => {
return mapDomain(input, (string) => {
return regexPunycode.test(string)
? decode(string.slice(4).toLowerCase())
: string;
});
};

module.exports = { toUnicode: toUnicode };
87 changes: 87 additions & 0 deletions test/decode_url.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
'use strict';

require('chai').should();

describe('decodeURL', () => {
const decodeURL = require('../lib/decode_url');

it('regular', () => {
const content = 'http://foo.com/';
decodeURL(content).should.eql(content);
});

it('auth', () => {
const content = 'http://user:[email protected]/';
decodeURL(content).should.eql(content);
});

it('port', () => {
const content = 'http://foo.com:80/';
decodeURL(content).should.eql(content);
});

it('space', () => {
const content = 'http://foo.com/bar%20baz';
decodeURL(content).should.eql('http://foo.com/bar baz');
});

it('unicode', () => {
const content = 'http://foo.com/b%C3%A1r';
decodeURL(content).should.eql('http://foo.com/bár');
});

it('decode once', () => {
const content = 'http://fóo.com/bár';
decodeURL(content).should.eql(content);
});

it('hash', () => {
const content = 'http://foo.com/b%C3%A1r#b%C3%A0z';
decodeURL(content).should.eql('http://foo.com/bár#bàz');
});

it('query', () => {
const content = 'http://foo.com/bar?q%C3%BAery=b%C3%A1z';
decodeURL(content).should.eql('http://foo.com/bar?qúery=báz');
});

it('multiple queries', () => {
const content = 'http://foo.com/bar?query1=a%C3%A1a&query2=a%C3%A0a';
decodeURL(content).should.eql('http://foo.com/bar?query1=aáa&query2=aàa');
});

it('hash and query', () => {
const content = 'http://foo.com/bar?query=b%C3%A1z#f%C3%B3o';
decodeURL(content).should.eql('http://foo.com/bar?query=báz#fóo');
});

it('idn', () => {
const content = 'http://xn--br-mia.com/baz';
decodeURL(content).should.eql('http://bár.com/baz');
});

it('path', () => {
const content = '/foo/bar/';
decodeURL(content).should.eql(content);
});

it('path with space', () => {
const content = '/foo%20bar/baz/';
decodeURL(content).should.eql('/foo bar/baz/');
});

it('path with unicode', () => {
const content = '/foo/b%C3%A1r/';
decodeURL(content).should.eql('/foo/bár/');
});

it('decode path once', () => {
const content = '/foo/bár /';
decodeURL(content).should.eql(content);
});

it('anchor with unicode', () => {
const content = '#f%C3%B3o-b%C3%A1r';
decodeURL(content).should.eql('#fóo-bár');
});
});

0 comments on commit e463a12

Please sign in to comment.