Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revised implementation #77

Merged
merged 7 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/__tests__/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ describe("sanitizeUrl", () => {
});

it("does not alter https URLs with alphanumeric characters", () => {
expect(sanitizeUrl("https://example.com")).toBe("https://example.com");
expect(sanitizeUrl("https://example.com")).toBe("https://example.com/");
});

it("does not alter https URLs with ports with alphanumeric characters", () => {
Expand Down Expand Up @@ -147,6 +147,28 @@ describe("sanitizeUrl", () => {
});
});

it("backslash prefixed attack vectors", () => {
const attackVectors = [
"\fjavascript:alert()",
"\vjavascript:alert()",
"\tjavascript:alert()",
"\njavascript:alert()",
"\rjavascript:alert()",
"\u0000javascript:alert()",
"\u0001javascript:alert()",
];

attackVectors.forEach((vector) => {
expect(sanitizeUrl(vector)).toBe(BLANK_URL);
});
});

it("reverses backslashes", () => {
const attack = "\\j\\av\\a\\s\\cript:alert()";

expect(sanitizeUrl(attack)).toBe("/j/av/a/s/cript:alert()");
});

describe("invalid protocols", () => {
describe.each(["javascript", "data", "vbscript"])("%s", (protocol) => {
it(`replaces ${protocol} urls with ${BLANK_URL}`, () => {
Expand Down
40 changes: 34 additions & 6 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,25 @@ import {
htmlEntitiesRegex,
invalidProtocolRegex,
relativeFirstCharacters,
urlSchemeRegex,
whitespaceEscapeCharsRegex,
urlSchemeRegex,
} from "./constants";

function isRelativeUrlWithoutProtocol(url: string): boolean {
return relativeFirstCharacters.indexOf(url[0]) > -1;
}

// adapted from https://stackoverflow.com/a/29824550/2601552
function decodeHtmlCharacters(str: string) {
const removedNullByte = str.replace(ctrlCharactersRegex, "");
return removedNullByte.replace(htmlEntitiesRegex, (match, dec) => {
return String.fromCharCode(dec);
});
}

function isValidUrl(url: string): boolean {
return URL.canParse(url);
}

function decodeURI(uri: string): string {
try {
return decodeURIComponent(uri);
Expand All @@ -36,8 +39,9 @@ export function sanitizeUrl(url?: string): string {
if (!url) {
return BLANK_URL;
}

let charsToDecode;
let decodedUrl = decodeURI(url);
let decodedUrl = decodeURI(url.trim());

do {
decodedUrl = decodeHtmlCharacters(decodedUrl)
Expand All @@ -54,7 +58,9 @@ export function sanitizeUrl(url?: string): string {
decodedUrl.match(htmlCtrlEntityRegex) ||
decodedUrl.match(whitespaceEscapeCharsRegex);
} while (charsToDecode && charsToDecode.length > 0);

const sanitizedUrl = decodedUrl;

if (!sanitizedUrl) {
return BLANK_URL;
}
Expand All @@ -63,17 +69,39 @@ export function sanitizeUrl(url?: string): string {
return sanitizedUrl;
}

const urlSchemeParseResults = sanitizedUrl.match(urlSchemeRegex);
// Remove any leading whitespace before checking the URL scheme
const trimmedUrl = sanitizedUrl.trimStart();
const urlSchemeParseResults = trimmedUrl.match(urlSchemeRegex);

if (!urlSchemeParseResults) {
return sanitizedUrl;
}

const urlScheme = urlSchemeParseResults[0];
const urlScheme = urlSchemeParseResults[0].toLowerCase().trim();

if (invalidProtocolRegex.test(urlScheme)) {
return BLANK_URL;
}

return sanitizedUrl;
const backSanitized = trimmedUrl.replace(/\\/g, "/");

// Handle special cases for mailto: and custom deep-link protocols
if (urlScheme === "mailto:" || urlScheme.includes("://")) {
return backSanitized;
}

// For http and https URLs, perform additional validation
if (urlScheme === "http:" || urlScheme === "https:") {
if (!isValidUrl(backSanitized)) {
return BLANK_URL;
}

const url = new URL(backSanitized);
url.protocol = url.protocol.toLowerCase();
url.hostname = url.hostname.toLowerCase();

return url.toString();
}

return backSanitized;
}
Loading