Skip to content

Commit

Permalink
AG-36282 Add 'transform' option with 'base64decode' value for 'href-s…
Browse files Browse the repository at this point in the history
…anitizer' scriptlet. #455
  • Loading branch information
jellizaveta committed Oct 11, 2024
1 parent 90eaf3d commit c81eeb6
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic
### Added

- `prevent-canvas` scriptlet [#451]
- `transform` option with `base64decode` value for `href-sanitizer` scriptlet [#455]

### Changed

Expand All @@ -23,6 +24,7 @@ The format is based on [Keep a Changelog], and this project adheres to [Semantic
[Unreleased]: https://github.com/AdguardTeam/Scriptlets/compare/v1.12.1...HEAD
[#451]: https://github.com/AdguardTeam/Scriptlets/issues/451
[#415]: https://github.com/AdguardTeam/Scriptlets/issues/415
[#455]: https://github.com/AdguardTeam/Scriptlets/issues/455

## [v1.12.1] - 2024-09-20

Expand Down
123 changes: 121 additions & 2 deletions src/scriptlets/href-sanitizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
* ### Syntax
*
* ```text
* example.org#%#//scriptlet('href-sanitizer', selector[, attribute])
* example.org#%#//scriptlet('href-sanitizer', selector[, attribute, [ transform]])
* ```
*
* - `selector` — required, a CSS selector to match the elements to be sanitized,
Expand All @@ -31,6 +31,8 @@ import {
* - `text` — use the text content of the matched element,
* - `[attribute-name]` copy the value from attribute `attribute-name` on the same element,
* - `?parameter` copy the value from URL parameter `parameter` of the same element's `href` attribute.
* - `transform` — optional, default to empty string:
* - `base64decode` — decode the base64 string from specified attribute.
*
* ### Examples
*
Expand Down Expand Up @@ -88,19 +90,40 @@ import {
* </div>
* ```
*
* 4. Decode the base64 string from specified attribute:
*
* ```adblock
* example.org#%#//scriptlet('href-sanitizer', 'a[href*="foo.com"]', '[href]', 'base64decode')
* ```
*
* ```html
* <!-- before -->
* <div>
* <a href="http://www.foo.com/out/?aHR0cDovL2V4YW1wbGUuY29tLz92PTEyMw=="></a>
* </div>
*
* <!-- after -->
* <div>
* <a href="http://example.com/?v=123"></a>
* </div>
* ```
*
* @added v1.10.25.
*/

export function hrefSanitizer(
source: Source,
selector: string,
attribute = 'text',
transform = '',
) {
if (!selector) {
logMessage(source, 'Selector is required.');
return;
}

const BASE64_TRANSFORM_MARKER = 'base64decode';

// Regular expression to find not valid characters at the beginning and at the end of the string,
// \x21-\x7e is a range that includes the ASCII characters from ! (hex 21) to ~ (hex 7E).
// This range covers numbers, English letters, and common symbols.
Expand All @@ -109,6 +132,8 @@ export function hrefSanitizer(
// so it have to be removed, otherwise it will not work properly.
const regexpNotValidAtStart = /^[^\x21-\x7e\p{Letter}]+/u;
const regexpNotValidAtEnd = /[^\x21-\x7e\p{Letter}]+$/u;
// Regular expression to match base64 strings.
const base64regexp = /(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)/;

/**
* Extracts text from an element based on the specified attribute.
Expand Down Expand Up @@ -144,6 +169,21 @@ export function hrefSanitizer(
return '';
};

/**
* Validates whether a given string is a well-formed URL.
*
* @param url - The URL string to validate.
* @returns `true` if the string is a valid URL, otherwise `false`.
*/
const isValidURL = (url: string): boolean => {
try {
new URL(url);
return true;
} catch {
return false;
}
};

/**
* Validates a URL, if valid return URL,
* otherwise return null.
Expand Down Expand Up @@ -177,6 +217,71 @@ export function hrefSanitizer(
return element.nodeName.toLowerCase() === 'a' && element.hasAttribute('href');
};

/**
* Recursively searches for the first valid URL within a nested object.
*
* @param obj - The object to search for URLs.
* @returns The first found URL as a string, or `null` if none are found.
*/
const findURLinObject = (obj: Record<string, unknown>): string | null => {
for (const key in obj) {
if (Object.prototype.hasOwnProperty.call(obj, key)) {
const value = obj[key];
if (typeof value === 'string') {
if (isValidURL(value)) {
return value;
}
} else if (typeof value === 'object' && value !== null) {
const result = findURLinObject(value as Record<string, unknown>);
if (result) {
return result;
}
}
}
}
return null;
};

/**
* Extracts the base64 part from a string.
* If no base64 string is found, `null` is returned.
* @param text - The string to extract the base64 part from.
* @returns The base64 part of the string, or `null` if none is found.
*/
const getBase64Part = (text: string): string | null => {
const base64String = text.match(base64regexp);
if (!base64String) {
return null;
}
return base64String[0];
};

/**
* Decodes a base64 string and tries to find a valid URL in it.
* If the decoded string is a valid URL, it is returned.
* If the decoded string is a JSON, it is parsed and a valid URL is searched for in it.
* If no valid URL is found, `null` is returned.
* @param text - The base64 string to decode.
* @returns The valid URL found in the decoded string, or `null` if none is found.
*/
const decodeBase64Part = (text: string): string | null => {
const decoded = atob(text);
// if the decoded string is a valid URL, return it
if (isValidURL(decoded)) {
return decoded;
}
// if the decoded string is a JSON, try to find a valid URL in it
const parsed = JSON.parse(decoded);
if (!parsed) {
return null;
}
const href = findURLinObject(parsed);
if (!href) {
return null;
}
return href;
};

/**
* Sanitizes the href attribute of elements matching the given selector.
*
Expand All @@ -196,7 +301,21 @@ export function hrefSanitizer(
if (!isSanitizableAnchor(elem)) {
return;
}
const newHref = extractNewHref(elem, attribute);
let newHref = extractNewHref(elem, attribute);
// if transform is base64decode, decode the base64 string
if (transform === BASE64_TRANSFORM_MARKER) {
const base64String = getBase64Part(newHref);
if (!base64String) {
logMessage(source, `Failed to extract base64 content from ${newHref}.`);
return;
}
const decodedBase64Part = decodeBase64Part(base64String);
if (!decodedBase64Part) {
logMessage(source, `Failed to decode base64 from "${newHref}"`);
return;
}
newHref = decodedBase64Part;
}
const newValidHref = getValidURL(newHref);
if (!newValidHref) {
logMessage(source, `Invalid URL: ${newHref}`);
Expand Down
44 changes: 44 additions & 0 deletions tests/scriptlets/href-sanitizer.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@ import { runScriptlet, clearGlobalProps } from '../helpers';
const { test, module } = QUnit;
const name = 'href-sanitizer';

/**
* Create link with href attribute and optional text and additional attribute
* @param {string} href - link href
* @param {string} text - link text
* @param {string} attributeName - additional attribute name
* @param {string} attributeValue - additional attribute value
* @returns {HTMLAnchorElement} - created link element
*/
const createElem = (href, text, attributeName, attributeValue) => {
const a = document.createElement('a');
a.setAttribute('href', href);
Expand Down Expand Up @@ -56,6 +64,42 @@ test('Checking if alias name works', (assert) => {
assert.strictEqual(codeByAdgParams, codeByUboParams, 'ubo name - ok');
});

test('Santize href - base64 where link decoded as a string in data-href attribute', (assert) => {
const expectedHref = 'https://example.org/';
const elem = createElem('https://google.com/', expectedHref, 'data-href', 'aHR0cHM6Ly9leGFtcGxlLm9yZy8=');
const selector = 'a[href^="https://google.com/';

const scriptletArgs = [selector, '[data-href]', 'base64decode'];
runScriptlet(name, scriptletArgs);

assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized');
assert.strictEqual(window.hit, 'FIRED');
});

test('Santize href - base64 where link decoded as a string in href attribute', (assert) => {
const expectedHref = 'http://example.com/?v=123';
const hrefWithBase64 = 'http://www.foo.com/out/?aHR0cDovL2V4YW1wbGUuY29tLz92PTEyMw==';
const elem = createElem(hrefWithBase64);
const selector = 'a[href*="out/?"]';
const scriptletArgs = [selector, '[href]', 'base64decode'];
runScriptlet(name, scriptletArgs);

assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized and base64 was decoded');
assert.strictEqual(window.hit, 'FIRED');
});

test('Santize href - base64 where link decoded in object in href attribute', (assert) => {
const expectedHref = 'http://example.com/?v=3468';
const hrefWithBase64 = 'http://www.foo.com/out/?eyJsIjoiaHR0cDovL2V4YW1wbGUuY29tLz92PTM0NjgiLCJjIjoxfQ==';
const elem = createElem(hrefWithBase64);
const selector = 'a[href*="out/?"]';
const scriptletArgs = [selector, '[href]', 'base64decode'];
runScriptlet(name, scriptletArgs);

assert.strictEqual(elem.getAttribute('href'), expectedHref, 'href has been sanitized and base64 was decoded');
assert.strictEqual(window.hit, 'FIRED');
});

test('Santize href - text content', (assert) => {
const expectedHref = 'https://example.org/';
const elem = createElem('https://example.com/foo?redirect=https%3A%2F%2Fexample.org%2F', expectedHref);
Expand Down

0 comments on commit c81eeb6

Please sign in to comment.