Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incorrect processing of titles with question marks and anchor parameters #807

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 33 additions & 41 deletions service-worker.js
Original file line number Diff line number Diff line change
Expand Up @@ -187,14 +187,19 @@ let fetchCaptureEnabled = false;
/**
* Intercept selected Fetch requests from the browser window
*/
self.addEventListener('fetch', function (event) {
self.addEventListener('fetch', function (event) {
// Only cache GET requests
if (event.request.method !== "GET") return;
// Remove any querystring before requesting from the cache
var rqUrl = event.request.url.replace(/\?[^?]+$/i, '');
var rqUrl = event.request.url;
var urlObject = new URL(rqUrl);
// Test the URL with parameters removed
var strippedUrl = urlObject.pathname;
// Select cache depending on request format
var cache = /\.zim\//i.test(rqUrl) ? ASSETS_CACHE : APP_CACHE;
var cache = /\.zim\//i.test(strippedUrl) ? ASSETS_CACHE : APP_CACHE;
if (cache === ASSETS_CACHE && !fetchCaptureEnabled) return;
// For APP_CACHE assets, we should ignore any querystring (whereas it should be conserved for ZIM assets,
// especially .js assets, where it may be significant). Anchor targets are irreleveant in this context.
if (cache === APP_CACHE) rqUrl = strippedUrl;
event.respondWith(
// First see if the content is in the cache
fromCache(cache, rqUrl).then(function (response) {
Expand All @@ -203,24 +208,24 @@ let fetchCaptureEnabled = false;
}, function () {
// The response was not found in the cache so we look for it in the ZIM
// and add it to the cache if it is an asset type (css or js)
if (cache === ASSETS_CACHE && regexpZIMUrlWithNamespace.test(rqUrl)) {
return fetchRequestFromZIM(event).then(function (response) {
if (cache === ASSETS_CACHE && regexpZIMUrlWithNamespace.test(strippedUrl)) {
return fetchUrlFromZIM(urlObject).then(function (response) {
// Add css or js assets to ASSETS_CACHE (or update their cache entries) unless the URL schema is not supported
if (regexpCachedContentTypes.test(response.headers.get('Content-Type')) &&
!regexpExcludedURLSchema.test(event.request.url)) {
event.waitUntil(updateCache(ASSETS_CACHE, event.request, response.clone()));
event.waitUntil(updateCache(ASSETS_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (msgPortData, title) {
console.error('Invalid message received from app.js for ' + title, msgPortData);
}).catch(function (msgPortData) {
console.error('Invalid message received from app.js for ' + strippedUrl, msgPortData);
return msgPortData;
});
} else {
// It's not an asset, or it doesn't match a ZIM URL pattern, so we should fetch it with Fetch API
return fetch(event.request).then(function (response) {
// If request was successful, add or update it in the cache, but be careful not to cache the ZIM archive itself!
if (!regexpExcludedURLSchema.test(rqUrl) && !/\.zim\w{0,2}$/i.test(rqUrl)) {
event.waitUntil(updateCache(APP_CACHE, event.request, response.clone()));
if (!regexpExcludedURLSchema.test(event.request.url) && !/\.zim\w{0,2}$/i.test(strippedUrl)) {
event.waitUntil(updateCache(APP_CACHE, rqUrl, response.clone()));
}
return response;
}).catch(function (error) {
Expand Down Expand Up @@ -271,25 +276,22 @@ let fetchCaptureEnabled = false;
});

/**
* Handles fetch events that need to be extracted from the ZIM
* Handles URLs that need to be extracted from the ZIM archive
*
* @param {Event} fetchEvent The fetch event to be processed
* @param {URL} urlObject The URL object to be processed for extraction from the ZIM
* @returns {Promise<Response>} A Promise for the Response, or rejects with the invalid message port data
*/
function fetchRequestFromZIM(fetchEvent) {
function fetchUrlFromZIM(urlObject) {
return new Promise(function (resolve, reject) {
var nameSpace;
var title;
var titleWithNameSpace;
var regexpResult = regexpZIMUrlWithNamespace.exec(fetchEvent.request.url);
var prefix = regexpResult[1];
nameSpace = regexpResult[2];
title = regexpResult[3];
// Note that titles may contain bare question marks or hashes, so we must use only the pathname without any URL parameters.
// Be sure that you haven't encoded any querystring along with the URL.
var barePathname = decodeURIComponent(urlObject.pathname);
var partsOfZIMUrl = regexpZIMUrlWithNamespace.exec(barePathname);
var prefix = partsOfZIMUrl[1];
var nameSpace = partsOfZIMUrl[2];
var title = partsOfZIMUrl[3];

// We need to remove the potential parameters in the URL
title = removeUrlParameters(decodeURIComponent(title));

titleWithNameSpace = nameSpace + '/' + title;
var titleWithNameSpace = nameSpace + '/' + title;

// Let's instantiate a new messageChannel, to allow app.js to give us the content
var messageChannel = new MessageChannel();
Expand Down Expand Up @@ -333,15 +335,6 @@ function fetchRequestFromZIM(fetchEvent) {
});
}

/**
* Removes parameters and anchors from a URL
* @param {type} url The URL to be processed
* @returns {String} The same URL without its parameters and anchors
*/
function removeUrlParameters(url) {
return url.replace(/([^?#]+)[?#].*$/, '$1');
}

/**
* Looks up a Request in a cache and returns a Promise for the matched Response
* @param {String} cache The name of the cache to look in
Expand All @@ -350,12 +343,10 @@ function removeUrlParameters(url) {
*/
function fromCache(cache, requestUrl) {
// Prevents use of Cache API if user has disabled it
if (!useAppCache && cache === APP_CACHE || !useAssetsCache && cache === ASSETS_CACHE) return Promise.reject('disabled');
if (!(useAppCache && cache === APP_CACHE || useAssetsCache && cache === ASSETS_CACHE)) return Promise.reject('disabled');
return caches.open(cache).then(function (cacheObj) {
return cacheObj.match(requestUrl).then(function (matching) {
if (!matching || matching.status === 404) {
return Promise.reject('no-match');
}
if (!matching || matching.status === 404) return Promise.reject('no-match');
console.debug('[SW] Supplying ' + requestUrl + ' from ' + cache + '...');
return matching;
});
Expand All @@ -365,15 +356,16 @@ function fromCache(cache, requestUrl) {
/**
* Stores or updates in a cache the given Request/Response pair
* @param {String} cache The name of the cache to open
* @param {Request} request The original Request object
* @param {Request|String} request The original Request object or the URL string requested
* @param {Response} response The Response received from the server/ZIM
* @returns {Promise} A Promise for the update action
*/
function updateCache(cache, request, response) {
// Prevents use of Cache API if user has disabled it
if (!useAppCache && cache === APP_CACHE || !useAssetsCache && cache === ASSETS_CACHE) return Promise.resolve();
if (!response.ok || !(useAppCache && cache === APP_CACHE || useAssetsCache && cache === ASSETS_CACHE))
return Promise.resolve();
return caches.open(cache).then(function (cacheObj) {
console.debug('[SW] Adding ' + request.url + ' to ' + cache + '...');
console.debug('[SW] Adding ' + (request.url || request) + ' to ' + cache + '...');
return cacheObj.put(request, response);
});
}
Expand Down
19 changes: 11 additions & 8 deletions tests/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,17 @@ define(['jquery', 'zimArchive', 'zimDirEntry', 'util', 'uiUtil', 'utf8'],
assert.equal(util.allCaseFirstLetters(testString6, "full").indexOf("ΚΑΛΆ ΝΕΡΆ ΜΑΓΝΗΣΊΑ ŽIŽEK") >= 0, true, "All Unicode letters should be uppercase");
});
QUnit.test("check removal of parameters in URL", function(assert) {
var testUrl1 = "A/question.html";
var testUrl2 = "A/question.html?param1=toto&param2=titi";
var testUrl3 = "A/question.html?param1=toto&param2=titi#anchor";
var testUrl4 = "A/question.html#anchor";
assert.equal(uiUtil.removeUrlParameters(testUrl1), testUrl1);
assert.equal(uiUtil.removeUrlParameters(testUrl2), testUrl1);
assert.equal(uiUtil.removeUrlParameters(testUrl3), testUrl1);
assert.equal(uiUtil.removeUrlParameters(testUrl4), testUrl1);
var baseUrl = "A/Che cosa è l'amore?.html";
var testUrls = [
"A/Che%20cosa%20%C3%A8%20l'amore%3F.html?param1=toto&param2=titi",
"A/Che%20cosa%20%C3%A8%20l'amore%3F.html?param1=toto&param2=titi#anchor",
"A/Che%20cosa%20%C3%A8%20l'amore%3F.html#anchor"
];
testUrls.forEach(function (testUrl) {
assert.equal(decodeURIComponent(
uiUtil.removeUrlParameters(testUrl)
), baseUrl);
});
});

QUnit.module("ZIM initialisation");
Expand Down
Loading