diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index d73d23225725f..539ea687553f1 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -31,9 +31,10 @@ import { import { buildGetDocumentParams, CMAP_URL, - createTemporaryNodeServer, DefaultFileReaderFactory, + getCrossOriginHostname, TEST_PDFS_PATH, + TestPdfsServer, } from "./test_utils.js"; import { DefaultCanvasFactory, @@ -67,27 +68,17 @@ describe("api", function () { buildGetDocumentParams(tracemonkeyFileName); let CanvasFactory; - let tempServer = null; - beforeAll(function () { + beforeAll(async function () { CanvasFactory = new DefaultCanvasFactory({}); - if (isNodeJS) { - tempServer = createTemporaryNodeServer(); - } + await TestPdfsServer.ensureStarted(); }); - afterAll(function () { + afterAll(async function () { CanvasFactory = null; - if (isNodeJS) { - // Close the server from accepting new connections after all test - // finishes. - const { server } = tempServer; - server.close(); - - tempServer = null; - } + await TestPdfsServer.ensureStopped(); }); function waitSome(callback) { @@ -148,9 +139,7 @@ describe("api", function () { }); it("creates pdf doc from URL-object", async function () { - const urlObj = isNodeJS - ? new URL(`http://127.0.0.1:${tempServer.port}/${basicApiFileName}`) - : new URL(TEST_PDFS_PATH + basicApiFileName, window.location); + const urlObj = TestPdfsServer.resolveURL(basicApiFileName); const loadingTask = getDocument(urlObj); expect(loadingTask instanceof PDFDocumentLoadingTask).toEqual(true); @@ -2989,17 +2978,14 @@ describe("api", function () { let loadingTask; function _checkCanLoad(expectSuccess, filename, options) { if (isNodeJS) { + // We can simulate cross-origin requests, but since Node.js does not + // enforce the Same Origin Policy, requests are expected to be allowed + // independently of withCredentials. pending("Cannot simulate cross-origin requests in Node.js"); } const params = buildGetDocumentParams(filename, options); const url = new URL(params.url); - if (url.hostname === "localhost") { - url.hostname = "127.0.0.1"; - } else if (params.url.hostname === "127.0.0.1") { - url.hostname = "localhost"; - } else { - pending("Can only run cross-origin test on localhost!"); - } + url.hostname = getCrossOriginHostname(url.hostname); params.url = url.href; loadingTask = getDocument(params); return loadingTask.promise diff --git a/test/unit/common_pdfstream_tests.js b/test/unit/common_pdfstream_tests.js new file mode 100644 index 0000000000000..b96620df9f5c2 --- /dev/null +++ b/test/unit/common_pdfstream_tests.js @@ -0,0 +1,90 @@ +/* Copyright 2024 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { AbortException, isNodeJS } from "../../src/shared/util.js"; +import { getCrossOriginHostname, TestPdfsServer } from "./test_utils.js"; + +// Common tests to verify behavior across implementations of the IPDFStream +// interface: +// - PDFNetworkStream by network_spec.js +// - PDFFetchStream by fetch_stream_spec.js +async function testCrossOriginRedirects({ + PDFStreamClass, + redirectIfRange, + testRangeReader, +}) { + const basicApiUrl = TestPdfsServer.resolveURL("basicapi.pdf").href; + const basicApiFileLength = 105779; + + const rangeSize = 32768; + const stream = new PDFStreamClass({ + url: getCrossOriginUrlWithRedirects(basicApiUrl, redirectIfRange), + length: basicApiFileLength, + rangeChunkSize: rangeSize, + disableStream: true, + disableRange: false, + }); + + const fullReader = stream.getFullReader(); + + await fullReader.headersReady; + // Sanity check: We can only test range requests if supported: + expect(fullReader.isRangeSupported).toEqual(true); + // ^ When range requests are supported (and streaming is disabled), the full + // initial request is aborted and we do not need to call fullReader.cancel(). + + const rangeReader = stream.getRangeReader( + basicApiFileLength - rangeSize, + basicApiFileLength + ); + + try { + await testRangeReader(rangeReader); + } finally { + rangeReader.cancel(new AbortException("Don't need rangeReader")); + } +} + +/** + * @param {string} testserverUrl - A URL handled that supports CORS and + * redirects (see crossOriginHandler and redirectHandler in webserver.mjs). + * @param {boolean} redirectIfRange - Whether Range requests should be + * redirected to a different origin compared to the initial request. + * @returns {string} A URL that will be redirected by the server. + */ +function getCrossOriginUrlWithRedirects(testserverUrl, redirectIfRange) { + const url = new URL(testserverUrl); + if (!isNodeJS) { + // The responses are going to be cross-origin. In Node.js, fetch() allows + // cross-origin requests for any request, but in browser environments we + // need to enable CORS. + // This option depends on crossOriginHandler in webserver.mjs. + url.searchParams.set("cors", "withoutCredentials"); + } + + // This redirect options depend on redirectHandler in webserver.mjs. + + // We will change the host to a cross-origin domain so that the initial + // request will be cross-origin. Set "redirectToHost" to the original host + // to force a cross-origin redirect (relative to the initial URL). + url.searchParams.set("redirectToHost", url.hostname); + url.hostname = getCrossOriginHostname(url.hostname); + if (redirectIfRange) { + url.searchParams.set("redirectIfRange", "1"); + } + return url.href; +} + +export { testCrossOriginRedirects }; diff --git a/test/unit/fetch_stream_spec.js b/test/unit/fetch_stream_spec.js index 3fbe1efc10ef4..181818a594a77 100644 --- a/test/unit/fetch_stream_spec.js +++ b/test/unit/fetch_stream_spec.js @@ -13,35 +13,23 @@ * limitations under the License. */ -import { AbortException, isNodeJS } from "../../src/shared/util.js"; -import { createTemporaryNodeServer } from "./test_utils.js"; +import { AbortException } from "../../src/shared/util.js"; import { PDFFetchStream } from "../../src/display/fetch_stream.js"; +import { testCrossOriginRedirects } from "./common_pdfstream_tests.js"; +import { TestPdfsServer } from "./test_utils.js"; describe("fetch_stream", function () { - let tempServer = null; - function getPdfUrl() { - return isNodeJS - ? `http://127.0.0.1:${tempServer.port}/tracemonkey.pdf` - : new URL("../pdfs/tracemonkey.pdf", window.location).href; + return TestPdfsServer.resolveURL("tracemonkey.pdf").href; } const pdfLength = 1016315; - beforeAll(function () { - if (isNodeJS) { - tempServer = createTemporaryNodeServer(); - } + beforeAll(async function () { + await TestPdfsServer.ensureStarted(); }); - afterAll(function () { - if (isNodeJS) { - // Close the server from accepting new connections after all test - // finishes. - const { server } = tempServer; - server.close(); - - tempServer = null; - } + afterAll(async function () { + await TestPdfsServer.ensureStopped(); }); it("read with streaming", async function () { @@ -129,4 +117,33 @@ describe("fetch_stream", function () { expect(result1.value).toEqual(rangeSize); expect(result2.value).toEqual(tailSize); }); + + describe("Redirects", function () { + it("redirects allowed if all responses are same-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFFetchStream, + redirectIfRange: false, + async testRangeReader(rangeReader) { + await expectAsync(rangeReader.read()).toBeResolved(); + }, + }); + }); + + it("redirects blocked if any response is cross-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFFetchStream, + redirectIfRange: true, + async testRangeReader(rangeReader) { + // When read (sync), error should be reported. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + // When read again (async), error should be consistent. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + }, + }); + }); + }); }); diff --git a/test/unit/network_spec.js b/test/unit/network_spec.js index 9a55b4771ff0e..367e525601ed9 100644 --- a/test/unit/network_spec.js +++ b/test/unit/network_spec.js @@ -15,6 +15,8 @@ import { AbortException } from "../../src/shared/util.js"; import { PDFNetworkStream } from "../../src/display/network.js"; +import { testCrossOriginRedirects } from "./common_pdfstream_tests.js"; +import { TestPdfsServer } from "./test_utils.js"; describe("network", function () { const pdf1 = new URL("../pdfs/tracemonkey.pdf", window.location).href; @@ -115,4 +117,41 @@ describe("network", function () { expect(isRangeSupported).toEqual(true); expect(fullReaderCancelled).toEqual(true); }); + + describe("Redirects", function () { + beforeAll(async function () { + await TestPdfsServer.ensureStarted(); + }); + + afterAll(async function () { + await TestPdfsServer.ensureStopped(); + }); + + it("redirects allowed if all responses are same-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFNetworkStream, + redirectIfRange: false, + async testRangeReader(rangeReader) { + await expectAsync(rangeReader.read()).toBeResolved(); + }, + }); + }); + + it("redirects blocked if any response is cross-origin", async function () { + await testCrossOriginRedirects({ + PDFStreamClass: PDFNetworkStream, + redirectIfRange: true, + async testRangeReader(rangeReader) { + // When read (sync), error should be reported. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + // When read again (async), error should be consistent. + await expectAsync(rangeReader.read()).toBeRejectedWithError( + /^Expected range response-origin "http:.*" to match "http:.*"\.$/ + ); + }, + }); + }); + }); }); diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index db2fdc2fde90a..4361ebac68205 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -51,6 +51,22 @@ function buildGetDocumentParams(filename, options) { return params; } +function getCrossOriginHostname(hostname) { + if (hostname === "localhost") { + // Note: This does not work if localhost is listening on IPv6 only. + // As a work-around, visit the IPv6 version at: + // http://[::1]:8888/test/unit/unit_test.html?spec=Cross-origin + return "127.0.0.1"; + } + + if (hostname === "127.0.0.1" || hostname === "[::1]") { + return "localhost"; + } + + // FQDN are cross-origin and browsers usually resolve them to the same server. + return hostname.endsWith(".") ? hostname.slice(0, -1) : hostname + "."; +} + class XRefMock { constructor(array) { this._map = Object.create(null); @@ -122,73 +138,103 @@ function createIdFactory(pageIndex) { return page._localIdFactory; } -function createTemporaryNodeServer() { - assert(isNodeJS, "Should only be used in Node.js environments."); - - const fs = process.getBuiltinModule("fs"), - http = process.getBuiltinModule("http"); - function isAcceptablePath(requestUrl) { - try { - // Reject unnormalized paths, to protect against path traversal attacks. - const url = new URL(requestUrl, "https://localhost/"); - return url.pathname === requestUrl; - } catch { - return false; +// Some tests rely on special behavior from webserver.mjs. When loaded in the +// browser, the page is already served from WebServer. When running from +// Node.js, that is not the case. This helper starts the WebServer if needed, +// and offers a mechanism to resolve the URL in a uniform way. +class TestPdfsServer { + static #webServer; + + static #startCount = 0; + + static #startPromise; + + static async ensureStarted() { + if (this.#startCount++) { + // Already started before. E.g. from another beforeAll call. + return this.#startPromise; + } + if (!isNodeJS) { + // In web browsers, tests are presumably served by webserver.mjs. + return undefined; } + + this.#startPromise = this.#startServer().finally(() => { + this.#startPromise = null; + }); + return this.#startPromise; + } + + static async #startServer() { + // WebServer from webserver.mjs is imported dynamically instead of + // statically because we do not need it when running from the browser. + let WebServer; + if (import.meta.url.endsWith("/lib-legacy/test/unit/test_utils.js")) { + // When "gulp unittestcli" is used to run tests, the tests are run from + // pdf.js/build/lib-legacy/test/ instead of directly from pdf.js/test/. + // eslint-disable-next-line import/no-unresolved + ({ WebServer } = await import("../../../../test/webserver.mjs")); + } else { + ({ WebServer } = await import("../webserver.mjs")); + } + this.#webServer = new WebServer({ + host: "127.0.0.1", + root: TEST_PDFS_PATH, + }); + await new Promise(resolve => { + this.#webServer.start(resolve); + }); + } + + static async ensureStopped() { + assert(this.#startCount > 0, "ensureStarted() should be called first"); + assert(!this.#startPromise, "ensureStarted() should have resolved"); + if (--this.#startCount) { + // Keep server alive as long as there is an ensureStarted() that was not + // followed by an ensureStopped() call. + // This could happen if ensureStarted() was called again before + // ensureStopped() was called from afterAll(). + return; + } + if (!isNodeJS) { + // Web browsers cannot stop the server. + return; + } + + await new Promise(resolve => { + this.#webServer.stop(resolve); + this.#webServer = null; + }); + } + + /** + * @param {string} path - path to file within test/unit/pdf/ (TEST_PDFS_PATH). + * @returns {URL} + */ + static resolveURL(path) { + assert(this.#startCount > 0, "ensureStarted() should be called first"); + assert(!this.#startPromise, "ensureStarted() should have resolved"); + + if (isNodeJS) { + // Note: TestPdfsServer.ensureStarted() should be called first. + return new URL(path, `http://127.0.0.1:${this.#webServer.port}/`); + } + // When "gulp server" is used, our URL looks like + // http://localhost:8888/test/unit/unit_test.html + // The PDFs are served from: + // http://localhost:8888/test/pdfs/ + return new URL(TEST_PDFS_PATH + path, window.location); } - // Create http server to serve pdf data for tests. - const server = http - .createServer((request, response) => { - if (!isAcceptablePath(request.url)) { - response.writeHead(400); - response.end("Invalid path"); - return; - } - const filePath = process.cwd() + "/test/pdfs" + request.url; - fs.promises.lstat(filePath).then( - stat => { - if (!request.headers.range) { - const contentLength = stat.size; - const stream = fs.createReadStream(filePath); - response.writeHead(200, { - "Content-Type": "application/pdf", - "Content-Length": contentLength, - "Accept-Ranges": "bytes", - }); - stream.pipe(response); - } else { - const [start, end] = request.headers.range - .split("=")[1] - .split("-") - .map(x => Number(x)); - const stream = fs.createReadStream(filePath, { start, end }); - response.writeHead(206, { - "Content-Type": "application/pdf", - }); - stream.pipe(response); - } - }, - error => { - response.writeHead(404); - response.end(`File ${request.url} not found!`); - } - ); - }) - .listen(0); /* Listen on a random free port */ - - return { - server, - port: server.address().port, - }; } export { buildGetDocumentParams, CMAP_URL, createIdFactory, - createTemporaryNodeServer, DefaultFileReaderFactory, + getCrossOriginHostname, STANDARD_FONT_DATA_URL, TEST_PDFS_PATH, + TestPdfsServer, XRefMock, }; diff --git a/test/webserver.mjs b/test/webserver.mjs index e0295f3a2865e..fffd8d72aa5ec 100644 --- a/test/webserver.mjs +++ b/test/webserver.mjs @@ -52,7 +52,7 @@ class WebServer { this.cacheExpirationTime = cacheExpirationTime || 0; this.disableRangeRequests = false; this.hooks = { - GET: [crossOriginHandler], + GET: [crossOriginHandler, redirectHandler], POST: [], }; } @@ -308,6 +308,11 @@ class WebServer { } #serveFileRange(response, fileURL, fileSize, start, end) { + if (end > fileSize || start > end) { + response.writeHead(416); + response.end(); + return; + } const stream = fs.createReadStream(fileURL, { flags: "rs", start, @@ -336,18 +341,65 @@ class WebServer { } // This supports the "Cross-origin" test in test/unit/api_spec.js -// It is here instead of test.js so that when the test will still complete as +// and "Redirects" in test/unit/network_spec.js and +// test/unit/fetch_stream_spec.js via test/unit/common_pdfstream_tests.js. +// It is here instead of test.mjs so that when the test will still complete as // expected if the user does "gulp server" and then visits // http://localhost:8888/test/unit/unit_test.html?spec=Cross-origin function crossOriginHandler(url, request, response) { if (url.pathname === "/test/pdfs/basicapi.pdf") { + if (!url.searchParams.has("cors") || !request.headers.origin) { + return; + } + response.setHeader("Access-Control-Allow-Origin", request.headers.origin); if (url.searchParams.get("cors") === "withCredentials") { - response.setHeader("Access-Control-Allow-Origin", request.headers.origin); response.setHeader("Access-Control-Allow-Credentials", "true"); - } else if (url.searchParams.get("cors") === "withoutCredentials") { - response.setHeader("Access-Control-Allow-Origin", request.headers.origin); + } // withoutCredentials does not include Access-Control-Allow-Credentials. + response.setHeader( + "Access-Control-Expose-Headers", + "Accept-Ranges,Content-Range" + ); + response.setHeader("Vary", "Origin"); + } +} + +// This supports the "Redirects" test in test/unit/network_spec.js and +// test/unit/fetch_stream_spec.js via test/unit/common_pdfstream_tests.js. +// It is here instead of test.mjs so that when the test will still complete as +// expected if the user does "gulp server" and then visits +// http://localhost:8888/test/unit/unit_test.html?spec=Redirects +function redirectHandler(url, request, response) { + const redirectToHost = url.searchParams.get("redirectToHost"); + if (redirectToHost) { + // Chrome may serve byte range requests directly from the cache, potentially + // from a full request or a different range, without involving the server. + // To prevent this from happening, make sure that the response is never + // cached, so that Range requests are never served from the browser cache. + response.setHeader("Cache-Control", "no-store,max-age=0"); + + if (url.searchParams.get("redirectIfRange") && !request.headers.range) { + return false; + } + try { + const newURL = new URL(url); + newURL.hostname = redirectToHost; + // Delete test-only query parameters to avoid infinite redirects. + newURL.searchParams.delete("redirectToHost"); + newURL.searchParams.delete("redirectIfRange"); + if (newURL.hostname !== redirectToHost) { + throw new Error(`Invalid hostname: ${redirectToHost}`); + } + response.setHeader("Location", newURL.href); + } catch { + response.writeHead(500); + response.end(); + return true; } + response.writeHead(302); + response.end(); + return true; } + return false; } export { WebServer };