-
-
Notifications
You must be signed in to change notification settings - Fork 93
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Page-reuse concurrency + Browser Repair + Screencaster Cleanup Improv…
…ements (#157) * new window: use cdp instead of window.open * new window tweaks: add reuseCount, use browser.target() instead of opening a new blank page * rename NewWindowPage -> ReuseWindowConcurrency, move to windowconcur.js potential fix for #156 * browser repair: - when using window-concurrency, attempt to repair / relaunch browser if cdp errors occur - mark pages as failed and don't reuse if page error or cdp errors occur - screencaster: clear previous targets if screencasting when repairing browser * bump version to 0.7.0-beta.3
- Loading branch information
Showing
6 changed files
with
163 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
{ | ||
"name": "browsertrix-crawler", | ||
"version": "0.7.0-beta.2", | ||
"version": "0.7.0-beta.3", | ||
"main": "browsertrix-crawler", | ||
"repository": "https://github.com/webrecorder/browsertrix-crawler", | ||
"author": "Ilya Kreymer <[email protected]>, Webrecorder Software", | ||
|
@@ -17,7 +17,7 @@ | |
"minio": "7.0.26", | ||
"node-fetch": "^2.6.1", | ||
"puppeteer-cluster": "github:ikreymer/puppeteer-cluster#async-job-queue", | ||
"puppeteer-core": "16.1.0", | ||
"puppeteer-core": "^16.1.1", | ||
"request": "^2.88.2", | ||
"sitemapper": "^3.1.2", | ||
"uuid": "8.3.2", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
const SingleBrowserImplementation = require("puppeteer-cluster/dist/concurrency/SingleBrowserImplementation").default; | ||
|
||
|
||
// =========================================================================== | ||
class ReuseWindowConcurrency extends SingleBrowserImplementation { | ||
async init() { | ||
await super.init(); | ||
|
||
this.pendingTargets = new Map(); | ||
this.startPage = "about:blank?_browsertrix" + Math.random().toString(36).slice(2); | ||
|
||
this.pages = []; | ||
this.reuseCount = 25; | ||
|
||
this.screencaster = null; | ||
|
||
const mainTarget = this.browser.target(); | ||
|
||
this.cdp = await mainTarget.createCDPSession(); | ||
this.sessionId = this.cdp.id(); | ||
|
||
this.browser.on("targetcreated", (target) => { | ||
if (target.url() === this.startPage) { | ||
this.pendingTargets.set(target._targetId, target); | ||
} | ||
}); | ||
} | ||
|
||
setScreencaster(screencaster) { | ||
this.screencaster = screencaster; | ||
} | ||
|
||
async repair() { | ||
if (this.openInstances !== 0 || this.repairing) { | ||
// already repairing or there are still pages open? wait for start/finish | ||
await new Promise(resolve => this.waitingForRepairResolvers.push(resolve)); | ||
return; | ||
} | ||
|
||
this.repairing = true; | ||
console.debug("Starting repair"); | ||
|
||
if (this.screencaster) { | ||
this.screencaster.endAllTargets(); | ||
} | ||
|
||
try { | ||
// will probably fail, but just in case the repair was not necessary | ||
await this.browser.close(); | ||
} catch (e) { | ||
console.debug("Unable to close browser."); | ||
} | ||
|
||
try { | ||
await this.init(); | ||
} catch (err) { | ||
console.debug("Unable to restart chrome."); | ||
} | ||
this.repairRequested = false; | ||
this.repairing = false; | ||
this.waitingForRepairResolvers.forEach(resolve => resolve()); | ||
this.waitingForRepairResolvers = []; | ||
} | ||
|
||
async getNewPage() { | ||
while (true) { | ||
let targetId; | ||
try { | ||
const res = await this.cdp.send("Target.createTarget", {url: this.startPage, newWindow: true}); | ||
targetId = res.targetId; | ||
} catch (e) { | ||
console.warn(e); | ||
await this.repair(); | ||
} | ||
|
||
const target = this.pendingTargets.get(targetId); | ||
// this shouldn't really happen, but just in case somehow ended up w/o a target, try again | ||
if (!target) { | ||
continue; | ||
} | ||
|
||
this.pendingTargets.delete(targetId); | ||
|
||
return {page: await target.page(), count: 0, id: this.sessionId}; | ||
} | ||
} | ||
|
||
async createResources() { | ||
if (this.pages.length) { | ||
const res = this.pages.shift(); | ||
if (res.id === this.sessionId) { | ||
return res; | ||
} else { | ||
// page is using stale session (eg. from crashed/previous browser instance), don't attempt to reuse | ||
} | ||
} | ||
return await this.getNewPage(); | ||
} | ||
|
||
async freeResources(resources) { | ||
// if marked as failed, don't try to reuse | ||
if (resources.page.__failed) { | ||
await resources.page.close(); | ||
} | ||
if (++resources.count > this.reuseCount) { | ||
await resources.page.close(); | ||
} else { | ||
this.pages.push(resources); | ||
} | ||
} | ||
} | ||
|
||
module.exports = { ReuseWindowConcurrency }; | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters