forked from Fannovel16/fancaps-scraper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathepisode.js
45 lines (41 loc) · 1.71 KB
/
episode.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
const { JSDOM } = require("jsdom")
const { getImageId } = require("./image")
const axios = require("./createAxios")()
async function getCurrPageImageUrls(episodePageUrl) {
const { data: pageHtml } = await axios(episodePageUrl)
const { document } = (new JSDOM(pageHtml)).window
const imagesContainerEl = document.evaluate(
`//h3[contains(text(),"Episode Screencaps")]`,
document.querySelector(".single_post_area"),
null,
0,
0
).iterateNext().parentElement.parentElement.nextElementSibling.nextElementSibling
return [...imagesContainerEl.querySelectorAll("img.imageFade")].map(el => `https://cdni.fancaps.net/file/fancaps-tvimages/${getImageId(el.src)}.jpg`)
}
const GET_EPISODE_PROMISE_AMOUNT = 20
async function getEpisodeDataset({ episodeTitle, episodeUrl }, { skipNLastPages = 2, seriesTitle }) {
episodeUrl = new URL(episodeUrl)
let i = 0
let imageUrls2d = []
while (true) {
let currImageUrls2dPromises = []
for (let j = 0; j < GET_EPISODE_PROMISE_AMOUNT; j++) {
episodeUrl.searchParams.set("page", i + j + 1)
currImageUrls2dPromises.push(getCurrPageImageUrls(episodeUrl.toString()))
}
const currImageUrls2d = await Promise.all(currImageUrls2dPromises)
imageUrls2d.push(...currImageUrls2d.filter(el => el.length))
if (currImageUrls2d.find(el => el.length === 0)) break
i += GET_EPISODE_PROMISE_AMOUNT
}
if (skipNLastPages) imageUrls2d = imageUrls2d.slice(0, -skipNLastPages)
episodeUrl.searchParams.delete("page")
return {
seriesTitle,
episodeTitle,
episodeUrl,
imageUrls: imageUrls2d.flat()
}
}
module.exports = { getEpisodeDataset }