-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscheduler.py
executable file
·166 lines (113 loc) · 4.57 KB
/
scheduler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
from feedgen.feed import FeedGenerator
from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from urllib.parse import urljoin, urlparse
from datetime import datetime
from flask import make_response, request
from urllib.error import HTTPError
import feedparser
USER_AGENT = "Mozilla/5.0 (Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0"
class SlugError(Exception):
pass
class LastPageError(Exception):
pass
class Scheduler:
title = None
def __init__(self, args, homepage):
self.feed = FeedGenerator()
self.feed.id(homepage)
self.feed.link(href=homepage)
self.query = args.get("query")
self.iframe = args.get("iframe") == "on"
self.pages = int(args.get("per-day", default="1"))
if self.pages < 1 or self.pages > 10:
raise ValueError("<code>per-day</code> must have value between 1 and 10")
start = int(args.get("start"))
if start < 1:
raise ValueError("Number of first entry must be positive.")
start_date = datetime.strptime(args.get("date"), "%Y-%m-%d")
diff = (datetime.now() - start_date).days
current = start + (diff + 1) * self.pages - 1
oldest = current - (3 * self.pages)
oldest = oldest if oldest > start else start
self.range = range(oldest, current + 1)
for num in self.range:
self.create_entry(num)
def create_entry(self, url, title):
entry = self.feed.add_entry()
entry.id(url)
entry.title(title)
entry.link(href=url)
if self.iframe:
content = f'<iframe width="100%" height="1000px" src="{url}"/>'
elif self.query != "":
req = Request(url, headers={"User-Agent": USER_AGENT})
page = BeautifulSoup(urlopen(req), features="lxml")
content = ""
for img in page.body.select(self.query):
src = urljoin(url, img["src"])
content += f'<img src="{src}"/>'
else:
content = f'<a href="{url}">{title}</>'
entry.content(content=content, type="html")
def response(self):
self.feed.title(self.title)
response = make_response(self.feed.atom_str())
response.headers.set("Content-Type", "application/rss+xml")
return response
class NumberedScheduler(Scheduler):
def __init__(self, args):
self.scheme = args.get("scheme")
self.title = args.get("title")
if self.title is None or self.title == "":
raise ValueError("title must be set.")
parsed_scheme = urlparse(self.scheme)
homepage = parsed_scheme.scheme + "//" + parsed_scheme.netloc
super().__init__(args, homepage)
def create_entry(self, num):
url = self.scheme.format(num)
title = self.title + " #" + str(num)
super().create_entry(url, title)
class FeedScheduler(Scheduler):
def __init__(self, args):
self.feed = FeedGenerator()
self.url = args.get("url")
self.iframe = args.get("iframe") == "on"
self.query = args.get("query")
parsed = feedparser.parse(self.url)
self.title = parsed.feed.title
self.feed.id(parsed.feed.link)
self.feed.link(href=parsed.feed.link)
for entry in parsed.entries[0:5]:
self.create_entry(entry)
def create_entry(self, entry):
url = entry.link
title = entry.title
super().create_entry(url, title)
class ComicRocketScheduler(Scheduler):
def __init__(self, args):
self.slug = args.get("slug")
if self.slug is None or self.slug == "":
raise ValueError("<code>slug</code> must be set")
homepage = f"https://www.comic-rocket.com/explore/{self.slug}/"
try:
super().__init__(args, homepage)
except LastPageError:
entry = self.feed.add_entry()
entry.id(homepage)
entry.title("End of the feed")
entry.link(href=homepage)
def create_entry(self, num):
url = f"https://www.comic-rocket.com/read/{self.slug}/{num}"
req = Request(url, headers={"User-Agent": USER_AGENT})
try:
page = BeautifulSoup(urlopen(req), features="lxml")
except HTTPError:
raise SlugError
try:
url = page.body.select("#serialpagebody iframe")[0]["src"]
except IndexError:
raise LastPageError
if self.title is None:
self.title = page.title.string.split(" - ")[0]
super().create_entry(url, self.title + " #" + str(num))