-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetch_feeds.py
executable file
·99 lines (87 loc) · 3.51 KB
/
fetch_feeds.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/env python
import sqlite3
import datetime
import argparse
import traceback
import feedparser
import socket
import setup_db
def convert_timestamp(val):
datepart, timepart = val.split(" ")
year, month, day = map(int, datepart.split("-"))
timepart_full = timepart.split(".")
hours, minutes, seconds = map(int, timepart_full[0].split(":"))
if len(timepart_full) == 2:
microseconds = int('{:0<6.6}'.format(timepart_full[1].decode()))
else:
microseconds = 0
val = datetime.datetime(year, month, day, hours, minutes, seconds, microseconds)
return val
def sanity_feed_item(feed_id, feed_item):
link = feed_item.link
if 'title' in feed_item:
title = feed_item.title
else:
title = feed_item.link
if 'id' in feed_item:
id = feed_item.id
else:
id = feed_item.link
if 'updated_parsed' in feed_item:
updated = datetime.datetime(*feed_item.updated_parsed[:6])
elif 'created_parsed' in feed_item:
updated = datetime.datetime(*feed_item.created_parsed[:6])
elif 'published_parsed' in feed_item and feed_item.published_parsed is not None:
updated = datetime.datetime(*feed_item.published_parsed[:6])
else:
updated = datetime.datetime.now()
return (feed_id, title, link, id, updated, False)
def update_feed(c, feed_id, name, feed_uri):
print(u"updating {} ({})".format(name, feed_id))
feed = feedparser.parse(feed_uri)
if len(feed.entries) == 0:
raise RuntimeError(u"No Entries for feed {}".format(name))
c.executemany("INSERT INTO feed_items VALUES(NULL,?,?,?,?,?,?)",
[sanity_feed_item(feed_id, e) for e in feed.entries])
print(u"done updating")
return feed
def update_all(args):
db = sqlite3.connect("feeds.db")
db.row_factory = sqlite3.Row
c = db.cursor()
c.execute("PRAGMA foreign_keys = ON")
if args.only_feed:
c.execute("SELECT * FROM feeds WHERE id = ?", (args.only_feed,))
else:
c.execute("SELECT * FROM feeds")
for row in c.fetchall():
if row['active'] == 0:
continue
c.execute("SELECT * FROM feed_status WHERE feed = ?", (row['id'],))
r = c.fetchone()
if r and r['last_fetch'] is not None:
date = convert_timestamp(r['last_fetch'])
if datetime.datetime.now() - date < datetime.timedelta(minutes=args.since):
continue
try:
f = update_feed(c, row['id'], row['name'], row['feed_uri'])
c.execute("INSERT OR REPLACE INTO feed_status VALUES(NULL, ?, ?, ?, NULL)",
(row['id'], datetime.datetime.now(), f.bozo) )
except:
error_string = traceback.format_exc()
print(error_string)
c.execute("INSERT OR REPLACE INTO feed_status VALUES(NULL, ?, NULL, NULL, ?)",
(row['id'], error_string))
db.commit()
c.close()
parser = argparse.ArgumentParser(description="fetch feeds into database")
parser.add_argument("--since", default=30, type=int,
help="how out of date fetched feeds need to be before they are fetched")
parser.add_argument("--only-feed", help="fetch this feed only")
parser.add_argument("--no-create", help="Don't create any tables", action="store_true")
parser.add_argument("--timeout", type=int, default=30, help='fetch timeout in seconds (0 = none)')
args = parser.parse_args()
socket.setdefaulttimeout(args.timeout if args.timeout else None)
if not args.no_create:
setup_db.create_tables()
update_all(args)