Skip to content

Commit

Permalink
[twitter] add option to extract TwitPic embeds (#579)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Jan 18, 2020
1 parent 254f7c3 commit 25d5ec4
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 1 deletion.
9 changes: 9 additions & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1067,6 +1067,15 @@ Description Extract images from retweets.
=========== =====


extractor.twitter.twitpic
-------------------------
=========== =====
Type ``bool``
Default ``false``
Description Extract `TwitPic <https://twitpic.com/>`__ embeds.
=========== =====


extractor.twitter.videos
------------------------
=========== =====
Expand Down
1 change: 1 addition & 0 deletions docs/gallery-dl.conf
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@
{
"content": false,
"retweets": true,
"twitpic": false,
"videos": false
},
"vsco":
Expand Down
29 changes: 28 additions & 1 deletion gallery_dl/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def __init__(self, match):
self._user_dict = None
self.logged_in = False
self.retweets = self.config("retweets", True)
self.twitpic = self.config("twitpic", False)
self.content = self.config("content", False)
self.videos = self.config("videos", False)

Expand Down Expand Up @@ -79,6 +80,26 @@ def items(self):
urls = [url + size for size in self.sizes]
yield Message.Urllist, urls, data

if self.twitpic and "//twitpic.com/" in tweet:
urls = [
url for url in text.extract_iter(
tweet, 'data-expanded-url="', '"')
if "//twitpic.com/" in url
]

if "num" not in data:
if urls:
yield Message.Directory, data
data["num"] = 0

for data["num"], url in enumerate(urls, data["num"]+1):
response = self.request(url, fatal=False)
if response.status_code >= 400:
continue
url = text.extract(
response.text, 'name="twitter:image" value="', '"')[0]
yield Message.Url, url, text.nameext_from_url(url, data)

def metadata(self):
"""Return general metadata"""
return {}
Expand Down Expand Up @@ -230,7 +251,7 @@ def _tweets_from_api(self, url, max_position=None):
for tweet in text.extract_iter(
data["items_html"], '<div class="tweet ', '\n</li>'):
yield tweet

if data.get("min_position") is None:
if data["has_more_items"] and "min_position" not in data:
pass
Expand Down Expand Up @@ -348,6 +369,12 @@ class TwitterTweetExtractor(TwitterExtractor):
"count": 4,
"keyword": "0c627af2b8cdccc7e0da8fd221155c4a4a3141a8",
}),
# TwitPic embeds (#579)
("https://twitter.com/i/web/status/112900228289540096", {
"options": (("twitpic", True),),
"pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
"count": 3,
}),
)

def __init__(self, match):
Expand Down

0 comments on commit 25d5ec4

Please sign in to comment.