Skip to content

Commit

Permalink
[twitter] improve results for regular user URLs
Browse files Browse the repository at this point in the history
- continuation of 3346f58
- use media timeline results (or tweet timeline if retweets are enabled)
  plus search results starting from the last tweet id of the first
  timeline, similar to how Twitter Media Downloader operates
- the old behavior can be forced by appending '/tweets' to a user URL,
  like with '/media' (https://twitter.com/USER/tweets)
  although there should be no need to ever do that
  • Loading branch information
mikf committed May 23, 2022
1 parent 6ad39f2 commit 915dba8
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
1 change: 1 addition & 0 deletions docs/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,7 @@ Description
Special values:

* ``"timeline"``: ``https://twitter.com/i/user/{rest_id}``
* ``"tweets"``: ``https://twitter.com/id:{rest_id}/tweets``
* ``"media"``: ``https://twitter.com/id:{rest_id}/media``

Note: To allow gallery-dl to follow custom URL formats, set the blacklist__
Expand Down
34 changes: 28 additions & 6 deletions gallery_dl/extractor/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ def _users_result(self, users):
elif userfmt == "media":
cls = TwitterMediaExtractor
fmt = (self.root + "/id:{rest_id}/media").format_map
elif userfmt == "tweets":
cls = TwitterTweetsExtractor
fmt = (self.root + "/id:{rest_id}/tweets").format_map
else:
cls = None
fmt = userfmt.format_map
Expand Down Expand Up @@ -383,7 +386,7 @@ def _login_impl(self, username, password):


class TwitterTimelineExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline"""
"""Extractor for a Twitter user timeline"""
subcategory = "timeline"
pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
Expand All @@ -409,18 +412,18 @@ def __init__(self, match):
self.user = "id:" + user_id

def tweets(self):
if not self.config("strategy"):
return self._tweets_twMediaDownloader()
return self.api.user_tweets(self.user)
tweets = (self.api.user_tweets(self.user) if self.retweets else
self.api.user_media(self.user))

def _tweets_twMediaDownloader(self):
# yield initial batch of (media) tweets
tweet = None
for tweet in self.api.user_media(self.user):
for tweet in tweets:
yield tweet

if tweet is None:
return

# get username
if not self.user.startswith("id:"):
username = self.user
elif "core" in tweet:
Expand All @@ -429,16 +432,35 @@ def _tweets_twMediaDownloader(self):
else:
username = tweet["user"]["screen_name"]

# get tweet data
if "legacy" in tweet:
tweet = tweet["legacy"]

# yield search results starting from last tweet id
yield from self.api.search_adaptive(
"from:{} include:retweets include:nativeretweets max_id:{} "
"filter:images OR card_name:animated_gif OR filter:native_video"
.format(username, tweet["id_str"])
)


class TwitterTweetsExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's Tweets timeline"""
subcategory = "tweets"
pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
test = (
("https://twitter.com/supernaturepics/tweets", {
"range": "1-40",
"url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
}),
("https://mobile.twitter.com/supernaturepics/tweets#t"),
("https://www.twitter.com/id:2976459548/tweets"),
)

def tweets(self):
return self.api.user_tweets(self.user)


class TwitterRepliesExtractor(TwitterExtractor):
"""Extractor for Tweets from a user's timeline including replies"""
subcategory = "replies"
Expand Down
1 change: 1 addition & 0 deletions scripts/supportedsites.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@
},
"twitter": {
"media": "Media Timelines",
"tweets": "",
"replies": "",
"list-members": "List Members",
},
Expand Down

0 comments on commit 915dba8

Please sign in to comment.