From 915dba8345d3d457a80f08fb34d0409b00829444 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 23 May 2022 18:23:21 +0200 Subject: [PATCH] [twitter] improve results for regular user URLs - continuation of 3346f58a - use media timeline results (or tweet timeline if retweets are enabled) plus search results starting from the last tweet id of the first timeline, similar to how Twitter Media Downloader operates - the old behavior can be forced by appending '/tweets' to a user URL, like with '/media' (https://twitter.com/USER/tweets) although there should be no need to ever do that --- docs/configuration.rst | 1 + gallery_dl/extractor/twitter.py | 34 +++++++++++++++++++++++++++------ scripts/supportedsites.py | 1 + 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index ff9ed02b1f..4dc89eb3bf 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2366,6 +2366,7 @@ Description Special values: * ``"timeline"``: ``https://twitter.com/i/user/{rest_id}`` + * ``"tweets"``: ``https://twitter.com/id:{rest_id}/tweets`` * ``"media"``: ``https://twitter.com/id:{rest_id}/media`` Note: To allow gallery-dl to follow custom URL formats, set the blacklist__ diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index cc5fb1a3f9..164a59f8f2 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -323,6 +323,9 @@ def _users_result(self, users): elif userfmt == "media": cls = TwitterMediaExtractor fmt = (self.root + "/id:{rest_id}/media").format_map + elif userfmt == "tweets": + cls = TwitterTweetsExtractor + fmt = (self.root + "/id:{rest_id}/tweets").format_map else: cls = None fmt = userfmt.format_map @@ -383,7 +386,7 @@ def _login_impl(self, username, password): class TwitterTimelineExtractor(TwitterExtractor): - """Extractor for Tweets from a user's timeline""" + """Extractor for a Twitter user timeline""" subcategory = "timeline" pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])" r"|i(?:/user/|ntent/user\?user_id=)(\d+))") @@ -409,18 +412,18 @@ def __init__(self, match): self.user = "id:" + user_id def tweets(self): - if not self.config("strategy"): - return self._tweets_twMediaDownloader() - return self.api.user_tweets(self.user) + tweets = (self.api.user_tweets(self.user) if self.retweets else + self.api.user_media(self.user)) - def _tweets_twMediaDownloader(self): + # yield initial batch of (media) tweets tweet = None - for tweet in self.api.user_media(self.user): + for tweet in tweets: yield tweet if tweet is None: return + # get username if not self.user.startswith("id:"): username = self.user elif "core" in tweet: @@ -429,9 +432,11 @@ def _tweets_twMediaDownloader(self): else: username = tweet["user"]["screen_name"] + # get tweet data if "legacy" in tweet: tweet = tweet["legacy"] + # yield search results starting from last tweet id yield from self.api.search_adaptive( "from:{} include:retweets include:nativeretweets max_id:{} " "filter:images OR card_name:animated_gif OR filter:native_video" @@ -439,6 +444,23 @@ def _tweets_twMediaDownloader(self): ) +class TwitterTweetsExtractor(TwitterExtractor): + """Extractor for Tweets from a user's Tweets timeline""" + subcategory = "tweets" + pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)" + test = ( + ("https://twitter.com/supernaturepics/tweets", { + "range": "1-40", + "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40", + }), + ("https://mobile.twitter.com/supernaturepics/tweets#t"), + ("https://www.twitter.com/id:2976459548/tweets"), + ) + + def tweets(self): + return self.api.user_tweets(self.user) + + class TwitterRepliesExtractor(TwitterExtractor): """Extractor for Tweets from a user's timeline including replies""" subcategory = "replies" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 84d0866743..3557871dbc 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -208,6 +208,7 @@ }, "twitter": { "media": "Media Timelines", + "tweets": "", "replies": "", "list-members": "List Members", },