Skip to content

Commit

Permalink
[architizer] add 'firm' extractor (#1369)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Mar 19, 2021
1 parent 2045236 commit 58b9363
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
2 changes: 1 addition & 1 deletion docs/supportedsites.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Consider all sites to be NSFW unless otherwise known.
<tr>
<td>Architizer</td>
<td>https://architizer.com/</td>
<td>Projects</td>
<td>Firms, Projects</td>
<td></td>
</tr>
<tr>
Expand Down
29 changes: 28 additions & 1 deletion gallery_dl/extractor/architizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

"""Extractors for https://architizer.com/"""

from .common import GalleryExtractor
from .common import GalleryExtractor, Extractor, Message
from .. import text


Expand Down Expand Up @@ -72,3 +72,30 @@ def images(self, page):
for url in text.extract_iter(
page, "property='og:image:secure_url' content='", "?")
]


class ArchitizerFirmExtractor(Extractor):
"""Extractor for all projects of a firm"""
category = "architizer"
subcategory = "firm"
root = "https://architizer.com"
pattern = r"(?:https?://)?architizer\.com/firms/([^/?#]+)"
test = ("https://architizer.com/firms/olson-kundig/", {
"pattern": ArchitizerProjectExtractor.pattern,
"count": ">= 90",
})

def __init__(self, match):
Extractor.__init__(self, match)
self.firm = match.group(1)

def items(self):
url = url = "{}/firms/{}/?requesting_merlin=pages".format(
self.root, self.firm)
page = self.request(url).text
data = {"_extractor": ArchitizerProjectExtractor}

for project in text.extract_iter(page, '<a href="/projects/', '"'):
if not project.startswith("q/"):
url = "{}/projects/{}".format(self.root, project)
yield Message.Queue, url, data

0 comments on commit 58b9363

Please sign in to comment.