Skip to content

Commit

Permalink
util.parse_mf2: set metaformats on main mf2 item
Browse files Browse the repository at this point in the history
either representative h-card or h-entry, if available
  • Loading branch information
snarfed committed Jul 25, 2024
1 parent 3019ba2 commit ac77c73
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 19 deletions.
24 changes: 24 additions & 0 deletions tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1718,3 +1718,27 @@ def test_parse_mf2_metaformats_hcard_photo_fallback(self):
</html>
""", url='http://xyz', metaformats=True),
ignore=['debug', 'rels', 'rel-urls'])

def test_parse_mf2_metaformats_hentry_inside_hfeed(self):
self.assert_equals({
'items': [{
'type': ['h-feed'],
'properties': {},
'children': [{
'type': ['h-entry'],
'properties': {
'name': ['foo'],
'photo': ['http://pic'],
},
}],
}],
}, util.parse_mf2("""\
<html>
<head><meta property="og:image" content="http://pic" /></head>
<body>
<div class="h-feed"><div class="h-entry">foo</div></div>
</body>
</html>
""", url='http://xyz/post', metaformats=True),
ignore=['debug', 'rels', 'rel-urls'])

45 changes: 26 additions & 19 deletions util.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from cachetools import cached, TTLCache
from domain2idna import domain2idna
from flask import abort
import mf2util

try:
import ujson
Expand Down Expand Up @@ -226,28 +227,28 @@
METAFORMAT_TO_MF2 = [
# in priority order, descending
# OGP
("property", "article:author", "url"),
("property", "article:published_time", "published"),
("property", "article:modified_time", "updated"),
("property", "og:audio", "audio"),
("property", "og:description", "summary"),
("property", "og:image", "photo"),
("property", "og:title", "name"),
("property", "og:video", "video"),
('property', 'article:author', 'url'),
('property', 'article:published_time', 'published'),
('property', 'article:modified_time', 'updated'),
('property', 'og:audio', 'audio'),
('property', 'og:description', 'summary'),
('property', 'og:image', 'photo'),
('property', 'og:title', 'name'),
('property', 'og:video', 'video'),
# Twitter
("name", "twitter:title", "name"),
("name", "twitter:description", "summary"),
("name", "twitter:image", "photo"),
('name', 'twitter:title', 'name'),
('name', 'twitter:description', 'summary'),
('name', 'twitter:image', 'photo'),
# HTML standard meta names
# https://developer.mozilla.org/en-US/docs/Web/HTML/Element/meta/name
("name", "description", "summary"),
('name', 'description', 'summary'),
]
METAFORMAT_URL_PROPERTIES = {
"article:author",
"og:audio",
"og:image",
"og:video",
"twitter:image",
'article:author',
'og:audio',
'og:image',
'og:video',
'twitter:image',
}


Expand Down Expand Up @@ -2094,16 +2095,22 @@ def parse_mf2(input, url=None, id=None, metaformats=None):
return None

mf2 = mf2py.parse(url=url, doc=input)
if urlparse(url).path in ('', '/'):
type = 'h-card'
mf2_item = mf2util.representative_hcard(mf2, mf2.get('url') or url)
else:
type = 'h-entry'
mf2_item = mf2util.find_first_entry(mf2, ['h-entry'])

mf2_item = mf2['items'][0] if mf2['items'] else None
if not mf2_item and mf2['items']:
mf2_item = mf2['items'][0]

mf2_hcard = None
for item in mf2['items']:
if 'h-card' in item.get('type', []):
mf2_hcard = item

if metaformats and url:
type = 'h-card' if urlparse(url).path in ('', '/') else 'h-entry'
if meta_item := parse_metaformats(input, url, type=type):
if mf2_item:
# if mf2 item doesn't have a photo, fall back to metaformats
Expand Down

0 comments on commit ac77c73

Please sign in to comment.