Skip to content

Commit

Permalink
Add MDN annotations to spec output
Browse files Browse the repository at this point in the history
This change adds annotations in the margin of the spec output next to
any items for which there’s a corresponding MDN article somewhere under
https://developer.mozilla.org/en-US/docs/Web that has a Specifications
section with a link to that item's URL.

The mechanism for inserting the annotations relies on data in a JSON
file containing a mapping of spec ID-attribute values to MDN article
pathnames. The change adds a copy of that file to the repo, along with a
makefile for updating/regenerating the JSON file.

Depends on whatwg/wattsi#89
Fixes #180
  • Loading branch information
sideshowbarker committed Sep 23, 2018
1 parent 1d9fa63 commit 5125d6b
Show file tree
Hide file tree
Showing 5 changed files with 5,645 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
.temp/
html/
output/
mdn/.id-list
mdn/developer.mozilla.org/
4 changes: 2 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ cd "$(dirname "$0")"
DIR=$(pwd)

# The latest required version of Wattsi. Update this if you change how ./build.sh invokes Wattsi.
WATTSI_LATEST=70
WATTSI_LATEST=74

# Shared state variables throughout this script
LOCAL_WATTSI=true
Expand Down Expand Up @@ -535,7 +535,7 @@ function runWattsi {
if $QUIET; then
WATTSI_ARGS+=( --quiet )
fi
WATTSI_ARGS+=( "$1" "$HTML_SHA" "$2" "$BUILD_TYPE" "$HTML_CACHE/caniuse.json" "$HIGHLIGHT_SERVER_URL" )
WATTSI_ARGS+=( "$1" "$HTML_SHA" "$2" "$BUILD_TYPE" "$HTML_CACHE/caniuse.json" "$DIR/mdn/id-map.json" "$HIGHLIGHT_SERVER_URL" )
if hash wattsi 2>/dev/null; then
if [[ "$(wattsi --version | cut -d' ' -f2)" -lt "$WATTSI_LATEST" ]]; then
echo
Expand Down
102 changes: 102 additions & 0 deletions mdn/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
URL_REGEX=\s+<td>(?:<bdi>)?<a.*href=".+html.spec.whatwg.org[^\#]+\#([^"]*).+
PSEUDO_CLASSES=active checked default defined disabled enabled focus hover\
in-range indeterminate invalid link dir optional out-of-range\
read-only read-write required target valid visited
PERL=perl
GREP=grep
WGET=wget
RG=rg
JQ=jq

id-map.json: .id-list
(echo "{"; COMMA=""; for ID in $$(cat .id-list); \
do echo "$$COMMA\"$$ID\": [" ; COMMA=","; ARRAY_COMMA=""; \
for article in \
$$($(RG) -l "\s+<td>(?:<bdi>)?<a.*href=\".+html.spec.whatwg.org[^#]+#$$ID\"" \
developer.mozilla.org/en-US/docs/Web | sort); do \
articledata=$$(./getMNDArticleDetails.py $$article); \
echo "$$ARRAY_COMMA$$articledata"; ARRAY_COMMA=","; done; \
echo "]"; done; echo "}") \
| $(PERL) -pe 's/\.html"/"/' \
| $(PERL) -pe 's/developer\.mozilla\.org\/en-US\/docs\/Web\///' \
| $(PERL) -pe 's/\x{5c}\x{28}/\(/g' \
| $(PERL) -pe 's/\x{5c}\x{29}/\)/g' \
| $(PERL) -pe 's/\x{5c}\x{2a}/\*/g' \
| $(JQ) . > $@

.id-list: developer.mozilla.org
$(RG) -t html --no-filename '$(URL_REGEX)' $< \
| $(PERL) -pe 's/$(URL_REGEX)/$$1/' | sort | uniq \
| $(PERL) -pe 's/\(/\\(/g' \
| $(PERL) -pe 's/\)/\\)/g' \
| $(PERL) -pe 's/\*/\\*/g' \
| sort | uniq > $@
# Fail if the ID list contains any generated "back" IDs.
# (These need to be changed in the corresponding MDN article to
# some other non-back-IDs.)
! ($(GREP) -v "the-lang-and-xml:lang-attributes" $@ \
| $(GREP) "\w:\w")

developer.mozilla.org:
-$(WGET) --mirror --adjust-extension --trust-server-names \
--include-directories=/en-US/docs/Web/HTML,/en-US/docs/Web/API \
--exclude-directories='\
/en-US/docs/Archive,\
/en-US/docs/Mozilla,\
/en-US/docs/Web/API/Bluetooth*,\
/en-US/docs/Web/API/Device*,\
/en-US/docs/Web/API/EXT*,\
/en-US/docs/Web/API/Gamepad*,\
/en-US/docs/Web/API/IDB*,\
/en-US/docs/Web/API/MediaKey*,\
/en-US/docs/Web/API/MediaStream*,\
/en-US/docs/Web/API/MediaTrack*,\
/en-US/docs/Web/API/OES*,\
/en-US/docs/Web/API/Payment*,\
/en-US/docs/Web/API/Performance*,\
/en-US/docs/Web/API/Presentation*,\
/en-US/docs/Web/API/RTC*,\
/en-US/docs/Web/API/Readable*,\
/en-US/docs/Web/API/SVG*,\
/en-US/docs/Web/API/ServiceWorker*,\
/en-US/docs/Web/API/Speech*,\
/en-US/docs/Web/API/WebVR*,\
/en-US/docs/Web/API/VR*,\
/en-US/docs/Web/API/WEBGL*,\
/en-US/docs/Web/API/WebGL*' \
--reject '*$$edit,*$$history,*$$json,*$$locales,\
*Bluetooth*,\
*Device*,\
*EXT*,\
*Gamepad*,\
*IDB*,\
*MediaKey*,\
*MediaStream*,\
*MediaTrack*,\
*Moz*,\
*Ms*,\
*OES*,\
*Performance*,\
*Presentation*,\
*RTC*,\
*Readable*,\
*SVG*,\
*ServiceWorker*,\
*Speech*,\
*VR*,\
*WEBGL*,\
*WebGL*' \
https://developer.mozilla.org/en-US/docs/Web
mkdir $@/en-US/docs/Web/CSS
for PSEUDO_CLASS in $(PSEUDO_CLASSES); do \
$(WGET) --adjust-extension \
https://developer.mozilla.org/en-US/docs/Web/CSS/:$$PSEUDO_CLASS; \
mv :$$PSEUDO_CLASS.html $@/en-US/docs/Web/CSS/; \
done
$(WGET) --adjust-extension \
https://developer.mozilla.org/en-US/docs/Web/CSS/Pseudo-classes
mv Pseudo-classes.html $@/en-US/docs/Web/CSS/

clean:
rm -f .id-list
rm -rf developer.mozilla.org
25 changes: 25 additions & 0 deletions mdn/getMNDArticleDetails.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env python2
import sys
from lxml.html import parse


def normalize(string):
return string.encode('utf-8') \
.replace('"', '\\"') \
.replace("\xc2\xa0", " ") \
.replace('\n', ' ') \
.strip()

mdnpath = sys.argv[1]
doc = parse(mdnpath)
firstParagraphXPath = '//article/p[string-length(text()) > 0][1]//text()'
title = normalize(''.join(doc.xpath('/html/head/title/text()'))
.split(" - ")[0].split(": ")[0])
summary = normalize(''.join(doc.xpath('//*[@class="seoSummary"]//text()')))
if summary == '':
# Found no seoSummary, so get text of class=summary paragraph.
summary = normalize(''.join(doc.xpath('//*[@class="summary"]//text()')))
if summary == '':
# Found no seoSummary or summary, so get text of the first <p> of article.
summary = normalize(''.join(doc.xpath(firstParagraphXPath)))
print '["' + mdnpath + '","' + title + '","' + summary + '"]'
Loading

0 comments on commit 5125d6b

Please sign in to comment.