Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update test suite and fix broken tests #36

Merged
merged 3 commits into from
Dec 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,11 @@
*/
package ch.digitalfondue.jfiveparse;

import static ch.digitalfondue.jfiveparse.TreeConstructor.CHARACTER;
import static ch.digitalfondue.jfiveparse.TreeConstructor.COMMENT;
import static ch.digitalfondue.jfiveparse.TreeConstructor.DOCTYPE;
import static ch.digitalfondue.jfiveparse.TreeConstructor.END_TAG;
import static ch.digitalfondue.jfiveparse.TreeConstructor.EOF;
import static ch.digitalfondue.jfiveparse.TreeConstructor.START_TAG;
import static ch.digitalfondue.jfiveparse.TreeConstructor.genericRCDataParsing;
import static ch.digitalfondue.jfiveparse.TreeConstructor.genericRawTextElementParsing;
import java.util.List;
import java.util.Locale;
import java.util.Set;

import static ch.digitalfondue.jfiveparse.TreeConstructor.*;

class TreeConstructorAftersBeforeInitialInHead {

Expand Down Expand Up @@ -160,25 +157,25 @@ static void afterAfterFrameset(byte tokenType, String tagName, TreeConstructor t
static void beforeHead(byte tokenType, String tagName, TreeConstructor treeConstructor) {

switch (tokenType) {
case CHARACTER:
handleCharacterHead(treeConstructor);
break;
case COMMENT:
treeConstructor.insertComment();
break;
case DOCTYPE:
treeConstructor.emitParseError();
// ignore
break;
case EOF:
anythingElseHead(treeConstructor);
break;
case END_TAG:
handleEndTagHead(tagName, treeConstructor);
break;
case START_TAG:
handleStartTagHead(tokenType, tagName, treeConstructor);
break;
case CHARACTER:
handleCharacterHead(treeConstructor);
break;
case COMMENT:
treeConstructor.insertComment();
break;
case DOCTYPE:
treeConstructor.emitParseError();
// ignore
break;
case EOF:
anythingElseHead(treeConstructor);
break;
case END_TAG:
handleEndTagHead(tagName, treeConstructor);
break;
case START_TAG:
handleStartTagHead(tokenType, tagName, treeConstructor);
break;
}
}

Expand Down Expand Up @@ -222,24 +219,24 @@ private static void anythingElseHead(TreeConstructor treeConstructor) {
static void beforeHtml(byte tokenType, String tagName, TreeConstructor treeConstructor) {

switch (tokenType) {
case CHARACTER:
handleCharacterHtml(treeConstructor);
break;
case COMMENT:
treeConstructor.insertCommentToDocument();
break;
case DOCTYPE:
treeConstructor.emitParseError();
break;
case EOF:
anythingElseHtml(treeConstructor);
break;
case END_TAG:
handleEndTagHtml(tagName, treeConstructor);
break;
case START_TAG:
handleStartTagHtml(tagName, treeConstructor);
break;
case CHARACTER:
handleCharacterHtml(treeConstructor);
break;
case COMMENT:
treeConstructor.insertCommentToDocument();
break;
case DOCTYPE:
treeConstructor.emitParseError();
break;
case EOF:
anythingElseHtml(treeConstructor);
break;
case END_TAG:
handleEndTagHtml(tagName, treeConstructor);
break;
case START_TAG:
handleStartTagHtml(tagName, treeConstructor);
break;
}
}

Expand Down Expand Up @@ -284,32 +281,128 @@ private static void anythingElseHtml(TreeConstructor treeConstructor) {
static void initial(byte tokenType, TreeConstructor treeConstructor) {

switch (tokenType) {
case CHARACTER:
handleCharacters(treeConstructor);
break;
case COMMENT:
treeConstructor.insertCommentToDocument();
break;
case DOCTYPE:
handleDoctype(treeConstructor);
break;
case EOF:
case CHARACTER:
handleCharacters(treeConstructor);
break;
case COMMENT:
treeConstructor.insertCommentToDocument();
break;
case DOCTYPE:
handleDoctype(treeConstructor);
break;
case EOF:
/*initialOthers(treeConstructor);
break;*/
case END_TAG:
case END_TAG:
/*initialOthers(treeConstructor);
break;*/
case START_TAG:
initialOthers(treeConstructor);
break;
case START_TAG:
initialOthers(treeConstructor);
break;
}
}

private static final List<String> PUBLIC_ID_PREFIXES = List.of(
"+//silmaril//dtd html pro v0r11 19970101//",
"-//as//dtd html 3.0 aswedit + extensions//",
"-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
"-//ietf//dtd html 2.0 level 1//",
"-//ietf//dtd html 2.0 level 2//",
"-//ietf//dtd html 2.0 strict level 1//",
"-//ietf//dtd html 2.0 strict level 2//",
"-//ietf//dtd html 2.0 strict//",
"-//ietf//dtd html 2.0//",
"-//ietf//dtd html 2.1e//",
"-//ietf//dtd html 3.0//",
"-//ietf//dtd html 3.2 final//",
"-//ietf//dtd html 3.2//",
"-//ietf//dtd html 3//",
"-//ietf//dtd html level 0//",
"-//ietf//dtd html level 1//",
"-//ietf//dtd html level 2//",
"-//ietf//dtd html level 3//",
"-//ietf//dtd html strict level 0//",
"-//ietf//dtd html strict level 1//",
"-//ietf//dtd html strict level 2//",
"-//ietf//dtd html strict level 3//",
"-//ietf//dtd html strict//",
"-//ietf//dtd html//",
"-//metrius//dtd metrius presentational//",
"-//microsoft//dtd internet explorer 2.0 html strict//",
"-//microsoft//dtd internet explorer 2.0 html//",
"-//microsoft//dtd internet explorer 2.0 tables//",
"-//microsoft//dtd internet explorer 3.0 html strict//",
"-//microsoft//dtd internet explorer 3.0 html//",
"-//microsoft//dtd internet explorer 3.0 tables//",
"-//netscape comm. corp.//dtd html//",
"-//netscape comm. corp.//dtd strict html//",
"-//o'reilly and associates//dtd html 2.0//",
"-//o'reilly and associates//dtd html extended 1.0//",
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
"-//sq//dtd html 2.0 hotmetal + extensions//",
"-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//",
"-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//",
"-//spyglass//dtd html 2.0 extended//",
"-//sun microsystems corp.//dtd hotjava html//",
"-//sun microsystems corp.//dtd hotjava strict html//",
"-//w3c//dtd html 3 1995-03-24//",
"-//w3c//dtd html 3.2 draft//", "-//w3c//dtd html 3.2 final//",
"-//w3c//dtd html 3.2//",
"-//w3c//dtd html 3.2s draft//",
"-//w3c//dtd html 4.0 frameset//",
"-//w3c//dtd html 4.0 transitional//",
"-//w3c//dtd html experimental 19960712//",
"-//w3c//dtd html experimental 970421//",
"-//w3c//dtd w3 html//",
"-//w3o//dtd w3 html 3.0//",
"-//webtechs//dtd mozilla html 2.0//",
"-//webtechs//dtd mozilla html//");

// 0 = no-quirks-mode, 1 = limited-quirks mode, 2 = quirks-mode
private static byte quirksType(DocumentType documentType) {
if (!"html".equals(documentType.getName())) {
return 2;
}
var publicId = documentType.getPublicId();
if (publicId != null) {
publicId = publicId.toLowerCase(Locale.ROOT);
}
var systemId = documentType.getSystemId();
if (systemId != null) {
systemId = systemId.toLowerCase(Locale.ROOT);
}
if (Set.of("-//w3o//dtd w3 html strict 3.0//en//", "-/w3c/dtd html 4.0 transitional/en", "html").contains(publicId)) {
return 2;
}
if ("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd".equals(systemId)) {
return 2;
}

for (var prefix : PUBLIC_ID_PREFIXES) {
if (publicId != null && publicId.startsWith(prefix)) {
return 2;
}
}
if ((systemId == null || "".equals(systemId)) && (
publicId.startsWith("-//w3c//dtd html 4.01 frameset//") || publicId.startsWith("-//w3c//dtd html 4.01 transitional//")
)) {
return 2;
}
// we skip the iframe srcdoc section
return 0;
}

// see https://html.spec.whatwg.org/#the-initial-insertion-mode "A DOCTYPE token"
private static void handleDoctype(TreeConstructor treeConstructor) {
DocumentType doctype = treeConstructor.buildDocumentType();
Document doc = treeConstructor.getDocument();
doc.appendChild(doctype);
doc.setDoctype(doctype);

if (quirksType(doctype) == 2) {
treeConstructor.setQuirksMode(true);
}

treeConstructor.setInsertionMode(TreeConstructionInsertionMode.BEFORE_HTML);
}

Expand Down Expand Up @@ -354,7 +447,7 @@ static void inHead(byte tokenType, String tagName, TreeConstructor treeConstruct
genericRCDataParsing(treeConstructor);
} else if (tokenType == START_TAG && (//
("noscript".equals(tagName) && treeConstructor.isScriptingFlag()) || //
("noframes".equals(tagName) || "style".equals(tagName)))) {
("noframes".equals(tagName) || "style".equals(tagName)))) {
genericRawTextElementParsing(treeConstructor);
} else if (Common.isStartTagNamed(tokenType, "noscript", tagName) && !treeConstructor.isScriptingFlag()) {
treeConstructor.insertHtmlElementToken();
Expand Down Expand Up @@ -443,7 +536,7 @@ static void inHeadNoScript(byte tokenType, String tagName, TreeConstructor treeC
}

private static void generateImpliedEndTagThoroughly(TreeConstructor treeConstructor) {
for (;;) {
for (; ; ) {
Element current = treeConstructor.getCurrentNode();
if (Node.NAMESPACE_HTML.equals(current.getNamespaceURI()) && Common.isImpliedTagsThoroughly(current.getNodeName())) {
treeConstructor.popCurrentNode();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,17 @@ static void inSelect(byte tokenType, String tagName, TreeConstructor treeConstru
treeConstructor.popCurrentNode();
}
treeConstructor.insertHtmlElementToken();
} else if (Common.isStartTagNamed(tokenType, "hr", tagName)) {
// see https://github.com/html5lib/html5lib-tests/commit/55aa183097fa52bb1328cd93633be6f88159d4b8
if (Common.isHtmlNS(treeConstructor.getCurrentNode(), "option")) {
treeConstructor.popCurrentNode();
}
if (Common.isHtmlNS(treeConstructor.getCurrentNode(), "optgroup")) {
treeConstructor.popCurrentNode();
}
treeConstructor.insertHtmlElementToken();
treeConstructor.popCurrentNode();
treeConstructor.ackSelfClosingTagIfSet();
} else if (Common.isEndTagNamed(tokenType, "optgroup", tagName)) {

if (Common.isHtmlNS(treeConstructor.getCurrentNode(), "option")
Expand Down
2 changes: 1 addition & 1 deletion src/test/resources/html5lib-tests
Submodule html5lib-tests updated 37 files
+76 −0 .github/workflows/downstream.yml
+25 −0 .github/workflows/lint.yml
+79 −0 .gitignore
+6 −0 lint
+0 −0 lint_lib/__init__.py
+24 −0 lint_lib/_vendor-patches/funcparserlib.patch
+0 −0 lint_lib/_vendor/__init__.py
+18 −0 lint_lib/_vendor/funcparserlib/LICENSE
+0 −0 lint_lib/_vendor/funcparserlib/__init__.py
+211 −0 lint_lib/_vendor/funcparserlib/lexer.py
+34 −0 lint_lib/_vendor/funcparserlib/lexer.pyi
+872 −0 lint_lib/_vendor/funcparserlib/parser.py
+83 −0 lint_lib/_vendor/funcparserlib/parser.pyi
+0 −0 lint_lib/_vendor/funcparserlib/py.typed
+72 −0 lint_lib/_vendor/funcparserlib/util.py
+7 −0 lint_lib/_vendor/funcparserlib/util.pyi
+1 −0 lint_lib/_vendor/vendor.txt
+280 −0 lint_lib/lint.py
+177 −0 lint_lib/parser.py
+7 −0 pyproject.toml
+2 −2 serializer/core.test
+1 −1 tokenizer/test2.test
+0 −8 tree-construction/foreign-fragment.dat
+53 −0 tree-construction/quirks01.dat
+0 −13 tree-construction/scriptdata01.dat
+36 −0 tree-construction/tables01.dat
+63 −17 tree-construction/template.dat
+0 −31 tree-construction/tests1.dat
+0 −55 tree-construction/tests19.dat
+10 −0 tree-construction/tests2.dat
+0 −16 tree-construction/tests20.dat
+0 −27 tree-construction/tests21.dat
+16 −0 tree-construction/tests4.dat
+36 −0 tree-construction/tests7.dat
+0 −44 tree-construction/tests_innerHTML_1.dat
+26 −0 tree-construction/webkit01.dat
+218 −0 tree-construction/webkit02.dat
Loading