Skip to content

Commit

Permalink
Ensure stack pops are HTML namespaced
Browse files Browse the repository at this point in the history
  • Loading branch information
jhy committed Oct 12, 2023
1 parent aab1dc1 commit 26cfb42
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 10 deletions.
41 changes: 33 additions & 8 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static org.jsoup.internal.StringUtil.inSorted;
import static org.jsoup.parser.HtmlTreeBuilderState.Constants.InTableFoster;
import static org.jsoup.parser.HtmlTreeBuilderState.ForeignContent;
import static org.jsoup.parser.Parser.NamespaceHtml;

/**
* HTML Tree Builder; creates a DOM from Tokens.
Expand Down Expand Up @@ -186,7 +187,7 @@ boolean shouldDispatchToCurrentInsertionMode(Token token) {
final String ns = el.tag().namespace();

// If the adjusted current node is an element in the HTML namespace
if (Parser.NamespaceHtml.equals(ns))
if (NamespaceHtml.equals(ns))
return true;

// If the adjusted current node is a MathML text integration point and the token is a start tag whose tag name is neither "mglyph" nor "malignmark"
Expand Down Expand Up @@ -471,6 +472,7 @@ boolean onStack(Element el) {
return onStack(stack, el);
}

/** Checks if there is an HTML element with the given name on the stack. */
boolean onStack(String elName) {
return getFromStack(elName) != null;
}
Expand All @@ -488,13 +490,14 @@ private static boolean onStack(ArrayList<Element> queue, Element element) {
return false;
}

/** Gets the nearest (lowest) HTML element with the given name from the stack. */
@Nullable
Element getFromStack(String elName) {
final int bottom = stack.size() - 1;
final int upper = bottom >= maxQueueDepth ? bottom - maxQueueDepth : 0;
for (int pos = bottom; pos >= upper; pos--) {
Element next = stack.get(pos);
if (next.normalName().equals(elName)) {
if (next.normalName().equals(elName) && NamespaceHtml.equals(next.tag().namespace())) {
return next;
}
}
Expand All @@ -512,8 +515,24 @@ boolean removeFromStack(Element el) {
return false;
}

/** Pops the stack until the given HTML element is removed. */
@Nullable
Element popStackToClose(String elName) {
for (int pos = stack.size() -1; pos >= 0; pos--) {
Element el = stack.get(pos);
stack.remove(pos);
if (el.normalName().equals(elName) && NamespaceHtml.equals(el.tag().namespace())) {
if (currentToken instanceof Token.EndTag)
onNodeClosed(el, currentToken);
return el;
}
}
return null;
}

/** Pops the stack until an element with the supplied name is removed, irrespective of namespace. */
@Nullable
Element popStackToCloseAnyNamespace(String elName) {
for (int pos = stack.size() -1; pos >= 0; pos--) {
Element el = stack.get(pos);
stack.remove(pos);
Expand All @@ -526,12 +545,12 @@ Element popStackToClose(String elName) {
return null;
}

// elnames is sorted, comes from Constants
void popStackToClose(String... elNames) {
/** Pops the stack until one of the given HTML elements is removed. */
void popStackToClose(String... elNames) { // elnames is sorted, comes from Constants
for (int pos = stack.size() -1; pos >= 0; pos--) {
Element el = stack.get(pos);
stack.remove(pos);
if (inSorted(el.normalName(), elNames)) {
if (inSorted(el.normalName(), elNames) && NamespaceHtml.equals(el.tag().namespace())) {
if (currentToken instanceof Token.EndTag)
onNodeClosed(el, currentToken);
break;
Expand All @@ -551,10 +570,12 @@ void clearStackToTableRowContext() {
clearStackToContext("tr", "template");
}

/** Removes elements from the stack until one of the supplied HTML elements is removed. */
private void clearStackToContext(String... nodeNames) {
for (int pos = stack.size() -1; pos >= 0; pos--) {
Element next = stack.get(pos);
if (StringUtil.in(next.normalName(), nodeNames) || next.normalName().equals("html"))
if (NamespaceHtml.equals(next.tag().namespace()) &&
(StringUtil.in(next.normalName(), nodeNames) || next.normalName().equals("html")))
break;
else
stack.remove(pos);
Expand Down Expand Up @@ -612,6 +633,9 @@ boolean resetInsertionMode() {
node = contextElement;
}
String name = node != null ? node.normalName() : "";
if (!NamespaceHtml.equals(node.tag().namespace()))
continue; // only looking for HTML elements here

switch (name) {
case "select":
transition(HtmlTreeBuilderState.InSelect);
Expand Down Expand Up @@ -820,12 +844,13 @@ void generateImpliedEndTags() {
}

/**
Pops elements off the stack according to the implied end tag rules
Pops HTML elements off the stack according to the implied end tag rules
@param thorough if we are thorough (includes table elements etc) or not
*/
void generateImpliedEndTags(boolean thorough) {
final String[] search = thorough ? TagThoroughSearchEndTags : TagSearchEndTags;
while (inSorted(currentElement().normalName(), search)) {
while (NamespaceHtml.equals(currentElement().tag().namespace())
&& inSorted(currentElement().normalName(), search)) {
pop();
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
tb.error(this);
} else {
tb.generateImpliedEndTags(true);
if (!name.equals(tb.currentElement().normalName())) tb.error(this);
if (!tb.currentElementIs(name)) tb.error(this);
tb.popStackToClose(name);
tb.clearFormattingElementsToLastMarker();
tb.popTemplateMode();
Expand Down Expand Up @@ -1796,7 +1796,7 @@ else if (HtmlTreeBuilderState.isWhitespace(c))
tb.error(this);
while (i != 0) {
if (el.normalName().equals(end.normalName)) {
tb.popStackToClose(el.normalName());
tb.popStackToCloseAnyNamespace(el.normalName());
return true;
}
i--;
Expand Down
Binary file added src/test/resources/fuzztests/63202.html.gz
Binary file not shown.

0 comments on commit 26cfb42

Please sign in to comment.