diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php
index 8f9e7b93e5c73..0c17712b68aee 100644
--- a/src/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -1174,7 +1174,10 @@ public function has_class( $wanted_class ) {
*/
public function set_bookmark( $name ) {
// It only makes sense to set a bookmark if the parser has paused on a concrete token.
- if ( self::STATE_INCOMPLETE === $this->parser_state ) {
+ if (
+ self::STATE_COMPLETE === $this->parser_state ||
+ self::STATE_INCOMPLETE === $this->parser_state
+ ) {
return false;
}
@@ -1555,12 +1558,12 @@ private function parse_next_tag() {
}
/*
- *
+ * `` because there's no inside content.
+ /*
+ * @todo When implementing `set_modifiable_text()` ensure that updates to this token
+ * don't break the syntax for short comments, e.g. ``. Unlike other comment
+ * and bogus comment syntax, these leave no clear insertion point for text and
+ * they need to be modified specially in order to contain text. E.g. to store
+ * `?` as the modifiable text, the `` needs to become ``, which
+ * involves inserting an additional `-` into the token after the modifiable text.
+ */
$this->parser_state = self::STATE_COMMENT;
$this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at;
$this->text_starts_at = $this->token_starts_at + 4;
@@ -1628,7 +1638,7 @@ private function parse_next_tag() {
}
/*
- *
+ * `
* These are ASCII-case-insensitive.
* https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
@@ -1726,7 +1736,7 @@ private function parse_next_tag() {
}
/*
- * transitions to a bogus comment state – skip to the nearest >
+ * `` transitions to a bogus comment state – skip to the nearest >
* See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
*/
if ( '?' === $html[ $at + 1 ] ) {
@@ -1789,6 +1799,9 @@ private function parse_next_tag() {
* If a non-alpha starts the tag name in a tag closer it's a comment.
* Find the first `>`, which closes the comment.
*
+ * This parser classifies these particular comments as special "funky comments"
+ * which are made available for further processing.
+ *
* See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name
*/
if ( $this->is_closing_tag ) {
@@ -2576,6 +2589,7 @@ public function is_tag_closer() {
* - `#cdata-section` when matched on a CDATA node.
* - `#processing-instruction` when matched on a processing instruction.
* - `#comment` when matched on a comment.
+ * - `#doctype` when matched on a DOCTYPE declaration.
* - `#presumptuous-tag` when matched on an empty tag closer.
* - `#funky-comment` when matched on a funky comment.
*
@@ -2667,20 +2681,25 @@ public function get_token_name() {
* @return string
*/
public function get_modifiable_text() {
- $at = $this->text_starts_at;
- $length = $this->text_length;
- $text = substr( $this->html, $at, $length );
+ if ( null === $this->text_starts_at ) {
+ return '';
+ }
+
+ $text = substr( $this->html, $this->text_starts_at, $this->text_length );
if (
self::STATE_CDATA_NODE === $this->parser_state ||
- self::STATE_PI_NODE === $this->parser_state
+ self::STATE_COMMENT === $this->parser_state ||
+ self::STATE_DOCTYPE === $this->parser_state ||
+ self::STATE_PI_NODE === $this->parser_state ||
+ self::STATE_FUNKY_COMMENT === $this->parser_state
) {
return $text;
}
- $text = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
+ $decoded = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE );
- if ( empty( $text ) ) {
+ if ( empty( $decoded ) ) {
return '';
}
@@ -2694,14 +2713,14 @@ public function get_modifiable_text() {
switch ( $this->get_tag() ) {
case 'PRE':
case 'TEXTAREA':
- if ( "\n" === $text[0] ) {
- return substr( $text, 1 );
+ if ( "\n" === $decoded[0] ) {
+ return substr( $decoded, 1 );
}
break;
}
}
- return $text;
+ return $decoded;
}
/**
@@ -3286,7 +3305,8 @@ private function matches() {
const STATE_DOCTYPE = 'STATE_DOCTYPE';
/**
- * Indicates that the parser has found an empty tag closer.
+ * Indicates that the parser has found an empty tag closer `>`.
+ *
* Note that in HTML there are no empty tag closers, and they
* are ignored. Nonetheless, the Tag Processor still
* recognizes them as they appear in the HTML stream.
@@ -3305,8 +3325,14 @@ private function matches() {
* Indicates that the parser has found a "funky comment"
* and it's possible to read and modify its modifiable text.
*
+ * Example:
+ *
+ * %url>
+ * {"wp-bit":"query/post-author"}>
+ * 2>
+ *
* Funky comments are tag closers with invalid tag names. Note
- * that in HTML these are treated as HTML comments. Nonetheless,
+ * that in HTML these are turn into bogus comments. Nonetheless,
* the Tag Processor recognizes them in a stream of HTML and
* exposes them for inspection and modification.
*