diff --git a/src/wp-includes/html-api/class-wp-html-tag-processor.php b/src/wp-includes/html-api/class-wp-html-tag-processor.php index 8f9e7b93e5c73..0c17712b68aee 100644 --- a/src/wp-includes/html-api/class-wp-html-tag-processor.php +++ b/src/wp-includes/html-api/class-wp-html-tag-processor.php @@ -1174,7 +1174,10 @@ public function has_class( $wanted_class ) { */ public function set_bookmark( $name ) { // It only makes sense to set a bookmark if the parser has paused on a concrete token. - if ( self::STATE_INCOMPLETE === $this->parser_state ) { + if ( + self::STATE_COMPLETE === $this->parser_state || + self::STATE_INCOMPLETE === $this->parser_state + ) { return false; } @@ -1555,12 +1558,12 @@ private function parse_next_tag() { } /* - * + * `` because there's no inside content. + /* + * @todo When implementing `set_modifiable_text()` ensure that updates to this token + * don't break the syntax for short comments, e.g. ``. Unlike other comment + * and bogus comment syntax, these leave no clear insertion point for text and + * they need to be modified specially in order to contain text. E.g. to store + * `?` as the modifiable text, the `` needs to become ``, which + * involves inserting an additional `-` into the token after the modifiable text. + */ $this->parser_state = self::STATE_COMMENT; $this->token_length = $closer_at + $span_of_dashes + 1 - $this->token_starts_at; $this->text_starts_at = $this->token_starts_at + 4; @@ -1628,7 +1638,7 @@ private function parse_next_tag() { } /* - * + * ` * These are ASCII-case-insensitive. * https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ @@ -1726,7 +1736,7 @@ private function parse_next_tag() { } /* - * + * ` * See https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state */ if ( '?' === $html[ $at + 1 ] ) { @@ -1789,6 +1799,9 @@ private function parse_next_tag() { * If a non-alpha starts the tag name in a tag closer it's a comment. * Find the first `>`, which closes the comment. * + * This parser classifies these particular comments as special "funky comments" + * which are made available for further processing. + * * See https://html.spec.whatwg.org/#parse-error-invalid-first-character-of-tag-name */ if ( $this->is_closing_tag ) { @@ -2576,6 +2589,7 @@ public function is_tag_closer() { * - `#cdata-section` when matched on a CDATA node. * - `#processing-instruction` when matched on a processing instruction. * - `#comment` when matched on a comment. + * - `#doctype` when matched on a DOCTYPE declaration. * - `#presumptuous-tag` when matched on an empty tag closer. * - `#funky-comment` when matched on a funky comment. * @@ -2667,20 +2681,25 @@ public function get_token_name() { * @return string */ public function get_modifiable_text() { - $at = $this->text_starts_at; - $length = $this->text_length; - $text = substr( $this->html, $at, $length ); + if ( null === $this->text_starts_at ) { + return ''; + } + + $text = substr( $this->html, $this->text_starts_at, $this->text_length ); if ( self::STATE_CDATA_NODE === $this->parser_state || - self::STATE_PI_NODE === $this->parser_state + self::STATE_COMMENT === $this->parser_state || + self::STATE_DOCTYPE === $this->parser_state || + self::STATE_PI_NODE === $this->parser_state || + self::STATE_FUNKY_COMMENT === $this->parser_state ) { return $text; } - $text = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE ); + $decoded = html_entity_decode( $text, ENT_QUOTES | ENT_HTML5 | ENT_SUBSTITUTE ); - if ( empty( $text ) ) { + if ( empty( $decoded ) ) { return ''; } @@ -2694,14 +2713,14 @@ public function get_modifiable_text() { switch ( $this->get_tag() ) { case 'PRE': case 'TEXTAREA': - if ( "\n" === $text[0] ) { - return substr( $text, 1 ); + if ( "\n" === $decoded[0] ) { + return substr( $decoded, 1 ); } break; } } - return $text; + return $decoded; } /** @@ -3286,7 +3305,8 @@ private function matches() { const STATE_DOCTYPE = 'STATE_DOCTYPE'; /** - * Indicates that the parser has found an empty tag closer. + * Indicates that the parser has found an empty tag closer ``. + * * Note that in HTML there are no empty tag closers, and they * are ignored. Nonetheless, the Tag Processor still * recognizes them as they appear in the HTML stream. @@ -3305,8 +3325,14 @@ private function matches() { * Indicates that the parser has found a "funky comment" * and it's possible to read and modify its modifiable text. * + * Example: + * + * + * + * + * * Funky comments are tag closers with invalid tag names. Note - * that in HTML these are treated as HTML comments. Nonetheless, + * that in HTML these are turn into bogus comments. Nonetheless, * the Tag Processor recognizes them in a stream of HTML and * exposes them for inspection and modification. *