diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index 1184403f7b..b08856e838 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -61,6 +61,7 @@ require_once __DIR__ . '/src/entity-readers/WP_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_HTML_Entity_Reader.php'; +require_once __DIR__ . '/src/entity-readers/WP_EPub_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_WXR_Entity_Reader.php'; require_once __DIR__ . '/src/entity-readers/WP_Directory_Tree_Entity_Reader.php'; diff --git a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php index 329e75bc39..6aceeb9c53 100644 --- a/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php +++ b/packages/playground/data-liberation/src/block-markup/WP_HTML_To_Blocks.php @@ -28,31 +28,29 @@ class WP_HTML_To_Blocks implements WP_Block_Markup_Converter { private $state = self::STATE_READY; private $block_stack = array(); - private $html; + private $markup_processor; private $ignore_text = false; private $in_ephemeral_paragraph = false; private $block_markup = ''; private $metadata = array(); + private $last_error = null; - public function __construct( $html ) { - $this->html = WP_HTML_Processor::create_fragment( $html ); + public function __construct( $markup_processor ) { + $this->markup_processor = $markup_processor; } - /** - * @inheritDoc - */ public function convert() { if ( self::STATE_READY !== $this->state ) { return false; } - while ( $this->html->next_token() ) { - switch ( $this->html->get_token_type() ) { + while ( $this->markup_processor->next_token() ) { + switch ( $this->markup_processor->get_token_type() ) { case '#text': if ( $this->ignore_text ) { break; } - $this->append_html( htmlspecialchars( $this->html->get_modifiable_text() ) ); + $this->append_rich_text( htmlspecialchars( $this->markup_processor->get_modifiable_text() ) ); break; case '#tag': $this->handle_tag(); @@ -60,13 +58,16 @@ public function convert() { } } + if ( $this->markup_processor->get_last_error() ) { + $this->last_error = $this->markup_processor->get_last_error(); + return false; + } + $this->close_ephemeral_paragraph(); + return true; } - /** - * @inheritDoc - */ public function get_first_meta_value( $key ) { if ( ! array_key_exists( $key, $this->metadata ) ) { return null; @@ -74,231 +75,204 @@ public function get_first_meta_value( $key ) { return $this->metadata[ $key ][0]; } - /** - * @inheritDoc - */ public function get_all_metadata() { return $this->metadata; } - /** - * @inheritDoc - */ public function get_block_markup() { return $this->block_markup; } - /** - * Converts the currently matched HTML tag to block markup - * or metadata. - */ private function handle_tag() { - $html = $this->html; - $tag = $html->get_tag(); + $html = $this->markup_processor; + $tag = strtoupper( $html->get_tag() ); $tag_lowercase = strtolower( $tag ); - $is_opener = ! $html->is_tag_closer() && $html->expects_closer(); - $is_closer = $html->is_tag_closer(); - $is_void_tag = ! $html->expects_closer(); - $prefix = ( - $is_void_tag ? '' : ( - $is_closer ? '-' : '+' - ) - ); - $event = $prefix . $tag; - switch ( $event ) { - case 'META': - $key = $html->get_attribute( 'name' ); - $value = $html->get_attribute( 'content' ); - if ( ! array_key_exists( $key, $this->metadata ) ) { - $this->metadata[ $key ] = array(); - } - $this->metadata[ $key ][] = $value; - break; - case 'IMG': - $template = new \WP_HTML_Tag_Processor( '' ); - $template->next_tag(); - foreach ( array( 'alt', 'title', 'src' ) as $attr ) { - if ( $html->get_attribute( $attr ) ) { - $template->set_attribute( $attr, $html->get_attribute( $attr ) ); + $is_void_tag = ! $html->expects_closer() && ! $html->is_tag_closer(); + if ( $is_void_tag ) { + switch ( $tag ) { + case 'META': + $key = $html->get_attribute( 'name' ); + $value = $html->get_attribute( 'content' ); + if ( ! array_key_exists( $key, $this->metadata ) ) { + $this->metadata[ $key ] = array(); } - } - $this->append_html( $template->get_updated_html() ); - break; - case 'INPUT': - // Insert the input tag as HTML blocks. - $this->push_block( 'html' ); - $template = new \WP_HTML_Tag_Processor( '' ); - $template->next_tag(); - $attrs = $this->html->get_attribute_names_with_prefix( '' ); - foreach ( $attrs as $attr ) { - $template->set_attribute( $attr, $this->html->get_attribute( $attr ) ); - } - $this->append_html( htmlspecialchars( $template->get_updated_html() ) ); - $this->pop_block(); - break; - case 'HR': - $this->push_block( 'separator' ); - $this->block_markup .= '
element? Let's convert it into a formatting element.
- * - A block element? Let's convert it into a block.
- */
- if ( $this->is_at_inline_code_element() ) {
- $this->append_html( '<' . $tag_lowercase . '>' );
- } else {
- $this->push_block( 'code' );
- $this->block_markup .= '<' . $tag_lowercase . ' class="wp-block-code">';
- }
- break;
- case '-CODE':
- $this->block_markup .= '' . $tag_lowercase . '>';
- if ( ! $this->is_at_inline_code_element() ) {
+ // Block elements
+ case 'UL':
+ case 'OL':
+ $this->block_markup .= '
'; - break; - case '-P': - $this->block_markup .= '
'; - $this->pop_block(); - break; + break; - case '+H1': - case '+H2': - case '+H3': - case '+H4': - case '+H5': - case '+H6': - $this->push_block( - 'heading', - array( - 'level' => (int) $tag[1] ? (int) $tag[1] : 1, - ) - ); - $this->block_markup .= 'Last week, WordPress 6.8 was released.
HTML; - $reader = new WP_HTML_Entity_Reader( $html, 1 ); + $reader = new WP_HTML_Entity_Reader( new WP_HTML_Processor( $html ), 1 ); $entities = []; while ( $reader->next_entity() ) { $data = $reader->get_entity()->get_data(); diff --git a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php index 41d6ba8ae8..d3daef742e 100644 --- a/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php +++ b/packages/playground/data-liberation/tests/WPHTMLToBlocksTests.php @@ -16,7 +16,7 @@ public function test_metadata_extraction() {Last week, WordPress 6.8 was released. This release includes a new default theme, a new block editor experience, and a new block library. It also includes a new block editor experience, and a new block library.
HTML; - $converter = new WP_HTML_To_Blocks( $html ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); $converter->convert( $html ); $metadata = $converter->get_all_metadata(); $expected_metadata = [ @@ -35,7 +35,7 @@ public function test_metadata_extraction() { * @dataProvider provider_test_conversion */ public function test_html_to_blocks_conversion( $html, $expected ) { - $converter = new WP_HTML_To_Blocks( $html ); + $converter = new WP_HTML_To_Blocks( new WP_HTML_Processor( $html ) ); $converter->convert( $html ); $blocks = $converter->get_block_markup(); @@ -89,16 +89,12 @@ public function provider_test_conversion() { ], 'Formatted text' => [ 'html' => 'Bold and Italic
', - 'expected' => "Bold and Italic
" + 'expected' => "Bold and Italic
" ], 'A blockquote' => [ 'html' => 'A simple blockquote', 'expected' => "
A simple blockquote" ], - 'A an tag' => [ - 'html' => '', - 'expected' => "<input type="text" value="A simple input"> " - ], 'A table' => [ 'html' => <<