Skip to content

Commit

Permalink
Issue #7 - switch to simple_html_dom.php
Browse files Browse the repository at this point in the history
  • Loading branch information
bobbingwide committed Nov 26, 2020
1 parent ee0e9c7 commit 3624b1e
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 115 deletions.
70 changes: 67 additions & 3 deletions class-stringer.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,81 @@

class Stringer {

private $notNeededTags = [];

function __construct() {
if ( ! function_exists( 'str_get_html' ) ) {
require_once 'simple_html_dom.php';
}
$this->setUpNotNeeded();
}

function get_strings( $block ) {
$html = str_get_html( $block['innerHTML'] );
// print_r( $html );
//$html->dump();
//foreach ( $html->root->children as $node ) {
//print_r( $node );
$this->recurse( $html->root );
}

function setUpNotNeeded() {
$notNeededTags = [ 'ul', 'ol'];
$this->notNeededTags = array_flip( $notNeededTags );
}

function isInnertextNeeded( $tag ) {
if ( isset( $this->notNeededTags[$tag ] ) ) {
return false;
}
return true;

}

function get_strings( $content ) {
$html = str_get_html( $content );
print_r( $html );
function recurse( $node ) {
static $nest = 0;
$nest++;

echo PHP_EOL;
echo str_repeat( ' ', $nest);
echo $node->tag;
echo ' ';
echo $node->nodetype;
echo ' ';

if ( $node->nodetype !== HDOM_TYPE_ROOT) {
if ( $this->isInnertextNeeded( $node->tag )) {
$text=$node->innertext();
$text=trim( $text );
echo $text;
}
}
echo implode( ' ', $node->getAllAttributes() );

if ( count( $node->children ) ) {
foreach ( $node->children as $child ) {
$this->recurse( $child );
}
}


//echo $node->innertext();

//print_r( $node->_ );


//$node->dump( $node );
//echo $node->text();
//echo $node->__tostring();
//if( isset( $node->text )) {
//echo "IT:";
//echo $node->innertext();
//echo ":TI";
//echo $node->text();


$nest--;
echo PHP_EOL;
}

}
153 changes: 41 additions & 112 deletions html2pot.php
Original file line number Diff line number Diff line change
@@ -1,36 +1,55 @@
<?php

/**
* Convert .html files to .pot files
/** @copyright (C) Copyright Bobbing Wide 2020
* @package oik-i18n
*
* oik batch routine to Internationalize / localize HTML templates and template parts
*
* Syntax:
* `
* cd [path]/wp-content/plugins/oik-i18n
* oikwp html2pot.php type filename.html locale
*
* where:
* type is is template / part
* filename.html is the file name
* locale is the target locale: en_GB, bb_BB, fr_FR
*
* Output will be written to a languages folder
*/

$html = file_get_contents( 'test.html');
//print_r( $html );
/**
* Stage 1. Find all the translatable strings in an .html file
* Stage 2. Extract from all the theme's .html files to a .pot file
* Stage 3. Translate into local language
* Stage 4. Reparse, apply the target language and save in the new locale.
*
* This is the very beginning of Stage 1.
*
*/

/*
// Let's grab all image tags from the HTML.
$dom_doc = new DOMDocument();
$filename = 'test.html';

// The @ is not enough to suppress errors when dealing with libxml,
// we have to tell it directly how we want to handle errors.
libxml_use_internal_errors( true );
$dom_doc->loadHTML( );
libxml_use_internal_errors( false );
$html = file_get_contents( $filename);
print_r( $html );

$image_tags = $dom_doc->getElementsByTagName( 'img' );
if ( 0 === strlen( $html) ) {
echo "Invalid file: " . $filename;

// For each image Tag, make sure it can be added to the $images array, and add it.
foreach ( $image_tags as $image_tag ) {
$img_src = $image_tag->getAttribute( 'src' );
gob();
}

if ( empty( $img_src ) ) {
continue;
}
*/
require_once 'class-stringer.php';

/**
* Use Gutenberg to parse the content into individual blocks.
* I've got a block recreation routine in oik-clone.
*/
$parser = new WP_Block_Parser();
$blocks = $parser->parse( $html);
//print_r( $blocks );
$blocks = $parser->parse( $html );

print_r( $blocks );

$stringer = new Stringer();
$count = 0;
foreach ( $blocks as $block) {
Expand All @@ -41,93 +60,3 @@
//print_r( $block );

}

class Stringer {
private $dom_doc = null;
function __construct() {
$this->dom_doc = new DOMDocument();

}

function get_strings( $block ) {
//echo $block['innerHTML'];
$this->dom_doc->loadHTML( $block['innerHTML']);
//print_r( $this->dom_doc );
echo $this->dom_doc->textContent;
echo PHP_EOL;
$this->showDOMNode( $this->dom_doc);
echo PHP_EOL;
/*
echo PHP_EOL;
if ( $this->dom_doc->hasChildNodes() ) {
echo "this has child nodes" . PHP_EOL;
foreach ( $this->dom_doc->childNodes as $childNode ) {
print_r( $childNode );
//gob();
//$this->get_child_node_strings( $childNode);
}
}
*/

}

function showDOMNode(DOMNode $domNode) {
static $nested;
$nested++;
foreach ($domNode->childNodes as $node) {

//echo PHP_EOL;
if ( $node->haschildNodes() ) {
echo PHP_EOL;
echo str_repeat( ' ', $nested );
echo 'PN:' . $node->nodeName . ': ';
echo 'PT:' . $node->nodeType;
$this->showDOMNode( $node );

} else {
$value = trim( $node->nodeValue);
if ( !empty( $value ) ) {
echo PHP_EOL;
echo str_repeat( ' ', $nested );
echo 'N:' . $node->nodeName . ': ';
echo 'V:' . $node->nodeValue;
echo 'T:' . $node->nodeType;
echo 'W:' . $node->wholeText;
}
//print_r( $node);
}
}
$nested--;
}
}

/*
* DOMText Object
(
[wholeText] =>
[data] =>
[length] => 2
[nodeName] => #text
[nodeValue] =>
[nodeType] => 3
[parentNode] => (object value omitted)
[childNodes] =>
[firstChild] =>
[lastChild] =>
[previousSibling] => (object value omitted)
[nextSibling] =>
[attributes] =>
[ownerDocument] => (object value omitted)
[namespaceURI] =>
[prefix] =>
[localName] =>
[baseURI] =>
[textContent] =>
)
*/

0 comments on commit 3624b1e

Please sign in to comment.