Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parsing of html image in base64 format #1382

Merged
merged 10 commits into from
May 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ v0.15.0 (?? ??? 2018)
- Add support for table indent (tblInd) @Trainmaster #1343
- Added parsing of internal links in HTML reader @lalop #1336
- Several improvements to charts @JAEK-S #1332
- Add parsing of html image in base64 format @jgpATs2w #1382

### Fixed
- Fix reading of docx default style - @troosan #1238
Expand Down
10 changes: 10 additions & 0 deletions samples/resources/Sample_30_ReadHTML.html
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,15 @@ <h1>Adding element via HTML</h1>
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
<p>Ordered (numbered) list:</p>
<ol><li>Item 1</li><li>Item 2</li></ol>

<p style="line-height:2">Double height</p>

<h2>Includes images</h2>
<img src="https://phpword.readthedocs.io/en/latest/_images/phpword.png" alt=""/>

<img src="https://localhost/gev/desarrollo/actividades/pruebas_14/5b064503587f7.jpeg" name="Imagen 12" align="bottom" width="208" height="183" border="0"/>
<img src="http://localhost/gev/desarrollo/actividades/pruebas_14/5b064503589db.png" name="Imagen 13" align="bottom" width="143" height="202" border="0"/>
<img src="http://localhost/gev/desarrollo/actividades/pruebas_14/5b0645035aac8.jpeg" name="Imagen 14" align="bottom" width="194" height="188" border="0"/>

</body>
</html>
Empty file modified samples/results/.gitignore
100644 → 100755
Empty file.
56 changes: 54 additions & 2 deletions src/PhpWord/Shared/Html.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
use PhpOffice\PhpWord\Element\AbstractContainer;
use PhpOffice\PhpWord\Element\Row;
use PhpOffice\PhpWord\Element\Table;
use PhpOffice\PhpWord\Settings;
use PhpOffice\PhpWord\SimpleType\Jc;
use PhpOffice\PhpWord\SimpleType\NumberFormat;

Expand All @@ -32,6 +33,7 @@ class Html
{
private static $listIndex = 0;
private static $xpath;
private static $options;

/**
* Add HTML parts.
Expand All @@ -44,13 +46,17 @@ class Html
* @param string $html The code to parse
* @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
* @param bool $preserveWhiteSpace If false, the whitespaces between nodes will be removed
* @param array $options:
* + IMG_SRC_SEARCH: optional to speed up images loading from remote url when files can be found locally
* + IMG_SRC_REPLACE: optional to speed up images loading from remote url when files can be found locally
*/
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true)
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true, $options = null)
{
/*
* @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
* which could be applied when such an element occurs in the parseNode function.
*/
self::$options = $options;

// Preprocess: remove all line ends, decode HTML entity,
// fix ampersand and angle brackets and add body tag for HTML fragments
Expand Down Expand Up @@ -141,6 +147,7 @@ protected static function parseNode($node, $element, $styles = array(), $data =
'sup' => array('Property', null, null, $styles, null, 'superScript', true),
'sub' => array('Property', null, null, $styles, null, 'subScript', true),
'span' => array('Span', $node, null, $styles, null, null, null),
'font' => array('Span', $node, null, $styles, null, null, null),
'table' => array('Table', $node, $element, $styles, null, null, null),
'tr' => array('Row', $node, $element, $styles, null, null, null),
'td' => array('Cell', $node, $element, $styles, null, null, null),
Expand Down Expand Up @@ -648,7 +655,52 @@ private static function parseImage($node, $element)
break;
}
}
$newElement = $element->addImage($src, $style);
$originSrc = $src;
if (strpos($src, 'data:image') !== false) {
$tmpDir = Settings::getTempDir() . '/';

$match = array();
preg_match('/data:image\/(\w+);base64,(.+)/', $src, $match);

$src = $imgFile = $tmpDir . uniqid() . '.' . $match[1];

$ifp = fopen($imgFile, 'wb');

if ($ifp !== false) {
fwrite($ifp, base64_decode($match[2]));
fclose($ifp);
}
}
$src = urldecode($src);

if (!is_file($src)
&& !is_null(self::$options)
&& isset(self::$options['IMG_SRC_SEARCH'])
&& isset(self::$options['IMG_SRC_REPLACE'])) {
$src = str_replace(self::$options['IMG_SRC_SEARCH'], self::$options['IMG_SRC_REPLACE'], $src);
}

if (!is_file($src)) {
if ($imgBlob = @file_get_contents($src)) {
$tmpDir = Settings::getTempDir() . '/';
$match = array();
preg_match('/.+\.(\w+)$/', $src, $match);
$src = $tmpDir . uniqid() . '.' . $match[1];

$ifp = fopen($src, 'wb');

if ($ifp !== false) {
fwrite($ifp, $imgBlob);
fclose($ifp);
}
}
}

if (is_file($src)) {
$newElement = $element->addImage($src, $style);
} else {
throw new \Exception("Could not load image $originSrc");
}

return $newElement;
}
Expand Down
87 changes: 87 additions & 0 deletions tests/PhpWord/Shared/HtmlTest.php

Large diffs are not rendered by default.