From b04e9986a3f8dca69cbaeb433d9488d07f3bc710 Mon Sep 17 00:00:00 2001 From: Dan Fickle Date: Thu, 23 Jun 2016 15:20:30 +1000 Subject: [PATCH] For #26 and #21 - Issues relating to character substitution and width The dangers of copy and pasting code! Our string width and string replacement routines had drifted out of sync. --- README.md | 4 ++- .../java/com/openhtmltopdf/util/OpenUtil.java | 26 +++++++++++++++++++ .../pdfboxout/PdfBoxOutputDevice.java | 6 ++++- .../pdfboxout/PdfBoxTextRenderer.java | 23 +++++++++++++--- 4 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java diff --git a/README.md b/README.md index 3cb8be3bd..363a44d98 100644 --- a/README.md +++ b/README.md @@ -262,7 +262,9 @@ CHANGELOG head - 0.0.1-RC4-SNAPSHOT ======== - ++ [Silently discard control characters, etc at the rendering stage](https://github.com/danfickle/openhtmltopdf/issues/21#issuecomment-227850449) Thanks @scoldwell ++ [Fixed incorrect spacing when characters are replaced](https://github.com/danfickle/openhtmltopdf/issues/26) Thanks @scoldwell + 0.0.1-RC3 ======== + [Experimental and unstable SVG support - early prototype](https://github.com/danfickle/openhtmltopdf/issues/23) diff --git a/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java new file mode 100644 index 000000000..2f3842eb6 --- /dev/null +++ b/openhtmltopdf-core/src/main/java/com/openhtmltopdf/util/OpenUtil.java @@ -0,0 +1,26 @@ +package com.openhtmltopdf.util; + +public class OpenUtil { + + private OpenUtil() {} + + /** + * Checks if a code point is printable. If false, it can be safely discarded at the + * rendering stage, else it should be replaced with the replacement character, + * if a suitable glyph can not be found. + * @param codePoint + * @return whether codePoint is printable + */ + public static boolean isCodePointPrintable(int codePoint) { + if (Character.isISOControl(codePoint)) + return false; + + int category = Character.getType(codePoint); + + return !(category == Character.CONTROL || + category == Character.FORMAT || + category == Character.UNASSIGNED || + category == Character.PRIVATE_USE || + category == Character.SURROGATE); + } +} diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxOutputDevice.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxOutputDevice.java index 90b93360b..174e29a8a 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxOutputDevice.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxOutputDevice.java @@ -91,6 +91,7 @@ import com.openhtmltopdf.render.PageBox; import com.openhtmltopdf.render.RenderingContext; import com.openhtmltopdf.util.Configuration; +import com.openhtmltopdf.util.OpenUtil; import com.openhtmltopdf.util.XRLog; public class PdfBoxOutputDevice extends AbstractOutputDevice implements OutputDevice { @@ -432,9 +433,12 @@ else if (replace.fontDescription != current.des) { sb = new StringBuilder(); } - if (Character.isSpaceChar(unicode)) { + if (Character.isSpaceChar(unicode) || Character.isWhitespace(unicode)) { sb.append(' '); } + else if (!OpenUtil.isCodePointPrintable(unicode)) { + // Do nothing + } else { sb.append(replace.replacement); } diff --git a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java index fad3e341d..7d23b0771 100644 --- a/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java +++ b/openhtmltopdf-pdfbox/src/main/java/com/openhtmltopdf/pdfboxout/PdfBoxTextRenderer.java @@ -35,6 +35,8 @@ import com.openhtmltopdf.render.FSFontMetrics; import com.openhtmltopdf.render.JustificationInfo; import com.openhtmltopdf.util.Configuration; +import com.openhtmltopdf.util.OpenUtil; +import com.openhtmltopdf.util.XRLog; public class PdfBoxTextRenderer implements TextRenderer { private static float TEXT_MEASURING_DELTA = 0.01f; @@ -178,7 +180,14 @@ private float getStringWidthSlow(FSFont bf, String str) { ReplacementChar replace = getReplacementChar(bf); List fonts = ((PdfBoxFSFont) bf).getFontDescription(); float strWidthResult = 0; - + float strWidthSpace = 0; + + try { + strWidthSpace = fonts.get(0).getFont().getStringWidth(" "); + } catch (Exception e) { + XRLog.general("Font doesn't contain a space character!"); + } + for (int i = 0; i < str.length(); ) { int unicode = str.codePointAt(i); i += Character.charCount(unicode); @@ -209,8 +218,16 @@ private float getStringWidthSlow(FSFont bf, String str) { } if (!gotWidth) { - // We still don't have the character after all that. So use replacement character. - strWidthResult += replace.width; + + if (Character.isSpaceChar(unicode) || Character.isWhitespace(unicode)) { + strWidthResult += strWidthSpace; + } + else if (!OpenUtil.isCodePointPrintable(unicode)) { + // Do nothing + } else { + // We still don't have the character after all that. So use replacement character. + strWidthResult += replace.width; + } } }