From d5621e5a20a1764925fd52de285fde7eb32670ce Mon Sep 17 00:00:00 2001 From: nguyenq Date: Sat, 3 Aug 2024 21:56:21 -0500 Subject: [PATCH] Implement getOSD method --- pom.xml | 2 +- .../net/sourceforge/tess4j/ITesseract.java | 14 ++++ .../net/sourceforge/tess4j/OSDResult.java | 83 +++++++++++++++++++ .../net/sourceforge/tess4j/Tesseract.java | 60 ++++++++++++++ .../net/sourceforge/tess4j/Tesseract1.java | 67 ++++++++++++++- src/main/resources/versionchanges.txt | 5 +- .../net/sourceforge/tess4j/TessAPI1Test.java | 20 ++--- .../net/sourceforge/tess4j/TessAPITest.java | 31 ++++--- .../sourceforge/tess4j/Tesseract1Test.java | 31 +++++-- .../net/sourceforge/tess4j/TesseractTest.java | 27 ++++-- 10 files changed, 295 insertions(+), 45 deletions(-) create mode 100644 src/main/java/net/sourceforge/tess4j/OSDResult.java diff --git a/pom.xml b/pom.xml index 671d7e1..aef79c2 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 net.sourceforge.tess4j tess4j - 5.12.1-SNAPSHOT + 5.13.0-SNAPSHOT jar Tess4J - Tesseract for Java diff --git a/src/main/java/net/sourceforge/tess4j/ITesseract.java b/src/main/java/net/sourceforge/tess4j/ITesseract.java index 18b5096..e15e4d2 100644 --- a/src/main/java/net/sourceforge/tess4j/ITesseract.java +++ b/src/main/java/net/sourceforge/tess4j/ITesseract.java @@ -377,4 +377,18 @@ public enum RenderedFormat { * @return list of Word */ List getWords(List biList, int pageIteratorLevel); + + /** + * Gets the detected orientation of the input image and apparent script (alphabet). + * @param imageFile an image file + * @return image orientation and script name + */ + OSDResult getOSD(File imageFile); + + /** + * Gets the detected orientation of the input image and apparent script (alphabet). + * @param bi a buffered image + * @return image orientation and script name + */ + OSDResult getOSD(BufferedImage bi); } diff --git a/src/main/java/net/sourceforge/tess4j/OSDResult.java b/src/main/java/net/sourceforge/tess4j/OSDResult.java new file mode 100644 index 0000000..98cc8b2 --- /dev/null +++ b/src/main/java/net/sourceforge/tess4j/OSDResult.java @@ -0,0 +1,83 @@ +/** + * Copyright @ 2024 Quan Nguyen + * + * Licensed under the Apache License, Version 2.0 (the "License"); you may not + * use this file except in compliance with the License. You may obtain a copy of + * the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package net.sourceforge.tess4j; + +/** + * Encapsulates Tesseract Orientation Script Detection (OSD) results. + */ +public class OSDResult { + + private final int orientDeg; + private final float orientConf; + private final String scriptName; + private final float scriptConf; + + /** + * Default constructor. + */ + public OSDResult() { + this(0, 0, "", 0); + } + + /** + * Constructor. + * + * @param orientDeg the detected clockwise rotation of the input image in degrees (0, 90, 180, 270) + * @param orientConf confidence in the orientation (15.0 is reasonably confident) + * @param scriptName the name of the script + * @param scriptConf confidence level in the script + */ + public OSDResult(int orientDeg, float orientConf, String scriptName, float scriptConf) { + this.orientDeg = orientDeg; + this.orientConf = orientConf; + this.scriptName = scriptName; + this.scriptConf = scriptConf; + } + + /** + * + * @return the orientDeg + */ + public int getOrientDeg() { + return orientDeg; + } + + /** + * @return the orientConf + */ + public float getOrientConf() { + return orientConf; + } + + /** + * @return the scriptName + */ + public String getScriptName() { + return scriptName; + } + + /** + * @return the scriptConf + */ + public float getScriptConf() { + return scriptConf; + } + + @Override + public String toString() { + return String.format("Orientation: %d degrees, confidence: %f; script name: %s, confidence: %f", getOrientDeg(), getOrientConf(), getScriptName(), getScriptConf()); + } +} \ No newline at end of file diff --git a/src/main/java/net/sourceforge/tess4j/Tesseract.java b/src/main/java/net/sourceforge/tess4j/Tesseract.java index a69fd1d..1394fbe 100644 --- a/src/main/java/net/sourceforge/tess4j/Tesseract.java +++ b/src/main/java/net/sourceforge/tess4j/Tesseract.java @@ -22,6 +22,7 @@ import java.awt.image.*; import java.io.*; import java.nio.ByteBuffer; +import java.nio.FloatBuffer; import java.nio.IntBuffer; import java.util.*; import javax.imageio.IIOImage; @@ -1067,6 +1068,65 @@ public List createDocumentsWithResults(String[] filenames, String[] o return results; } + /** + * Gets the detected orientation of the input imageFile and apparent script + * (alphabet). + * + * @param imageFile an image file + * @return imageFile orientation and script name + */ + @Override + public OSDResult getOSD(File imageFile) { + try { + // if PDF, convert to multi-page TIFF + imageFile = ImageIOHelper.getImageFile(imageFile); + BufferedImage bi = ImageIO.read(new FileInputStream(imageFile)); + return getOSD(bi); + } catch (Exception e) { + logger.warn(e.getMessage(), e); + } + + return new OSDResult(); + } + + /** + * Gets the detected orientation of the input image and apparent script + * (alphabet). + * + * @param bi a buffered image + * @return image orientation and script name + */ + @Override + public OSDResult getOSD(BufferedImage bi) { + init(); + setVariables(); + + try { + api.TessBaseAPIInit3(handle, datapath, "osd"); + setImage(bi); + + IntBuffer orient_degB = IntBuffer.allocate(1); + FloatBuffer orient_confB = FloatBuffer.allocate(1); + PointerByReference script_nameB = new PointerByReference(); + FloatBuffer script_confB = FloatBuffer.allocate(1); + + int result = api.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB); + if (result == TRUE) { + int orient_deg = orient_degB.get(); + float orient_conf = orient_confB.get(); + String script_name = script_nameB.getValue().getString(0); + float script_conf = script_confB.get(); + return new OSDResult(orient_deg, orient_conf, script_name, script_conf); + } + } catch (IOException ioe) { + logger.warn(ioe.getMessage(), ioe); + } finally { + dispose(); + } + + return new OSDResult(); + } + /** * Gets result words at specified page iterator level from a recognized * page. diff --git a/src/main/java/net/sourceforge/tess4j/Tesseract1.java b/src/main/java/net/sourceforge/tess4j/Tesseract1.java index f6919dd..ffebd6a 100644 --- a/src/main/java/net/sourceforge/tess4j/Tesseract1.java +++ b/src/main/java/net/sourceforge/tess4j/Tesseract1.java @@ -22,6 +22,7 @@ import java.awt.image.*; import java.io.*; import java.nio.ByteBuffer; +import java.nio.FloatBuffer; import java.nio.IntBuffer; import java.util.*; import javax.imageio.IIOImage; @@ -34,6 +35,7 @@ import net.sourceforge.lept4j.Leptonica1; import net.sourceforge.lept4j.Pix; import net.sourceforge.lept4j.util.LeptUtils; +import static net.sourceforge.tess4j.ITessAPI.TRUE; import net.sourceforge.tess4j.util.ImageIOHelper; import net.sourceforge.tess4j.util.LoggHelper; @@ -333,8 +335,8 @@ public String doOCR(List imageList, String filename, Rectangle rect) t * @param imageList a list of IIOImage objects * @param filename input file name. Needed only for training and reading a * UNLV zone file. - * @param roiss list of list of the bounding rectangles defines the regions of the - * images to be recognized. A rectangle of zero dimension or + * @param roiss list of list of the bounding rectangles defines the regions + * of the images to be recognized. A rectangle of zero dimension or * null indicates the whole image. * @return the recognized text * @throws TesseractException @@ -661,7 +663,7 @@ private TessResultRenderer createRenderers(String outputbase, List createDocumentsWithResults(String[] filenames, String[] o return results; } + /** + * Gets the detected orientation of the input imageFile and apparent script + * (alphabet). + * + * @param imageFile an image file + * @return imageFile orientation and script name + */ + @Override + public OSDResult getOSD(File imageFile) { + try { + // if PDF, convert to multi-page TIFF + imageFile = ImageIOHelper.getImageFile(imageFile); + BufferedImage bi = ImageIO.read(new FileInputStream(imageFile)); + return getOSD(bi); + } catch (Exception e) { + logger.warn(e.getMessage(), e); + } + + return new OSDResult(); + } + + /** + * Gets the detected orientation of the input image and apparent script + * (alphabet). + * + * @param bi a buffered image + * @return image orientation and script name + */ + @Override + public OSDResult getOSD(BufferedImage bi) { + init(); + setVariables(); + + try { + TessBaseAPIInit3(handle, datapath, "osd"); + setImage(bi); + + IntBuffer orient_degB = IntBuffer.allocate(1); + FloatBuffer orient_confB = FloatBuffer.allocate(1); + PointerByReference script_nameB = new PointerByReference(); + FloatBuffer script_confB = FloatBuffer.allocate(1); + + int result = TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB); + if (result == TRUE) { + int orient_deg = orient_degB.get(); + float orient_conf = orient_confB.get(); + String script_name = script_nameB.getValue().getString(0); + float script_conf = script_confB.get(); + return new OSDResult(orient_deg, orient_conf, script_name, script_conf); + } + } catch (IOException ioe) { + logger.warn(ioe.getMessage(), ioe); + } finally { + dispose(); + } + + return new OSDResult(); + } + /** * Gets result words at specified page iterator level from a recognized * page. diff --git a/src/main/resources/versionchanges.txt b/src/main/resources/versionchanges.txt index 125049b..e548660 100644 --- a/src/main/resources/versionchanges.txt +++ b/src/main/resources/versionchanges.txt @@ -251,4 +251,7 @@ Version 5.11.0 (7 March 2024) - Update lept4j-1.19.1 Version 5.12.0 (24 June 2024) -- Upgrade to Tesseract 5.4.1 \ No newline at end of file +- Upgrade to Tesseract 5.4.1 + +Version 5.13.0 (3 August 2024) +- Add a convenient method to get OSD data \ No newline at end of file diff --git a/src/test/java/net/sourceforge/tess4j/TessAPI1Test.java b/src/test/java/net/sourceforge/tess4j/TessAPI1Test.java index df91bff..15616be 100644 --- a/src/test/java/net/sourceforge/tess4j/TessAPI1Test.java +++ b/src/test/java/net/sourceforge/tess4j/TessAPI1Test.java @@ -485,17 +485,15 @@ public void testOSD() throws Exception { int actualResult = TessAPI1.TessBaseAPIGetPageSegMode(handle); logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class)); TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl); - int success = TessAPI1.TessBaseAPIRecognize(handle, null); - if (success == 0) { - TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle); - TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle); - logger.info(String.format( - "Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n", - Utils.getConstantName(orientation.get(), TessOrientation.class), - Utils.getConstantName(direction.get(), TessWritingDirection.class), - Utils.getConstantName(order.get(), TessTextlineOrder.class), - deskew_angle.get())); - } + TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle); + TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle); + logger.info(String.format( + "Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n", + Utils.getConstantName(orientation.get(), TessOrientation.class), + Utils.getConstantName(direction.get(), TessWritingDirection.class), + Utils.getConstantName(order.get(), TessTextlineOrder.class), + deskew_angle.get())); + assertEquals(expResult, actualResult); } diff --git a/src/test/java/net/sourceforge/tess4j/TessAPITest.java b/src/test/java/net/sourceforge/tess4j/TessAPITest.java index 3c415ed..e5dde98 100644 --- a/src/test/java/net/sourceforge/tess4j/TessAPITest.java +++ b/src/test/java/net/sourceforge/tess4j/TessAPITest.java @@ -49,7 +49,6 @@ import org.junit.jupiter.api.*; import static org.junit.jupiter.api.Assertions.*; - public class TessAPITest { private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString()); @@ -350,7 +349,7 @@ public void testTessBaseAPIGetHOCRText() throws Exception { api.TessDeleteText(utf8Text); assertTrue(result.contains("
")); - + // WordStr Box output textPtr = api.TessBaseAPIGetWordStrBoxText(handle, page_number); result = textPtr.getString(0); api.TessDeleteText(textPtr); assertTrue(result.contains("WordStr")); - + // TSV output textPtr = api.TessBaseAPIGetTsvText(handle, page_number); result = textPtr.getString(0); @@ -489,20 +488,18 @@ public void testOSD() throws Exception { int actualResult = api.TessBaseAPIGetPageSegMode(handle); logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class)); api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl); - int success = api.TessBaseAPIRecognize(handle, null); - if (success == 0) { - TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle); - api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle); - logger.info(String.format( - "Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n", - Utils.getConstantName(orientation.get(), TessOrientation.class), - Utils.getConstantName(direction.get(), TessWritingDirection.class), - Utils.getConstantName(order.get(), TessTextlineOrder.class), - deskew_angle.get())); - } + TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle); + api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle); + logger.info(String.format( + "Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n", + Utils.getConstantName(orientation.get(), TessOrientation.class), + Utils.getConstantName(direction.get(), TessWritingDirection.class), + Utils.getConstantName(order.get(), TessTextlineOrder.class), + deskew_angle.get())); + assertEquals(expResult, actualResult); } - + /** * Test of TessBaseAPIGetGradient method, of class TessAPI. * @@ -599,7 +596,7 @@ public void testResultIterator() throws Exception { } while (api.TessPageIteratorNext(pi, level) == TRUE); // api.TessPageIteratorDelete(pi); api.TessResultIteratorDelete(ri); - + assertTrue(true); } diff --git a/src/test/java/net/sourceforge/tess4j/Tesseract1Test.java b/src/test/java/net/sourceforge/tess4j/Tesseract1Test.java index 44bdc43..e0c3a4d 100644 --- a/src/test/java/net/sourceforge/tess4j/Tesseract1Test.java +++ b/src/test/java/net/sourceforge/tess4j/Tesseract1Test.java @@ -154,7 +154,7 @@ public void testDoOCR_File_Rectangles() throws Exception { logger.info(result); assertEquals(expResult, result); } - + /** * Test of doOCR method, of class Tesseract1. * @@ -171,7 +171,7 @@ public void testDoOCR_File_Rectangles1() throws Exception { logger.info(result); assertEquals(expResult, result); } - + /** * Test of doOCR method, of class Tesseract1. * @@ -293,7 +293,7 @@ public void testCreateDocuments() throws Exception { File imageFile2 = new File(this.testResourcesDataPath, "eurotext.png"); String outputbase1 = "target/test-classes/test-results/docrenderer1-1"; String outputbase2 = "target/test-classes/test-results/docrenderer1-2"; - List formats = new ArrayList(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); + List formats = new ArrayList<>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats); assertTrue(new File(outputbase1 + ".pdf").exists()); } @@ -321,7 +321,7 @@ public void testGetWords() throws Exception { logger.info(word.toString()); } - List text = new ArrayList(); + List text = new ArrayList<>(); for (Word word : result.subList(0, expResults.length)) { text.add(word.getText().trim()); } @@ -347,7 +347,7 @@ public void testGetSegmentedRegions() throws Exception { logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height)); } - assertTrue(result.size() > 0); + assertTrue(!result.isEmpty()); } /** @@ -362,7 +362,7 @@ public void testCreateDocumentsWithResults() throws Exception { File imageFile2 = new File(this.testResourcesDataPath, "multipage-pdf.pdf"); String outputbase1 = "target/test-classes/test-results/docrenderer1-3"; String outputbase2 = "target/test-classes/test-results/docrenderer1-4"; - List formats = new ArrayList(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); + List formats = new ArrayList<>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); instance.setVariable(ITesseract.DOCUMENT_TITLE, "My document"); List results = instance.createDocumentsWithResults(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats, TessPageIteratorLevel.RIL_WORD); assertTrue(new File(outputbase1 + ".pdf").exists()); @@ -383,11 +383,28 @@ public void testCreateDocumentsWithResults1() throws Exception { File imageFile = new File(this.testResourcesDataPath, "eurotext.tif"); BufferedImage bi = ImageIO.read(imageFile); String outputbase = "target/test-classes/test-results/docrenderer1-5"; - List formats = new ArrayList(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); + List formats = new ArrayList<>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); instance.setVariable(ITesseract.DOCUMENT_TITLE, "My document"); OCRResult result = instance.createDocumentsWithResults(bi, imageFile.getPath(), outputbase, formats, TessPageIteratorLevel.RIL_WORD); assertTrue(new File(outputbase + ".pdf").exists()); assertTrue(result.getConfidence() > 0); assertEquals(66, result.getWords().size()); } + + /** + * Test of getOSD method, of class Tesseract1. + * + * @throws java.lang.Exception + */ + @Test + public void testGetOSD() { + logger.info("getOSD"); + File imageFile = new File(this.testResourcesDataPath, "eurotext90.png"); + OSDResult result = instance.getOSD(imageFile); + logger.info(result.toString()); + assertEquals(90, result.getOrientDeg()); + assertTrue(result.getOrientConf() > 0); + assertEquals("Latin", result.getScriptName()); + assertTrue(result.getScriptConf() > 0); + } } diff --git a/src/test/java/net/sourceforge/tess4j/TesseractTest.java b/src/test/java/net/sourceforge/tess4j/TesseractTest.java index a7f756c..07e914a 100644 --- a/src/test/java/net/sourceforge/tess4j/TesseractTest.java +++ b/src/test/java/net/sourceforge/tess4j/TesseractTest.java @@ -294,7 +294,7 @@ public void testCreateDocuments() throws Exception { File imageFile2 = new File(this.testResourcesDataPath, "eurotext.png"); String outputbase1 = "target/test-classes/test-results/docrenderer-1"; String outputbase2 = "target/test-classes/test-results/docrenderer-2"; - List formats = new ArrayList(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); + List formats = new ArrayList<>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); instance.createDocuments(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats); assertTrue(new File(outputbase1 + ".pdf").exists()); } @@ -322,7 +322,7 @@ public void testGetWords() throws Exception { logger.info(word.toString()); } - List text = new ArrayList(); + List text = new ArrayList<>(); for (Word word : result.subList(0, expResults.length)) { text.add(word.getText().trim()); } @@ -348,7 +348,7 @@ public void testGetSegmentedRegions() throws Exception { logger.info(String.format("Box[%d]: x=%d, y=%d, w=%d, h=%d", i, rect.x, rect.y, rect.width, rect.height)); } - assertTrue(result.size() > 0); + assertTrue(!result.isEmpty()); } /** @@ -363,7 +363,7 @@ public void testCreateDocumentsWithResults() throws Exception { File imageFile2 = new File(this.testResourcesDataPath, "multipage-pdf.pdf"); String outputbase1 = "target/test-classes/test-results/docrenderer-3"; String outputbase2 = "target/test-classes/test-results/docrenderer-4"; - List formats = new ArrayList(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); + List formats = new ArrayList<>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); instance.setVariable(ITesseract.DOCUMENT_TITLE, "My document"); List results = instance.createDocumentsWithResults(new String[]{imageFile1.getPath(), imageFile2.getPath()}, new String[]{outputbase1, outputbase2}, formats, TessPageIteratorLevel.RIL_WORD); assertTrue(new File(outputbase1 + ".pdf").exists()); @@ -383,11 +383,28 @@ public void testCreateDocumentsWithResults1() throws Exception { File imageFile = new File(this.testResourcesDataPath, "eurotext.tif"); BufferedImage bi = ImageIO.read(imageFile); String outputbase = "target/test-classes/test-results/docrenderer-5"; - List formats = new ArrayList(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); + List formats = new ArrayList<>(Arrays.asList(RenderedFormat.HOCR, RenderedFormat.PDF, RenderedFormat.TEXT)); instance.setVariable(ITesseract.DOCUMENT_TITLE, "My document"); OCRResult result = instance.createDocumentsWithResults(bi, imageFile.getPath(), outputbase, formats, TessPageIteratorLevel.RIL_WORD); assertTrue(new File(outputbase + ".pdf").exists()); assertTrue(result.getConfidence() > 0); assertEquals(66, result.getWords().size()); } + + /** + * Test of getOSD method, of class Tesseract. + * + * @throws java.lang.Exception + */ + @Test + public void testGetOSD() { + logger.info("getOSD"); + File imageFile = new File(this.testResourcesDataPath, "eurotext90.png"); + OSDResult result = instance.getOSD(imageFile); + logger.info(result.toString()); + assertEquals(90, result.getOrientDeg()); + assertTrue(result.getOrientConf() > 0); + assertEquals("Latin", result.getScriptName()); + assertTrue(result.getScriptConf() > 0); + } }