Skip to content

Commit

Permalink
Implement getOSD method
Browse files Browse the repository at this point in the history
  • Loading branch information
nguyenq committed Aug 4, 2024
1 parent 81c880c commit d5621e5
Show file tree
Hide file tree
Showing 10 changed files with 295 additions and 45 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>5.12.1-SNAPSHOT</version>
<version>5.13.0-SNAPSHOT</version>
<packaging>jar</packaging>

<name>Tess4J - Tesseract for Java</name>
Expand Down
14 changes: 14 additions & 0 deletions src/main/java/net/sourceforge/tess4j/ITesseract.java
Original file line number Diff line number Diff line change
Expand Up @@ -377,4 +377,18 @@ public enum RenderedFormat {
* @return list of <code>Word</code>
*/
List<Word> getWords(List<BufferedImage> biList, int pageIteratorLevel);

/**
* Gets the detected orientation of the input image and apparent script (alphabet).
* @param imageFile an image file
* @return image orientation and script name
*/
OSDResult getOSD(File imageFile);

/**
* Gets the detected orientation of the input image and apparent script (alphabet).
* @param bi a buffered image
* @return image orientation and script name
*/
OSDResult getOSD(BufferedImage bi);
}
83 changes: 83 additions & 0 deletions src/main/java/net/sourceforge/tess4j/OSDResult.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/**
* Copyright @ 2024 Quan Nguyen
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package net.sourceforge.tess4j;

/**
* Encapsulates Tesseract Orientation Script Detection (OSD) results.
*/
public class OSDResult {

private final int orientDeg;
private final float orientConf;
private final String scriptName;
private final float scriptConf;

/**
* Default constructor.
*/
public OSDResult() {
this(0, 0, "", 0);
}

/**
* Constructor.
*
* @param orientDeg the detected clockwise rotation of the input image in degrees (0, 90, 180, 270)
* @param orientConf confidence in the orientation (15.0 is reasonably confident)
* @param scriptName the name of the script
* @param scriptConf confidence level in the script
*/
public OSDResult(int orientDeg, float orientConf, String scriptName, float scriptConf) {
this.orientDeg = orientDeg;
this.orientConf = orientConf;
this.scriptName = scriptName;
this.scriptConf = scriptConf;
}

/**
*
* @return the orientDeg
*/
public int getOrientDeg() {
return orientDeg;
}

/**
* @return the orientConf
*/
public float getOrientConf() {
return orientConf;
}

/**
* @return the scriptName
*/
public String getScriptName() {
return scriptName;
}

/**
* @return the scriptConf
*/
public float getScriptConf() {
return scriptConf;
}

@Override
public String toString() {
return String.format("Orientation: %d degrees, confidence: %f; script name: %s, confidence: %f", getOrientDeg(), getOrientConf(), getScriptName(), getScriptConf());
}
}
60 changes: 60 additions & 0 deletions src/main/java/net/sourceforge/tess4j/Tesseract.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.awt.image.*;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.*;
import javax.imageio.IIOImage;
Expand Down Expand Up @@ -1067,6 +1068,65 @@ public List<OCRResult> createDocumentsWithResults(String[] filenames, String[] o
return results;
}

/**
* Gets the detected orientation of the input imageFile and apparent script
* (alphabet).
*
* @param imageFile an image file
* @return imageFile orientation and script name
*/
@Override
public OSDResult getOSD(File imageFile) {
try {
// if PDF, convert to multi-page TIFF
imageFile = ImageIOHelper.getImageFile(imageFile);
BufferedImage bi = ImageIO.read(new FileInputStream(imageFile));
return getOSD(bi);
} catch (Exception e) {
logger.warn(e.getMessage(), e);
}

return new OSDResult();
}

/**
* Gets the detected orientation of the input image and apparent script
* (alphabet).
*
* @param bi a buffered image
* @return image orientation and script name
*/
@Override
public OSDResult getOSD(BufferedImage bi) {
init();
setVariables();

try {
api.TessBaseAPIInit3(handle, datapath, "osd");
setImage(bi);

IntBuffer orient_degB = IntBuffer.allocate(1);
FloatBuffer orient_confB = FloatBuffer.allocate(1);
PointerByReference script_nameB = new PointerByReference();
FloatBuffer script_confB = FloatBuffer.allocate(1);

int result = api.TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
if (result == TRUE) {
int orient_deg = orient_degB.get();
float orient_conf = orient_confB.get();
String script_name = script_nameB.getValue().getString(0);
float script_conf = script_confB.get();
return new OSDResult(orient_deg, orient_conf, script_name, script_conf);
}
} catch (IOException ioe) {
logger.warn(ioe.getMessage(), ioe);
} finally {
dispose();
}

return new OSDResult();
}

/**
* Gets result words at specified page iterator level from a recognized
* page.
Expand Down
67 changes: 64 additions & 3 deletions src/main/java/net/sourceforge/tess4j/Tesseract1.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.awt.image.*;
import java.io.*;
import java.nio.ByteBuffer;
import java.nio.FloatBuffer;
import java.nio.IntBuffer;
import java.util.*;
import javax.imageio.IIOImage;
Expand All @@ -34,6 +35,7 @@
import net.sourceforge.lept4j.Leptonica1;
import net.sourceforge.lept4j.Pix;
import net.sourceforge.lept4j.util.LeptUtils;
import static net.sourceforge.tess4j.ITessAPI.TRUE;

import net.sourceforge.tess4j.util.ImageIOHelper;
import net.sourceforge.tess4j.util.LoggHelper;
Expand Down Expand Up @@ -333,8 +335,8 @@ public String doOCR(List<IIOImage> imageList, String filename, Rectangle rect) t
* @param imageList a list of <code>IIOImage</code> objects
* @param filename input file name. Needed only for training and reading a
* UNLV zone file.
* @param roiss list of list of the bounding rectangles defines the regions of the
* images to be recognized. A rectangle of zero dimension or
* @param roiss list of list of the bounding rectangles defines the regions
* of the images to be recognized. A rectangle of zero dimension or
* <code>null</code> indicates the whole image.
* @return the recognized text
* @throws TesseractException
Expand Down Expand Up @@ -661,7 +663,7 @@ private TessResultRenderer createRenderers(String outputbase, List<RenderedForma
} else {
TessResultRendererInsert(renderer, TessPAGERendererCreate(outputbase));
}
break;
break;
case TSV:
if (renderer == null) {
renderer = TessTsvRendererCreate(outputbase);
Expand Down Expand Up @@ -1048,6 +1050,65 @@ public List<OCRResult> createDocumentsWithResults(String[] filenames, String[] o
return results;
}

/**
* Gets the detected orientation of the input imageFile and apparent script
* (alphabet).
*
* @param imageFile an image file
* @return imageFile orientation and script name
*/
@Override
public OSDResult getOSD(File imageFile) {
try {
// if PDF, convert to multi-page TIFF
imageFile = ImageIOHelper.getImageFile(imageFile);
BufferedImage bi = ImageIO.read(new FileInputStream(imageFile));
return getOSD(bi);
} catch (Exception e) {
logger.warn(e.getMessage(), e);
}

return new OSDResult();
}

/**
* Gets the detected orientation of the input image and apparent script
* (alphabet).
*
* @param bi a buffered image
* @return image orientation and script name
*/
@Override
public OSDResult getOSD(BufferedImage bi) {
init();
setVariables();

try {
TessBaseAPIInit3(handle, datapath, "osd");
setImage(bi);

IntBuffer orient_degB = IntBuffer.allocate(1);
FloatBuffer orient_confB = FloatBuffer.allocate(1);
PointerByReference script_nameB = new PointerByReference();
FloatBuffer script_confB = FloatBuffer.allocate(1);

int result = TessBaseAPIDetectOrientationScript(handle, orient_degB, orient_confB, script_nameB, script_confB);
if (result == TRUE) {
int orient_deg = orient_degB.get();
float orient_conf = orient_confB.get();
String script_name = script_nameB.getValue().getString(0);
float script_conf = script_confB.get();
return new OSDResult(orient_deg, orient_conf, script_name, script_conf);
}
} catch (IOException ioe) {
logger.warn(ioe.getMessage(), ioe);
} finally {
dispose();
}

return new OSDResult();
}

/**
* Gets result words at specified page iterator level from a recognized
* page.
Expand Down
5 changes: 4 additions & 1 deletion src/main/resources/versionchanges.txt
Original file line number Diff line number Diff line change
Expand Up @@ -251,4 +251,7 @@ Version 5.11.0 (7 March 2024)
- Update lept4j-1.19.1

Version 5.12.0 (24 June 2024)
- Upgrade to Tesseract 5.4.1
- Upgrade to Tesseract 5.4.1

Version 5.13.0 (3 August 2024)
- Add a convenient method to get OSD data
20 changes: 9 additions & 11 deletions src/test/java/net/sourceforge/tess4j/TessAPI1Test.java
Original file line number Diff line number Diff line change
Expand Up @@ -485,17 +485,15 @@ public void testOSD() throws Exception {
int actualResult = TessAPI1.TessBaseAPIGetPageSegMode(handle);
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
TessAPI1.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int success = TessAPI1.TessBaseAPIRecognize(handle, null);
if (success == 0) {
TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
logger.info(String.format(
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
deskew_angle.get()));
}
TessAPI1.TessPageIterator pi = TessAPI1.TessBaseAPIAnalyseLayout(handle);
TessAPI1.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
logger.info(String.format(
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
deskew_angle.get()));

assertEquals(expResult, actualResult);
}

Expand Down
31 changes: 14 additions & 17 deletions src/test/java/net/sourceforge/tess4j/TessAPITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import org.junit.jupiter.api.*;
import static org.junit.jupiter.api.Assertions.*;


public class TessAPITest {

private static final Logger logger = LoggerFactory.getLogger(new LoggHelper().toString());
Expand Down Expand Up @@ -350,7 +349,7 @@ public void testTessBaseAPIGetHOCRText() throws Exception {
api.TessDeleteText(utf8Text);
assertTrue(result.contains("<div class='ocr_page'"));
}

/**
* Test of TessBaseAPIGetAltoText method, of class TessAPI.
*
Expand All @@ -374,13 +373,13 @@ public void testTessBaseAPIGetAltoText() throws Exception {
String result = textPtr.getString(0);
api.TessDeleteText(textPtr);
assertTrue(result.contains("<Page WIDTH=\"1024\" HEIGHT=\"800\" PHYSICAL_IMG_NR=\"0\" ID=\"page_0\">"));

// WordStr Box output
textPtr = api.TessBaseAPIGetWordStrBoxText(handle, page_number);
result = textPtr.getString(0);
api.TessDeleteText(textPtr);
assertTrue(result.contains("WordStr"));

// TSV output
textPtr = api.TessBaseAPIGetTsvText(handle, page_number);
result = textPtr.getString(0);
Expand Down Expand Up @@ -489,20 +488,18 @@ public void testOSD() throws Exception {
int actualResult = api.TessBaseAPIGetPageSegMode(handle);
logger.info("PSM: " + Utils.getConstantName(actualResult, TessPageSegMode.class));
api.TessBaseAPISetImage(handle, buf, image.getWidth(), image.getHeight(), bytespp, bytespl);
int success = api.TessBaseAPIRecognize(handle, null);
if (success == 0) {
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
logger.info(String.format(
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
deskew_angle.get()));
}
TessPageIterator pi = api.TessBaseAPIAnalyseLayout(handle);
api.TessPageIteratorOrientation(pi, orientation, direction, order, deskew_angle);
logger.info(String.format(
"Orientation: %s\nWritingDirection: %s\nTextlineOrder: %s\nDeskew angle: %.4f\n",
Utils.getConstantName(orientation.get(), TessOrientation.class),
Utils.getConstantName(direction.get(), TessWritingDirection.class),
Utils.getConstantName(order.get(), TessTextlineOrder.class),
deskew_angle.get()));

assertEquals(expResult, actualResult);
}

/**
* Test of TessBaseAPIGetGradient method, of class TessAPI.
*
Expand Down Expand Up @@ -599,7 +596,7 @@ public void testResultIterator() throws Exception {
} while (api.TessPageIteratorNext(pi, level) == TRUE);
// api.TessPageIteratorDelete(pi);
api.TessResultIteratorDelete(ri);

assertTrue(true);
}

Expand Down
Loading

0 comments on commit d5621e5

Please sign in to comment.