specialSpaces) {
+ this.specialSpaces = new HashSet<>(specialSpaces);
+ return this;
+ }
+
+ /**
+ * Sets the characters requiring custom trained spaces.
+ *
+ * @param specialSpaces The characters requiring separate training for their trailing spaces.
+ * @return The current {@link OCROptions} object
+ */
+ public OCROptions setSpecialSpaces(char... specialSpaces) {
+ this.specialSpaces = IntStream.range(0, specialSpaces.length)
+ .mapToObj(x -> specialSpaces[x])
+ .collect(Collectors.toSet());
+ return this;
+ }
+
+ /**
+ * Gets the value set by {@link OCROptions#setMaxPercentDiffToMerge(double)}
+ *
+ * @return The value set by {@link OCROptions#setMaxPercentDiffToMerge(double)}
+ */
+ public double getMaxPercentDiffToMerge() {
+ return maxPercentDiffToMerge;
+ }
+
+ /**
+ * Sets the maximum percentage difference a line must be in order to merge in the very first phase of training. This
+ * is primarily for when underscores are below a line, and will need to be X% smaller than the line to merge.
+ *
+ * This value is by default 0.5
+ *
+ * @param maxPercentDiffToMerge The percentage to set
+ * @return The current {@link OCROptions} object
+ */
+ public OCROptions setMaxPercentDiffToMerge(double maxPercentDiffToMerge) {
+ this.maxPercentDiffToMerge = maxPercentDiffToMerge;
+ return this;
+ }
+
+ /**
+ * Gets the amount the width/height radio should be multiplied across all a character's potential matches, to
+ * increase its effects compared to the actual section similarity.
+ *
+ * @return The weight of the width/height ratio
+ */
+ public double getSizeRatioWeight() {
+ return sizeRatioWeight;
+ }
+
+ /**
+ * Sets the amount the width/height radio should be multiplied across all a character's potential matches, to
+ * increase its effects compared to the actual section similarity.
+ *
+ * @param sizeRatioWeight The weight of the width/height ratio
+ * @return The current {@link OCROptions} object
+ */
+ public OCROptions setSizeRatioWeight(double sizeRatioWeight) {
+ this.sizeRatioWeight = sizeRatioWeight;
+ return this;
+ }
+}
diff --git a/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java b/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java
index 2286106..3ada2cf 100644
--- a/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java
+++ b/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java
@@ -1,68 +1,22 @@
package com.uddernetworks.newocr.train;
-import javax.imageio.ImageIO;
-import java.awt.*;
-import java.awt.image.BufferedImage;
import java.io.File;
-import java.io.IOException;
-
-public class TrainGenerator {
-
- private static String trainString = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghjiklmnopqrstuvwxyz{|}~W W";
- public static final int UPPER_FONT_BOUND = 90;
- public static final int LOWER_FONT_BOUND = 20;
-
- public static void main(String[] args) {
- BufferedImage image = new BufferedImage(1500, 500, BufferedImage.TYPE_INT_ARGB);
- Graphics2D graphics = image.createGraphics();
-
- RenderingHints rht = new RenderingHints(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON);
- graphics.setRenderingHints(rht);
-
- Font font = new Font("Verdana", Font.PLAIN, 92);
- graphics.setFont(font);
-
- int newHeight = 11;
-
- int size2 = UPPER_FONT_BOUND;
- for (int i = 0; i < UPPER_FONT_BOUND - LOWER_FONT_BOUND; i++) {
- newHeight += size2 + 11;
- size2--;
- }
-
- image = new BufferedImage(graphics.getFontMetrics().stringWidth(trainString) + 50, newHeight, BufferedImage.TYPE_INT_ARGB);
- for (int y = 0; y < image.getHeight(); y++) {
- for (int x = 0; x < image.getWidth(); x++) {
- image.setRGB(x, y, Color.WHITE.getRGB());
- }
- }
-
- graphics = image.createGraphics();
-
- RenderingHints rh = new RenderingHints(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON);
- graphics.setRenderingHints(rh);
-
- int size = UPPER_FONT_BOUND;
- int offset = UPPER_FONT_BOUND;
- for (int i = 0; i < UPPER_FONT_BOUND - LOWER_FONT_BOUND; i++) {
- drawLine(graphics, trainString, offset, size);
- offset += size + 10;
- size--;
- }
-
- try {
- ImageIO.write(image, "png", new File("training.png"));
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- private static void drawLine(Graphics2D drawTo, String line, int yOffset, int size) {
- Font font = new Font("Verdana", Font.PLAIN, size);
- drawTo.setFont(font);
- drawTo.setPaint(Color.BLACK);
-
- drawTo.drawString(line, 10, yOffset);
- }
+public interface TrainGenerator {
+
+ /**
+ * Generates an image that can be used while training the OCR using default options of font bounds 90-30, and a font
+ * family of Comic Sans MS.
+ *
+ * @param file The file to write to
+ */
+ void generateTrainingImage(File file);
+
+ /**
+ * Generates an image that can be used while training the OCR using the given options.
+ *
+ * @param file The file to write to
+ * @param options The options used during image generation
+ */
+ void generateTrainingImage(File file, TrainGeneratorOptions options);
}
diff --git a/src/main/java/com/uddernetworks/newocr/train/TrainGeneratorOptions.java b/src/main/java/com/uddernetworks/newocr/train/TrainGeneratorOptions.java
new file mode 100644
index 0000000..ff1e569
--- /dev/null
+++ b/src/main/java/com/uddernetworks/newocr/train/TrainGeneratorOptions.java
@@ -0,0 +1,70 @@
+package com.uddernetworks.newocr.train;
+
+/**
+ * Defines options for the actual generation of the image to train on.
+ */
+public class TrainGeneratorOptions {
+ private int maxFontSize = 90;
+ private int minFontSize = 30;
+ private String fontFamily = "";
+
+ /**
+ * Gets the maximum font size to generate up to in points.
+ *
+ * @return The maximum font size
+ */
+ public int getMaxFontSize() {
+ return maxFontSize;
+ }
+
+ /**
+ * Sets the maximum font size to generate up to in points.
+ *
+ * @param maxFontSize The maximum font size
+ * @return The current {@link TrainGeneratorOptions}
+ */
+ public TrainGeneratorOptions setMaxFontSize(int maxFontSize) {
+ this.maxFontSize = maxFontSize;
+ return this;
+ }
+
+ /**
+ * Gets the minimum font size to generate down to in points.
+ *
+ * @return The minimum font size
+ */
+ public int getMinFontSize() {
+ return minFontSize;
+ }
+
+ /**
+ * Sets the minimum font size to generate up to in points.
+ *
+ * @param minFontSize The minimum font size
+ * @return The current {@link TrainGeneratorOptions}
+ */
+ public TrainGeneratorOptions setMinFontSize(int minFontSize) {
+ this.minFontSize = minFontSize;
+ return this;
+ }
+
+ /**
+ * Gets the system font family used during training image generation.
+ *
+ * @return The font family
+ */
+ public String getFontFamily() {
+ return fontFamily;
+ }
+
+ /**
+ * Sets the system font family used during training image generation.
+ *
+ * @param fontFamily The font family to set
+ * @return The current {@link TrainGeneratorOptions}
+ */
+ public TrainGeneratorOptions setFontFamily(String fontFamily) {
+ this.fontFamily = fontFamily;
+ return this;
+ }
+}
diff --git a/src/main/java/com/uddernetworks/newocr/train/UntrainedDatabaseException.java b/src/main/java/com/uddernetworks/newocr/train/UntrainedDatabaseException.java
new file mode 100644
index 0000000..78d7455
--- /dev/null
+++ b/src/main/java/com/uddernetworks/newocr/train/UntrainedDatabaseException.java
@@ -0,0 +1,10 @@
+package com.uddernetworks.newocr.train;
+
+import com.uddernetworks.newocr.database.DatabaseManager;
+
+public class UntrainedDatabaseException extends RuntimeException {
+
+ public UntrainedDatabaseException(DatabaseManager databaseManager) {
+ super("The given database " + databaseManager.getName() + " has not been trained yet.");
+ }
+}
diff --git a/src/main/java/com/uddernetworks/newocr/utils/CharacterGettingUtils.java b/src/main/java/com/uddernetworks/newocr/utils/CharacterGettingUtils.java
deleted file mode 100644
index 8f8af6b..0000000
--- a/src/main/java/com/uddernetworks/newocr/utils/CharacterGettingUtils.java
+++ /dev/null
@@ -1,223 +0,0 @@
-package com.uddernetworks.newocr.utils;
-
-import com.uddernetworks.newocr.CombineMethod;
-import com.uddernetworks.newocr.LetterMeta;
-import com.uddernetworks.newocr.character.SearchCharacter;
-import java.util.Arrays;
-import java.util.List;
-import java.util.Optional;
-
-public class CharacterGettingUtils {
-
- /**
- * Gets the base of a character with a dot on top of it and combines it with the found character.
- * @param dotCharacter The dot character to search from
- * @param coordinates The coordinates used by the dotCharacter currently
- * @param searchCharacters The SearchCharacter list to check for the base
- * @return If a successful combination was made
- */
- public static boolean doDotStuff(SearchCharacter dotCharacter, List coordinates, List searchCharacters) {
- if (!dotCharacter.isProbablyDot()) {
- return false;
- }
-
- Optional baseCharacterOptional = getBaseOfDot(searchCharacters, dotCharacter);
-
- baseCharacterOptional.ifPresent(baseCharacter -> {
- combine(baseCharacter, dotCharacter, coordinates, CombineMethod.DOT, LetterMeta.DOT_ABOVE);
- baseCharacter.setHasDot(true);
- dotCharacter.setHasDot(true);
- });
-
- return baseCharacterOptional.isPresent();
- }
-
- /**
- * Gets the base of the percent and adds the given circle/dot to it.
- * @param percentDotCharacter The dot character of the percentage
- * @param coordinates The coordinates used by the percentDotCharacter currently
- * @param searchCharacters The SearchCharacter list to check for the base
- * @return If a successful combination was made
- */
- public static boolean doPercentStuff(SearchCharacter percentDotCharacter, List coordinates, List searchCharacters) {
- if (!percentDotCharacter.isProbablyCircleOfPercent()) return false;
- Optional baseCharacterOptional = getBaseForPercent(searchCharacters, percentDotCharacter);
- baseCharacterOptional.ifPresent(baseCharacter -> {
- combine(baseCharacter, percentDotCharacter, coordinates, CombineMethod.PERCENTAGE_CIRCLE, LetterMeta.PERCENT);
- baseCharacter.setHasDot(true);
- percentDotCharacter.setHasDot(true);
- });
-
- return baseCharacterOptional.isPresent();
- }
-
- /**
- * Gets the left apostrophe and adds the given left apostrophe with it.
- * @param rightApostrophe The apostrophe on the right side
- * @param coordinates The coordinates used by the rightApostrophe currently
- * @param searchCharacters The SearchCharacter list to check for the base
- * @return If a successful combination was made
- */
- public static boolean doApostropheStuff(SearchCharacter rightApostrophe, List coordinates, List searchCharacters) {
- if (!rightApostrophe.isProbablyApostraphe()) {
- return false;
- }
-
- Optional leftApostropheOptional = getLeftApostrophe(searchCharacters, rightApostrophe);
-
- leftApostropheOptional.ifPresent(leftApostrophe -> {
- combine(leftApostrophe, rightApostrophe, coordinates, CombineMethod.APOSTROPHE, LetterMeta.QUOTE);
- leftApostrophe.setHasDot(true);
- rightApostrophe.setHasDot(true);
- });
-
- return leftApostropheOptional.isPresent();
- }
-
- /**
- * Combines a given {@link SearchCharacter} with another using one of several methods.
- * @param baseCharacter The {@link SearchCharacter} that will be added to
- * @param adding The {@link SearchCharacter} that will be added to the baseCharacter
- * @param coordinates The coordinates used by the `adding` parameter
- * @param combineMethod The method to be used when combining the characters. {@link CombineMethod#DOT} and {@link CombineMethod#COLON} do the same thing
- * @param letterMeta The {@link LetterMeta} to add to the base character
- */
- public static void combine(SearchCharacter baseCharacter, SearchCharacter adding, List coordinates, CombineMethod combineMethod, LetterMeta letterMeta) {
- int minX = Math.min(baseCharacter.getX(), adding.getX());
- int minY = Math.min(baseCharacter.getY(), adding.getY());
- int maxX = Math.max(baseCharacter.getX() + baseCharacter.getWidth(), adding.getX() + adding.getWidth());
- int maxY = Math.max(baseCharacter.getY() + baseCharacter.getHeight(), adding.getY() + adding.getHeight());
-
- baseCharacter.setWidth(maxX - minX);
- baseCharacter.setHeight(maxY - minY);
- baseCharacter.setX(minX);
- baseCharacter.setY(minY);
- baseCharacter.setLetterMeta(letterMeta);
-
- switch (combineMethod) {
- case DOT:
- case COLON:
- maxX = baseCharacter.getX() + baseCharacter.getWidth();
- maxY = baseCharacter.getY() + baseCharacter.getHeight();
- baseCharacter.setHeight(maxY - adding.getY());
- baseCharacter.setY(adding.getY());
-
- int dotMaxX = adding.getX() + adding.getWidth();
-
- if (dotMaxX > maxX) {
- baseCharacter.setWidth(dotMaxX - baseCharacter.getX());
- }
-
- baseCharacter.addDot(coordinates);
- break;
- case PERCENTAGE_CIRCLE:
- baseCharacter.addPercentageCircle(coordinates, OCRUtils.isWithin(adding.getY(), baseCharacter.getY(), (double) baseCharacter.getHeight() / 10D));
- break;
- case APOSTROPHE:
- baseCharacter.addPercentageCircle(coordinates, false);
- break;
- }
-
- coordinates.clear();
- }
-
- /**
- * Gets the base of character like i and j from a dot character
- * @param characters The list of {@link SearchCharacter}s to search from
- * @param dotCharacter The dot character to search from
- * @return The {@link SearchCharacter} base Optional
- */
- public static Optional getBaseOfDot(List characters, SearchCharacter dotCharacter) {
- return characters.parallelStream()
- .filter(character -> !character.equals(dotCharacter))
- .filter(character -> !character.hasDot())
- .filter(character -> character.isInBounds(dotCharacter.getX() + (dotCharacter.getWidth() / 2), character.getY() + 4))
- .filter(character -> character.getHeight() > dotCharacter.getHeight() * 5)
- .filter(baseCharacter -> {
- int below = dotCharacter.getY() + dotCharacter.getHeight() + 1;
-
- return OCRUtils.checkDifference(below, baseCharacter.getY(), dotCharacter.getHeight() + 2);
- })
- .findFirst();
- }
-
- /**
- * Gets the dot of a character like ! and ? from a base character
- * @param characters The list of {@link SearchCharacter}s to search from
- * @param baseCharacter The base character to search from
- * @return The {@link SearchCharacter} dot Optional
- */
- public static Optional getDotUnderLetter(List characters, SearchCharacter baseCharacter) {
- return characters.parallelStream()
- .filter(character -> !character.equals(baseCharacter))
- .filter(character -> !character.hasDot())
- .filter(SearchCharacter::isProbablyDot)
- .filter(character -> baseCharacter.isInBounds(character.getX() + (character.getWidth() / 2), baseCharacter.getY() + 4))
- .filter(character -> baseCharacter.getHeight() > character.getHeight() * 2)
- .filter(dotCharacter -> {
- int below = dotCharacter.getY() - dotCharacter.getHeight();
- int mod = dotCharacter.getHeight();
- return OCRUtils.checkDifference(below, baseCharacter.getY() + baseCharacter.getHeight(), mod + 2);
- })
- .findFirst();
- }
-
- /**
- * Gets the bottom dot of a character like : and ; from its top dot
- * @param characters The list of {@link SearchCharacter}s to search from
- * @param topDot The bottom dot to search from
- * @return The {@link SearchCharacter} dot Optional
- */
- public static Optional getBottomColon(List characters, SearchCharacter topDot) {
- return characters.stream()
- .filter(character -> !character.equals(topDot))
- .filter(character -> !character.hasDot())
- .filter(character -> topDot.isInXBounds(character.getX() + (character.getWidth() / 2)))
- .filter(character -> {
- double ratio = (double) topDot.getHeight() / (double) character.getHeight();
- if (character.getWidth() * 2 < topDot.getWidth()) return false;
- return (ratio >= 0.25 && ratio <= 0.5) || (topDot.getHeight() == character.getHeight() && topDot.getWidth() == character.getWidth());
- })
- .filter(bottomCharacter -> {
- double mult = ((double) bottomCharacter.getWidth() / (double) bottomCharacter.getHeight() > 3 && Arrays.deepEquals(bottomCharacter.getValues(), topDot.getValues())) ? 5 : 5;
- int mod = (int) (topDot.getHeight() * mult);
-
- return OCRUtils.checkDifference(bottomCharacter.getY(), topDot.getY() + topDot.getHeight(), mod + 1);
- })
- .findFirst();
- }
-
- /**
- * Gets the left apostrophe from the given left apostrophe
- * @param characters The list of {@link SearchCharacter}s to search from
- * @param rightApostrophe The right apostrophe to search from
- * @return The {@link SearchCharacter} dot Optional
- */
- public static Optional getLeftApostrophe(List characters, SearchCharacter rightApostrophe) {
- return characters.parallelStream()
- .filter(SearchCharacter::isProbablyApostraphe)
- .filter(character -> character.getY() == rightApostrophe.getY())
- .filter(character -> {
- boolean[][] values = character.getValues();
- boolean[][] values2 = rightApostrophe.getValues();
- if (values.length != values2.length || values[0].length != values2[0].length) return false;
-
- double diff = OCRUtils.getDifferencesFrom2D(values, values2);
- return diff <= 0.05; // If it's at least 5% similar
- })
- .filter(character -> OCRUtils.isWithin(character.getX() + character.getWidth(), rightApostrophe.getX(), rightApostrophe.getWidth() - 1D, ((double) rightApostrophe.getWidth() * 1.1D) + 4D))
- .findFirst();
- }
-
- /**
- * Gets the base character for the given percent circle/dot character
- * @param characters The list of {@link SearchCharacter}s to search from
- * @param circleOfPercent The circle/dot of the percent to search from
- * @return The {@link SearchCharacter} dot Optional
- */
- public static Optional getBaseForPercent(List characters, SearchCharacter circleOfPercent) {
- return characters.parallelStream()
- .filter(searchCharacter -> searchCharacter.isOverlaping(circleOfPercent))
- .findFirst();
- }
-}
diff --git a/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java b/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java
index 0269722..5f2b085 100644
--- a/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java
+++ b/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java
@@ -1,5 +1,8 @@
package com.uddernetworks.newocr.utils;
+/**
+ * Provides simple conversions of units used by the OCR and programs using the library.
+ */
public class ConversionUtils {
/**
@@ -9,7 +12,7 @@ public class ConversionUtils {
* @return The point value of the pixel
*/
public static int pixelToPoint(int pixel) {
- return (int) Math.round(((double) pixel) / (4D/3D));
+ return (int) Math.round(((double) pixel) / (4D / 3D));
}
/**
@@ -19,7 +22,7 @@ public static int pixelToPoint(int pixel) {
* @return The pixel value of the point
*/
public static int pointToPixel(int point) {
- return (int) Math.round(((double) point) * (4D/3D));
+ return (int) Math.round(((double) point) * (4D / 3D));
}
}
diff --git a/src/main/java/com/uddernetworks/newocr/utils/IntPair.java b/src/main/java/com/uddernetworks/newocr/utils/IntPair.java
index 8d82c42..30c4816 100644
--- a/src/main/java/com/uddernetworks/newocr/utils/IntPair.java
+++ b/src/main/java/com/uddernetworks/newocr/utils/IntPair.java
@@ -9,17 +9,17 @@
* @version January 12, 2019
*/
public final class IntPair {
-
+
/**
* The key of this {@link IntPair}.
*/
private int key;
-
+
/**
* The value of this {@link IntPair}.
*/
private int value;
-
+
/**
* Creates a new {@link IntPair} with the specified key and value.
*
@@ -30,7 +30,7 @@ public IntPair(int key, int value) {
this.key = key;
this.value = value;
}
-
+
/**
* {@inheritDoc}
*/
@@ -39,12 +39,12 @@ public boolean equals(Object o) {
if (!(o instanceof IntPair)) {
return false;
}
-
+
var pair = (IntPair) o;
-
+
return key == pair.key && value == pair.value;
}
-
+
/**
* {@inheritDoc}
*/
@@ -52,7 +52,15 @@ public boolean equals(Object o) {
public int hashCode() {
return Objects.hash(key, value);
}
-
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public String toString() {
+ return "IntPair[key = " + key + ", value = " + value + "]";
+ }
+
/**
* Gets this {@link IntPair}'s key.
*
@@ -61,7 +69,16 @@ public int hashCode() {
public int getKey() {
return key;
}
-
+
+ /**
+ * Sets this {@link IntPair}'s key.
+ *
+ * @param key The key as an {@code int}.
+ */
+ public void setKey(int key) {
+ this.key = key;
+ }
+
/**
* Gets this {@link IntPair}'s value.
*
@@ -70,5 +87,14 @@ public int getKey() {
public int getValue() {
return value;
}
-
+
+ /**
+ * Sets this {@link IntPair}'s value.
+ *
+ * @param value The value as an {@code int}.
+ */
+ public void setValue(int value) {
+ this.value = value;
+ }
+
}
diff --git a/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java b/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java
index bf35c6c..b518f69 100644
--- a/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java
+++ b/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java
@@ -1,29 +1,22 @@
package com.uddernetworks.newocr.utils;
-import com.uddernetworks.newocr.character.SearchCharacter;
-import it.unimi.dsi.fastutil.ints.IntArrayList;
-import it.unimi.dsi.fastutil.ints.IntList;
-import java.awt.Color;
-import java.awt.Graphics;
+import javax.imageio.ImageIO;
+import javax.swing.*;
+import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
-import java.util.LinkedList;
-import java.util.List;
+import java.util.Arrays;
import java.util.Optional;
+import java.util.OptionalDouble;
import java.util.stream.Collectors;
-import java.util.stream.Stream;
-import javax.imageio.ImageIO;
-import javax.swing.ImageIcon;
/**
* Some various utility methods used by the OCR that may assist others using the library.
*/
public class OCRUtils {
- public static final IntPair ZERO_PLACEHOLDER = new IntPair(0, 0);
-
/**
* An ImageIO.read() replacement, which in tests can be up to 15x faster. This has shown to significantly improve
* the OCR's performance both in training and actual usage.
@@ -45,6 +38,24 @@ public static BufferedImage readImage(File input) {
return bufferedImage;
}
+ /**
+ * Removes all common spaces between all newlines, useful if the OCR say adds an extra 2 spaces before all lines of
+ * text, this will remove the 2 spaces.
+ *
+ * @param string The input string
+ * @return The input string trimmed properly
+ */
+ public static String removeLeadingSpaces(String string) {
+ var split = string.split("\n");
+ var commonSpaces = Arrays.stream(split).mapToInt(OCRUtils::countLeadingSpaces).min().orElse(0);
+ if (commonSpaces == 0) return string;
+ return Arrays.stream(split).map(line -> line.substring(commonSpaces)).collect(Collectors.joining("\n"));
+ }
+
+ private static int countLeadingSpaces(String input) {
+ return input.length() - input.stripLeading().length();
+ }
+
/*
* Advanced/Convenient Comparisons
*/
@@ -56,8 +67,8 @@ public static BufferedImage readImage(File input) {
* @param two The second number
* @return The difference
*/
- public static double getDiff(double one, double two) {
- return Math.max(one, two) - Math.min(one, two);
+ public static double diff(double one, double two) {
+ return Math.abs(one - two);
}
/**
@@ -67,54 +78,8 @@ public static double getDiff(double one, double two) {
* @param two The second number
* @return The difference
*/
- public static int getDiff(int one, int two) {
- return Math.max(one, two) - Math.min(one, two);
- }
-
- /**
- * Gets if two ints are within a given double.
- *
- * @param one Bound 1
- * @param two Bound 2
- * @param within The number
- * @return If one and two are within `within`
- */
- public static boolean isWithin(int one, int two, double within) {
- double diff = Math.max((double) one, (double) two) - Math.min((double) one, (double) two);
- return diff <= within;
- }
-
- /**
- * Gets if the difference of the two given ints are between both of the two doubles given.
- *
- * @param one The first number
- * @param two The second number
- * @param lowerBound The lower bound to check
- * @param upperBound The upper bound to check
- * @return If the difference of the two given ints are between both of the two doubles given
- */
- public static boolean isWithin(int one, int two, double lowerBound, double upperBound) {
- double diff = Math.max((double) one, (double) two) - Math.min((double) one, (double) two);
- return diff <= upperBound && lowerBound <= diff;
- }
-
- /**
- * Gets the percentage difference of two different 2D boolean arrays.
- *
- * @param input1 The first 2D array
- * @param input2 The second 2D array
- * @return The percentage difference <= 1
- */
- public static double getDifferencesFrom2D(boolean[][] input1, boolean[][] input2) {
- if (input1.length != input2.length) return 1D;
- double result = 0;
- for (int x = 0; x < input1.length; x++) {
- for (int y = 0; y < input1[0].length; y++) {
- if (input1[x][y] != input2[x][y]) result++;
- }
- }
-
- return result / ((double) input1.length * (double) input1[0].length);
+ public static int diff(int one, int two) {
+ return Math.abs(one - two);
}
/**
@@ -124,34 +89,19 @@ public static double getDifferencesFrom2D(boolean[][] input1, boolean[][] input2
* @param input2 The second array
* @return An array with the same length as the inputs containing the difference of both arrays' respective values
*/
- public static Optional getDifferencesFrom(double[] input1, double[] input2) {
- if (input1.length != input2.length) return Optional.empty();
- double[] ret = new double[input1.length];
+ public static OptionalDouble getDifferencesFrom(double[] input1, double[] input2) {
+ if (input1 == null || input2 == null || input1.length != input2.length) return OptionalDouble.empty();
+ var res = 0D;
for (int i = 0; i < input1.length; i++) {
- double one = input1[i];
- double two = input2[i];
-
- ret[i] = Math.max(one, two) - Math.min(one, two);
+ res += Math.pow(input1[i] - input2[i], 2);
}
- return Optional.of(ret);
- }
-
- /**
- * Gets if a given number is within two bounds. The same as {@link #isWithin(double, double, double)} but with ints.
- *
- * @param lowerBound The lower bound to check
- * @param upperBound The upper bound to check
- * @param value The value to check
- * @return If the two values are within the given bounds
- */
- public static boolean isWithin(int lowerBound, int upperBound, int value) {
- return lowerBound <= value && value <= upperBound;
+ return OptionalDouble.of(res);
}
/**
- * Gets if a given number is within two bounds. The same as {@link #isWithin(int, int, double)} but with doubles.
+ * Gets if a given number is within two bounds.
*
* @param lowerBound The lower bound to check
* @param upperBound The upper bound to check
@@ -162,18 +112,6 @@ public static boolean isWithin(double lowerBound, double upperBound, double valu
return lowerBound <= value && value <= upperBound;
}
- /**
- * Gets if the difference or two doubles is less than or equal to another given double.
- *
- * @param num1 The first number
- * @param num2 The second number
- * @param amount The inclusive amount the difference can be
- * @return If the difference is less than or equal to the `amount`
- */
- public static boolean checkDifference(double num1, double num2, double amount) {
- return Math.max(num1, num2) - Math.min(num1, num2) <= amount;
- }
-
/*
* Image-related methods
*/
@@ -224,65 +162,21 @@ public static boolean isRowPopulated(boolean[][] values, int y) {
}
/**
- * Gets all the characters between the two Y values (The line bounds) form the {@link SearchCharacter} list.
- *
- * @param topY The top Y value of the line
- * @param bottomY The bottom Y value of the line
- * @param searchCharacters The {@link SearchCharacter} list to check from
- * @return The {@link SearchCharacter} objects between the given Y values
- */
- public static List findCharactersAtLine(int topY, int bottomY, List searchCharacters) {
- return searchCharacters
- .stream()
- .sorted()
- .filter(searchCharacter -> OCRUtils.isWithin(topY, bottomY, searchCharacter.getY()))
- .collect(Collectors.toCollection(LinkedList::new));
- }
-
- /**
- * Sets all pixels from input to temp. When running in the program if the System property `newocr.rewrite` is set to
- * true, it will write the image to stop any weird image decoding issues
+ * Binarizes the input image, making all pixels wither black or white with an alpha of 255
*
- * @param temp The empty image with the same size as the input that will be written to
- * @param input The input that will be read from
+ * @param input The input image to be filtered
+ * @return The filtered image
*/
- public static void rewriteImage(BufferedImage temp, BufferedImage input) {
- for (int y = 0; y < temp.getHeight(); y++) {
- for (int x = 0; x < temp.getWidth(); x++) {
- temp.setRGB(x, y, input.getRGB(x, y));
- }
- }
- }
+ public static Optional filter(BufferedImage input) {
+ var result = new BufferedImage(input.getWidth(), input.getHeight(), BufferedImage.TYPE_INT_ARGB);
- /**
- * Gets if a {@link SearchCharacter} is fully black for things like . or the sections of =
- *
- * @param searchCharacter The input {@link SearchCharacter} to check
- * @return If the input is all black
- */
- public static boolean isAllBlack(SearchCharacter searchCharacter) {
- // TODO: Replace with a difference check with threshold and/or a circular check for other fonts
- for (boolean[] row : searchCharacter.getValues()) {
- for (boolean bool : row) {
- if (!bool) return false;
+ for (int y = 0; y < input.getHeight(); y++) {
+ for (int x = 0; x < input.getWidth(); x++) {
+ result.setRGB(x, y, isBlack(input, x, y) ? Color.BLACK.getRGB() : Color.WHITE.getRGB());
}
}
- return true;
- }
-
- /**
- * Binarizes the input image, making all pixels wither black or white with an alpha of 255
- *
- * @param bufferedImage The input image to be mutated
- */
- public static void filter(BufferedImage bufferedImage) {
- for (int y = 0; y < bufferedImage.getHeight(); y++) {
- for (int x = 0; x < bufferedImage.getWidth(); x++) {
- Color writeColor = isBlack(bufferedImage, x, y) ? new Color(0, 0, 0, 255) : new Color(255, 255, 255, 255);
- bufferedImage.setRGB(x, y, writeColor.getRGB());
- }
- }
+ return Optional.of(result);
}
/**
@@ -296,200 +190,12 @@ public static void filter(BufferedImage bufferedImage) {
public static boolean isBlack(BufferedImage image, int x, int y) {
try {
Color pixel = new Color(image.getRGB(x, y));
- return (pixel.getRed() + pixel.getGreen() + pixel.getBlue()) / 3 < 255 * 0.75;
+ return (pixel.getRed() + pixel.getGreen() + pixel.getBlue()) / 3D < 255 * 0.75;
} catch (ArrayIndexOutOfBoundsException e) {
return true;
}
}
- /*
- * Getting array sections
- */
-
- /**
- * Splits a grid of values in half horizontally
- *
- * @param values The grid to split
- * @return A stream of 2 halves, top and bottom
- */
- public static Stream getHorizontalHalf(boolean[][] values) {
- int topHeight = values.length / 2;
- int bottomHeight = values.length - topHeight;
-
- boolean[][] topHalf = new boolean[topHeight][];
- boolean[][] bottomHalf = new boolean[bottomHeight][];
-
- for (int y = 0; y < values.length; y++) {
- if (y < topHeight) {
- topHalf[y] = values[y];
- } else {
- bottomHalf[y - topHeight] = values[y];
- }
- }
-
- return Stream.of(topHalf, bottomHalf).sequential();
- }
-
- /**
- * Splits a grid of values in thirds horizontally
- *
- * @param values The grid to split
- * @return A stream of 3 thirds: top, middle, and bottom
- */
- public static Stream getHorizontalThird(boolean[][] values) {
- int topHeight = values.length / 3;
- int middleHeight = values.length - topHeight * 2;
- int bottomHeight = topHeight;
-
- boolean[][] topThird = new boolean[topHeight][];
- boolean[][] middleThird = new boolean[middleHeight][];
- boolean[][] bottomThird = new boolean[bottomHeight][];
-
- for (int y = 0; y < values.length; y++) {
- if (y < topHeight) {
- topThird[y] = values[y];
- } else if (y < topHeight + middleHeight) {
- middleThird[y - topHeight] = values[y];
- } else {
- bottomThird[y - topHeight - middleHeight] = values[y];
- }
- }
-
- return Stream.of(topThird, middleThird, bottomThird).sequential();
- }
-
- /**
- * Splits a grid of values in half vertically
- *
- * @param values The grid to split
- * @return A stream of 2 halves, left and right
- */
- public static Stream getVerticalHalf(boolean[][] values) {
- if (values.length == 0) return Stream.of(null, null);
- int leftHeight = values[0].length / 2;
- int rightHeight = values[0].length - leftHeight;
-
- boolean[][] leftHalf = new boolean[values.length][];
- boolean[][] rightHalf = new boolean[values.length][];
-
- for (int i = 0; i < values.length; i++) {
- leftHalf[i] = new boolean[leftHeight];
- rightHalf[i] = new boolean[rightHeight];
- }
-
- for (int y = 0; y < values.length; y++) {
- for (int x = 0; x < values[0].length; x++) {
- if (x < leftHeight) {
- leftHalf[y][x] = values[y][x];
- } else {
- rightHalf[y][x - leftHeight] = values[y][x];
- }
- }
- }
-
- return Stream.of(leftHalf, rightHalf).sequential();
- }
-
- /**
- * Splits a grid of values in thirds vertically
- *
- * @param values The grid to split
- * @return A stream of 3 thirds: left, middle, and right
- */
- public static Stream getVerticalThird(boolean[][] values) {
- if (values.length == 0) return Stream.of(ZERO_PLACEHOLDER, ZERO_PLACEHOLDER, ZERO_PLACEHOLDER);
- int leftHeight = values[0].length / 3;
- int middleHeight = values[0].length - leftHeight * 2;
-
- int leftSize = 0, leftTrue = 0;
- int middleSize = 0, middleTrue = 0;
- int rightSize = 0, rightTrue = 0;
-
- for (boolean[] line : values) {
- for (int x = 0; x < values[0].length; x++) {
- if (x < leftHeight) {
- if (line[x]) {
- leftTrue++;
- }
-
- leftSize++;
- } else if (x < middleHeight + leftHeight) {
- if (line[x]){
- middleTrue++;
- }
-
- middleSize++;
- } else {
- if (line[x]) {
- rightTrue++;
- }
-
- rightSize++;
- }
- }
- }
-
- return Stream.of(new IntPair(leftTrue, leftSize), new IntPair(middleTrue, middleSize), new IntPair(rightTrue, rightSize));
- }
-
- /**
- * Splits a grid of values in half diagonally. The diagonal line will be going from the top left to bototm right if
- * `increasing` is `true`, and top left to bottom right if it is `false`.
- *
- * @param values The grid to split into halves diagonally
- * @param increasing The line's slope will be positive when `true`, and negative when `false`.
- * @return A List of 2 halves
- */
- public static List getDiagonal(boolean[][] values, boolean increasing) {
- int topSize = 0;
- int topTrue = 0;
- int bottomSize = 0;
- int bottomTrue = 0;
-
- if (values != null) {
- double slope = (double) values.length / (double) values[0].length;
-
- IntList yPositions = new IntArrayList();
-
- for (int x = 0; x < values[0].length; x++) {
- double y = slope * x;
-
- if (increasing) {
- y = values.length - y;
- }
-
- yPositions.add((int) y);
- }
-
- for (int x = 0; x < values[0].length; x++) {
- int yPos = yPositions.get(x);
-
- for (int y = 0; y < values.length; y++) {
- if (y < yPos) {
- if (values[y][x]) {
- bottomTrue++;
- }
-
- bottomSize++;
- } else {
- if (values[y][x]) {
- topTrue++;
- }
-
- topSize++;
- }
- }
- }
- }
-
- List ret = new LinkedList<>();
-
- ret.add(new IntPair(topTrue, topSize));
- ret.add(new IntPair(bottomTrue, bottomSize));
-
- return ret;
- }
-
/*
* For debugging
*/
@@ -558,7 +264,7 @@ public static void printOut(boolean[][] values) {
System.out.print(bool ? "#" : " ");
}
- System.out.println("");
+ System.out.println();
}
}
}
diff --git a/src/main/java/com/uddernetworks/newocr/utils/SegmentationUtils.java b/src/main/java/com/uddernetworks/newocr/utils/SegmentationUtils.java
new file mode 100644
index 0000000..dfa4360
--- /dev/null
+++ b/src/main/java/com/uddernetworks/newocr/utils/SegmentationUtils.java
@@ -0,0 +1,200 @@
+package com.uddernetworks.newocr.utils;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntList;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * A utility class for methods to create segments separating character images for training and scanning.
+ */
+public class SegmentationUtils {
+
+ public static final IntPair ZERO_PLACEHOLDER = new IntPair(0, 0);
+
+ /**
+ * Splits a grid of values in half horizontally
+ *
+ * @param values The grid to split
+ * @return A stream of 2 halves, top and bottom
+ */
+ public static Stream getHorizontalHalf(boolean[][] values) {
+ int topHeight = values.length / 2;
+ int bottomHeight = values.length - topHeight;
+
+ boolean[][] topHalf = new boolean[topHeight][];
+ boolean[][] bottomHalf = new boolean[bottomHeight][];
+
+ for (int y = 0; y < values.length; y++) {
+ if (y < topHeight) {
+ topHalf[y] = values[y];
+ } else {
+ bottomHalf[y - topHeight] = values[y];
+ }
+ }
+
+ return Stream.of(topHalf, bottomHalf).sequential();
+ }
+
+ /**
+ * Splits a grid of values in thirds horizontally
+ *
+ * @param values The grid to split
+ * @return A stream of 3 thirds: top, middle, and bottom
+ */
+ public static Stream getHorizontalThird(boolean[][] values) {
+ int topHeight = values.length / 3;
+ int middleHeight = values.length - topHeight * 2;
+
+ boolean[][] topThird = new boolean[topHeight][];
+ boolean[][] middleThird = new boolean[middleHeight][];
+ boolean[][] bottomThird = new boolean[topHeight][];
+
+ for (int y = 0; y < values.length; y++) {
+ if (y < topHeight) {
+ topThird[y] = values[y];
+ } else if (y < topHeight + middleHeight) {
+ middleThird[y - topHeight] = values[y];
+ } else {
+ bottomThird[y - topHeight - middleHeight] = values[y];
+ }
+ }
+
+ return Stream.of(topThird, middleThird, bottomThird).sequential();
+ }
+
+ /**
+ * Splits a grid of values in half vertically
+ *
+ * @param values The grid to split
+ * @return A stream of 2 halves, left and right
+ */
+ public static Stream getVerticalHalf(boolean[][] values) {
+ if (values.length == 0) return Stream.of(null, null);
+ int leftHeight = values[0].length / 2;
+ int rightHeight = values[0].length - leftHeight;
+
+ boolean[][] leftHalf = new boolean[values.length][];
+ boolean[][] rightHalf = new boolean[values.length][];
+
+ for (int i = 0; i < values.length; i++) {
+ leftHalf[i] = new boolean[leftHeight];
+ rightHalf[i] = new boolean[rightHeight];
+ }
+
+ for (int y = 0; y < values.length; y++) {
+ for (int x = 0; x < values[0].length; x++) {
+ if (x < leftHeight) {
+ leftHalf[y][x] = values[y][x];
+ } else {
+ rightHalf[y][x - leftHeight] = values[y][x];
+ }
+ }
+ }
+
+ return Stream.of(leftHalf, rightHalf).sequential();
+ }
+
+ /**
+ * Splits a grid of values in thirds vertically
+ *
+ * @param values The grid to split
+ * @return A stream of 3 thirds: left, middle, and right
+ */
+ public static Stream getVerticalThird(boolean[][] values) {
+ if (values.length == 0) return Stream.of(ZERO_PLACEHOLDER, ZERO_PLACEHOLDER, ZERO_PLACEHOLDER);
+ int leftHeight = values[0].length / 3;
+ int middleHeight = values[0].length - leftHeight * 2;
+
+ int leftSize = 0, leftTrue = 0;
+ int middleSize = 0, middleTrue = 0;
+ int rightSize = 0, rightTrue = 0;
+
+ for (boolean[] line : values) {
+ for (int x = 0; x < values[0].length; x++) {
+ if (x < leftHeight) {
+ if (line[x]) {
+ leftTrue++;
+ }
+
+ leftSize++;
+ } else if (x < middleHeight + leftHeight) {
+ if (line[x]) {
+ middleTrue++;
+ }
+
+ middleSize++;
+ } else {
+ if (line[x]) {
+ rightTrue++;
+ }
+
+ rightSize++;
+ }
+ }
+ }
+
+ return Stream.of(new IntPair(leftTrue, leftSize), new IntPair(middleTrue, middleSize), new IntPair(rightTrue, rightSize));
+ }
+
+ /**
+ * Splits a grid of values in half diagonally. The diagonal line will be going from the top left to bottom right if
+ * `increasing` is `true`, and top left to bottom right if it is `false`.
+ *
+ * @param values The grid to split into halves diagonally
+ * @param increasing The line's slope will be positive when `true`, and negative when `false`.
+ * @return A List of 2 halves
+ */
+ public static List getDiagonal(boolean[][] values, boolean increasing) {
+ int topSize = 0;
+ int topTrue = 0;
+ int bottomSize = 0;
+ int bottomTrue = 0;
+
+ if (values != null) {
+ double slope = (double) values.length / (double) values[0].length;
+
+ IntList yPositions = new IntArrayList();
+
+ for (int x = 0; x < values[0].length; x++) {
+ double y = slope * x;
+
+ if (increasing) {
+ y = values.length - y;
+ }
+
+ yPositions.add((int) y);
+ }
+
+ for (int x = 0; x < values[0].length; x++) {
+ int yPos = yPositions.getInt(x);
+
+ for (int y = 0; y < values.length; y++) {
+ if (y < yPos) {
+ if (values[y][x]) {
+ bottomTrue++;
+ }
+
+ bottomSize++;
+ } else {
+ if (values[y][x]) {
+ topTrue++;
+ }
+
+ topSize++;
+ }
+ }
+ }
+ }
+
+ List ret = new LinkedList<>();
+
+ ret.add(new IntPair(topTrue, topSize));
+ ret.add(new IntPair(bottomTrue, bottomSize));
+
+ return ret;
+ }
+
+}
diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java
index 2a2daa8..810594b 100644
--- a/src/main/java/module-info.java
+++ b/src/main/java/module-info.java
@@ -1,13 +1,22 @@
module NewOCR {
requires java.desktop;
requires java.sql;
-
+
requires com.zaxxer.hikari;
requires it.unimi.dsi.fastutil;
-
+ requires slf4j.api;
+ requires typesafe.config;
+
exports com.uddernetworks.newocr;
exports com.uddernetworks.newocr.character;
+ exports com.uddernetworks.newocr.configuration;
exports com.uddernetworks.newocr.database;
+ exports com.uddernetworks.newocr.detection;
+ exports com.uddernetworks.newocr.recognition;
+ exports com.uddernetworks.newocr.recognition.mergence;
+ exports com.uddernetworks.newocr.recognition.mergence.rules;
+ exports com.uddernetworks.newocr.recognition.similarity;
+ exports com.uddernetworks.newocr.recognition.similarity.rules;
exports com.uddernetworks.newocr.train;
exports com.uddernetworks.newocr.utils;
}
\ No newline at end of file
diff --git a/src/main/resources/addAverageData.sql b/src/main/resources/addAverageData.sql
new file mode 100644
index 0000000..e2c5b9a
--- /dev/null
+++ b/src/main/resources/addAverageData.sql
@@ -0,0 +1 @@
+INSERT INTO data VALUES(?, ?);
\ No newline at end of file
diff --git a/src/main/resources/addCustomSpace.sql b/src/main/resources/addCustomSpace.sql
new file mode 100644
index 0000000..0ec67eb
--- /dev/null
+++ b/src/main/resources/addCustomSpace.sql
@@ -0,0 +1 @@
+INSERT INTO customSpaces VALUES(?, ?) ON DUPLICATE KEY UPDATE value = value;
\ No newline at end of file
diff --git a/src/main/resources/addLetterSegment.sql b/src/main/resources/addLetterSegment.sql
index e88a054..5e000cb 100644
--- a/src/main/resources/addLetterSegment.sql
+++ b/src/main/resources/addLetterSegment.sql
@@ -1 +1 @@
-INSERT INTO sectionData VALUES (?, ?, ?, ?, ?);
\ No newline at end of file
+INSERT INTO sectionData VALUES (?, ?, ?, ?);
\ No newline at end of file
diff --git a/src/main/resources/addLetterSize.sql b/src/main/resources/addLetterSize.sql
deleted file mode 100644
index a267ea4..0000000
--- a/src/main/resources/addLetterSize.sql
+++ /dev/null
@@ -1 +0,0 @@
-INSERT INTO sizing VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE letter = letter;
\ No newline at end of file
diff --git a/src/main/resources/booleanProperties.sql b/src/main/resources/booleanProperties.sql
new file mode 100644
index 0000000..1325cb2
--- /dev/null
+++ b/src/main/resources/booleanProperties.sql
@@ -0,0 +1,4 @@
+CREATE TABLE IF NOT EXISTS boolean_properties (
+ name VARCHAR(64),
+ value INT
+);
\ No newline at end of file
diff --git a/src/main/resources/clearLetterSegments.sql b/src/main/resources/clearLetterSegments.sql
index 413cf01..3d3a3bf 100644
--- a/src/main/resources/clearLetterSegments.sql
+++ b/src/main/resources/clearLetterSegments.sql
@@ -1 +1 @@
-DELETE FROM %s WHERE letter = ? AND minFontSize = ? AND maxFontSize = ?;
\ No newline at end of file
+DELETE FROM %s WHERE letter = ?;
\ No newline at end of file
diff --git a/src/main/resources/createLetterEntry.sql b/src/main/resources/createLetterEntry.sql
index d3f123f..3aa44f5 100644
--- a/src/main/resources/createLetterEntry.sql
+++ b/src/main/resources/createLetterEntry.sql
@@ -1 +1 @@
-INSERT INTO letters VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
\ No newline at end of file
+INSERT INTO letters VALUES (?, ?, ?, ?, ?, ?, ?);
\ No newline at end of file
diff --git a/src/main/resources/customSpaces.sql b/src/main/resources/customSpaces.sql
new file mode 100644
index 0000000..3d0680e
--- /dev/null
+++ b/src/main/resources/customSpaces.sql
@@ -0,0 +1,4 @@
+CREATE TABLE IF NOT EXISTS customSpaces (
+ letter INT UNIQUE,
+ value DOUBLE
+);
\ No newline at end of file
diff --git a/src/main/resources/data.sql b/src/main/resources/data.sql
new file mode 100644
index 0000000..2646a61
--- /dev/null
+++ b/src/main/resources/data.sql
@@ -0,0 +1,4 @@
+CREATE TABLE IF NOT EXISTS data (
+ name VARCHAR(64),
+ value DOUBLE
+);
\ No newline at end of file
diff --git a/src/main/resources/fonts/Calibri.conf b/src/main/resources/fonts/Calibri.conf
new file mode 100644
index 0000000..6e0099b
--- /dev/null
+++ b/src/main/resources/fonts/Calibri.conf
@@ -0,0 +1,11 @@
+include "Default.conf"
+language {
+ properties {
+ system-name: "Calibri"
+ }
+ options {
+ special-spaces: ["`", "|", "{", "}", "!"]
+ max-percent-diff-to-merge: 0.5
+ size-ratio-weight: 4
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/fonts/ComicSans.conf b/src/main/resources/fonts/ComicSans.conf
new file mode 100644
index 0000000..cd62e40
--- /dev/null
+++ b/src/main/resources/fonts/ComicSans.conf
@@ -0,0 +1,18 @@
+include "Default.conf"
+language {
+ properties {
+ system-name: "Comic Sans MS"
+ friendly-name: "Comic Sans"
+ }
+ options {
+ special-spaces: ["`"]
+ max-percent-diff-to-merge: 0.5
+ size-ratio-weight: 5.5
+ }
+ similarities {
+ vertical-line {
+ name: vertical-line
+ letters: [APOSTROPHE, QUOTE_LEFT, QUOTE_RIGHT, PIPE, l, i, EXCLAMATION, PERIOD, COLON_TOP, COLON_BOTTOM, EXCLAMATION_DOT, SEMICOLON_TOP, i_DOT, j_DOT, QUESTION_MARK_BOTTOM]
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/fonts/Consolas.conf b/src/main/resources/fonts/Consolas.conf
new file mode 100644
index 0000000..5bbd2c9
--- /dev/null
+++ b/src/main/resources/fonts/Consolas.conf
@@ -0,0 +1,11 @@
+include "Default.conf"
+language {
+ properties {
+ system-name: "Consolas"
+ }
+ options {
+ special-spaces: ["`"]
+ max-percent-diff-to-merge: 0.5
+ size-ratio-weight: 4
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/fonts/CourierNew.conf b/src/main/resources/fonts/CourierNew.conf
new file mode 100644
index 0000000..0ba8160
--- /dev/null
+++ b/src/main/resources/fonts/CourierNew.conf
@@ -0,0 +1,17 @@
+include "Default.conf"
+language {
+ properties {
+ system-name: "Courier New"
+ }
+ options {
+ special-spaces: ["`", "'", "{", "|", "}"]
+ max-percent-diff-to-merge: 0.5
+ size-ratio-weight: 4
+ }
+ similarities {
+ percent-base {
+ name: percent-base
+ letters: [PERCENT_BASE, FORWARD_SLASH, TILDE]
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/fonts/Default.conf b/src/main/resources/fonts/Default.conf
new file mode 100644
index 0000000..8b51e25
--- /dev/null
+++ b/src/main/resources/fonts/Default.conf
@@ -0,0 +1,42 @@
+language {
+ properties {
+ system-name: null
+ friendly-name: ${language.properties.system-name}
+ }
+ options {
+ special-spaces: ["`"]
+ max-percent-diff-to-merge: 0.5
+ size-ratio-weight: 4
+ }
+ similarities {
+ dot {
+ name: dot
+ letters: [PERIOD, COLON_TOP, COLON_BOTTOM, EXCLAMATION_DOT, SEMICOLON_TOP, i_DOT, j_DOT, QUESTION_MARK_BOTTOM]
+ }
+ horizontal-line {
+ name: horizontal-line
+ letters: [MINUS, EQUALS_BOTTOM, EQUALS_TOP, UNDERSCORE]
+ }
+ percent-dot {
+ name: percent-dot
+ letters: [PERCENT_LDOT, PERCENT_RDOT, o]
+ }
+ percent-base {
+ name: percent-base
+ letters: [PERCENT_BASE, FORWARD_SLASH]
+ }
+ vertical-line {
+ name: vertical-line
+ letters: [APOSTROPHE, QUOTE_LEFT, QUOTE_RIGHT, PIPE, l, i, EXCLAMATION]
+ }
+ }
+ mergence {
+ rules: [
+ com.uddernetworks.newocr.recognition.mergence.rules.ApostropheMergeRule,
+ com.uddernetworks.newocr.recognition.mergence.rules.EqualVerticalMergeRule,
+ com.uddernetworks.newocr.recognition.mergence.rules.OverDotMergeRule,
+ com.uddernetworks.newocr.recognition.mergence.rules.PercentMergeRule,
+ com.uddernetworks.newocr.recognition.mergence.rules.UnderDotMergeRule,
+ ]
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/fonts/Monospaced.plain.conf b/src/main/resources/fonts/Monospaced.plain.conf
new file mode 100644
index 0000000..6a407fa
--- /dev/null
+++ b/src/main/resources/fonts/Monospaced.plain.conf
@@ -0,0 +1,18 @@
+include "Default.conf"
+language {
+ properties {
+ system-name: "Monospaced.plain"
+ friendly-name: "Monospaced"
+ }
+ options {
+ special-spaces: ["`", "'", "|", "{", "}"]
+ max-percent-diff-to-merge: 0.5
+ size-ratio-weight: 4
+ }
+ similarities {
+ percent-base {
+ name: percent-base
+ letters: [PERCENT_BASE, FORWARD_SLASH, TILDE]
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/fonts/Verdana.conf b/src/main/resources/fonts/Verdana.conf
new file mode 100644
index 0000000..94b752d
--- /dev/null
+++ b/src/main/resources/fonts/Verdana.conf
@@ -0,0 +1,6 @@
+include "Default.conf"
+language {
+ properties {
+ system-name: "Verdana"
+ }
+}
\ No newline at end of file
diff --git a/src/main/resources/getAverageData.sql b/src/main/resources/getAverageData.sql
new file mode 100644
index 0000000..9dc5cdb
--- /dev/null
+++ b/src/main/resources/getAverageData.sql
@@ -0,0 +1 @@
+SELECT AVG(value) FROM data WHERE name = ?;
\ No newline at end of file
diff --git a/src/main/resources/getBooleanProperty.sql b/src/main/resources/getBooleanProperty.sql
new file mode 100644
index 0000000..7deb65e
--- /dev/null
+++ b/src/main/resources/getBooleanProperty.sql
@@ -0,0 +1 @@
+SELECT value FROM boolean_properties WHERE name = ? LIMIT 1;
\ No newline at end of file
diff --git a/src/main/resources/getCustomSpace.sql b/src/main/resources/getCustomSpace.sql
new file mode 100644
index 0000000..c310945
--- /dev/null
+++ b/src/main/resources/getCustomSpace.sql
@@ -0,0 +1 @@
+SELECT value FROM customSpaces WHERE letter = ?;
\ No newline at end of file
diff --git a/src/main/resources/getLetterEntry.sql b/src/main/resources/getLetterEntry.sql
index baaa9dc..e8e731e 100644
--- a/src/main/resources/getLetterEntry.sql
+++ b/src/main/resources/getLetterEntry.sql
@@ -1 +1 @@
-SELECT avgWidth, avgHeight, minFontSize, maxFontSize, minCenter, maxCenter, hasDot, letterMeta FROM letters WHERE letter = ? AND minFontSize = ? AND maxFontSize = ? ORDER BY letter;
\ No newline at end of file
+SELECT avgWidth, avgHeight, minCenter, maxCenter FROM letters WHERE letter = ? AND modifier = ? ORDER BY letter;
\ No newline at end of file
diff --git a/src/main/resources/getLetterSize.sql b/src/main/resources/getLetterSize.sql
deleted file mode 100644
index 499eddc..0000000
--- a/src/main/resources/getLetterSize.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT size FROM sizing WHERE letter = ? ORDER BY ABS(height - /* inputHeight */ ?);
\ No newline at end of file
diff --git a/src/main/resources/getSpaceEntry.sql b/src/main/resources/getSpaceEntry.sql
index 483b188..2e80938 100644
--- a/src/main/resources/getSpaceEntry.sql
+++ b/src/main/resources/getSpaceEntry.sql
@@ -1 +1 @@
-SELECT avgWidth, avgHeight, minFontSize, maxFontSize, minCenter, maxCenter FROM letters WHERE isSpace = TRUE ORDER BY letter;
\ No newline at end of file
+SELECT avgWidth, avgHeight, minCenter, maxCenter FROM letters WHERE isSpace = TRUE ORDER BY letter;
\ No newline at end of file
diff --git a/src/main/resources/letters.sql b/src/main/resources/letters.sql
index c15a282..367b357 100644
--- a/src/main/resources/letters.sql
+++ b/src/main/resources/letters.sql
@@ -1,13 +1,10 @@
CREATE TABLE IF NOT EXISTS letters (
letter INTEGER, -- The letter the data set is for
+ modifier INTEGER, -- The modifier number of the letter. E.g. different parts of a "
avgWidth DOUBLE, -- The average width of all tested character images of this letter
avgHeight DOUBLE, -- The average height of all tested character images of this letter
- minFontSize INTEGER, -- The minimum font size this data set was trained on
- maxFontSize INTEGER, -- The maximum font size this data set was trained on
minCenter DOUBLE,
maxCenter DOUBLE,
- hasDot BOOLEAN,
- letterMeta INTEGER,
isSpace BOOLEAN,
- UNIQUE(letter, minFontSize, maxFontSize)
+ UNIQUE(letter, modifier)
);
\ No newline at end of file
diff --git a/src/main/resources/sectionData.sql b/src/main/resources/sectionData.sql
index b050c1f..a80c8cb 100644
--- a/src/main/resources/sectionData.sql
+++ b/src/main/resources/sectionData.sql
@@ -1,7 +1,6 @@
CREATE TABLE IF NOT EXISTS sectionData (
letter INTEGER,
- minFontSize INTEGER,
- maxFontSize INTEGER,
+ modifier INTEGER,
sectionIndex INTEGER,
data DOUBLE
);
\ No newline at end of file
diff --git a/src/main/resources/selectAllSegments.sql b/src/main/resources/selectAllSegments.sql
index acacf5d..e9313b6 100644
--- a/src/main/resources/selectAllSegments.sql
+++ b/src/main/resources/selectAllSegments.sql
@@ -1 +1 @@
-SELECT CHAR(letter) AS letter, sectionIndex, data FROM sectionData WHERE minFontSize = ? AND maxFontSize = ? ORDER BY letter, sectionIndex;
\ No newline at end of file
+SELECT CHAR(letter) AS letter, modifier, sectionIndex, data FROM sectionData ORDER BY letter, modifier, sectionIndex;
\ No newline at end of file
diff --git a/src/main/resources/selectSegments.sql b/src/main/resources/selectSegments.sql
deleted file mode 100644
index 0e96ffd..0000000
--- a/src/main/resources/selectSegments.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT sectionIndex, data FROM sectionData WHERE letter = ?;
\ No newline at end of file
diff --git a/src/main/resources/setBooleanProperty.sql b/src/main/resources/setBooleanProperty.sql
new file mode 100644
index 0000000..882ebcf
--- /dev/null
+++ b/src/main/resources/setBooleanProperty.sql
@@ -0,0 +1 @@
+INSERT INTO boolean_properties VALUES(?, ?);
\ No newline at end of file
diff --git a/src/main/resources/sizing.sql b/src/main/resources/sizing.sql
deleted file mode 100644
index a2b6d50..0000000
--- a/src/main/resources/sizing.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-CREATE TABLE IF NOT EXISTS sizing (
- letter INTEGER,
- size INTEGER,
- height INTEGER,
- UNIQUE(letter, height)
-);
\ No newline at end of file
diff --git a/src/test/java/com/uddernetworks/newocr/FontTestNameGenerator.java b/src/test/java/com/uddernetworks/newocr/FontTestNameGenerator.java
new file mode 100644
index 0000000..aefa8b2
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/FontTestNameGenerator.java
@@ -0,0 +1,34 @@
+package com.uddernetworks.newocr;
+
+import org.junit.jupiter.api.DisplayNameGenerator;
+
+import java.lang.reflect.Method;
+
+public class FontTestNameGenerator implements DisplayNameGenerator {
+
+ @Override
+ public String generateDisplayNameForClass(Class> testClass) {
+ var className = testClass.getSimpleName();
+ if (!className.startsWith("Font")) return className;
+ return className.substring(4);
+ }
+
+ @Override
+ public String generateDisplayNameForNestedClass(Class> nestedClass) {
+ return generateDisplayNameForClass(nestedClass);
+ }
+
+ @Override
+ public String generateDisplayNameForMethod(Class> testClass, Method testMethod) {
+ var className = testClass.getSimpleName();
+ var methodName = testMethod.getName();
+ var defaultName = className + "#" + methodName;
+ if (!className.startsWith("Font")) return defaultName;
+
+ if (methodName.equals("accuracyTest")) {
+ return className.substring(4) + " Accuracy";
+ }
+
+ return defaultName;
+ }
+}
diff --git a/src/test/java/com/uddernetworks/newocr/OCRHandleTest.java b/src/test/java/com/uddernetworks/newocr/OCRHandleTest.java
deleted file mode 100644
index 6d8a561..0000000
--- a/src/test/java/com/uddernetworks/newocr/OCRHandleTest.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package com.uddernetworks.newocr;
-
-import com.uddernetworks.newocr.character.ImageLetter;
-import com.uddernetworks.newocr.database.DatabaseManager;
-import com.uddernetworks.newocr.database.OCRDatabaseManager;
-import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
-import java.io.File;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Future;
-import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
-import org.junit.Before;
-import org.junit.Test;
-
-
-import static org.junit.Assert.assertTrue;
-
-public class OCRHandleTest {
-
- private final double ACCURACY = -0.9; // What the accuracy threshold of all tests should (Max of 1)
-
- private DatabaseManager databaseManager;
- private OCRHandle ocrHandle;
- private ScannedImage trainImage;
-
- @Before
- public void setUp() throws Exception {
- System.out.println("Setting up database...");
- this.databaseManager = new OCRDatabaseManager(new File("database" + File.separator + "ocr_db"));
- this.ocrHandle = new OCRHandle(this.databaseManager);
-
- System.out.println("Scanning training image...");
- this.trainImage = this.ocrHandle.scanImage(new File("src\\test\\resources\\size\\training.png"));
- }
-
- @Test
- public void characterSizeRecognizer() throws ExecutionException, InterruptedException {
- int characterDepth = 20;
-
- var def = new DoubleArrayList();
- var gen = new DoubleArrayList();
-
- for (int i = 0; i < this.trainImage.getLineCount() * characterDepth; i++) {
- def.add(i);
- gen.add(0D);
- }
-
- for (int i = 0; i < this.trainImage.getLineCount(); i++) {
- for (int i1 = 0; i1 < characterDepth; i1++) {
- ImageLetter firstOfLine = this.trainImage.getLine(i).get(i1);
-
- if (firstOfLine.getLetter() == ' ') { // Ignore spaces (Not found in the database)
- def.set(i * characterDepth + i1, -2D); // The -2 values will be ignored later
- gen.set(i * characterDepth + i1, -2D);
- continue;
- }
-
- Future fontSize = this.ocrHandle.getFontSize(firstOfLine);
- gen.set(i * characterDepth + i1, (double) fontSize.get());
- }
- }
-
- double[] defArray = def.stream().mapToDouble(Double::doubleValue).filter(val -> val != -2D).toArray();
- double[] genArray = gen.stream().mapToDouble(Double::doubleValue).filter(val -> val != -2).toArray();
-
- double coeff = Math.abs(new PearsonsCorrelation().correlation(defArray, genArray));
-
- System.out.println("Accuracy is " + coeff);
-
- assertTrue(coeff >= ACCURACY);
- }
-
-}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/Accuracy.java b/src/test/java/com/uddernetworks/newocr/fonts/Accuracy.java
new file mode 100644
index 0000000..643addf
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/Accuracy.java
@@ -0,0 +1,109 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.ScannedImage;
+import com.uddernetworks.newocr.configuration.ConfigReflectionCacher;
+import com.uddernetworks.newocr.configuration.FontConfiguration;
+import com.uddernetworks.newocr.configuration.HOCONFontConfiguration;
+import com.uddernetworks.newocr.database.DatabaseManager;
+import com.uddernetworks.newocr.database.OCRDatabaseManager;
+import com.uddernetworks.newocr.recognition.OCRScan;
+import com.uddernetworks.newocr.recognition.OCRTrain;
+import com.uddernetworks.newocr.recognition.mergence.DefaultMergenceManager;
+import com.uddernetworks.newocr.recognition.similarity.DefaultSimilarityManager;
+import com.uddernetworks.newocr.recognition.similarity.SimilarityManager;
+import com.uddernetworks.newocr.train.ComputerTrainGenerator;
+import com.uddernetworks.newocr.train.OCROptions;
+import com.uddernetworks.newocr.train.TrainGeneratorOptions;
+import org.bitbucket.cowwoc.diffmatchpatch.DiffMatchPatch;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+
+import static org.bitbucket.cowwoc.diffmatchpatch.DiffMatchPatch.Operation.DELETE;
+import static org.bitbucket.cowwoc.diffmatchpatch.DiffMatchPatch.Operation.EQUAL;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+public class Accuracy {
+
+ private static Logger LOGGER = LoggerFactory.getLogger(Accuracy.class);
+ private static final double MINIMUM_SUCCESS_RATE = 98; // Requires at least a 98% success rate
+ private static final boolean TRIM_SPACES = true; // If spaces before the input should be trimmed, to take into account input image padding
+
+ public static ScannedImage generate(String fontFamily, String configFileName) throws IOException {
+ var strippedName = fontFamily.replaceAll("[^a-zA-Z\\d\\s:]", "_");
+ var databaseManager = new OCRDatabaseManager(new File("src\\test\\resources\\database\\ocr_db_" + strippedName));
+ var similarityManager = new DefaultSimilarityManager();
+
+ var fontConfiguration = new HOCONFontConfiguration(configFileName, new ConfigReflectionCacher());
+ var options = fontConfiguration.fetchOptions();
+ fontConfiguration.fetchAndApplySimilarities(similarityManager);
+
+ return generate(fontFamily, options, similarityManager, databaseManager, fontConfiguration);
+ }
+
+ public static ScannedImage generate(String fontFamily, OCROptions options, SimilarityManager similarityManager, DatabaseManager databaseManager, FontConfiguration fontConfiguration) {
+ LOGGER.info("Setting up database...");
+
+ var readingImage = new File("src\\test\\resources\\training_" + fontFamily.replaceAll("[^a-zA-Z\\d\\s:]", "_") + ".png");
+
+ var mergenceManager = new DefaultMergenceManager(databaseManager, similarityManager);
+ var ocrTrain = new OCRTrain(databaseManager, options);
+
+ LOGGER.info("Generating image for {}", fontFamily);
+ new ComputerTrainGenerator().generateTrainingImage(readingImage, new TrainGeneratorOptions()
+ .setFontFamily(fontFamily));
+
+ LOGGER.info("Starting training for {}...", fontFamily);
+
+ var start = System.currentTimeMillis();
+ ocrTrain.trainImage(readingImage);
+
+ LOGGER.info("Finished training in {}ms", System.currentTimeMillis() - start);
+
+ // It needs some kind of delay, I need to inspect in the future on what causes this. Check the repo for any
+ // issues on the matter before reporting.
+ try {
+ Thread.sleep(3000);
+ } catch (InterruptedException e) {}
+
+ fontConfiguration.fetchAndApplyMergeRules(mergenceManager);
+ var ocrScan = new OCRScan(databaseManager, options, similarityManager, mergenceManager);
+
+ LOGGER.info("Scanning training image...");
+
+ return ocrScan.scanImage(readingImage);
+ }
+
+ public void accuracyTest(ScannedImage trainImage) {
+ var scannedString = trainImage.getPrettyString();
+ var diffMatchPath = new DiffMatchPatch();
+ var lines = scannedString.split("\n");
+ var differences = 0;
+ for (String line : lines) {
+ line = TRIM_SPACES ? line.trim() : line;
+ var difference = diffMatchPath.diffMain(line, OCRScan.RAW_STRING);
+ final int[] insert = {0};
+ final int[] delete = {0};
+ difference.stream().filter(diff -> diff.operation != EQUAL)
+ .forEach(diff -> {
+ if (diff.operation == DELETE) {
+ delete[0] += diff.text.length();
+ } else {
+ insert[0] += diff.text.length();
+ }
+ });
+ differences += Math.max(insert[0], delete[0]);
+
+ System.out.println(line);
+ }
+
+ var totalChars = lines.length * OCRScan.RAW_STRING.length();
+ var accuracy = (Math.round((1 - (double) differences / (double) totalChars) * 100_00D) / 100D);
+ LOGGER.info("{} errors out of {} at a {}% success rate", differences, totalChars, accuracy);
+
+ assertTrue(accuracy >= MINIMUM_SUCCESS_RATE); // We're looking for at *least* a 95% success rate
+ }
+
+}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontCalibri.java b/src/test/java/com/uddernetworks/newocr/fonts/FontCalibri.java
new file mode 100644
index 0000000..0b38d33
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/FontCalibri.java
@@ -0,0 +1,24 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.FontTestNameGenerator;
+import com.uddernetworks.newocr.ScannedImage;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(FontTestNameGenerator.class)
+public class FontCalibri extends Accuracy {
+
+ private static ScannedImage trainImage;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ trainImage = generate("Calibri", "fonts/Calibri");
+ }
+
+ @Test
+ public void accuracyTest() {
+ super.accuracyTest(trainImage);
+ }
+
+}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontComicSansMS.java b/src/test/java/com/uddernetworks/newocr/fonts/FontComicSansMS.java
new file mode 100644
index 0000000..0164a32
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/FontComicSansMS.java
@@ -0,0 +1,24 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.FontTestNameGenerator;
+import com.uddernetworks.newocr.ScannedImage;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(FontTestNameGenerator.class)
+public class FontComicSansMS extends Accuracy {
+
+ private static ScannedImage trainImage;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ trainImage = generate("Comic Sans MS", "fonts/ComicSans");
+ }
+
+ @Test
+ public void accuracyTest() {
+ super.accuracyTest(trainImage);
+ }
+
+}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontConsolas.java b/src/test/java/com/uddernetworks/newocr/fonts/FontConsolas.java
new file mode 100644
index 0000000..8a9718d
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/FontConsolas.java
@@ -0,0 +1,24 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.FontTestNameGenerator;
+import com.uddernetworks.newocr.ScannedImage;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(FontTestNameGenerator.class)
+public class FontConsolas extends Accuracy {
+
+ private static ScannedImage trainImage;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ trainImage = generate("Consolas", "fonts/Consolas");
+ }
+
+ @Test
+ public void accuracyTest() {
+ super.accuracyTest(trainImage);
+ }
+
+}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontCourierNew.java b/src/test/java/com/uddernetworks/newocr/fonts/FontCourierNew.java
new file mode 100644
index 0000000..4ff9770
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/FontCourierNew.java
@@ -0,0 +1,24 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.FontTestNameGenerator;
+import com.uddernetworks.newocr.ScannedImage;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(FontTestNameGenerator.class)
+public class FontCourierNew extends Accuracy {
+
+ private static ScannedImage trainImage;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ trainImage = generate("Courier New", "fonts/CourierNew");
+ }
+
+ @Test
+ public void accuracyTest() {
+ super.accuracyTest(trainImage);
+ }
+
+}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontMonospaced.java b/src/test/java/com/uddernetworks/newocr/fonts/FontMonospaced.java
new file mode 100644
index 0000000..7697cbd
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/FontMonospaced.java
@@ -0,0 +1,24 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.FontTestNameGenerator;
+import com.uddernetworks.newocr.ScannedImage;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(FontTestNameGenerator.class)
+public class FontMonospaced extends Accuracy {
+
+ private static ScannedImage trainImage;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ trainImage = generate("Monospaced.plain", "fonts/Monospaced.plain");
+ }
+
+ @Test
+ public void accuracyTest() {
+ super.accuracyTest(trainImage);
+ }
+
+}
diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontVerdana.java b/src/test/java/com/uddernetworks/newocr/fonts/FontVerdana.java
new file mode 100644
index 0000000..92770ed
--- /dev/null
+++ b/src/test/java/com/uddernetworks/newocr/fonts/FontVerdana.java
@@ -0,0 +1,24 @@
+package com.uddernetworks.newocr.fonts;
+
+import com.uddernetworks.newocr.FontTestNameGenerator;
+import com.uddernetworks.newocr.ScannedImage;
+import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.DisplayNameGeneration;
+import org.junit.jupiter.api.Test;
+
+@DisplayNameGeneration(FontTestNameGenerator.class)
+public class FontVerdana extends Accuracy {
+
+ private static ScannedImage trainImage;
+
+ @BeforeAll
+ public static void setUp() throws Exception {
+ trainImage = generate("Verdana", "fonts/Verdana");
+ }
+
+ @Test
+ public void accuracyTest() {
+ super.accuracyTest(trainImage);
+ }
+
+}
diff --git a/src/test/resources/log4j.xml b/src/test/resources/log4j.xml
new file mode 100644
index 0000000..6e547a0
--- /dev/null
+++ b/src/test/resources/log4j.xml
@@ -0,0 +1,14 @@
+
+
+
+