diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..373dd6e --- /dev/null +++ b/.gitignore @@ -0,0 +1,20 @@ +# Gradle files +.gradle/ +gradle/ +gradlew +gradlew.bat + +# IDE Generated Files +.idea/ +build/ +out/ +**.ppf + +# Training temp files +**/resources/training_**.png +database/ + +# Misc. +**.class +training.png +training_**.png \ No newline at end of file diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..ffa3f7d --- /dev/null +++ b/.travis.yml @@ -0,0 +1,27 @@ +os: windows +language: shell +filter_secrets: false +cache: false +before_install: + - choco install jdk11 -params 'installdir=c:\\newocr\\jdk' -y + - wget http://services.gradle.org/distributions/gradle-5.3-bin.zip + - unzip -qq gradle-5.3-bin.zip -d /c/newocr/gradle + - export GRADLE_HOME=/c/newocr/gradle/gradle-5.3 + - export JAVA_HOME=/c/newocr/jdk + - export PATH=$GRADLE_HOME/bin:$PATH + - export PATH=$JAVA_HOME/bin:$PATH + - set TERM=dumb + - gradle -version +script: + - gradle clean install cleanTest test --exclude-task signArchives --no-daemon +after_success: + - wget https://raw.githubusercontent.com/DiscordHooks/travis-ci-discord-webhook/master/send.sh + - bash send.sh success $WEBHOOK_URL +after_failure: + - wget https://raw.githubusercontent.com/DiscordHooks/travis-ci-discord-webhook/master/send.sh + - bash send.sh failure $WEBHOOK_URL +deploy: + provider: script + script: bash scripts/deploy.sh + on: + branch: master \ No newline at end of file diff --git a/Fonts.md b/Fonts.md new file mode 100644 index 0000000..6095da0 --- /dev/null +++ b/Fonts.md @@ -0,0 +1,30 @@ +# Fonts + +NewOCR can train almost any arbitrary font from a training file in the correct format (Some may require configuration file modification), though there are some fonts that are already configured and trained to work. Fonts can be added/removed from this list as long as they work and pass all tests. Any OCR changes are tested against these fonts, so the more fonts the less problems the OCR will have in the end. + +## Why is [font] not supported? + +There are several reasons a font may not be supported. At the current NewOCR version, fonts must contain all characters in the training image, and have **no kerning. ** Kerning is the biggest reason some fonts are not supported. Another smaller reason some fonts aren't supported is due to letters looking similar to each other. An example is in the font Arial, and the characters I, L, and |. They all look identical other than a height change in one of them, which makes it impossible for the OCR to know what is going on without context (Soon to be supported, hopefully). + +## Supported Fonts + +Just because a font is not on this list, does **not** mean it will not work! These are just the fonts that the OCR is tested against, if you have a font that works then make a PR and add its config to the repo and add it here! + ++ Comic Sans MS ++ Monospaced ++ Verdana ++ Calibri ++ Consolas ++ Courier New +## Unsupported Fonts + ++ Arial **Reason: Kerning/Similar characters** ++ Terminal **Reason: Kerning** ++ Lucidia Console **Reason: Kerning** (Need to double-check) ++ Javanese Text ++ Ebrima ++ Montserrat **Reason: Kerning** (Around [\\]) ++ OCR-A **Reason: Conjoined quotes** hmmm... ironic ++ Myanmar Text **Reason: Kerning** ++ Bahnschrift Light Condensed **Reason: vertical lines misrecognition** ++ Ink Free **Reason: Kerning** \ No newline at end of file diff --git a/HWTest.png b/HWTest.png deleted file mode 100644 index e73ed5c..0000000 Binary files a/HWTest.png and /dev/null differ diff --git a/README.md b/README.md index 154282b..481a7d7 100644 --- a/README.md +++ b/README.md @@ -1,91 +1,30 @@ - - NewOCR and MS Paint IDE's Discord server - +
+ + Maven Central + + + NewOCR and MS Paint IDE's Discord server + + + Travis (.org) branch + +
# NewOCR -NewOCR is an OCR library made to suit [MS Paint IDE](https://github.com/RubbaBoy/MSPaintIDE)'s needs, though can be used in any project, as nothing is made specific to the IDE. The OCR can be trained with many fonts, though is geared towards fonts like **Verdana** and similar fonts. Other fonts _may_ require some tweaking of the character detector, but the main detection will work with no matter how different the characters are from Verdana (Hell you could modify it to work with emojis). +NewOCR is an OCR library made to suit [MS Paint IDE](https://github.com/MSPaintIDE/MSPaintIDE)'s needs, though can be used in any project, as nothing is made specific to the IDE. The OCR can be trained with many fonts, though is geared towards fonts like **Verdana** and similar fonts. Other fonts _may_ require some tweaking of the character detector, but the main detection will work with no matter how different the characters are from Verdana (You could even modify it to work with emojis). -## How it works -### Summary -NewOCR uses a super sketchy method of detecting characters, which in short breaks up each character into different subsections, then gets the percentage of filled in pixels each section contains, and puts them into an array. It then gets the closest matching array, which is decided as the closest pixel. +Currently, NewOCR is being tested against the following fonts: -### Sectioning -Each letter is broken up into 16 sections. These aren't pixel-based, but percentage based. This allows them to be created on all sized letters with the same proportions. +- Comic Sans MS +- Monospaced +- Verdana +- Calibri +- Consolas +- Courier New -First, the letter is horizontally broken up into top and bottom sections. Then, each of those two sections are broken up vertically into another two sections. The remaining sections are broken up into diagonal sections, with their diagonals angling towards the center of the character. A visual of what the sections look like and their index of the value array (Will be used later) can be found here: -![Section examples 1](/images/E1.png) +Though you can train the OCR on many, many other fonts. For more information on fonts used and how they are chosen, see the [fonts page](Fonts.md). -After that process has occurred, the second sectioning process starts. This one is more simple, in that it first horizontally separates it into thirds, then those sections into vertical thirds. The sections and their indices look like the following: -![Section examples 2](/images/E2.png) +To get started with using NewOCR or get a detailed description of how every piece of the OCR works from start to finish, you can visit the wiki here: [https://wiki.newocr.dev/](https://wiki.newocr.dev/) -### Applying the sections - -After the sections and their indices have been established, the system gets the percentage the pixels are black (Rather than white, as it's effectively binary image). Applied to our sections, this is what the values for sections of the letter **E** would look like (Depending on the size, these values may vary from your results): -![Section values 1](/images/Eval1.png) -![Section values 2](/images/Eval2.png) - -With the indices applied, the value array would be: -``` -[0.86, 0.51, 0.46, 0.48, 0.46, 0.67, 0.43, 0.09, 0.77, 0.37, 0.37, 0.77, 0.36, 0.36, 0.77, 0.37, 0.37] -``` - -These values are then compared to the averaged out trained characters' data, and the closest match is given. Other things that affect its similarity to the trained database character are the width/height ratio, which helps distinguish characters like `_` and `-`. Some type meta can also be attached to the database character, but still has the percentage values stored. These meta values are things like if it had to append chunks of pixels together in such a way it has to be a percentage sign, if it appended pixels to the top of a base character (`!`, `i`, `j`), to a bottom of a character (`!`), and some others. The enum containing these values may be found here: [LetterMeta.java](/src/main/java/com/uddernetworks/newocr/LetterMeta.java). - -### Training -A vital part in the OCR is its training. Though many OCRs require training for their Neural Networks, NewOCR uses a simple, fast method of training involving essentially averaging values form charcaters. - -The OCR starts off with a generated image of all the characters it can take advantage of through the [TrainGenerator](/src/main/java/com/uddernetworks/newocr/TrainGenerator.java) class, taking up fonts from an upper to lower bound. The system gets the character bounds for every character, then incrementally goes through the characters, putting the segmented percentages described above into a database, after averaging all the font sizes together. This is also done with the width and height of the character, for increased accuracy. The accuracy of the character segmentation is crucial in this step, as if one character is detected as say 2, it will throw off the entire line, resulting in a useless training data set. - -With scaling fonts to smaller sizes where they get deformed by their pixelation, their percentages may be significantly different than the higher resolution variants. To circumvent this, the database is broken up into different sections of font bounds, e.g. from font size 0-12 values will be places together, 13-20, and 20+ will be grouped together. The bounds' values and count may be changed in the program. - -Example of a training image: -![Training image](/images/training.png) - -## Using It -NewOCR is on Central, so it's insanely easy to get on both Maven and Gradle. - -Gradle: -```Groovy -compile 'com.uddernetworks.newocr:NewOCR:1.2.1' -``` - -Maven: -```XML - - com.uddernetworks.newocr - NewOCR - 1.2.1 - -``` - -### Creating the training image -The OCR needs an image to base all its font data off of, so a training image is required. The class `TrainGenerator.java` has the ability to create such images, and you can just change `UPPER_FONT_BOUND` and `LOWER_FONT_BOUND` to the maximum and minimum fonts to be created in the image. After running the program, you should have an image similar to the one displayed above in [Training](#Training). - -Currently the font `Verdana` is the only font tested to work with the character recogniser, though if character detection was modified/improved (Planned for the future) it could easily detect many more fonts with high accuracy. - -### Setting up the database -To use NewOCR, a MySQL database is required. This is to store all the section data of each character. To run by the example usage in `Main.java`, you will need to put the database's URL, username, and password as the program arguments in their respective orders. An example of this would be: -```java -jar NewOCR-1.2.1.java "jdbc:mysql://127.0.0.1:3306/OCR" "my_user" "my_pass"``` -You will _not_ be required to run any queries manually once you have created a table for the OCR; the program will do that for you. - -Before you do anything with detecting characters you must train the OCR. It does not use any Neural Networks as shown in the explanation above, but it needs to register how the font works. In order to get this working in `Main.java`, make sure in the main method you have `new Main().run(args)` uncommented, and that more down the file that `new File("training.png")` and `new File("HWTest.png")` points to valid paths, the first one being the training image as described above, and then your input image. When you run the program, type `yes` when it asks if you want to train, and then wait a minute or so. When the program exits, you should be able to run it again, answer `no` to that question, and after a few seconds it should give its output. - -### System properties used -NewOCR uses a few system properties for some extra options for debugging and other things. Here is a list of them (More may be added in the future): -- **newocr.rewrite** [Boolean] - Rewrites the image to a new BufferedImage before it's scanned. This could fix some weird encoding issues happening in the past -- **newocr.error** [Boolean] - If the system should output certain problems it thinks may have occurred (NOT stacktraces, those are always shown) -- **newocr.debug** [Boolean] - If the system should display some certain debug messages used in the program - -## Resources -The following papers were used as inspiration, ideas, knowledge gathering, whatever it may be towards the advancement of this OCR. I could have forgotten a few research papers, I read a lot of them. They might just be stuff I thought was really cool related to the subject, I'm generalizing this description to hell so I won't have to change it later. - -- https://www.researchgate.net/publication/260405352_OPTICAL_CHARACTER_RECOGNITION_OCR_SYSTEM_FOR_MULTIFONT_ENGLISH_TEXTS_USING_DCT_WAVELET_TRANSFORM -- https://core.ac.uk/download/pdf/20643247.pdf -- https://www.researchgate.net/publication/321761298_Generalized_Haar-like_filters_for_document_analysis_application_to_word_spotting_and_text_extraction_from_comics -- https://pdfs.semanticscholar.org/c8b7/804abc030ee93eff2f5baa306b8b95361c57.pdf -- http://www.frc.ri.cmu.edu/~akeipour/downloads/Conferences/ICIT13.pdf -- https://support.dce.felk.cvut.cz/mediawiki/images/2/24/Bp_2017_troller_milan.pdf -- http://www.cs.toronto.edu/~scottl/research/msc_thesis.pdf -- https://www.researchgate.net/publication/258651794_Novel_Approach_for_Baseline_Detection_and_Text_Line_Segmentation -- https://www.researchgate.net/publication/2954700_Neural_and_fuzzy_methods_in_handwriting_recognition +To view javadocs on the project, you can go here: [https://docs.newocr.dev/](https://docs.newocr.dev/) \ No newline at end of file diff --git a/all.png b/all.png deleted file mode 100644 index f183bc3..0000000 Binary files a/all.png and /dev/null differ diff --git a/alphabet48.png b/alphabet48.png deleted file mode 100644 index 64901bb..0000000 Binary files a/alphabet48.png and /dev/null differ diff --git a/alphabet72.png b/alphabet72.png deleted file mode 100644 index 48fe88c..0000000 Binary files a/alphabet72.png and /dev/null differ diff --git a/binariazed.png b/binariazed.png deleted file mode 100644 index 03081f9..0000000 Binary files a/binariazed.png and /dev/null differ diff --git a/build.gradle b/build.gradle index 8224ed6..9642006 100644 --- a/build.gradle +++ b/build.gradle @@ -23,7 +23,7 @@ apply plugin: 'io.codearte.nexus-staging' group 'com.uddernetworks.newocr' archivesBaseName = "NewOCR" -version '1.2.1' +version '2.0.0-SNAPSHOT' sourceCompatibility = 11 @@ -32,8 +32,13 @@ repositories { } dependencies { - testCompile group: 'junit', name: 'junit', version: '4.12' + testImplementation('org.junit.jupiter:junit-jupiter:5.4.2') testCompile group: 'org.apache.commons', name: 'commons-math3', version: '3.6.1' + testCompile group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.7.25' + + compile group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25' + + compile group: 'org.bitbucket.cowwoc', name: 'diff-match-patch', version: '1.2' compile group: 'com.zaxxer', name: 'HikariCP', version: '2.7.8' compile group: 'mysql', name: 'mysql-connector-java', version: '5.1.6' @@ -43,12 +48,26 @@ dependencies { // https://mvnrepository.com/artifact/it.unimi.dsi/fastutil compile group: 'it.unimi.dsi', name: 'fastutil', version: '8.2.2' + + compile group: 'com.typesafe', name: 'config', version: '1.3.3' } ext.moduleName = 'NewOCR' +tasks.withType(Test) { + maxParallelForks = 1 +} + +test { + minHeapSize = "1024m" + maxHeapSize = "61446m" + forkEvery = 1 +} + javadoc { options.addStringOption('-module-path', classpath.asPath) + source = sourceSets.main.allJava + classpath = configurations.compile } compileJava { @@ -61,6 +80,13 @@ compileJava { } } +test { + useJUnitPlatform() + testLogging { + events "passed", "skipped", "failed" + } +} + nexusStaging { if (project.hasProperty("ossrhUser") && project.hasProperty("ossrhPassword")) { username = ossrhUser @@ -86,59 +112,62 @@ artifacts { archives javadocJar, sourcesJar } -allprojects { - apply plugin: 'signing' - apply plugin: 'maven' +apply plugin: 'signing' +apply plugin: 'maven' - // Signature of artifacts - signing { - sign configurations.archives - } +// Signature of artifacts +signing { + sign configurations.archives +} - // OSSRH publication - uploadArchives { - repositories { - mavenDeployer { - // POM signature - beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } - - if (project.hasProperty("ossrhUser") && project.hasProperty("ossrhPassword")) { - // Target repository - repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") { - authentication(userName: ossrhUser, password: ossrhPassword) - } +// -Prelease uploadArchives closeAndPromoteRepository -x demo:uploadArchives + +// OSSRH publication +uploadArchives { + repositories { + mavenDeployer { + // POM signature + beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) } + + if (project.hasProperty("ossrhUser") && project.hasProperty("ossrhPassword")) { + // Target repository + String repo = version.toString().endsWith("-SNAPSHOT") ? + "https://oss.sonatype.org/content/repositories/snapshots" : + "https://oss.sonatype.org/service/local/staging/deploy/maven2" + println "Using repository ${repo}" + repository(url: repo) { + authentication(userName: ossrhUser, password: ossrhPassword) } + } - pom.project { - name 'NewOCR' - description 'NewOCR is a library for simple but efficient OCR detection in pure Java.' - packaging 'jar' - url 'https://github.com/RubbaBoy/NewOCR' + pom.project { + name 'NewOCR' + description 'NewOCR is a library for simple but efficient OCR detection in pure Java.' + packaging 'jar' + url 'https://github.com/MSPaintIDE/NewOCR' - scm { - connection 'scm:git:https://github.com/RubbaBoy/NewOCR.git' - developerConnection 'scm:git:git@github.com:RubbaBoy/NewOCR.git' - url 'https://github.com/RubbaBoy/NewOCR.git' - } + scm { + connection 'scm:git:https://github.com/MSPaintIDE/NewOCR.git' + developerConnection 'scm:git:git@github.com:MSPaintIDE/NewOCR.git' + url 'https://github.com/MSPaintIDE/NewOCR.git' + } - licenses { - license { - name 'The MIT License (MIT)' - url 'http://opensource.org/licenses/MIT' - distribution 'repo' - } + licenses { + license { + name 'The MIT License (MIT)' + url 'http://opensource.org/licenses/MIT' + distribution 'repo' } + } - developers { - developer { - id = 'RubbaBoy' - name = 'Adam Yarris' - email = 'adamyarris@gmail.com' - } + developers { + developer { + id = 'RubbaBoy' + name = 'Adam Yarris' + email = 'adamyarris@gmail.com' } } } } } - } diff --git a/demo/build.gradle b/demo/build.gradle new file mode 100644 index 0000000..1bf81f8 --- /dev/null +++ b/demo/build.gradle @@ -0,0 +1,13 @@ +apply plugin: 'java' + +repositories { + mavenLocal() + mavenCentral() +} + +dependencies { + compile project(':') + + compile group: 'org.slf4j', name: 'slf4j-api', version: '1.7.25' + compile group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.7.25' +} \ No newline at end of file diff --git a/demo/src/main/java/com/uddernetworks/newocr/demo/ScanDemo.java b/demo/src/main/java/com/uddernetworks/newocr/demo/ScanDemo.java new file mode 100644 index 0000000..2808382 --- /dev/null +++ b/demo/src/main/java/com/uddernetworks/newocr/demo/ScanDemo.java @@ -0,0 +1,51 @@ +package com.uddernetworks.newocr.demo; + +import com.uddernetworks.newocr.ScannedImage; +import com.uddernetworks.newocr.configuration.ConfigReflectionCacher; +import com.uddernetworks.newocr.configuration.HOCONFontConfiguration; +import com.uddernetworks.newocr.database.OCRDatabaseManager; +import com.uddernetworks.newocr.recognition.OCRScan; +import com.uddernetworks.newocr.recognition.mergence.DefaultMergenceManager; +import com.uddernetworks.newocr.recognition.similarity.DefaultSimilarityManager; +import com.uddernetworks.newocr.utils.OCRUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +public class ScanDemo { + + private static Logger LOGGER = LoggerFactory.getLogger(ScanDemo.class); + + public static void main(String[] args) throws IOException { + var databaseManager = new OCRDatabaseManager(new File("database\\ocr_db_traindemo")); + var similarityManager = new DefaultSimilarityManager(); + var mergenceManager = new DefaultMergenceManager(databaseManager, similarityManager); + + if (!databaseManager.isTrainedSync()) { + LOGGER.error("The database has not been trained yet! Please run com.uddernetworks.newocr.demo.TrainDemo to train it and try again."); + databaseManager.shutdown(TimeUnit.SECONDS, 1L); + return; + } + + var fontConfiguration = new HOCONFontConfiguration("fonts/ComicSans", new ConfigReflectionCacher(), similarityManager, mergenceManager); + var ocrScan = new OCRScan(databaseManager, fontConfiguration.fetchOptions(), similarityManager); + + LOGGER.info("Starting training..."); + + var start = System.currentTimeMillis(); + + ScannedImage scannedImage = ocrScan.scanImage(new File("demo\\src\\main\\resources\\code.png")); + + LOGGER.info("Got:\n" + OCRUtils.removeLeadingSpaces(scannedImage.getPrettyString())); + + LOGGER.info("Finished scanning in " + (System.currentTimeMillis() - start) + "ms"); + + // HSQLDB freaks out and kills the database file after writing if it doesn't have some kind of delay + // before killing the threads. + databaseManager.shutdown(TimeUnit.SECONDS, 1L); + } + +} diff --git a/demo/src/main/java/com/uddernetworks/newocr/demo/TrainDemo.java b/demo/src/main/java/com/uddernetworks/newocr/demo/TrainDemo.java new file mode 100644 index 0000000..89686e3 --- /dev/null +++ b/demo/src/main/java/com/uddernetworks/newocr/demo/TrainDemo.java @@ -0,0 +1,46 @@ +package com.uddernetworks.newocr.demo; + +import com.uddernetworks.newocr.configuration.ConfigReflectionCacher; +import com.uddernetworks.newocr.configuration.HOCONFontConfiguration; +import com.uddernetworks.newocr.database.OCRDatabaseManager; +import com.uddernetworks.newocr.recognition.OCRTrain; +import com.uddernetworks.newocr.recognition.mergence.DefaultMergenceManager; +import com.uddernetworks.newocr.recognition.similarity.DefaultSimilarityManager; +import com.uddernetworks.newocr.train.ComputerTrainGenerator; +import com.uddernetworks.newocr.train.TrainGeneratorOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.concurrent.TimeUnit; + +public class TrainDemo { + + private static Logger LOGGER = LoggerFactory.getLogger(TrainDemo.class); + + public static void main(String[] args) throws IOException { + var databaseManager = new OCRDatabaseManager(new File("database\\ocr_db_traindemo")); + var similarityManager = new DefaultSimilarityManager(); + var mergenceManager = new DefaultMergenceManager(databaseManager, similarityManager); + + var fontConfiguration = new HOCONFontConfiguration("fonts/ComicSans", new ConfigReflectionCacher(), similarityManager, mergenceManager); + var ocrTrain = new OCRTrain(databaseManager, fontConfiguration.fetchOptions()); + + var trainImage = new File("demo\\src\\main\\resources\\training.png"); + + new ComputerTrainGenerator().generateTrainingImage(trainImage, new TrainGeneratorOptions().setFontFamily("Comic Sans MS")); + + LOGGER.info("Starting training..."); + + var start = System.currentTimeMillis(); + ocrTrain.trainImage(trainImage); + + LOGGER.info("Finished training in " + (System.currentTimeMillis() - start) + "ms"); + + // HSQLDB freaks out and kills the database file after writing if it doesn't have some kind of delay + // before killing the threads. + databaseManager.shutdown(TimeUnit.SECONDS, 1L); + } + +} diff --git a/demo/src/main/java/com/uddernetworks/newocr/demo/TrainImageGeneration.java b/demo/src/main/java/com/uddernetworks/newocr/demo/TrainImageGeneration.java new file mode 100644 index 0000000..bcb7466 --- /dev/null +++ b/demo/src/main/java/com/uddernetworks/newocr/demo/TrainImageGeneration.java @@ -0,0 +1,27 @@ +package com.uddernetworks.newocr.demo; + +import com.uddernetworks.newocr.train.ComputerTrainGenerator; +import com.uddernetworks.newocr.train.TrainGeneratorOptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; + +public class TrainImageGeneration { + + private static Logger LOGGER = LoggerFactory.getLogger(TrainImageGeneration.class); + + public static void main(String[] args) { + LOGGER.info("Generating a training image with font bounds of 90-30, and a font family of Monospaced.plain"); + + var start = System.currentTimeMillis(); + + new ComputerTrainGenerator().generateTrainingImage(new File("training_mono.png"), new TrainGeneratorOptions() + .setFontFamily("Monospaced.plain") + .setMaxFontSize(90) + .setMinFontSize(30)); + + LOGGER.info("Finished in " + (System.currentTimeMillis() - start) + "ms"); + } + +} diff --git a/demo/src/main/resources/code.png b/demo/src/main/resources/code.png new file mode 100644 index 0000000..8f03c8e Binary files /dev/null and b/demo/src/main/resources/code.png differ diff --git a/demo/src/main/resources/log4j.xml b/demo/src/main/resources/log4j.xml new file mode 100644 index 0000000..6e547a0 --- /dev/null +++ b/demo/src/main/resources/log4j.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/demo/src/main/resources/scan.png b/demo/src/main/resources/scan.png new file mode 100644 index 0000000..2dc7601 Binary files /dev/null and b/demo/src/main/resources/scan.png differ diff --git a/images/E1.png b/images/E1.png deleted file mode 100644 index 8381df5..0000000 Binary files a/images/E1.png and /dev/null differ diff --git a/images/E2.png b/images/E2.png deleted file mode 100644 index 6c57078..0000000 Binary files a/images/E2.png and /dev/null differ diff --git a/images/Eval1.png b/images/Eval1.png deleted file mode 100644 index 2c88905..0000000 Binary files a/images/Eval1.png and /dev/null differ diff --git a/images/Eval2.png b/images/Eval2.png deleted file mode 100644 index eb581ca..0000000 Binary files a/images/Eval2.png and /dev/null differ diff --git a/images/training.png b/images/training.png deleted file mode 100644 index 3025d85..0000000 Binary files a/images/training.png and /dev/null differ diff --git a/outputNEW.png b/outputNEW.png deleted file mode 100644 index f3e887b..0000000 Binary files a/outputNEW.png and /dev/null differ diff --git a/scripts/deploy.sh b/scripts/deploy.sh new file mode 100644 index 0000000..1507e88 --- /dev/null +++ b/scripts/deploy.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# See https://medium.com/@nthgergo/publishing-gh-pages-with-travis-ci-53a8270e87db + +if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then exit 0; fi + +set -o errexit + +# config +git config --global user.email "nobody@ms-paint-i.de" +git config --global user.name "Travis CI" +git config core.autocrlf true + +mkdir pages +cd pages + +git clone https://github.com/MSPaintIDE/NewOCR-javadocs . +rm -rf * +cd ../ + +gradle javadoc --no-daemon +cd build/docs/ +mv javadoc/* ../../pages +cd ../../pages +echo "docs.newocr.dev" > CNAME + +# deploy +git add . &> /dev/null +git commit -m "Update docs from https://github.com/MSPaintIDE/NewOCR" &> /dev/null +git push --quiet "https://${GITHUB_TOKEN}@github.com/MSPaintIDE/NewOCR-javadocs.git" master:master &> /dev/null \ No newline at end of file diff --git a/settings.gradle b/settings.gradle index e02bd22..a8d5782 100644 --- a/settings.gradle +++ b/settings.gradle @@ -1,2 +1,3 @@ rootProject.name = 'NewOCR' +include 'demo' \ No newline at end of file diff --git a/src/main/java/com/uddernetworks/newocr/CombineMethod.java b/src/main/java/com/uddernetworks/newocr/CombineMethod.java deleted file mode 100644 index 19301bb..0000000 --- a/src/main/java/com/uddernetworks/newocr/CombineMethod.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.uddernetworks.newocr; - -import com.uddernetworks.newocr.character.SearchCharacter; -import com.uddernetworks.newocr.utils.CharacterGettingUtils; - -/** - * Different ways of combining two {@link SearchCharacter}s. - * - * @see CharacterGettingUtils - */ -public enum CombineMethod { - - DOT, - COLON, - PERCENTAGE_CIRCLE, - APOSTROPHE - -} \ No newline at end of file diff --git a/src/main/java/com/uddernetworks/newocr/FontBounds.java b/src/main/java/com/uddernetworks/newocr/FontBounds.java deleted file mode 100644 index 591f35f..0000000 --- a/src/main/java/com/uddernetworks/newocr/FontBounds.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.uddernetworks.newocr; - -/** - * An object storing the data for a set of bounds (Both upper and lower) - */ -public class FontBounds { - - private int minFont; - private int maxFont; - - /** - * Creates a FontBounds object from the given bounds. - * - * @param minFont The upper limit of allowed font size - * @param maxFont The lower limit of allowed font size - */ - public FontBounds(int minFont, int maxFont) { - this.minFont = minFont; - this.maxFont = maxFont; - } - - /** - * Gets the minimum allowed font size. - * - * @return The minimum allowed font size - */ - public int getMinFont() { - return minFont; - } - - /** - * Gets the maximum allowed font size. - * - * @return The maximum allowed font size - */ - public int getMaxFont() { - return maxFont; - } - - /** - * Gets if the given size is contained or included in the font bounds. This method is simple but is good for a - * visual comparison. - * - * @param font The font size to compare - * @return If the font is between or in the borders - */ - public boolean isInbetween(int font) { - return minFont <= font && font <= maxFont; - } - - @Override - public String toString() { - return "FontBounds[" + this.minFont + " - " + this.maxFont + "]"; - } - -} \ No newline at end of file diff --git a/src/main/java/com/uddernetworks/newocr/LetterMeta.java b/src/main/java/com/uddernetworks/newocr/LetterMeta.java deleted file mode 100644 index 5e52f9a..0000000 --- a/src/main/java/com/uddernetworks/newocr/LetterMeta.java +++ /dev/null @@ -1,51 +0,0 @@ -package com.uddernetworks.newocr; - -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; -import java.util.Arrays; -import java.util.Optional; - -/** - * Meta for letters that is inserted into the database to help distinguish characters. - */ -public enum LetterMeta { - - NONE(0), - EVEN_DOTS(1), // : = - DOT_UNDER(2), // ! ? - DOT_ABOVE(3), // ; - PERCENT(4), // % - QUOTE(5); // " - - private static final Int2ObjectMap LETTER_META = new Int2ObjectOpenHashMap<>(values().length); - - static { - Arrays.stream(values()).forEach(value -> LETTER_META.put(value.id, value)); - } - - private int id; - - LetterMeta(int id) { - this.id = id; - } - - /** - * Gets the LetterMeta's ID. - * - * @return The ID - */ - public int getID() { - return id; - } - - /** - * Gets a {@link LetterMeta} from the given ID. - * - * @param id The ID to get - * @return The {@link LetterMeta} with the same ID as the one given - */ - public static Optional fromID(int id) { - return Optional.ofNullable(LETTER_META.get(id)); - } - -} diff --git a/src/main/java/com/uddernetworks/newocr/Main.java b/src/main/java/com/uddernetworks/newocr/Main.java deleted file mode 100644 index c510de8..0000000 --- a/src/main/java/com/uddernetworks/newocr/Main.java +++ /dev/null @@ -1,80 +0,0 @@ -package com.uddernetworks.newocr; - -import com.uddernetworks.newocr.character.SearchCharacter; -import com.uddernetworks.newocr.database.OCRDatabaseManager; -import com.uddernetworks.newocr.utils.IntPair; -import com.uddernetworks.newocr.utils.OCRUtils; - -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Scanner; -import java.util.concurrent.TimeUnit; -import java.util.stream.Collectors; - -public class Main { - - public static void main(String[] args) throws IOException, InterruptedException { // alphabet48 - new Main().run(args); - //new Main().getSections(args); - } - - private void run(String[] args) throws IOException, InterruptedException { - var databaseManager = new OCRDatabaseManager(new File("database" + File.separator + "ocr_db")); - var scanner = new Scanner(System.in); - var ocrHandle = new OCRHandle(databaseManager); - - System.setProperty("newocr.debug", "true"); - - System.out.println("Do you want to train? (y)es/no"); - - var inputLine = scanner.nextLine(); - - if ("yes".equalsIgnoreCase(inputLine) || "y".equalsIgnoreCase(inputLine)) { - System.out.println("Generating features..."); - var start = System.currentTimeMillis(); - ocrHandle.trainImage(new File("training.png")); - System.out.println("Finished training in " + (System.currentTimeMillis() - start) + "ms"); - // HSQLDB freaks out and kills the database file after writing if it doesn't have some kind of delay - // before killing the threads. - TimeUnit.SECONDS.sleep(1L); - databaseManager.shutdown(); - return; - } - - var scannedImage = ocrHandle.scanImage(new File("HWTest.png")); - - System.out.println("Got:\n" + scannedImage.getPrettyString()); - - databaseManager.shutdown(); - } - - private void getSections(String[] args) throws IOException { - BufferedImage input = OCRUtils.readImage(new File("E.png")); - boolean[][] values = OCRUtils.createGrid(input); - - OCRUtils.toGrid(input, values); - - SearchImage searchImage = new SearchImage(values); - - List coordinates = new ArrayList<>(); - - for (int y = input.getHeight(); 0 <= --y; ) { - for (int x = 0; x < input.getWidth(); x++) { - if (searchImage.getValue(x, y)) { - searchImage.scanFrom(x, y, coordinates); - break; - } - } - } - - SearchCharacter searchCharacter = new SearchCharacter(coordinates); - searchCharacter.applySections(); - - System.out.println(searchCharacter.getSegments()); - System.out.println(searchCharacter.getSegments().stream().map(entry -> (double) entry.getKey() / (double) entry.getValue()).collect(Collectors.toList())); - } - -} diff --git a/src/main/java/com/uddernetworks/newocr/OCRHandle.java b/src/main/java/com/uddernetworks/newocr/OCRHandle.java deleted file mode 100644 index 17a2f9e..0000000 --- a/src/main/java/com/uddernetworks/newocr/OCRHandle.java +++ /dev/null @@ -1,723 +0,0 @@ -package com.uddernetworks.newocr; - -import com.uddernetworks.newocr.character.ImageLetter; -import com.uddernetworks.newocr.character.SearchCharacter; -import com.uddernetworks.newocr.database.DatabaseCharacter; -import com.uddernetworks.newocr.database.DatabaseManager; -import com.uddernetworks.newocr.train.TrainGenerator; -import com.uddernetworks.newocr.train.TrainedCharacterData; -import com.uddernetworks.newocr.utils.IntPair; -import com.uddernetworks.newocr.utils.OCRUtils; -import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; -import it.unimi.dsi.fastutil.objects.Object2DoubleMap; -import it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap; - -import javax.imageio.ImageIO; -import java.awt.image.BufferedImage; -import java.io.File; -import java.io.IOException; -import java.util.*; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.Collectors; -import java.util.stream.IntStream; - -import static com.uddernetworks.newocr.utils.CharacterGettingUtils.*; - -public class OCRHandle { - - private static final FontBounds[] FONT_BOUNDS = { - new FontBounds(0, 12), - new FontBounds(13, 20), - new FontBounds(21, 30), - new FontBounds(31, 100), - }; - - private static String trainString = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghjiklmnopqrstuvwxyz{|}~W W"; - - private DatabaseManager databaseManager; - - public OCRHandle(DatabaseManager databaseManager) { - this.databaseManager = databaseManager; - ImageIO.setUseCache(false); - } - - /** - * Scans the input image and returns a {@link ScannedImage} containing all the characters and their info. - * - * @param file The input image to be scanned - * @return A {@link ScannedImage} containing all scanned character data - */ - public ScannedImage scanImage(File file) { - var start = System.currentTimeMillis(); - - // Preparing image - var input = OCRUtils.readImage(file); - var values = OCRUtils.createGrid(input); - var searchCharacters = new ArrayList(); - - if (Boolean.getBoolean("newocr.rewrite")) { - var temp = new BufferedImage(input.getWidth(), input.getHeight(), BufferedImage.TYPE_INT_ARGB); - OCRUtils.rewriteImage(temp, input); - input = temp; - } - - OCRUtils.filter(input); - OCRUtils.toGrid(input, values); - - var searchImage = new SearchImage(values); - - var coordinates = new ArrayList(); - - // Goes through coordinates of image and adds any connecting pixels to `coordinates` - - for (int y = input.getHeight(); 0 <= --y;) { - for (int x = 0; x < input.getWidth(); x++) { - getLetterFrom(searchImage, x, y, coordinates, searchCharacters); - } - } - - Map> searchLines = new HashMap<>(); - - // Puts all found characters into font size groupings - - searchCharacters.forEach(searchCharacter -> { - FontBounds bounds = matchNearestFontSize(searchCharacter.getHeight()); - searchLines.putIfAbsent(bounds, new ArrayList<>()); - searchLines.get(bounds).add(searchCharacter); - }); - - // Gets all needed character data from the database based on the currently used font sizes - - searchLines.keySet().parallelStream().forEach(fontBounds -> { - try { - databaseManager.getAllCharacterSegments(fontBounds).get(); - } catch (InterruptedException | ExecutionException e) { - e.printStackTrace(); - } - }); - - // Key = Entry centers are ABSOLUTE - Map> lines = new LinkedHashMap<>(); - - // Gets the closest matching character (According to the database values) using OCRHandle#getCharacterFor(SearchCharacter), - // then it orders them by their X values, and then sorts the ImageLetters so certain ones go first, allowing the - // characters to go to the correct lines - - searchLines.values() - .stream() - .flatMap(List::stream) - .map(this::getCharacterFor) - .filter(Optional::isPresent) - .map(Optional::get) - .sorted(Comparator.comparingInt(ImageLetter::getX)) - .sorted((o1, o2) -> { - char cha = o1.getLetter(); - char cha2 = o2.getLetter(); - - if (cha == cha2) return 0; - if (cha == ',' ^ cha2 == ',') return cha2 == ',' ? 1 : -1; - if (cha == '.' ^ cha2 == '.') return cha2 == '.' ? 1 : -1; - if (cha == '_' ^ cha2 == '_') return cha2 == '_' ? 1 : -1; - if (cha == '`' ^ cha2 == '`') return cha2 == '`' ? 1 : -1; - if (cha == '\'' ^ cha2 == '\'') return cha2 == '\'' ? 1 : -1; - if (cha == '"' ^ cha2 == '"') return cha2 == '"' ? 1 : -1; - if (cha == '*' ^ cha2 == '*') return cha2 == '*' ? 1 : -1; - return -1; - }) - .forEach(imageLetter -> { - double maxCenter = imageLetter.getDatabaseCharacter().getMaxCenter(); - double minCenter = imageLetter.getDatabaseCharacter().getMinCenter(); - boolean subtract = maxCenter < 0 && imageLetter.getDatabaseCharacter().getMinCenter() < 0; - double centerDiff = subtract ? - maxCenter + minCenter : - maxCenter - minCenter; - // The tolerance of how far away a character can be from the line's center for it to be included - double tolerance = (int) Math.round(Math.max(Math.abs(centerDiff / 2 * 1.1), 2D)); - - int exactMin = (int) Math.round(imageLetter.getY() + minCenter); - int exactMax = (int) Math.round(imageLetter.getY() + maxCenter); - - int exactTolerantMin = (int) Math.max(exactMin - tolerance, 0); - int exactTolerantMax = (int) (exactMax + tolerance); - - int potentialY = (int) Math.round(imageLetter.getY() + centerDiff); - - // Gets the nearest line and its Y value, if any - var tempp = lines.keySet() - .stream() - .filter(centers -> { - int x1 = centers.getKey(); - int y1 = centers.getValue(); - int x2 = exactTolerantMin; - int y2 = exactTolerantMax; - return Math.max(y1, y2) - Math.min(x1, x2) < (y1 - x1) + (y2 - x2); - }) - .min(Comparator.comparing(centers -> { - double min = centers.getKey(); - double max = centers.getValue(); - double centerBeginningY = ((max - min) / 2) + min; - return OCRUtils.getDiff(centerBeginningY, potentialY); - })); - - var center = tempp.orElseGet(() -> { - var pair = new IntPair(exactTolerantMin, exactTolerantMax); // Included tolerance - lines.put(pair, new LinkedList<>()); - return pair; - }); - - double ratio = imageLetter.getDatabaseCharacter().getAvgWidth() / imageLetter.getDatabaseCharacter().getAvgHeight(); - double diff = Math.max(ratio, imageLetter.getRatio()) - Math.min(ratio, imageLetter.getRatio()); - - // This is signaled when the difference of the ratios are a value that is probably incorrect. - // If the ratio is very different, it should be looked into, as it could be from faulty detection. - if (diff > 0.2D) { - error("Questionable ratio diff of " + diff + " on letter: " + imageLetter.getLetter() + " at (" + imageLetter.getX() + ", " + imageLetter.getY() + ")"); - } - - lines.get(center).add(imageLetter); - }); - - // End ordering - var sortedLines = new Int2ObjectLinkedOpenHashMap>(); - - // Sorts the characters again based on their X value in their respective lines. This must be done again because - // the two different lists (firstList and secondList) will have caused a mixup of X positions from normal - // characters, and the ones in secondList - - lines.keySet() - .stream() - .map(entry -> new AbstractMap.SimpleEntry<>(entry, (int) Math.round(((double) entry.getValue() - (double) entry.getKey()) / 2D + entry.getKey()))) - .sorted(Comparator.comparingInt(AbstractMap.SimpleEntry::getValue)) - .forEach(nestedEntry -> { - var linesEntry = nestedEntry.getKey(); - int y = nestedEntry.getValue(); - - List databaseCharacters = lines.get(linesEntry); - - if (databaseCharacters.isEmpty()) { - return; - } - - databaseCharacters.sort(Comparator.comparingInt(ImageLetter::getX)); - sortedLines.put(y, databaseCharacters); - }); - - // Inserts all the spaces in the line. This is based on the first character of the line's height, and will be - // derived from that font size. - sortedLines.values().forEach(line -> line.addAll(getSpacesFor(line, line.stream().mapToInt(ImageLetter::getHeight).max().getAsInt()))); - - // Sorts the lines again based on X values, to move spaces from the back to their proper locations in the line. - - ScannedImage scannedImage = new ScannedImage(file, input); - - sortedLines.keySet().stream().sorted().forEach(y -> { - List line = sortedLines.get(y); - scannedImage.addLine(y, line.stream().sorted(Comparator.comparingInt(ImageLetter::getX)).collect(Collectors.toList())); - }); - - debug("Finished in " + (System.currentTimeMillis() - start) + "ms"); - return scannedImage; - } - - private boolean[][] clone2DArray(boolean[][] input) { - boolean[][] clone = new boolean[input.length][input[0].length]; - - for (int y = 0; y < input.length; y++) { - clone[y] = Arrays.copyOf(input[y], input[y].length); - } - - return clone; - } - - /** - * Scans the input image and creates training data based off of it. It must be an input image created from - * {@link TrainGenerator} or something of a similar format. - * - * @param file The input image to be trained from - * @throws IOException - */ - public void trainImage(File file) throws IOException { - Map> trainedCharacterDataList = new HashMap<>(); - - List.of(FONT_BOUNDS).forEach(fontBounds -> trainedCharacterDataList.put(fontBounds, new ArrayList<>())); - - // Preparing image - - var input = OCRUtils.readImage(file); - var values = OCRUtils.createGrid(input); - var searchCharacters = new ArrayList(); - - if (Boolean.getBoolean("newocr.rewrite")) { - var temp = new BufferedImage(input.getWidth(), input.getHeight(), BufferedImage.TYPE_INT_ARGB); - OCRUtils.rewriteImage(temp, input); - input = temp; - } - - OCRUtils.filter(input); - OCRUtils.toGrid(input, values); - - var valuesClone = clone2DArray(values); - - var searchImage = new SearchImage(values); - - var coordinates = new ArrayList(); - - // Goes through coordinates of image and adds any connecting pixels to `coordinates` - for (int y = input.getHeight(); 0 <= --y; ) { - for (int x = 0; x < input.getWidth(); x++) { - getLetterFrom(searchImage, x, y, coordinates, searchCharacters); - } - } - - trainedCharacterDataList.values().forEach(dataList -> { - IntStream.range('!', '~' + 1).forEach(letter -> dataList.add(new TrainedCharacterData((char) letter))); - dataList.add(new TrainedCharacterData(' ')); - }); - - Collections.sort(searchCharacters); - - // Pair (Absolute coordinates) - // Gets the top and bottom line bounds of every line - var lineBounds = getLineBoundsForTesting(valuesClone); - - var searchCharactersCopy = new ArrayList<>(searchCharacters); - - int startingSize = 90; - - // Goes through each line found - for (var lineBound : lineBounds) { - int lineHeight = lineBound.getValue() - lineBound.getKey(); - - // Gets all characters found at the line bounds from the searchCharacters (Collected from the double for loops) - var line = OCRUtils.findCharactersAtLine(lineBound.getKey(), lineBound.getValue(), searchCharacters); - - if (!line.isEmpty()) { - AtomicInteger letterIndex = new AtomicInteger(); - AtomicInteger beforeSpaceX = new AtomicInteger(); - - line.forEach(searchCharacter -> { - // Gets the next character it knows it will be - char current = searchCharacter.getKnownChar() == ' ' ? ' ' : trainString.charAt(letterIndex.getAndIncrement()); - - // TODO: Improve and cache these following 3 variables - var currentFontBoundsOptional = trainedCharacterDataList.keySet() - .stream() - .filter(fontBounds -> fontBounds.isInbetween(searchCharacter.getHeight())) - .findFirst(); - - if (currentFontBoundsOptional.isEmpty()) { - return; - } - - var trainedSearchCharacterOptional = trainedCharacterDataList.get(currentFontBoundsOptional.get()) - .stream() - .filter(trainedCharacterData -> trainedCharacterData.getValue() == current) - .findFirst(); - - var spaceTrainedCharacterOptional = trainedCharacterDataList.get(currentFontBoundsOptional.get()) - .stream() - .filter(trainedCharacterData -> trainedCharacterData.getValue() == ' ') - .findFirst(); - - // If the current character is the FIRST `W`, sets beforeSpaceX to the current far right coordinate - // of the space (X + width), and go up another character (Skipping the space in trainString) - if (letterIndex.get() == trainString.length() - 2) { - beforeSpaceX.set(searchCharacter.getX() + searchCharacter.getWidth()); - letterIndex.incrementAndGet(); - return; - - // If it's the last character, add the space based on beforeSpaceX and the current X, (Getting the - // width of the space) and reset the line - } else if (letterIndex.get() == trainString.length()) { - spaceTrainedCharacterOptional.ifPresent(trainedCharacterData -> trainedCharacterData.recalculateTo(searchCharacter.getX() - beforeSpaceX.get(), lineHeight)); - letterIndex.set(0); - return; - } else { - searchCharacter.setKnownChar(current); - } - - trainedSearchCharacterOptional.ifPresent(trainedSearchCharacter -> { - // Adds the current segment values of the current searchCharacter to the trainedSearchCharacter - trainedSearchCharacter.recalculateTo(searchCharacter); - - double halfOfLineHeight = ((double) lineBound.getValue() - (double) lineBound.getKey()) / 2; - double middleToTopChar = (double) searchCharacter.getY() - (double) lineBound.getKey(); - double topOfLetterToCenter = halfOfLineHeight - middleToTopChar; - - // Sets the current center to be calculated, along with any meta it may have - trainedSearchCharacter.recalculateCenter(topOfLetterToCenter); // This NOW gets offset from top of - trainedSearchCharacter.setHasDot(searchCharacter.hasDot()); - trainedSearchCharacter.setLetterMeta(searchCharacter.getLetterMeta()); - }); - - // Resets the current letter - if (letterIndex.get() >= trainString.length()) { - letterIndex.set(0); - } - }); - - databaseManager.addLetterSize(startingSize, line); - - // Removes any used letters from the line in searchCharacters, so none will be duplicated and to - // increase performance. - searchCharacters.removeAll(line); - } - - startingSize--; - } - - searchCharacters = searchCharactersCopy; - - debug(searchCharacters.size() + " characters found"); - - debug("Writing data to database..."); - long start = System.currentTimeMillis(); - - debug("trainedCharacterDataList = " + trainedCharacterDataList); - - // Inserts all character data into the database after recalculating the - trainedCharacterDataList.forEach((fontBounds, databaseTrainedCharacters) -> databaseTrainedCharacters.forEach(databaseTrainedCharacter -> { - try { - if (databaseTrainedCharacter.isEmpty()) { - return; - } - - databaseTrainedCharacter.finishRecalculations(); - - char letter = databaseTrainedCharacter.getValue(); - - CompletableFuture.runAsync(() -> databaseManager.clearLetterSegments(letter, fontBounds.getMinFont(), fontBounds.getMaxFont())) - .thenRunAsync(() -> databaseManager.createLetterEntry(letter, databaseTrainedCharacter.getWidthAverage(), databaseTrainedCharacter.getHeightAverage(), fontBounds.getMinFont(), fontBounds.getMaxFont(), databaseTrainedCharacter.getMinCenter(), databaseTrainedCharacter.getMaxCenter(), databaseTrainedCharacter.hasDot(), databaseTrainedCharacter.getLetterMeta(), letter == ' ')) - .thenRunAsync(() -> { - if (letter != ' ') { - databaseManager.addLetterSegments(letter, fontBounds.getMinFont(), fontBounds.getMaxFont(), databaseTrainedCharacter.getSegmentPercentages()); - } - }); - } catch (Exception e) { - e.printStackTrace(); - } - })); - - debug("Finished writing to database in " + (System.currentTimeMillis() - start) + "ms"); - } - - /** - * Gets and inserts all the spaces of the current line based on the font size given (The first character of the line - * by default). This method adds the spaces to the end of the line currently, so a resort is needed. - * - * @param line The line to add spaces to - * @param fontSize The font size to base the space widths off of - * @return A copy of the input {@link ImageLetter} List, but with spaces appended to the end - */ - private List getSpacesFor(List line, int fontSize) { - var ret = new ArrayList(); - - try { - var fontBounds = matchNearestFontSize(fontSize); - var data = databaseManager.getAllCharacterSegments(fontBounds).get(); - - // Gets the space DatabaseCharacter used for the current font size from the database - var spaceOptional = data.stream().filter(databaseCharacter -> databaseCharacter.getLetter() == ' ').findFirst(); - - if (spaceOptional.isEmpty()) { - error("No space found for current font size: " + fontSize); - return line; - } - - var space = spaceOptional.get(); - - ImageLetter prev = null; - - for (var searchCharacter : line) { - int leftX = prev == null ? 0 : prev.getX() + prev.getWidth() + 1; - int rightX = searchCharacter.getX(); - - var gap = rightX - leftX; // The space between the current character and the last character - var ratio = space.getAvgWidth() / space.getAvgHeight(); // The ratio of the space DatabaseCharacter - var usedWidth = ratio * fontSize; // The width of the space for this specific fot size - - var noRoundDownSpace = "!"; // Might be more in the future, that's why it's not testing equality of an inline string - - int spaces = noRoundDownSpace.contains(searchCharacter.getLetter() + "") ? (int) Math.floor(gap / usedWidth) : spaceRound(gap / usedWidth); - - for (int i = 0; i < spaces; i++) { - ret.add(new ImageLetter(space, (int) (leftX + (usedWidth * i)), searchCharacter.getY(), (int) usedWidth, fontSize, ratio)); - } - - prev = searchCharacter; - } - } catch (ExecutionException | InterruptedException e) { - e.printStackTrace(); - } - - return ret; - } - - /** - * Gets the full space character count for the blank gap divided by the space width. This is calculated by getting - * the amount of times the space can fit in evenly (x % 1) and if the remaining value is within 0.2 of 1, it is - * considered a space. - * - * @param input The amount of spaces that fit in the gap (gap / spaceWidth) - * @return The amount of spaces that is found as a whole number - */ - private int spaceRound(double input) { - int known = (int) Math.floor(input); - double extra = input % 1; - known += OCRUtils.checkDifference(extra, 1, 0.2D) ? 1 : 0; - return known; - } - - /** - * Puts all touching black characters together and adds them to `coordinates`. This is the method where most incorrect - * detections will result from. - * - * @param searchImage The SearchImage to read from - * @param x The X coordinate to start at - * @param y The Y coordinate to start at - * @param coordinates The mutable list of coordinate values that will be added to when a new black pixel is found - * @param searchCharacters The mutable list of SearchCharacters that will be added to when a group of pixels is found - * @return If it count a group of pixels - */ - public static boolean getLetterFrom(SearchImage searchImage, int x, int y, List coordinates, List searchCharacters) { - searchImage.scanFrom(x, y, coordinates); - - if (coordinates.isEmpty()) { - return false; - } - - var searchCharacter = new SearchCharacter(coordinates); - - // Gets any grouping of pixels with this character being added to, whereas the ones below will have pixels added - // to it - if (doDotStuff(searchCharacter, coordinates, searchCharacters)) { - return true; - } - - if (doPercentStuff(searchCharacter, coordinates, searchCharacters)) { - return true; - } - - if (doApostropheStuff(searchCharacter, coordinates, searchCharacters)) { - return true; - } - - // Adds groupings of pixels found where they are connected. An example of this is the letter i or j, things like - // ! or %, etc. - var possibleDot = getBaseForPercent(searchCharacters, /* Is the circle > */ searchCharacter); - - if (possibleDot.isPresent()) { - combine(possibleDot.get(), searchCharacter, coordinates, CombineMethod.PERCENTAGE_CIRCLE, LetterMeta.PERCENT); - searchCharacters.remove(searchCharacter); - return true; - } - - if (searchCharacter.isProbablyDot() && !searchCharacter.hasDot()) { - possibleDot = getBaseOfDot(searchCharacters, searchCharacter); - - if (possibleDot.isPresent()) { - combine(possibleDot.get(), searchCharacter, coordinates, CombineMethod.DOT, LetterMeta.DOT_ABOVE); - searchCharacters.remove(searchCharacter); - return true; - } - } - - // For ! or ? - possibleDot = getDotUnderLetter(searchCharacters, searchCharacter); - - if (possibleDot.isPresent()) { - combine(possibleDot.get(), searchCharacter, coordinates, CombineMethod.DOT, LetterMeta.DOT_UNDER); - searchCharacters.remove(searchCharacter); - return true; - } - - if (searchCharacter.isProbablyColon() && OCRUtils.isAllBlack(searchCharacter) && !searchCharacter.hasDot()) { - possibleDot = getBottomColon(searchCharacters, searchCharacter); - - if (possibleDot.isPresent()) { - combine(possibleDot.get(), searchCharacter, coordinates, CombineMethod.COLON, LetterMeta.EVEN_DOTS); - searchCharacters.remove(searchCharacter); - return true; - } - } - - if (searchCharacter.isProbablyApostraphe()) { - var leftApostropheOptional = getLeftApostrophe(searchCharacters, searchCharacter); - - leftApostropheOptional.ifPresent(leftApostrophe -> { - combine(leftApostrophe, searchCharacter, coordinates, CombineMethod.APOSTROPHE, LetterMeta.QUOTE); - leftApostrophe.setHasDot(true); - searchCharacter.setHasDot(true); - searchCharacters.remove(searchCharacter); - }); - - if (leftApostropheOptional.isPresent()) { - return true; - } - } - - searchCharacter.applySections(); - searchCharacter.analyzeSlices(); - - searchCharacters.add(searchCharacter); - coordinates.clear(); - return false; - } - - /** - * Gets the estimated font size based on the given letter's character and dimensions from the stored values in the - * database after training. - * - * @param imageLetter The {@link ImageLetter} to check the size of - * @return The estimated font size in Points - */ - public Future getFontSize(ImageLetter imageLetter) { - return this.databaseManager.getLetterSize(imageLetter.getLetter(), imageLetter.getHeight()); - } - - /** - * Gets the nearest {@link FontBounds} object for the exact font size (Height) given - * - * @param fontSize The exact font size (Height) - * @return The nearest matching {@link FontBounds} object - */ - private FontBounds matchNearestFontSize(int fontSize) { - return Arrays.stream(FONT_BOUNDS).filter(fontBounds -> fontBounds.isInbetween(fontSize)).findFirst().get(); - } - - /** - * Actually matches the {@link SearchCharacter} object to a real character from the database. - * - * @param searchCharacter The input {@link SearchCharacter} to match to - * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character - */ - private Optional getCharacterFor(SearchCharacter searchCharacter) { - Object2DoubleMap diffs = new Object2DoubleOpenHashMap<>(); // The lower value the better - - try { - // All the possible DatabaseCharacters that `searchCharacter` can be from the database - List data = new ArrayList<>(databaseManager.getAllCharacterSegments(matchNearestFontSize(searchCharacter.getHeight())).get()); - - data.stream() - .filter(character -> character.hasDot() == searchCharacter.hasDot()) - .filter(character -> character.getLetterMeta() == searchCharacter.getLetterMeta()) - .forEach(character -> - OCRUtils.getDifferencesFrom(searchCharacter.getSegmentPercentages(), character.getData()).ifPresent(charDifference -> - Arrays.stream(charDifference).average().ifPresent(value -> { - // Gets the difference of the database character and searchCharacter (Lower is better) - diffs.put(new ImageLetter(character, searchCharacter.getX(), searchCharacter.getY(), searchCharacter.getWidth(), searchCharacter.getHeight(), ((double) searchCharacter.getWidth()) / ((double) searchCharacter.getHeight()), searchCharacter.getSegments()), value); - }))); - } catch (InterruptedException | ExecutionException e) { - e.printStackTrace(); - } - - // TODO: The following code can definitely be improved - var entries = diffs.object2DoubleEntrySet().stream().sorted(Comparator.comparingDouble(Object2DoubleMap.Entry::getDoubleValue)).collect(Collectors.toList()); - - // If there's no characters found, don't continue - if (entries.isEmpty()) { - return Optional.empty(); - } - - var firstEntry = entries.get(0); // The most similar character - - double allowedDouble = firstEntry.getValue() * 0.1D; - - entries.removeIf(value -> OCRUtils.getDiff(value.getValue(), firstEntry.getValue()) > allowedDouble); // Removes any character without the - - // same difference (Most often a similarity of 0) - double searchRatio = (double) searchCharacter.getWidth() / (double) searchCharacter.getHeight(); - - // Sorts the equally matching characters by their width to height ratios, the first being most similar - entries.sort(Comparator.comparingDouble(entry -> OCRUtils.getDiff(searchRatio, entry.getKey().getDatabaseCharacter().getAvgWidth() / entry.getKey().getDatabaseCharacter().getAvgHeight()))); - - ImageLetter first = entries.get(0).getKey(); - first.setValues(searchCharacter.getValues()); - return Optional.of(first); - } - - /** - * Gets the top and bottom line bounds found from the value 2D array. This is used for getting characters for - * training data. - * - * @param values The 2D array of values derived from the image to check from - * @return A list of the absolute top and bottom line values - */ - public static List getLineBoundsForTesting(boolean[][] values) { - // Pair - List lines = new ArrayList<>(); - - int height = 0; - - for (int y = 0; y < values.length; y++) { - // If there's something on the line, add to their height of it. - if (OCRUtils.isRowPopulated(values, y)) { - height++; - } else if (height > 0) { // If the row has nothing on it and the line is populated, add it to the values - int heightUntil = 0; - int finalSpace = -1; - - // Seeing if the gap under the character is <= the height of the above piece. This is mainly for seeing - // if the dot on an 'i' (And other similar characters) is <= is above the rest of the character the same - // amount as its height (Making it a proper 'i' in Verdana and other fonts) - for (int i = 0; i < height; i++) { - if (y + i >= values.length) { - finalSpace = 0; - break; - } - - if (OCRUtils.isRowPopulated(values, y + i)) { - if (finalSpace == -1) { - finalSpace = heightUntil; - } - } else { - heightUntil++; - } - } - - if (finalSpace > 0) { - if (height == finalSpace) { - y += finalSpace; - height += finalSpace; - } else { - lines.add(new IntPair(y - height, y)); - height = 0; - } - } else { - lines.add(new IntPair(y - height, y)); - height = 0; - } - } else { - if (height == 0) continue; - lines.add(new IntPair(y - height, y)); - height = 0; - } - } - - return lines; - } - - /** - * Prints out an error message if the System property `newocr.error` is `true`. - * - * @param string The error to potentially print out - */ - private void error(String string) { - if (Boolean.getBoolean("newocr.error")) { - System.err.println(string); - } - } - - /** - * Prints out a debug message if the System property `newocr.debug` is `true`. - * - * @param string The string to potentially print out - */ - private void debug(String string) { - if (Boolean.getBoolean("newocr.debug")) System.out.println(string); - } - -} diff --git a/src/main/java/com/uddernetworks/newocr/ScannedImage.java b/src/main/java/com/uddernetworks/newocr/ScannedImage.java index 5bf5036..fed3a23 100644 --- a/src/main/java/com/uddernetworks/newocr/ScannedImage.java +++ b/src/main/java/com/uddernetworks/newocr/ScannedImage.java @@ -8,7 +8,6 @@ import java.io.File; import java.util.List; import java.util.Optional; -import java.util.concurrent.ExecutionException; /** * An object to store line data for {@link ImageLetter}s on a scanned image. @@ -28,6 +27,7 @@ public ScannedImage(File originalFile, BufferedImage originalImage) { /** * Gets the string of a scanned image + * * @return The string of a scanned image */ public String getPrettyString() { @@ -43,39 +43,17 @@ public String getPrettyString() { return stringBuilder.toString(); } - /** - * Gets the first font size found in points. - * - * @param ocrHandle The OCRHandle used - * @return The font size in points - */ - public int getFirstFontSize(OCRHandle ocrHandle) { - try { - for (var imageLetters : grid.values()) { - for (var imageLetter : imageLetters) { - int size = ocrHandle.getFontSize(imageLetter).get(); - - if (size != -1) { - return size; - } - } - } - } catch (InterruptedException | ExecutionException e) { - e.printStackTrace(); - } - - return 0; - } - /** * Gets the letter at the given index of the actual {@link ScannedImage#getPrettyString()} position, meaning * newlines are not returned. * * @param index The character index - * @return The ImageLetter at the given position + * @return The ImageLetter at the given position, if found */ - public ImageLetter letterAt(int index) { - List last = getGridLineAtIndex(0).get(); + public Optional letterAt(int index) { + var firstLineOptional = getGridLineAtIndex(0); + if (firstLineOptional.isEmpty()) return Optional.empty(); + List last = firstLineOptional.get(); var i = 0; while (last.size() + 1 <= index) { @@ -85,7 +63,7 @@ public ImageLetter letterAt(int index) { last = nextLine.get(); } - return last.get(index); + return Optional.ofNullable(last.size() <= index ? null : last.get(index)); } /** @@ -120,7 +98,7 @@ public Int2ObjectMap> getGrid() { /** * Adds a line containing {@link ImageLetter}s. * - * @param y The exact Y position of the line + * @param y The exact Y position of the line * @param databaseCharacterList A list of {@link ImageLetter}s as the line */ public void addLine(int y, List databaseCharacterList) { diff --git a/src/main/java/com/uddernetworks/newocr/character/Character.java b/src/main/java/com/uddernetworks/newocr/character/Character.java new file mode 100644 index 0000000..30a8b82 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/character/Character.java @@ -0,0 +1,223 @@ +package com.uddernetworks.newocr.character; + +import java.util.Objects; + +/** + * The superclass of all characters. + */ +public abstract class Character implements Comparable { + + // Coordinate data + int x; + int y; + int width; + int height; + + // Letter data + char letter; + int modifier; + + Character() {} + + Character(char letter) { + this(letter, 0); + } + + Character(char letter, int modifier) { + this.letter = letter; + this.modifier = modifier; + } + + + /** + * Gets the X coordinate of the current {@link Character}. + * + * @return The X coordinate + */ + public int getX() { + return x; + } + + /** + * Sets the X coordinate of the current {@link Character}. + * + * @param x The X coordinate to set + */ + public void setX(int x) { + this.x = x; + } + + /** + * Gets the Y coordinate of the current {@link Character}. + * + * @return The Y coordinate + */ + public int getY() { + return y; + } + + /** + * Sets the Y coordinate of the current {@link Character}. + * + * @param y The Y coordinate to set + */ + public void setY(int y) { + this.y = y; + } + + /** + * Gets the width of the current {@link Character}. + * + * @return The width + */ + public int getWidth() { + return width; + } + + /** + * Sets the width of the current {@link Character}. + * + * @param width The width to set + */ + public void setWidth(int width) { + this.width = width; + } + + /** + * Gets the height of the current {@link Character}. + * + * @return The height + */ + public int getHeight() { + return height; + } + + /** + * Sets the height of the current {@link Character}. + * + * @param height The height to set + */ + public void setHeight(int height) { + this.height = height; + } + + /** + * Gets the char letter of the current {@link Character}. + * + * @return The letter (Will default to '0' if not found for whatever reason) + */ + public char getLetter() { + return letter; + } + + /** + * Sets the char letter to the current {@link Character}. + * + * @param letter The letter to set + */ + public void setLetter(char letter) { + this.letter = letter; + } + + /** + * Gets the modifier for the character. + * + * @return The character's modifier + */ + public int getModifier() { + return modifier; + } + + /** + * Sets the modifier for the character. + * + * @param modifier The modifier to set + */ + public void setModifier(int modifier) { + this.modifier = modifier; + } + + /** + * Gets if another {@link Character} is overlapping the current {@link Character} at all in the X axis. + * + * @param searchCharacter The {@link Character} to check for overlapping + * @return If the given {@link Character} is overlapping the current {@link Character} + */ + public boolean isOverlappingX(Character searchCharacter) { + // Thanks https://nedbatchelder.com/blog/201310/range_overlap_in_two_compares.html :) + return getX() + getWidth() > searchCharacter.getX() && searchCharacter.getX() + searchCharacter.getWidth() > getX(); + } + + /** + * Gets if another {@link Character} is overlapping the current {@link Character} at all in the X axis. + * + * @param searchCharacter The {@link Character} to check for overlapping + * @return If the given {@link Character} is overlapping the current {@link Character} + */ + public boolean isOverlappingY(Character searchCharacter) { + // Thanks https://nedbatchelder.com/blog/201310/range_overlap_in_two_compares.html :) + return getY() + getHeight() > searchCharacter.getY() && searchCharacter.getY() + searchCharacter.getHeight() > getY(); + } + + /** + * Gets if the given coordinate is within the bounds of this character. + * + * @param x The X coordinate to check + * @param y The Y coordinate to check + * @return If the coordinate is within this character + */ + public boolean isInBounds(int x, int y) { + return x <= this.x + this.width + && x >= this.x + && y <= this.y + this.height + && y >= this.y; + } + + /** + * Gets if the given Y position is within the Y bounds of the current character. + * + * @param y The Y position to check + * @return If the given Y position is within the Y bounds of the current character + */ + public boolean isInYBounds(int y) { + return y <= this.y + this.height + && y >= this.y; + } + + /** + * Gets if the given Y position is within the X bounds of the current character. + * + * @param x The Y position to check + * @return If the given Y position is within the X bounds of the current character + */ + public boolean isInXBounds(int x) { + return x <= this.x + this.width + && x >= this.x; + } + + @Override + public String toString() { + return String.valueOf(this.letter); + } + + @Override + public int compareTo(Character searchCharacter) { + return x - searchCharacter.x; + } + + @Override + public int hashCode() { + return Objects.hash(this.letter, this.x, this.y, this.width, this.height); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof Character)) return false; + var character = (Character) obj; + return character.letter == this.letter + && character.x == this.x + && character.y == this.y + && character.width == this.width + && character.height == this.height; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/character/CoordinateCharacter.java b/src/main/java/com/uddernetworks/newocr/character/CoordinateCharacter.java new file mode 100644 index 0000000..108a09d --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/character/CoordinateCharacter.java @@ -0,0 +1,125 @@ +package com.uddernetworks.newocr.character; + +import com.uddernetworks.newocr.utils.IntPair; + +import java.util.List; +import java.util.Objects; + +/** + * The superclass for characters containing data from the input image. + */ +public abstract class CoordinateCharacter extends Character { + + List coordinates; + boolean[][] values; + int amountOfMerges = 0; + + /** + * Merges the given {@link CoordinateCharacter} with the current one, possibly changing width, height, X and Y + * values, along with combining the current and given {@link CoordinateCharacter}'s coordinates and values + * (Accessible via {@link CoordinateCharacter#getCoordinates()} and {@link CoordinateCharacter#getValues()} + * respectively). + * + * @param character The {@link CoordinateCharacter} to merge into the current one + */ + public void merge(CoordinateCharacter character) { + this.amountOfMerges++; + this.coordinates.addAll(character.coordinates); + int maxX = Integer.MIN_VALUE, minX = Integer.MAX_VALUE; + int maxY = Integer.MIN_VALUE, minY = Integer.MAX_VALUE; + + for (var pair : this.coordinates) { + int key = pair.getKey(), value = pair.getValue(); + + if (key > maxX) { + maxX = key; + } + + if (key < minX) { + minX = key; + } + + if (value > maxY) { + maxY = value; + } + + if (value < minY) { + minY = value; + } + } + + this.x = minX; + this.y = minY; + + this.width = maxX - minX; + this.height = maxY - minY; + + values = new boolean[this.height + 1][]; + + for (int i = 0; i < values.length; i++) { + values[i] = new boolean[width + 1]; + } + + this.coordinates.forEach(pair -> values[pair.getValue() - this.y][pair.getKey() - this.x] = true); + } + + /** + * Gets the coordinates of the character. + * + * @return The coordinates + */ + public List getCoordinates() { + return coordinates; + } + + /** + * Gets the black (true) and white (false) pixels of the scanned character. + * + * @return The grid of black or white values + */ + public boolean[][] getValues() { + return values; + } + + /** + * Sets the black (true) and white (false) pixels of the scanned character. + * + * @param values The grid of black or white values. Will return `null` for spaces + */ + public void setValues(boolean[][] values) { + this.values = values; + } + + /** + * Gets how many times the current {@link ImageLetter} has been merged via + * {@link CoordinateCharacter#merge(CoordinateCharacter)} with another {@link ImageLetter}. This value is added + * every time {@link CoordinateCharacter#merge(CoordinateCharacter)} is invoked, and adds the amount of merges the + * argument of that method to the current merge value, as well as + * incrementing normally. + * + * @return The amount of merge operations affecting the current {@link ImageLetter} + */ + public int getAmountOfMerges() { + return amountOfMerges; + } + + @Override + public int hashCode() { + return Objects.hash(this.letter, this.x, this.y, this.width, this.height, this.coordinates, this.values, this.amountOfMerges); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof CoordinateCharacter)) return false; + var character = (CoordinateCharacter) obj; + return character.letter == this.letter + && character.x == this.x + && character.y == this.y + && character.width == this.width + && character.height == this.height + && character.coordinates == this.coordinates + && character.values == this.values + && character.amountOfMerges == this.amountOfMerges; + } + +} diff --git a/src/main/java/com/uddernetworks/newocr/character/DatabaseCharacter.java b/src/main/java/com/uddernetworks/newocr/character/DatabaseCharacter.java new file mode 100644 index 0000000..8c1659b --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/character/DatabaseCharacter.java @@ -0,0 +1,113 @@ +package com.uddernetworks.newocr.character; + +/** + * Used for storage of data to go into our from the database. + */ +public class DatabaseCharacter extends Character { + + private double[] data = new double[17]; + private double avgWidth; + private double avgHeight; + private double minCenter; // Pixels from the top + private double maxCenter; // Pixels from the top + + /** + * Creates a DatabaseCharacter from the given character with a modifier of 0. + * + * @param letter The letter of the object + */ + public DatabaseCharacter(char letter) { + super(letter); + } + + /** + * Creates a DatabaseCharacter from the given character. + * + * @param letter The letter of the object + * @param modifier The modifier of the character + */ + public DatabaseCharacter(char letter, int modifier) { + super(letter, modifier); + } + + /** + * Gets the 16 sectioned percentages for the stored character. + * + * @return An array of percentages <= 1 with a length of 17 + */ + public double[] getData() { + return data; + } + + /** + * Sets a percentage value to the data. + * + * @param index The index of the data to set + * @param data The percentage of filled in pixels found + */ + public void addDataPoint(int index, double data) { + this.data[index] = data; + } + + /** + * Sets multiple used data points. + * + * @param avgWidth The average width across all used characters in the font sizes + * @param avgHeight The average height across all used characters in the font sizes + * @param minCenter The minimum relative center value in the training ste for this character and font size + * @param maxCenter The maximum relative center value in the training ste for this character and font size + */ + public void setData(double avgWidth, double avgHeight, double minCenter, double maxCenter) { + this.avgWidth = avgWidth; + this.avgHeight = avgHeight; + this.minCenter = minCenter; + this.maxCenter = maxCenter; + } + + /** + * Gets the average width of the character. + * + * @return The average width of the character + */ + public double getAvgWidth() { + return avgWidth; + } + + /** + * Gets the average height of the character. + * + * @return The average height of the character + */ + public double getAvgHeight() { + return avgHeight; + } + + /** + * Gets the minimum relative center value from the top of the character found in the training set for this font size. + * + * @return The minimum relative center value from the top of the character found in the training set for this font size + */ + public double getMinCenter() { + return minCenter; + } + + /** + * Gets the maximum relative center value from the top of the character found in the training set for this font size. + * + * @return The maximum relative center value from the top of the character found in the training set for this font size + */ + public double getMaxCenter() { + return maxCenter; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof DatabaseCharacter) { + var characterObj = ((DatabaseCharacter) obj); + return characterObj.letter == this.letter && characterObj.modifier == modifier; + } + + return false; + } + +} diff --git a/src/main/java/com/uddernetworks/newocr/character/ImageLetter.java b/src/main/java/com/uddernetworks/newocr/character/ImageLetter.java index 610acf6..4a62997 100644 --- a/src/main/java/com/uddernetworks/newocr/character/ImageLetter.java +++ b/src/main/java/com/uddernetworks/newocr/character/ImageLetter.java @@ -1,134 +1,110 @@ package com.uddernetworks.newocr.character; -import com.uddernetworks.newocr.database.DatabaseCharacter; import com.uddernetworks.newocr.utils.IntPair; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; +import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Optional; /** * An object to contain data from characters directly scanned from an image. */ -public class ImageLetter { +public class ImageLetter extends CoordinateCharacter { - private DatabaseCharacter databaseCharacter; - private int x; - private int y; - private int width; - private int height; + private double averageWidth; + private double averageHeight; private double ratio; - private boolean[][] values; - private List segments; private Object data; + private double maxCenter; + private double minCenter; + + private List> closestMatches = new ArrayList<>(); /** * Creates an ImageLetter from collected data. - * @param databaseCharacter The {@link DatabaseCharacter} that is decided to be related to this character - * @param x The X coordinate of this character - * @param y The Y coordinate of this character - * @param width The width of this character - * @param height The height of this character - * @param ratio The width/height ratio of this character + * + * @param letter The letter value + * @param modifier The modifier of the letter + * @param x The X coordinate of this character + * @param y The Y coordinate of this character + * @param width The width of this character + * @param height The height of this character + * @param averageWidth The average width of the character + * @param averageHeight The average height of the character + * @param ratio The width/height ratio of this character */ - public ImageLetter(DatabaseCharacter databaseCharacter, int x, int y, int width, int height, double ratio) { - this(databaseCharacter, x, y, width, height, ratio, null); + public ImageLetter(char letter, int modifier, int x, int y, int width, int height, double averageWidth, double averageHeight, double ratio) { + this(letter, modifier, x, y, width, height, averageWidth, averageHeight, ratio, null); } /** * Creates an ImageLetter from collected data. - * @param databaseCharacter The {@link DatabaseCharacter} that is decided to be related to this character - * @param x The X coordinate of this character - * @param y The Y coordinate of this character - * @param width The width of this character - * @param height The height of this character - * @param ratio The width/height ratio of this character - * @param segments The data segments of this character (In form of [Black, Total]) - */ - public ImageLetter(DatabaseCharacter databaseCharacter, int x, int y, int width, int height, double ratio, List segments) { - this.databaseCharacter = databaseCharacter; + * + * @param letter The letter value + * @param modifier The modifier of the letter + * @param x The X coordinate of this character + * @param y The Y coordinate of this character + * @param width The width of this character + * @param height The height of this character + * @param averageWidth The average width of the character + * @param averageHeight The average height of the character + * @param ratio The width/height ratio of this character + * @param coordinates The data coordinates of this character (In form of [Black, Total]) + */ + public ImageLetter(char letter, int modifier, int x, int y, int width, int height, double averageWidth, double averageHeight, double ratio, List coordinates) { + this.letter = letter; + this.modifier = modifier; this.x = x; this.y = y; this.width = width; this.height = height; + this.averageWidth = averageWidth; + this.averageHeight = averageHeight; this.ratio = ratio; - this.segments = segments; - } - - /** - * Gets the {@link DatabaseCharacter} found. - * @return The {@link DatabaseCharacter} found - */ - public DatabaseCharacter getDatabaseCharacter() { - return databaseCharacter; + this.coordinates = coordinates; } /** - * Gets the X coordinate of this character. - * @return The X coordinate of this character + * Gets the average width of this character's trained data. + * + * @return The average width of the character */ - public int getX() { - return x; + public double getAverageWidth() { + return averageWidth; } /** - * Sets the X coordinate of this character. - * @param x The X coordinate of this character + * Sets the average width of this character's trained data. + * + * @param averageWidth The average width of the character */ - public void setX(int x) { - this.x = x; - } - - - /** - * Gets the Y coordinate of this character. - * @return The Y coordinate of this character - */ - public int getY() { - return y; + public void setAverageWidth(double averageWidth) { + this.averageWidth = averageWidth; } /** - * Sets the Y coordinate of this character. - * @param y The Y coordinate of this character + * Gets the average height of this character's trained data. + * + * @return The average height of the character */ - public void setY(int y) { - this.y = y; + public double getAverageHeight() { + return averageHeight; } /** - * Gets the width of this character. - * @return The width of this character + * Sets the average height of this character's trained data. + * + * @param averageHeight The average height of the character */ - public int getWidth() { - return width; - } - - /** - * Sets the width of this character - * @param width The width of this character - */ - public void setWidth(int width) { - this.width = width; - } - - /** - * Gets the height of this character. - * @return The height of this character - */ - public int getHeight() { - return height; - } - - /** - * Sets the height of this character. - * @param height The height of this character - */ - public void setHeight(int height) { - this.height = height; + public void setAverageHeight(double averageHeight) { + this.averageHeight = averageHeight; } /** * Gets the width/height ratio of this character. + * * @return The width/height ratio of this character */ public double getRatio() { @@ -137,31 +113,19 @@ public double getRatio() { /** * Sets the width/height ratio of this character. + * * @param ratio The width/height ratio of this character */ public void setRatio(double ratio) { this.ratio = ratio; } - /** - * Gets the data segments of this character in form of [Black, Total] - * @return The data segments of this character - */ - public List getSegments() { - return segments; - } - - /** - * Gets the character value found for this character. - * @return The character value found for this character - */ - public char getLetter() { - return this.databaseCharacter.getLetter(); - } - /** * Gets any data set to the {@link ImageLetter} object, useful for storing any needed data about the character to be * used in the future. + * + * @param clazz The class type of the data, only used for getting the returning type + * @param The type of the data * @return Data set to the character */ public Optional getData(Class clazz) { @@ -171,6 +135,7 @@ public Optional getData(Class clazz) { /** * Gets the raw data Object set to the {@link ImageLetter} object, useful for storing any needed data about the * character to be used in the future. + * * @return Data set to the character */ public Optional getData() { @@ -180,6 +145,7 @@ public Optional getData() { /** * Sets any data to the {@link ImageLetter} object, useful for storing any needed data about the character to be * used in the future. + * * @param data The data to be set */ public void setData(Object data) { @@ -187,23 +153,100 @@ public void setData(Object data) { } /** - * Gets the black (true) and white (false) pixels of the scanned character. - * @return The grid of black or white values + * Gets the minimum relative center value from the top of the character found in the training set for this font size. + * + * @return The minimum relative center value from the top of the character found in the training set for this font size + */ + public double getMinCenter() { + return minCenter; + } + + /** + * Sets the minimum relative center value from the top of the character found in the training set for this font size. + * + * @param minCenter The minimum relative center value from the top of the character found in the training set for this font size + */ + public void setMinCenter(double minCenter) { + this.minCenter = minCenter; + } + + /** + * Gets the maximum relative center value from the top of the character found in the training set for this font size. + * + * @return The maximum relative center value from the top of the character found in the training set for this font size + */ + public double getMaxCenter() { + return maxCenter; + } + + /** + * Sets the maximum relative center value from the top of the character found in the training set for this font size. + * + * @param maxCenter The maximum relative center value from the top of the character found in the training set for this font size */ - public boolean[][] getValues() { - return values; + public void setMaxCenter(double maxCenter) { + this.maxCenter = maxCenter; + } + + public List> getClosestMatches() { + return closestMatches; + } + + public void setClosestMatches(List> closestMatches) { + this.closestMatches = closestMatches; + } + + public void setNextClosest() { + copyProperties(this.closestMatches.remove(0).getKey()); } /** - * Sets the black (true) and white (false) pixels of the scanned character. - * @param values The grid of black or white values. Will return `null` for spaces + * Copies the properties from the given {@link ImageLetter} to the current one. + * + * @param imageLetter The {@link ImageLetter} to copy data from */ - public void setValues(boolean[][] values) { - this.values = values; + public void copyProperties(ImageLetter imageLetter) { + this.letter = imageLetter.letter; + this.modifier = imageLetter.modifier; + this.x = imageLetter.x; + this.y = imageLetter.y; + this.width = imageLetter.width; + this.height = imageLetter.height; + this.averageWidth = imageLetter.averageWidth; + this.averageHeight = imageLetter.averageHeight; + this.ratio = imageLetter.ratio; + this.values = imageLetter.values; + this.coordinates = imageLetter.coordinates; + this.data = imageLetter.data; + this.maxCenter = imageLetter.maxCenter; + this.minCenter = imageLetter.minCenter; + this.amountOfMerges = imageLetter.amountOfMerges; } @Override public String toString() { - return String.valueOf(getLetter()); + return getLetter() + "_" + getModifier(); + } + + @Override + public int hashCode() { + return Objects.hash(this.letter, this.x, this.y, this.width, this.height, this.averageWidth, this.averageHeight, this.ratio, this.data, this.maxCenter, this.minCenter); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof ImageLetter)) return false; + var character = (ImageLetter) obj; + return character.letter == this.letter + && character.x == this.x + && character.y == this.y + && character.width == this.width + && character.height == this.height + && character.averageWidth == this.averageWidth + && character.averageHeight == this.averageHeight + && character.ratio == this.ratio + && character.data == this.data + && character.maxCenter == this.maxCenter + && character.minCenter == this.minCenter; } } diff --git a/src/main/java/com/uddernetworks/newocr/character/SearchCharacter.java b/src/main/java/com/uddernetworks/newocr/character/SearchCharacter.java index ef27c9f..f23e944 100644 --- a/src/main/java/com/uddernetworks/newocr/character/SearchCharacter.java +++ b/src/main/java/com/uddernetworks/newocr/character/SearchCharacter.java @@ -1,274 +1,75 @@ package com.uddernetworks.newocr.character; -import com.uddernetworks.newocr.LetterMeta; import com.uddernetworks.newocr.utils.IntPair; -import com.uddernetworks.newocr.utils.OCRUtils; -import java.util.LinkedList; -import java.util.List; +import com.uddernetworks.newocr.utils.SegmentationUtils; + +import java.util.*; import java.util.concurrent.atomic.AtomicInteger; /** * An object meant to store characters directly scanned from an image and that is being searched for/mutated. */ -public class SearchCharacter implements Comparable { +public class SearchCharacter extends CoordinateCharacter { - private char knownChar = '?'; - private boolean[][] values; - private int x; - private int y; - private int width; - private int height; - private boolean hasDot; - private LetterMeta letterMeta = LetterMeta.NONE; private List segments = new LinkedList<>(); - private double[] segmentPercentages = new double[8 + 9]; // Percentage <= 1 // FIrst 8 are the normal ones, last 9 are for the grid created + private double[] segmentPercentages = new double[8 + 9]; // Percentage <= 1 // First 8 are the normal ones, last 9 are for the grid created + private Map trainingMeta = new HashMap<>(); + private double centerOffset = 0; /** * Creates a SearchCharacter from a list of coordinates used by the character. + * * @param coordinates Coordinates used by the character */ public SearchCharacter(List coordinates) { + this(coordinates, 0, 0); + } + + /** + * Creates a SearchCharacter from a list of coordinates used by the character. + * + * @param coordinates Coordinates used by the character + * @param xOffset The X offset of the coordinates + * @param yOffset The Y offset of the coordinates + */ + public SearchCharacter(List coordinates, int xOffset, int yOffset) { + this.coordinates = coordinates; int maxX = Integer.MIN_VALUE, minX = Integer.MAX_VALUE; int maxY = Integer.MIN_VALUE, minY = Integer.MAX_VALUE; - + for (var pair : coordinates) { int key = pair.getKey(), value = pair.getValue(); - + if (key > maxX) { maxX = key; } - + if (key < minX) { minX = key; } - + if (value > maxY) { maxY = value; } - + if (value < minY) { minY = value; } } - this.x = minX; - this.y = minY; - - this.width = maxX - minX; - this.height = maxY - minY; - - values = new boolean[this.height + 1][]; - - for (int i = 0; i < values.length; i++) { - values[i] = new boolean[width + 1]; - } - - coordinates.forEach(pair -> values[pair.getValue() - this.y][pair.getKey() - this.x] = true); - } - - /** - * Gets if the character is probably a dot. - * @return If the character is probably a dot - */ - public boolean isProbablyDot() { - int diff = Math.max(width, height) - Math.min(width, height); - return diff <= 3; - } - - /** - * Gets if the character is probably a circle of a percent. - * @return If the character is probably a circle of a percent - */ - public boolean isProbablyCircleOfPercent() { - double ratio = (double) width + 1 / (double) height + 1; - return ratio <= 0.9 && ratio >= 0.7; - } + this.x = minX + xOffset; + this.y = minY + yOffset; - /** - * Gets if the character is probably an apostrophe. - * @return If the character is probably an apostrophe - */ - public boolean isProbablyApostraphe() { - double ratio = (double) width / (double) height; - return (ratio <= 0.375 && ratio >= 0.166) || (width == 1 && (height == 4 || height == 5)); - } + this.width = maxX - minX + 1; + this.height = maxY - minY + 1; - /** - * Gets if the character is probably a colon. - * @return If the character is probably a colon - */ - public boolean isProbablyColon() { - double ratio = (Math.min(this.width, this.height) + 1D) / (Math.max(this.width, this.height) + 1D); - return (ratio <= 1D && ratio >= 0.7D) - || (height * 4 < width) - || ((width == 3 && height == 3) - || (width == 2 && height == 3) - || (width == 2 && height == 2) - || (width == 1 || height == 2)); - } + values = new boolean[this.height][]; - /** - * Adds coordinates to the character. - * @param dotCoordinates The coordinates to add - */ - public void addDot(List dotCoordinates) { - boolean[][] values = new boolean[this.height + 1][]; - for (int i = 0; i < values.length; i++) { - values[i] = new boolean[width + 1]; - } - - int yOffset = this.height - this.values.length + 1; - - for (int y = 0; y < this.values.length; y++) { - System.arraycopy(this.values[y], 0, values[y + yOffset], 0, this.values[0].length); - } - - dotCoordinates.forEach(entry -> values[entry.getValue() - this.y][entry.getKey() - this.x] = true); - - this.values = values; - this.hasDot = true; - } - - /** - * Adds a set of coordinates to the character to the current character (Assuming the current character is a percent). - * @param dotCoordinates The coordinates to add from the circle of the percent - * @param left If the percentage circle is on the left or right - */ - public void addPercentageCircle(List dotCoordinates, boolean left) { - boolean[][] values = new boolean[this.height + 1][]; - for (int i = 0; i < values.length; i++) values[i] = new boolean[width + 1]; - - int yOffset = this.height - this.values.length + 1; - - int offset = left ? Math.abs(width + 1 - this.values[0].length) : 0; - - for (int y = 0; y < this.values.length; y++) { - System.arraycopy(this.values[y], 0, values[y + yOffset], offset, this.values[0].length); + values[i] = new boolean[width]; } - dotCoordinates.forEach(entry -> values[entry.getValue() - this.y][entry.getKey() - this.x] = true); - - this.values = values; - this.hasDot = true; - } - - /** - * Gets the raw grid of boolean values from the image of this character. - * @return The raw grid of boolean values from the image of this character - */ - public boolean[][] getValues() { - return values; - } - - /** - * Gets the X position of the character. - * @return The X position of ths character - */ - public int getX() { - return x; - } - - /** - * Gets the Y position of the character. - * @return The Y position of ths character - */ - public int getY() { - return y; - } - - /** - * Gets the width of the character. - * @return The width of ths character - */ - public int getWidth() { - return width; - } - - /** - * Gets the height of the character. - * @return The height of ths character - */ - public int getHeight() { - return height; - } - - /** - * Sets the X position of the character. - * @param x the X position to set - */ - public void setX(int x) { - this.x = x; - } - - /** - * Sets the Y position of the character. - * @param y the Y position to set - */ - public void setY(int y) { - this.y = y; - } - - /** - * Sets the width of the character. - * @param width The width of the character - */ - public void setWidth(int width) { - this.width = width; - } - - /** - * Sets the height of the character. - * @param height The height of the character - */ - public void setHeight(int height) { - this.height = height; - } - - /** - * Gets if the given coordinate is within the bounds of this character. - * @param x The X coordinate to check - * @param y The Y coordinate to check - * @return If the coordinate is within this character - */ - public boolean isInBounds(int x, int y) { - return x <= this.x + this.width - && x >= this.x - && y <= this.y + this.height - && y >= this.y; - } - - /** - * Gets if another {@link SearchCharacter} is overlapping the current {@link SearchCharacter} at all. - * @param searchCharacter The {@link SearchCharacter} to check for overlapping - * @return If the given {@link SearchCharacter} is overlapping the current {@link SearchCharacter} - */ - public boolean isOverlaping(SearchCharacter searchCharacter) { - if (isInBounds(searchCharacter.getX(), searchCharacter.getY())) return true; - if (isInBounds(searchCharacter.getX(), searchCharacter.getY() + searchCharacter.getHeight())) return true; - if (isInBounds(searchCharacter.getX() + searchCharacter.getWidth(), searchCharacter.getY())) return true; - if (isInBounds(searchCharacter.getX() + searchCharacter.getWidth(), searchCharacter.getY() + searchCharacter.getHeight())) return true; - return false; - } - - /** - * Gets if the given Y position is within the Y bounds of the current character. - * @param y The Y position to check - * @return If the given Y position is within the Y bounds of the current character - */ - public boolean isInYBounds(int y) { - return y <= this.y + this.height - && y >= this.y; - } - - /** - * Gets if the given Y position is within the X bounds of the current character. - * @param x The Y position to check - * @return If the given Y position is within the X bounds of the current character - */ - public boolean isInXBounds(int x) { - return x <= this.x + this.width - && x >= this.x; + coordinates.forEach(pair -> values[pair.getValue() - this.y + yOffset][pair.getKey() - this.x + xOffset] = true); } /** @@ -276,16 +77,16 @@ public boolean isInXBounds(int x) { */ public void applySections() { AtomicInteger index = new AtomicInteger(); - OCRUtils.getHorizontalHalf(this.values) - .flatMap(OCRUtils::getVerticalHalf) - .forEach(section -> OCRUtils.getDiagonal(section, index.get() == 1 || index.getAndIncrement() == 2).forEach(this::addSegment)); + SegmentationUtils.getHorizontalHalf(this.values) + .flatMap(SegmentationUtils::getVerticalHalf) + .forEach(section -> SegmentationUtils.getDiagonal(section, index.get() == 1 || index.getAndIncrement() == 2).forEach(this::addSegment)); - OCRUtils.getHorizontalThird(this.values).forEach(values -> - OCRUtils.getVerticalThird(values).forEach(this::addSegment)); + SegmentationUtils.getHorizontalThird(this.values).forEach(values -> + SegmentationUtils.getVerticalThird(values).forEach(this::addSegment)); } /** - * Performs calculations for the sections added by {@link #addSegment(IntPair)}, getting their <= 1 percentages + * Performs calculations for the sections added by {@link #addSegment(IntPair)}, getting their <= 1 percentages * accessible from {@link #getSegmentPercentages()}. This must be invoked after {@link #applySections()}. */ public void analyzeSlices() { @@ -303,6 +104,7 @@ public void analyzeSlices() { /** * Adds a data segment to be calculated in the future. The segments may be fetched via {@link #getSegments()}. + * * @param entry The data segment in the format of [total black, size of segment] */ public void addSegment(IntPair entry) { @@ -312,6 +114,7 @@ public void addSegment(IntPair entry) { /** * Gets the raw segments added via {@link #addSegment(IntPair)} where the Entry format is * [total black, size of segment]. + * * @return The raw segments */ public List getSegments() { @@ -319,8 +122,9 @@ public List getSegments() { } /** - * Gets the raw segment percentages all <= 1. This will return an empty array until {@link #applySections()} and + * Gets the raw segment percentages all <= 1. This will return an empty array until {@link #applySections()} and * {@link #analyzeSlices()} have been invoked. + * * @return The raw array of segment percentages with a length of 17 */ public double[] getSegmentPercentages() { @@ -328,69 +132,63 @@ public double[] getSegmentPercentages() { } /** - * Gets the known character of this object. If it has not been fount yet, it will return `?`. - * @return The known character + * Gets the training meta with the given name. This contains data such as separation of the dots of an i, data on + * the holes of a %, etc. + * + * @param name The name of the training data + * @return The value of the training data */ - public char getKnownChar() { - return knownChar; + public OptionalDouble getTrainingMeta(String name) { + return this.trainingMeta.containsKey(name) ? OptionalDouble.of(this.trainingMeta.get(name)) : OptionalDouble.empty(); } /** - * Sets the known character. - * @param knownChar The know character + * Sets the training data with a given name. + * + * @param name The name of the data + * @param data The data to set */ - public void setKnownChar(char knownChar) { - this.knownChar = knownChar; + public void setTrainingMeta(String name, double data) { + this.trainingMeta.put(name, data); } /** - * Gets If this character has a dot. - * @return If this character has a dot + * Gets the amount away a character is from the center of the line. This isn't useful for detecting single + * characters. + * + * @return The offset of the character */ - public boolean hasDot() { - return this.hasDot; + public double getCenterOffset() { + return centerOffset; } /** - * Sets if this character has a dot in it. - * @param hasDot If this character has a dot + * Sets the amount away a character is from the center of the line. This isn't useful for detecting single + * characters. + * + * @param centerOffset The offset of the character to set */ - public void setHasDot(boolean hasDot) { - this.hasDot = hasDot; + public void setCenterOffset(double centerOffset) { + this.centerOffset = centerOffset; } @Override - public int compareTo(SearchCharacter searchCharacter) { - return x - searchCharacter.x; + public int hashCode() { + return Objects.hash(this.letter, this.x, this.y, this.width, this.height, this.segments, this.segmentPercentages, this.trainingMeta, this.centerOffset); } @Override - public String toString() { - return String.valueOf(knownChar); - } - - /** - * Gets the raw 2D array of values of the character. - * @return The raw 2D array of values of the character - */ - public boolean[][] getData() { - return values; - } - - /** - * Gets the {@link LetterMeta} of the current character. - * @return The {@link LetterMeta} of the current character - */ - public LetterMeta getLetterMeta() { - return letterMeta; - } - - /** - * Sets the {@link LetterMeta} for the current character. - * @param letterMeta The {@link LetterMeta} for the current character - */ - public void setLetterMeta(LetterMeta letterMeta) { - this.letterMeta = letterMeta; + public boolean equals(Object obj) { + if (!(obj instanceof SearchCharacter)) return false; + var character = (SearchCharacter) obj; + return character.letter == this.letter + && character.x == this.x + && character.y == this.y + && character.width == this.width + && character.height == this.height + && character.segments == this.segments + && character.segmentPercentages == this.segmentPercentages + && character.trainingMeta == this.trainingMeta + && character.centerOffset == this.centerOffset; } - } diff --git a/src/main/java/com/uddernetworks/newocr/train/TrainedCharacterData.java b/src/main/java/com/uddernetworks/newocr/character/TrainedCharacterData.java similarity index 61% rename from src/main/java/com/uddernetworks/newocr/train/TrainedCharacterData.java rename to src/main/java/com/uddernetworks/newocr/character/TrainedCharacterData.java index 82ba315..0d9591d 100644 --- a/src/main/java/com/uddernetworks/newocr/train/TrainedCharacterData.java +++ b/src/main/java/com/uddernetworks/newocr/character/TrainedCharacterData.java @@ -1,20 +1,14 @@ -package com.uddernetworks.newocr.train; - -import com.uddernetworks.newocr.LetterMeta; -import com.uddernetworks.newocr.character.SearchCharacter; -import it.unimi.dsi.fastutil.doubles.DoubleArrayList; -import it.unimi.dsi.fastutil.doubles.DoubleList; -import java.util.ArrayList; -import java.util.List; +package com.uddernetworks.newocr.character; + +import java.util.LinkedList; +import java.util.Objects; import java.util.OptionalDouble; /** * This is an object meant for storing the data for characters in the training stage. */ -public class TrainedCharacterData { +public class TrainedCharacterData extends Character { - private char value; - private boolean hasDot; private double widthAverage; private double heightAverage; private double[] segmentPercentages; @@ -22,32 +16,32 @@ public class TrainedCharacterData { private double maxCenter = -1; private double sizeRatio = -1; // Width / Height private boolean empty = true; - private LetterMeta letterMeta = LetterMeta.NONE; - private List recalculatingList = new ArrayList<>(); - private DoubleList recalculatingWidths = new DoubleArrayList(); - private DoubleList recalculatingHeights = new DoubleArrayList(); + private LinkedList recalculatingList = new LinkedList<>(); + private LinkedList recalculatingWidths = new LinkedList<>(); + private LinkedList recalculatingHeights = new LinkedList<>(); /** - * Creates a {@link TrainedCharacterData} from a character value. + * Creates a {@link TrainedCharacterData} from a character letter with a modifier of 0. * - * @param value The known character value + * @param letter The known character letter */ - public TrainedCharacterData(char value) { - this.value = value; + public TrainedCharacterData(char letter) { + super(letter); } /** - * Gets the assigned character value. + * Creates a {@link TrainedCharacterData} from a character value with a given modifier value. * - * @return The assigned character value + * @param letter The known character value + * @param modifier The modifier for the character */ - public char getValue() { - return value; + public TrainedCharacterData(char letter, int modifier) { + super(letter, modifier); } /** * Gets the calculated array of the percentages for each section. The array is always 16 elements with everything - * being <= 1. + * being <= 1. * * @return An array of percentages */ @@ -56,37 +50,65 @@ public double[] getSegmentPercentages() { } /** - * Gets if the trained character has a dot. + * Gets the width/height size ratio. * - * @return If the trained character has a dot + * @return The width/height size ratio */ - public boolean hasDot() { - return hasDot; + public double getSizeRatio() { + return sizeRatio; } /** - * Sets if the trained character has a dot. + * Gets the average width for everything trained with this object. * - * @param hasDot If the trained character has a dot + * @return The average width. Will return 0 if {@link #finishRecalculations()} has not been invoked. */ - public void setHasDot(boolean hasDot) { - this.hasDot |= hasDot; + public double getWidthAverage() { + return widthAverage; } /** - * Gets the width/height size ratio. + * Gets the average height for everything trained with this object. * - * @return The width/height size ratio + * @return The average height. Will return 0 if {@link #finishRecalculations()} has not been invoked. */ - public double getSizeRatio() { - return sizeRatio; + public double getHeightAverage() { + return heightAverage; + } + + /** + * Gets the minimum center value of all the training data used. + * + * @return The minimum center value of all the training data used + */ + public double getMinCenter() { + return minCenter; + } + + /** + * Gets the maximum center value of all the training data used. + * + * @return The maximum center value of all the training data used + */ + public double getMaxCenter() { + return maxCenter; + } + + /** + * Gets if anything has been recalcuated/prepared to be recalculated to the character, e.g. by using + * {@link #recalculateTo(double, double)} or {@link #recalculateTo(SearchCharacter)}. + * + * @return If anything has been recalculated + */ + public boolean isEmpty() { + return empty; } /** * Adds the given width and height variables to the internal list to be put into calculations upon invoking * {@link #finishRecalculations()}. * - * @param width The width of the character + * @param width The width of the character * @param height The height of the character */ public void recalculateTo(double width, double height) { @@ -113,6 +135,29 @@ public void recalculateTo(SearchCharacter searchCharacter) { } } + /** + * Removed the last x entries added by {@link TrainedCharacterData#recalculateTo(SearchCharacter)}. + * + * @param amount The amount of entries to remove + */ + public void undoLastRecalculations(int amount) { + boolean removingList = amount < this.recalculatingList.size(); + boolean removingHeights = amount < this.recalculatingHeights.size(); + boolean removingWidths = amount < this.recalculatingWidths.size(); + + if (!removingList) this.recalculatingList.clear(); + if (!removingHeights) this.recalculatingHeights.clear(); + if (!removingWidths) this.recalculatingWidths.clear(); + + for (int i = 0; i < amount; i++) { + if (removingList) this.recalculatingList.removeLast(); + if (removingHeights) this.recalculatingHeights.removeLast(); + if (removingWidths) this.recalculatingWidths.removeLast(); + } + + finishRecalculations(); + } + /** * Calculates everything based on the data inserted by {@link #recalculateTo(double, double)} and * {@link #recalculateTo(SearchCharacter)} by averaging the width and heights provided, and averaging the @@ -127,12 +172,12 @@ public void finishRecalculations() { this.sizeRatio = this.heightAverage != 0 ? this.widthAverage / this.heightAverage : 0; - if (value == ' ') { + if (this.letter == ' ') { return; } this.segmentPercentages = new double[8 + 9]; - + for (int i = 0; i < 8 + 9; i++) { int finalI = i; this.segmentPercentages[i] = recalculatingList.stream().mapToDouble(t -> t[finalI]).average().orElse(0D); @@ -141,8 +186,8 @@ public void finishRecalculations() { /** * If the given value is less than the minimum center, it's the new minimum center, and if it's bigger than the - * maximum center, it will be the new maximum center. (Retrievable via {@link #getMaxCenter() and { - * @link #getMinCenter()}}). + * maximum center, it will be the new maximum center. (Retrievable via {@link #getMaxCenter() and + * {@link #getMinCenter()}}). * * @param center The value to add as center */ @@ -154,80 +199,37 @@ public void recalculateCenter(double center) { if (center > maxCenter) { maxCenter = center; } - + if (center < minCenter) { minCenter = center; } } } - /** - * Gets the average width for everything trained with this object. - * - * @return The average width. Will return 0 if {@link #finishRecalculations()} has not been invoked. - */ - public double getWidthAverage() { - return widthAverage; - } - - /** - * Gets the average height for everything trained with this object. - * - * @return The average height. Will return 0 if {@link #finishRecalculations()} has not been invoked. - */ - public double getHeightAverage() { - return heightAverage; - } - - /** - * Gets the minimum center value of all the training data used. - * - * @return The minimum center value of all the training data used - */ - public double getMinCenter() { - return minCenter; - } - - /** - * Gets the maximum center value of all the training data used. - * - * @return The maximum center value of all the training data used - */ - public double getMaxCenter() { - return maxCenter; - } - - /** - * Gets if anything has been recalcuated/prepared to be recalculated to the character, e.g. by using - * {@link #recalculateTo(double, double)} or {@link #recalculateTo(SearchCharacter)}. - * - * @return If anything has been recalculated - */ - public boolean isEmpty() { - return empty; - } - - /** - * Gets the letter meta of the character. - * - * @return The letter meta of the character - */ - public LetterMeta getLetterMeta() { - return letterMeta; - } - - /** - * Sets the letter meta of the character. - * - * @param letterMeta The letter meta of the character - */ - public void setLetterMeta(LetterMeta letterMeta) { - this.letterMeta = letterMeta; + @Override + public int hashCode() { + return Objects.hash(this.letter, this.x, this.y, this.width, this.height, this.widthAverage, this.heightAverage, this.segmentPercentages, this.minCenter, this.maxCenter, this.sizeRatio, this.empty); } @Override - public String toString() { - return String.valueOf(value); + public boolean equals(Object obj) { + if (!(obj instanceof TrainedCharacterData)) return false; + var character = (TrainedCharacterData) obj; + return character.letter == this.letter + && character.x == this.x + && character.y == this.y + && character.width == this.width + && character.height == this.height + && character.widthAverage == this.widthAverage + && character.heightAverage == this.heightAverage + && character.segmentPercentages == this.segmentPercentages + && character.minCenter == this.minCenter + && character.maxCenter == this.maxCenter + && character.sizeRatio == this.sizeRatio + && character.empty == this.empty + && character.recalculatingList == this.recalculatingList + && character.recalculatingWidths == this.recalculatingWidths + && character.recalculatingHeights == this.recalculatingHeights; } - + } diff --git a/src/main/java/com/uddernetworks/newocr/configuration/ConfigReflectionCacher.java b/src/main/java/com/uddernetworks/newocr/configuration/ConfigReflectionCacher.java new file mode 100644 index 0000000..8fef129 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/configuration/ConfigReflectionCacher.java @@ -0,0 +1,29 @@ +package com.uddernetworks.newocr.configuration; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.Constructor; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class ConfigReflectionCacher implements ReflectionCacher { + + private static Logger LOGGER = LoggerFactory.getLogger(ConfigReflectionCacher.class); + + private Map, Constructor> constructors = new HashMap<>(); + + @Override + @SuppressWarnings("unchecked") + public Optional> getOrLookupConstructor(Class clazz, ReflexiveSupplier> constructorGenerator) { + return Optional.ofNullable((Constructor) constructors.computeIfAbsent(clazz, x -> { + try { + return constructorGenerator.get(); + } catch (ReflectiveOperationException e) { + LOGGER.error("Error while creating constructor", e); + return null; + } + })); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/configuration/FontConfiguration.java b/src/main/java/com/uddernetworks/newocr/configuration/FontConfiguration.java new file mode 100644 index 0000000..06a93af --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/configuration/FontConfiguration.java @@ -0,0 +1,68 @@ +package com.uddernetworks.newocr.configuration; + +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.mergence.MergenceManager; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; +import com.uddernetworks.newocr.train.OCROptions; + +import java.util.Optional; + +/** + * The configuration to read a file to populate managers and options. + */ +public interface FontConfiguration { + + /** + * Gets the file name the {@link HOCONFontConfiguration} was generated from. + * + * @return The file name + */ + String getFileName(); + + /** + * The friendly version of the font name that should be displayed in messages/options. + * + * @return The friendly font name + */ + String getFriendlyName(); + + /** + * Gets the font name used by the OS to define it. This shouldn't really be displayed to the user, unless the + * specific application requires it. + * + * @return The font name by the OS + */ + String getSystemName(); + + /** + * Generates and returns the {@link OCROptions} from the file configuration. + * + * @return The {@link OCROptions} from the settings + */ + OCROptions fetchOptions(); + + /** + * Fetches the similarities' settings from the configuration and applies them to the given + * {@link SimilarityManager}. All similarities will be an instance of + * {@link com.uddernetworks.newocr.recognition.similarity.rules.BasicSimilarityRule}. + * + * @param similarityManager The {@link SimilarityManager} to apply all similarities to + */ + void fetchAndApplySimilarities(SimilarityManager similarityManager); + + /** + * Fetches the mergence rules' settings from the configuration and applies them to the given + * {@link MergenceManager}. + * + * @param mergenceManager The {@link MergenceManager} to apply all mergence rules to. + */ + void fetchAndApplyMergeRules(MergenceManager mergenceManager); + + /** + * Loads a class that's an instance of {@link MergeRule} from the given fully qualified name . + * + * @param className The fully qualified name of the class to load + * @return The class, if found + */ + Optional> loadMergeClass(String className); +} diff --git a/src/main/java/com/uddernetworks/newocr/configuration/HOCONFontConfiguration.java b/src/main/java/com/uddernetworks/newocr/configuration/HOCONFontConfiguration.java new file mode 100644 index 0000000..5862bee --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/configuration/HOCONFontConfiguration.java @@ -0,0 +1,181 @@ +package com.uddernetworks.newocr.configuration; + +import com.typesafe.config.*; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.mergence.MergenceManager; +import com.uddernetworks.newocr.recognition.similarity.Letter; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; +import com.uddernetworks.newocr.recognition.similarity.rules.BasicSimilarityRule; +import com.uddernetworks.newocr.train.OCROptions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.InvocationTargetException; +import java.util.AbstractMap; +import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; + +public class HOCONFontConfiguration implements FontConfiguration { + + private static Logger LOGGER = LoggerFactory.getLogger(HOCONFontConfiguration.class); + + private String fileName; + private Config config; + private ReflectionCacher reflectionCacher; + + private String systemName; + private String friendlyName; + + /** + * Creates a {@link HOCONFontConfiguration} with a given file name and {@link ReflectionCacher} (Which should be + * global across all instances of this current class). This also includes a {@link SimilarityManager} and a + * {@link MergenceManager} to automatically invoke the methods + * {@link FontConfiguration#fetchAndApplySimilarities(SimilarityManager)} and + * {@link FontConfiguration#fetchAndApplyMergeRules(MergenceManager)} in their respective order. + * + * @param fileName The name of the file + * @param reflectionCacher The {@link ReflectionCacher} to use + * @param similarityManager The {@link SimilarityManager} to invoke + * {@link FontConfiguration#fetchAndApplySimilarities(SimilarityManager)} on + * @param mergenceManager The {@link MergenceManager} to invoke + * {@link FontConfiguration#fetchAndApplyMergeRules(MergenceManager)} on + */ + public HOCONFontConfiguration(String fileName, ReflectionCacher reflectionCacher, SimilarityManager similarityManager, MergenceManager mergenceManager) { + this(fileName, reflectionCacher); + fetchAndApplySimilarities(similarityManager); + fetchAndApplyMergeRules(mergenceManager); + } + + /** + * Creates a {@link HOCONFontConfiguration} with a given file name and {@link ReflectionCacher} (Which should be + * global across all instances of this current class) + * + * @param fileName The name of the file + * @param reflectionCacher The {@link ReflectionCacher} to use + */ + public HOCONFontConfiguration(String fileName, ReflectionCacher reflectionCacher) { + this.fileName = fileName; + this.config = ConfigFactory.load(fileName); + this.reflectionCacher = reflectionCacher; + + var langProperties = this.config.getConfig("language.properties"); + this.systemName = langProperties.getString("system-name"); + this.friendlyName = langProperties.getString("friendly-name"); + + LOGGER.info("[{}] Loading font configuration...", this.friendlyName); + } + + @Override + public String getFileName() { + return this.fileName; + } + + @Override + public String getFriendlyName() { + return this.friendlyName; + } + + @Override + public String getSystemName() { + return this.systemName; + } + + @Override + public OCROptions fetchOptions() { + var options = this.config.getConfig("language.options"); + var ocrOptions = new OCROptions(); + + ocrOptions.setSpecialSpaces(options.getStringList("special-spaces") + .stream() + .map(string -> string.charAt(0)) + .collect(Collectors.toSet())); + + ocrOptions.setMaxPercentDiffToMerge(options.getDouble("max-percent-diff-to-merge")); + ocrOptions.setSizeRatioWeight(options.getDouble("size-ratio-weight")); + + LOGGER.info("[{}] Generated OCROptions", this.friendlyName); + + return ocrOptions; + } + + @Override + public void fetchAndApplySimilarities(SimilarityManager similarityManager) { + var similarities = this.config.getConfig("language.similarities"); + + var entries = similarities.entrySet(); + + var collected = entries.stream().collect(Collectors.groupingBy(t -> getNthPath(t.getKey(), 0))); + + collected.forEach((root, entryList) -> { + var children = entryList.stream() + .map(entry -> new AbstractMap.SimpleEntry<>(getNthPath(entry.getKey(), 1), entry.getValue())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + var configList = (ConfigList) children.get("letters"); + + var letters = configList.stream().map(value -> getEnumValue(Letter.class, value)) + .filter(Optional::isPresent) + .map(Optional::get) + .collect(Collectors.toSet()); + + similarityManager.addSimilarity(new BasicSimilarityRule((String) children.get("name").unwrapped(), letters)); + }); + + LOGGER.info("[{}] Generated and added {} similarities...", this.friendlyName, entries.size()); + } + + @Override + public void fetchAndApplyMergeRules(MergenceManager mergenceManager) { + var mergence = this.config.getConfig("language.mergence"); + var ruleList = mergence.getStringList("rules"); + ruleList.forEach(className -> loadMergeClass(className).ifPresentOrElse(ruleClass -> + this.reflectionCacher.getOrLookupConstructor(ruleClass, () -> ruleClass.getConstructor(DatabaseManager.class, SimilarityManager.class)).ifPresentOrElse(constructor -> + mergenceManager.addRule((databaseManager, similarityManager) -> { + try { + return constructor.newInstance(databaseManager, similarityManager); + } catch (InstantiationException | IllegalAccessException | InvocationTargetException e) { + LOGGER.error("[" + this.friendlyName + "] Error while creating an instance of " + className, e); + return null; + } + }), () -> LOGGER.warn("[{}] No constructor found for {}", this.friendlyName, className)), () -> LOGGER.warn("[{}] Couldn't find rule with name of {}", this.friendlyName, className))); + LOGGER.info("[{}] Generated and added {} merge rules...", this.friendlyName, ruleList.size()); + } + + @Override + @SuppressWarnings("unchecked") + public Optional> loadMergeClass(String className) { + try { + var got = Class.forName(className); + if (got.getClass().isInstance(MergeRule.class)) return Optional.of((Class) got); + } catch (ClassNotFoundException ignored) { + } + return Optional.empty(); + } + + /** + * Slightly adapted from SimpleConfig#getEnumValue(String, Class, ConfigValue) + * + * @param enumClass The class of the enum + * @param enumConfigValue The value of the enum in the config + * @param The enum type + * @return The enum value of the key + */ + private > Optional getEnumValue(Class enumClass, ConfigValue enumConfigValue) { + String enumName = (String) enumConfigValue.unwrapped(); + try { + return Optional.of(Enum.valueOf(enumClass, enumName)); + } catch (IllegalArgumentException e) { + LOGGER.error("Invalid enum value {} for enum {}", enumName, enumClass.getSimpleName()); + return Optional.empty(); + } + } + + private String getNthPath(String pathString, int index) { + var path = ConfigUtil.splitPath(pathString); + if (path.size() < index + 1) return "null"; + return path.get(index); + } + +} diff --git a/src/main/java/com/uddernetworks/newocr/configuration/ReflectionCacher.java b/src/main/java/com/uddernetworks/newocr/configuration/ReflectionCacher.java new file mode 100644 index 0000000..7f71b65 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/configuration/ReflectionCacher.java @@ -0,0 +1,22 @@ +package com.uddernetworks.newocr.configuration; + +import java.lang.reflect.Constructor; +import java.util.Optional; + +/** + * A simple class to cache constructors from classes. + */ +public interface ReflectionCacher { + + /** + * Gets a constructor from the internal cache from the given class, or generate one via constructorGenerator. + * The generation of one from constructorGenerator then saves it in the local cache. + * + * @param clazz The class to generate or fetch the constructor from + * @param constructorGenerator The supplier to make the constructor if one is not found + * @param The class type + * @return The constructor, if found + */ + Optional> getOrLookupConstructor(Class clazz, ReflexiveSupplier> constructorGenerator); + +} diff --git a/src/main/java/com/uddernetworks/newocr/configuration/ReflexiveSupplier.java b/src/main/java/com/uddernetworks/newocr/configuration/ReflexiveSupplier.java new file mode 100644 index 0000000..df5b0bf --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/configuration/ReflexiveSupplier.java @@ -0,0 +1,10 @@ +package com.uddernetworks.newocr.configuration; + +/** + * A simple supplier that throws {@link ReflectiveOperationException}. + * @param The type to supply + */ +@FunctionalInterface +public interface ReflexiveSupplier { + T get() throws ReflectiveOperationException; +} diff --git a/src/main/java/com/uddernetworks/newocr/database/DatabaseCharacter.java b/src/main/java/com/uddernetworks/newocr/database/DatabaseCharacter.java deleted file mode 100644 index e73d4d5..0000000 --- a/src/main/java/com/uddernetworks/newocr/database/DatabaseCharacter.java +++ /dev/null @@ -1,161 +0,0 @@ -package com.uddernetworks.newocr.database; - -import com.uddernetworks.newocr.LetterMeta; - -/** - * Used for storage of data to go into our from the database. - */ -public class DatabaseCharacter { - - private char letter; - private double[] data = new double[17]; - private double avgWidth; - private double avgHeight; - private int minFontSize; - private int maxFontSize; - private double minCenter; // Pixels from the top - private double maxCenter; // Pixels from the top - private boolean hasDot; - private LetterMeta letterMeta; - - /** - * Creates a DatabaseCharacter from the given character - * @param letter The letter of the object - */ - public DatabaseCharacter(char letter) { - this.letter = letter; - } - - /** - * Gets the letter for the current DatabaseCharcater - * @return The current letter - */ - public char getLetter() { - return letter; - } - - /** - * Gets the 16 sectioned percentages for the stored character - * @return An array of percentages <= 1 with a length of 17 - */ - public double[] getData() { - return data; - } - - /** - * Sets a percentage value to the data - * @param index The index of the data to set - * @param data The percentage of filled in pixels found - */ - public void addDataPoint(int index, double data) { - this.data[index] = data; - } - - /** - * Sets multiple used data points - * @param avgWidth The average width across all used characters in the font sizes - * @param avgHeight The average height across all used characters in the font sizes - * @param minFontSize The minimum font size used in this character - * @param maxFontSize The maximum font size used in this character - * @param minCenter The minimum relative center value in the training ste for this character and font size - * @param maxCenter The maximum relative center value in the training ste for this character and font size - */ - public void setData(double avgWidth, double avgHeight, int minFontSize, int maxFontSize, double minCenter, double maxCenter) { - this.avgWidth = avgWidth; - this.avgHeight = avgHeight; - this.minFontSize = minFontSize; - this.maxFontSize = maxFontSize; - this.minCenter = minCenter; - this.maxCenter = maxCenter; - } - - /** - * Gets the average width of the character - * @return The average width of the character - */ - public double getAvgWidth() { - return avgWidth; - } - - /** - * Gets the average height of the character - * @return The average height of the character - */ - public double getAvgHeight() { - return avgHeight; - } - - /** - * Gets the minimum font size used in the training ste for the character - * @return The minimum font size used in the training ste for the character - */ - public int getMinFontSize() { - return minFontSize; - } - - /** - * Gets the maximum font size used in the training ste for the character - * @return The maximum font size used in the training ste for the character - */ - public int getMaxFontSize() { - return maxFontSize; - } - - /** - * Gets the minimum relative center value from the top of the character found in the training set for this font size - * @return The minimum relative center value from the top of the character found in the training set for this font size - */ - public double getMinCenter() { - return minCenter; - } - - /** - * Gets the maximum relative center value from the top of the character found in the training set for this font size - * @return The maximum relative center value from the top of the character found in the training set for this font size - */ - public double getMaxCenter() { - return maxCenter; - } - - /** - * Sets if the current character has a dot at all in it - * @param hasDot If the current character has a dot at all in it - */ - public void setHasDot(boolean hasDot) { - this.hasDot = hasDot; - } - - /** - * Gets if the current character has a dot at all in it - * @return If the current character has a dot at all in it - */ - public boolean hasDot() { - return this.hasDot; - } - - /** - * Gets the {@link LetterMeta} of the current character - * @return The {@link LetterMeta} of the current character - */ - public LetterMeta getLetterMeta() { - return letterMeta; - } - - /** - * Sets the {@link LetterMeta} of the current character - */ - public void setLetterMeta(LetterMeta letterMeta) { - this.letterMeta = letterMeta; - } - - @Override - public String toString() { - return String.valueOf(this.letter); - } - - @Override - public boolean equals(Object obj) { - return obj instanceof DatabaseCharacter && ((DatabaseCharacter) obj).letter == this.letter; - } - -} diff --git a/src/main/java/com/uddernetworks/newocr/database/DatabaseManager.java b/src/main/java/com/uddernetworks/newocr/database/DatabaseManager.java index 219193f..5abf2b7 100644 --- a/src/main/java/com/uddernetworks/newocr/database/DatabaseManager.java +++ b/src/main/java/com/uddernetworks/newocr/database/DatabaseManager.java @@ -1,84 +1,199 @@ package com.uddernetworks.newocr.database; -import com.uddernetworks.newocr.FontBounds; -import com.uddernetworks.newocr.LetterMeta; -import com.uddernetworks.newocr.character.SearchCharacter; +import com.uddernetworks.newocr.character.DatabaseCharacter; +import it.unimi.dsi.fastutil.doubles.DoubleList; + +import javax.sql.DataSource; import java.util.List; +import java.util.Optional; import java.util.concurrent.Future; -import javax.sql.DataSource; +import java.util.concurrent.TimeUnit; +/** + * The manager that provides methods to connect and interact with the database. + */ public interface DatabaseManager { + /** + * Sets the name of the database. This has no effect on the actual function of the database. + * + * @param name The name of the database + */ + void setName(String name); + + /** + * Gets the name of the database, which is by default it is either the file name or URL (Whichever one is given). + * + * @return The name of the database + */ + String getName(); + /** * Gets the {@link DataSource} used by the DatabaseManager + * * @return The {@link DataSource} used by the DatabaseManager */ DataSource getDataSource(); + /** + * Inserts into the `letters` table with a modifier of 0. + * + * @param letter The character to insert + * @param averageWidth The average width of the character + * @param averageHeight The average height of the character + * @param minCenter The minimum relative center from the top found in the training ste for the font size + * @param maxCenter The maximum relative center from the top found in the training ste for the font size + * @param isLetter If the character is a letter (true) or if it is a space (false) + */ + void createLetterEntry(char letter, double averageWidth, double averageHeight, double minCenter, double maxCenter, boolean isLetter); + /** * Inserts into the `letters` table. - * @param letter The character to insert - * @param averageWidth The average width of the character + * + * @param letter The character to insert + * @param modifier The modifier of the data, for multi-part characters such as " + * @param averageWidth The average width of the character * @param averageHeight The average height of the character - * @param minFontSize The minimum font size populated by this character - * @param maxFontSize The maximum font size populate by this character - * @param minCenter The minimum relative center from the top found in the training ste for the font size - * @param maxCenter The maximum relative center from the top found in the training ste for the font size - * @param hasDot If the character has a dot in it - * @param letterMeta The {@link LetterMeta} of the character - * @param isLetter If the charcater is a letter (true) or if it is a space (false) - * @return A Future + * @param minCenter The minimum relative center from the top found in the training ste for the font size + * @param maxCenter The maximum relative center from the top found in the training ste for the font size + * @param isLetter If the character is a letter (true) or if it is a space (false) */ - void createLetterEntry(char letter, double averageWidth, double averageHeight, int minFontSize, int maxFontSize, double minCenter, double maxCenter, boolean hasDot, LetterMeta letterMeta, boolean isLetter); + void createLetterEntry(char letter, int modifier, double averageWidth, double averageHeight, double minCenter, double maxCenter, boolean isLetter); /** * Clears all data revolving around a character from both the `letters` and `sectionData` table. + * * @param letter The character to clear - * @param minFontSize The minimum font size to clear - * @param maxFontSize The maximum font size to clear */ - void clearLetterSegments(char letter, int minFontSize, int maxFontSize); + void clearLetterSegments(char letter); + + /** + * Adds segments (Percentage data points) to the database for a certain character, with a modifier of 0. + * + * @param letter The character to add segments to + * @param segments An array with a length of 17 all <= 1 as percentage data points + */ + void addLetterSegments(char letter, double[] segments); /** * Adds segments (Percentage data points) to the database for a certain character. - * @param letter The character to add segments to - * @param minFontSize The minimum font size for the character - * @param maxFontSize The maximum font size for the character - * @param segments An array with a length of 17 all <= 1 as percentage data points + * + * @param letter The character to add segments to + * @param modifier The modifier of the letter + * @param segments An array with a length of 17 all <= 1 as percentage data points */ - void addLetterSegments(char letter, int minFontSize, int maxFontSize, double[] segments); + void addLetterSegments(char letter, int modifier, double[] segments); /** - * Gets all the {@link DatabaseCharacter}s between the given {@link FontBounds}. - * @param fontBounds The {@link FontBounds} get the characters between + * Gets all the {@link DatabaseCharacter}s in the database + * * @return A Future of all the {@link DatabaseCharacter}s */ - Future> getAllCharacterSegments(FontBounds fontBounds); + Future> getAllCharacterSegments(); + + /** + * Adds a piece of data in the database (Never overrides existing data) to be averaged and fetched later. + * + * @param name The name of the data + * @param values The value to be added + */ + void addAveragedData(String name, double[] values); + + /** + * Adds a piece of data in the database (Never overrides existing data) to be averaged and fetched later. + * + * @param name The name of the data + * @param values The value to be added + */ + void addAveragedData(String name, DoubleList values); + + /** + * Gets the average value of the given data name, added from {@link DatabaseManager#addAveragedData(String, double[])}. + * If no data is found, it will return -1. + * + * @param name The name of the data to fetch and average + * @return The averaged data, being -1 if no data is found + */ + Future getAveragedData(String name); + + /** + * Adds a custom between-character space amount for after a character, as some fonts have different padding after + * certain character. This calculated with of padding after a character is subtracted from the amount needed for a + * space. + * + * @param letter The letter before the space width + * @param ratio The width/height ratio of the space + */ + void addCustomSpace(char letter, double ratio); + + /** + * Gets the custom between-character space associated with the character, to appear after the character. + * + * @param letter The letter this space associates with + * @return The custom between-character space width/height ratio, or 0 if no custom space is found + */ + Future getCustomSpace(char letter); + + /** + * Sets the boolean property to the database. + * + * @param name The name of the property + * @param value The property value + */ + void setProperty(String name, boolean value); /** - * Adds the letter sizes to the `sizing` table to accurately detect sizing of fonts on a character basis - * @param fontSize The font size in pixels to be inserted - * @param searchCharacterList The list of {@link SearchCharacter}s to insert + * Gets the boolean property by the given name. + * + * @param name The name of the property to get + * @return The property value */ - void addLetterSize(int fontSize, List searchCharacterList); + Future> getProperty(String name); /** - * Gets the font size of the given character based on its dimensions - * @param character The character being detected - * @param height The height of the character - * @return The nearest font size in points, with -1 being if no data was found for the character for some reason + * Sets the database value as trained or not. This only changes a boolean in the database. + * + * @param trained If the database is trained or not */ - Future getLetterSize(char character, int height); + void setTrained(boolean trained); + + /** + * Gets if the database has been trained already. + * + * @return If the database has been trained + */ + Future> isTrained(); + + /** + * Gets if the database has been trained already. This differs from {@link DatabaseManager#isTrained()} because + * this is done fully synchronous and defaults to false if not found. + * + * @return If the database has been trained + */ + boolean isTrainedSync(); + + /** + * Clears all data in the database, primarily used for before training. + */ + void clearData(); /** * Shuts down all executor threads when the program is ready to be terminated. */ void shutdown(); + /** + * Shuts down all executor threads when the program is ready to be terminated after the specified time. + * + * @param unit The Unit of time + * @param duration How long until it should shut down + */ + void shutdown(TimeUnit unit, long duration); + /** * Gets if the database manager is running off of the internal HSQLDB database or the external MySQL database. + * * @return If the database manager is using the internal HSQLDB database */ boolean usesInternal(); - } diff --git a/src/main/java/com/uddernetworks/newocr/database/OCRDatabaseManager.java b/src/main/java/com/uddernetworks/newocr/database/OCRDatabaseManager.java index 715d795..0ce22b2 100644 --- a/src/main/java/com/uddernetworks/newocr/database/OCRDatabaseManager.java +++ b/src/main/java/com/uddernetworks/newocr/database/OCRDatabaseManager.java @@ -1,44 +1,44 @@ package com.uddernetworks.newocr.database; -import com.uddernetworks.newocr.FontBounds; -import com.uddernetworks.newocr.LetterMeta; -import com.uddernetworks.newocr.character.SearchCharacter; -import com.uddernetworks.newocr.utils.ConversionUtils; +import com.uddernetworks.newocr.character.DatabaseCharacter; import com.zaxxer.hikari.HikariConfig; import com.zaxxer.hikari.HikariDataSource; +import it.unimi.dsi.fastutil.doubles.DoubleList; + +import javax.sql.DataSource; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.io.InputStreamReader; import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.Objects; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicReference; import java.util.function.Consumer; import java.util.stream.Collectors; -import javax.sql.DataSource; +import java.util.stream.Stream; public class OCRDatabaseManager implements DatabaseManager { private final boolean useInternal; + private String name; private DataSource dataSource; private ExecutorService executor = Executors.newCachedThreadPool(); private String createLetterEntry; private String clearLetterSegments; private String addLetterSegment; - private String selectSegments; private String selectAllSegments; private String getLetterEntry; private String getSpaceEntry; - private String addLetterSize; - private String getLetterSize; + private String addAverageData; + private String getAverageData; + private String addCustomSpace; + private String getCustomSpace; + private String setBooleanProperty; + private String getBooleanProperty; - private final ConcurrentMap> databaseCharacterCache = new ConcurrentHashMap<>(); + private final AtomicReference> databaseCharacterCache = new AtomicReference<>(); + private final AtomicReference> customSpaceCache = new AtomicReference<>(new HashMap<>()); /** * Connects to the database with the given credentials, and executes the queries found in letters.sql and sectionData.sql @@ -46,7 +46,7 @@ public class OCRDatabaseManager implements DatabaseManager { * @param databaseURL The URL to the database * @param username The username of the connecting account * @param password The password of the connecting account - * @throws IOException + * @throws IOException If there are issues when creating/accessing the pool */ public OCRDatabaseManager(String databaseURL, String username, String password) throws IOException { this(false, null, databaseURL, username, password); @@ -57,7 +57,7 @@ public OCRDatabaseManager(String databaseURL, String username, String password) * letters.sql and sectionData.sql. This option can be over 12x faster than the MySQL variant. * * @param filePath The file without an extension of the database. If this doesn't exist, it will be created - * @throws IOException + * @throws IOException If there are issues when creating/accessing the pool */ public OCRDatabaseManager(File filePath) throws IOException { this(true, filePath, null, null, null); @@ -65,7 +65,7 @@ public OCRDatabaseManager(File filePath) throws IOException { public OCRDatabaseManager(boolean useInternal, File filePath, String databaseURL, String username, String password) throws IOException { this.useInternal = useInternal; - + var config = new HikariConfig(); try { @@ -75,11 +75,13 @@ public OCRDatabaseManager(boolean useInternal, File filePath, String databaseURL } if (useInternal) { + this.name = filePath.getName(); filePath.getParentFile().mkdirs(); config.setJdbcUrl("jdbc:hsqldb:file:" + filePath); config.setUsername("SA"); config.setPassword(""); } else { + this.name = databaseURL.replace(password, ""); config.setDriverClassName("com.mysql.jdbc.Driver"); config.setJdbcUrl(databaseURL); config.setUsername(username); @@ -90,12 +92,12 @@ public OCRDatabaseManager(boolean useInternal, File filePath, String databaseURL config.addDataSourceProperty("cachePrepStmts", "true"); config.addDataSourceProperty("prepStmtCacheSize", "1000"); config.addDataSourceProperty("prepStmtCacheSqlLimit", "8192"); - + dataSource = new HikariDataSource(config); - List.of("letters.sql", "sectionData.sql", "sizing.sql").parallelStream().forEach(table -> { + List.of("letters.sql", "sectionData.sql", "data.sql", "customSpaces.sql", "booleanProperties.sql").parallelStream().forEach(table -> { var stream = OCRDatabaseManager.class.getResourceAsStream("/" + table); - + try (var reader = new BufferedReader(new InputStreamReader(stream)); var connection = dataSource.getConnection(); var statement = connection.prepareStatement(reader.lines().collect(Collectors.joining("\n")))) { @@ -119,18 +121,21 @@ public OCRDatabaseManager(boolean useInternal, File filePath, String databaseURL * Ran internally after the DatabaseManager has been created to read the *.sql files in the /resources/ directory * for future queries. * - * @throws IOException + * @throws IOException If there are issues when creating/accessing the pool */ private void initializeStatements() throws IOException { this.createLetterEntry = getQuery("createLetterEntry"); this.clearLetterSegments = getQuery("clearLetterSegments"); this.addLetterSegment = getQuery("addLetterSegment"); - this.selectSegments = getQuery("selectSegments"); this.selectAllSegments = getQuery("selectAllSegments"); this.getLetterEntry = getQuery("getLetterEntry"); this.getSpaceEntry = getQuery("getSpaceEntry"); - this.addLetterSize = getQuery("addLetterSize"); - this.getLetterSize = getQuery("getLetterSize"); + this.addAverageData = getQuery("addAverageData"); + this.getAverageData = getQuery("getAverageData"); + this.addCustomSpace = getQuery("addCustomSpace"); + this.getCustomSpace = getQuery("getCustomSpace"); + this.setBooleanProperty = getQuery("setBooleanProperty"); + this.getBooleanProperty = getQuery("getBooleanProperty"); } /** @@ -138,34 +143,46 @@ private void initializeStatements() throws IOException { * * @param name The resource file to read * @return The string contents of it - * @throws IOException + * @throws IOException If there are issues when creating/accessing the pool */ private String getQuery(String name) throws IOException { var resource = Objects.requireNonNull(getClass().getClassLoader().getResource(name + ".sql")); - + try (var reader = new BufferedReader(new InputStreamReader(resource.openStream()))) { return reader.lines().collect(Collectors.joining("\n")); } } + @Override + public void setName(String name) { + this.name = name; + } + + @Override + public String getName() { + return this.name; + } + @Override public DataSource getDataSource() { return this.dataSource; } @Override - public void createLetterEntry(char letter, double averageWidth, double averageHeight, int minFontSize, int maxFontSize, double minCenter, double maxCenter, boolean hasDot, LetterMeta letterMeta, boolean isLetter) { + public void createLetterEntry(char letter, double averageWidth, double averageHeight, double minCenter, double maxCenter, boolean isLetter) { + createLetterEntry(letter, 0, averageWidth, averageHeight, minCenter, maxCenter, isLetter); + } + + @Override + public void createLetterEntry(char letter, int modifier, double averageWidth, double averageHeight, double minCenter, double maxCenter, boolean isLetter) { try (var connection = dataSource.getConnection(); var createLetterEntry = connection.prepareStatement(this.createLetterEntry)) { createLetterEntry.setInt(1, letter); - createLetterEntry.setDouble(2, averageWidth); - createLetterEntry.setDouble(3, averageHeight); - createLetterEntry.setInt(4, minFontSize); - createLetterEntry.setInt(5, maxFontSize); - createLetterEntry.setDouble(6, minCenter); - createLetterEntry.setDouble(7, maxCenter); - createLetterEntry.setBoolean(8, hasDot); - createLetterEntry.setInt(9, letterMeta.getID()); - createLetterEntry.setBoolean(10, isLetter); + createLetterEntry.setInt(2, modifier); + createLetterEntry.setDouble(3, averageWidth); + createLetterEntry.setDouble(4, averageHeight); + createLetterEntry.setDouble(5, minCenter); + createLetterEntry.setDouble(6, maxCenter); + createLetterEntry.setBoolean(7, isLetter); createLetterEntry.executeUpdate(); } catch (SQLException e) { e.printStackTrace(); @@ -173,14 +190,12 @@ public void createLetterEntry(char letter, double averageWidth, double averageHe } @Override - public void clearLetterSegments(char letter, int minFontSize, int maxFontSize) { - List.of("letters", "sectionData").forEach(table -> { + public void clearLetterSegments(char letter) { + Stream.of("letters", "sectionData").parallel().forEach(table -> { var query = String.format(this.clearLetterSegments, table); - + try (var connection = dataSource.getConnection(); var clearLetterSegments = connection.prepareStatement(query)) { clearLetterSegments.setInt(1, letter); - clearLetterSegments.setInt(2, minFontSize); - clearLetterSegments.setInt(3, maxFontSize); clearLetterSegments.executeUpdate(); } catch (SQLException e) { e.printStackTrace(); @@ -189,14 +204,18 @@ public void clearLetterSegments(char letter, int minFontSize, int maxFontSize) { } @Override - public void addLetterSegments(char letter, int minFontSize, int maxFontSize, double[] segments) { + public void addLetterSegments(char letter, double[] segments) { + addLetterSegments(letter, 0, segments); + } + + @Override + public void addLetterSegments(char letter, int modifier, double[] segments) { try (var connection = dataSource.getConnection(); var addLetterSegment = connection.prepareStatement(this.addLetterSegment)) { for (int i = 0; i < segments.length; i++) { addLetterSegment.setInt(1, letter); - addLetterSegment.setInt(2, minFontSize); - addLetterSegment.setInt(3, maxFontSize); - addLetterSegment.setInt(4, i); - addLetterSegment.setDouble(5, segments[i]); + addLetterSegment.setInt(2, modifier); + addLetterSegment.setInt(3, i); + addLetterSegment.setDouble(4, segments[i]); addLetterSegment.addBatch(); } @@ -207,39 +226,30 @@ public void addLetterSegments(char letter, int minFontSize, int maxFontSize, dou } @Override - public Future> getAllCharacterSegments(FontBounds fontBounds) { + public Future> getAllCharacterSegments() { return executor.submit(() -> { - var cachedValue = databaseCharacterCache.compute(fontBounds, (k, v) -> { - if (v == null || v.isEmpty()) { - return null; - } - - return v; - }); - - if (cachedValue != null) { + var cachedValue = databaseCharacterCache.get(); + + if (cachedValue != null && !cachedValue.isEmpty()) { return cachedValue; } var databaseCharacters = new ArrayList(); try (var connection = dataSource.getConnection(); var selectSegments = connection.prepareStatement(this.selectAllSegments)) { - selectSegments.setInt(1, fontBounds.getMinFont()); - selectSegments.setInt(2, fontBounds.getMaxFont()); - var resultSet = selectSegments.executeQuery(); while (resultSet.next()) { var letter = resultSet.getString("letter").charAt(0); + var modifier = resultSet.getInt("modifier"); var sectionIndex = resultSet.getInt("sectionIndex"); var data = resultSet.getDouble("data"); - var databaseCharacter = getDatabaseCharacter(databaseCharacters, letter, newDatabaseCharacter -> { + var databaseCharacter = getDatabaseCharacter(databaseCharacters, letter, modifier, newDatabaseCharacter -> { try (var getLetterEntry = connection.prepareCall(this.getLetterEntry)) { getLetterEntry.setInt(1, letter); - getLetterEntry.setInt(2, fontBounds.getMinFont()); - getLetterEntry.setInt(3, fontBounds.getMaxFont()); - + getLetterEntry.setInt(2, modifier); + var resultSet1 = getLetterEntry.executeQuery(); if (!resultSet1.next()) { @@ -248,23 +258,17 @@ public Future> getAllCharacterSegments(FontBounds fontBo var avgWidth = resultSet1.getDouble("avgWidth"); var avgHeight = resultSet1.getDouble("avgHeight"); - var minFontSize = resultSet1.getInt("minFontSize"); - var maxFontSize = resultSet1.getInt("maxFontSize"); var minCenter = resultSet1.getDouble("minCenter"); var maxCenter = resultSet1.getDouble("maxCenter"); - var hasDot = resultSet1.getBoolean("hasDot"); - var letterMetaID = resultSet1.getInt("letterMeta"); - newDatabaseCharacter.setData(avgWidth, avgHeight, minFontSize, maxFontSize, minCenter, maxCenter); - newDatabaseCharacter.setHasDot(hasDot); - LetterMeta.fromID(letterMetaID).ifPresent(newDatabaseCharacter::setLetterMeta); + newDatabaseCharacter.setData(avgWidth, avgHeight, minCenter, maxCenter); } catch (SQLException e) { e.printStackTrace(); } }); databaseCharacter.addDataPoint(sectionIndex, data); - + if (!databaseCharacters.contains(databaseCharacter)) { databaseCharacters.add(databaseCharacter); } @@ -276,11 +280,9 @@ public Future> getAllCharacterSegments(FontBounds fontBo if (spaceResult.next()) { var avgWidth = spaceResult.getDouble("avgWidth"); var avgHeight = spaceResult.getDouble("avgHeight"); - var minFontSize = spaceResult.getInt("minFontSize"); - var maxFontSize = spaceResult.getInt("maxFontSize"); var spaceCharacter = new DatabaseCharacter(' '); - spaceCharacter.setData(avgWidth, avgHeight, minFontSize, maxFontSize, 0, 0); + spaceCharacter.setData(avgWidth, avgHeight, 0, 0); databaseCharacters.add(spaceCharacter); } } @@ -288,50 +290,134 @@ public Future> getAllCharacterSegments(FontBounds fontBo e.printStackTrace(); } - this.databaseCharacterCache.put(fontBounds, databaseCharacters); + this.databaseCharacterCache.set(databaseCharacters); return databaseCharacters; }); } @Override - public void addLetterSize(int fontSize, List searchCharacterList) { - executor.execute(() -> { - try (var connection = dataSource.getConnection(); var insertSize = connection.prepareStatement(this.addLetterSize)) { - for (SearchCharacter searchCharacter : searchCharacterList) { - insertSize.setInt(1, searchCharacter.getKnownChar()); - insertSize.setInt(2, fontSize); - insertSize.setInt(3, searchCharacter.getHeight()); - insertSize.addBatch(); - insertSize.clearParameters(); - } + public void addAveragedData(String name, double[] values) { + if (values.length == 0) return; + try (var connection = dataSource.getConnection(); + var addData = connection.prepareStatement(this.addAverageData)) { + for (double value : values) { + addData.setString(1, name); + addData.setDouble(2, value); + addData.addBatch(); + } + + addData.executeBatch(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + @Override + public void addAveragedData(String name, DoubleList values) { + addAveragedData(name, values.stream().mapToDouble(Double::doubleValue).toArray()); + } + + @Override + public Future getAveragedData(String name) { + return executor.submit(() -> { + try (var connection = dataSource.getConnection(); + var getData = connection.prepareStatement(this.getAverageData)) { + getData.setString(1, name); + var resultSet = getData.executeQuery(); + if (!resultSet.next()) return 0D; - insertSize.executeBatch(); + return resultSet.getDouble(1); + } + }); + } + + @Override + public void addCustomSpace(char letter, double ratio) { + try (var connection = dataSource.getConnection(); + var addData = connection.prepareStatement(this.addCustomSpace)) { + addData.setInt(1, letter); + addData.setDouble(2, ratio); + addData.executeUpdate(); + } catch (SQLException e) { + e.printStackTrace(); + } + } + + @Override + public Future getCustomSpace(char letter) { + return executor.submit(() -> customSpaceCache.get().computeIfAbsent(letter, ignored -> { + try (var connection = dataSource.getConnection(); + var getData = connection.prepareStatement(this.getCustomSpace)) { + getData.setInt(1, letter); + var resultSet = getData.executeQuery(); + if (!resultSet.next()) return 0D; + + return resultSet.getDouble(1); } catch (SQLException e) { e.printStackTrace(); + return 0D; } - }); + })); + } + + @Override + public void setProperty(String name, boolean value) { + try (var connection = dataSource.getConnection(); + var addData = connection.prepareStatement(this.setBooleanProperty)) { + addData.setString(1, name); + addData.setBoolean(2, value); + addData.executeUpdate(); + } catch (SQLException e) { + e.printStackTrace(); + } } @Override - public Future getLetterSize(char character, int height) { + public Future> getProperty(String name) { return executor.submit(() -> { - int result = -1; + try (var connection = dataSource.getConnection(); + var getData = connection.prepareStatement(this.getBooleanProperty)) { + getData.setString(1, name); + var resultSet = getData.executeQuery(); + if (!resultSet.next()) return Optional.empty(); - try (var connection = dataSource.getConnection(); var getSize = connection.prepareStatement(this.getLetterSize)) { - getSize.setInt(1, character); - getSize.setInt(2, height); + return Optional.of(resultSet.getBoolean(1)); + } catch (SQLException e) { + e.printStackTrace(); + return Optional.empty(); + } + }); + } - var resultSet = getSize.executeQuery(); + @Override + public void setTrained(boolean trained) { + setProperty("trained", trained); + } - if (resultSet.next()) { - result = ConversionUtils.pixelToPoint(resultSet.getInt(1)); - } + @Override + public Future> isTrained() { + return getProperty("trained"); + } + + @Override + public boolean isTrainedSync() { + try { + return getProperty("trained").get().orElse(false); + } catch (InterruptedException | ExecutionException e) { + return false; + } + } + + @Override + public void clearData() { + Stream.of("letters", "sectionData", "data", "customSpaces").parallel().forEach(table -> { + try (var connection = dataSource.getConnection(); // Keeping the same connection throughout all tables might be faster + var truncate = connection.prepareStatement("TRUNCATE TABLE " + table)) { + truncate.executeUpdate(); } catch (SQLException e) { e.printStackTrace(); } - - return result; }); } @@ -342,6 +428,15 @@ public void shutdown() { } } + @Override + public void shutdown(TimeUnit unit, long duration) { + try { + unit.sleep(duration); + } catch (InterruptedException ignored) {} + + shutdown(); + } + @Override public boolean usesInternal() { return this.useInternal; @@ -353,16 +448,17 @@ public boolean usesInternal() { * * @param databaseCharacters The list of {@link DatabaseCharacter}s to search from * @param letter The character the value must match + * @param modifier The modifier of the character * @param onCreate An action to do when a {@link DatabaseCharacter} is created, usually adding more info from it * from a database. * @return The created {@link DatabaseCharacter} */ - private DatabaseCharacter getDatabaseCharacter(List databaseCharacters, char letter, Consumer onCreate) { - return databaseCharacters.stream().filter(cha -> cha.getLetter() == letter).findFirst().orElseGet(() -> { - var databaseCharacter = new DatabaseCharacter(letter); + private DatabaseCharacter getDatabaseCharacter(List databaseCharacters, char letter, int modifier, Consumer onCreate) { + return databaseCharacters.stream().filter(cha -> cha.getLetter() == letter && cha.getModifier() == modifier).findFirst().orElseGet(() -> { + var databaseCharacter = new DatabaseCharacter(letter, modifier); onCreate.accept(databaseCharacter); return databaseCharacter; }); } - + } diff --git a/src/main/java/com/uddernetworks/newocr/SearchImage.java b/src/main/java/com/uddernetworks/newocr/detection/SearchImage.java similarity index 63% rename from src/main/java/com/uddernetworks/newocr/SearchImage.java rename to src/main/java/com/uddernetworks/newocr/detection/SearchImage.java index fbfe825..bbe9556 100644 --- a/src/main/java/com/uddernetworks/newocr/SearchImage.java +++ b/src/main/java/com/uddernetworks/newocr/detection/SearchImage.java @@ -1,7 +1,11 @@ -package com.uddernetworks.newocr; +package com.uddernetworks.newocr.detection; import com.uddernetworks.newocr.utils.IntPair; + +import java.awt.*; +import java.awt.image.BufferedImage; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; /** @@ -15,6 +19,7 @@ public class SearchImage { /** * Creates a {@link SearchImage} from a boolean grid. + * * @param values The boolean grid of the image */ public SearchImage(boolean[][] values) { @@ -25,8 +30,9 @@ public SearchImage(boolean[][] values) { * Adds the surrounding black pixels from the given coordinate to the coordinates parameter. * When a new value sis count, it is set to false to stop duplicates without checking each entry * of the map. - * @param originalX The X location of the current black pixel - * @param originalY The Y location of the current black pixel + * + * @param originalX The X location of the current black pixel + * @param originalY The Y location of the current black pixel * @param coordinates The mutable list of coordinates that will have each new coordinate added to it */ public void scanFrom(int originalX, int originalY, List coordinates) { @@ -36,44 +42,44 @@ public void scanFrom(int originalX, int originalY, List coordinates) { var nextProcessing = new ArrayList(); var processingBuffer = new ArrayList(); - + nextProcessing.add(new IntPair(originalX, originalY)); while (true) { for (var pair : nextProcessing) { coordinates.add(pair); - + int x = pair.getKey(); int y = pair.getValue(); - + if (hasValue(x, y + 1)) { processingBuffer.add(new IntPair(x, y + 1)); } - + if (hasValue(x, y - 1)) { processingBuffer.add(new IntPair(x, y - 1)); } - + if (hasValue(x + 1, y)) { processingBuffer.add(new IntPair(x + 1, y)); } - + if (hasValue(x - 1, y)) { processingBuffer.add(new IntPair(x - 1, y)); } - + if (hasValue(x + 1, y + 1)) { processingBuffer.add(new IntPair(x + 1, y + 1)); } - + if (hasValue(x + 1, y - 1)) { processingBuffer.add(new IntPair(x + 1, y - 1)); } - + if (hasValue(x - 1, y + 1)) { processingBuffer.add(new IntPair(x - 1, y + 1)); } - + if (hasValue(x - 1, y - 1)) { processingBuffer.add(new IntPair(x - 1, y - 1)); } @@ -92,9 +98,10 @@ public void scanFrom(int originalX, int originalY, List coordinates) { /** * Gets the value of the given coordinates. If it's true, it will set the value to false and return true. + * * @param x The X coordinate to check * @param y The Y coordinate to check - * @return The value of the coordinates + * @return The value of the coordinate, or false if the position is inexistant */ public boolean hasValue(int x, int y) { if (x < 0 || y < 0 || y >= values.length || x >= values[y].length) return false; @@ -106,6 +113,7 @@ public boolean hasValue(int x, int y) { /** * Gets the coordinate of the given coordinates. Similar to {@link SearchImage#hasValue(int, int)}, but * it performs no mutation to the values grid. + * * @param x The X coordinate to check * @param y The Y coordinate to check * @return The value of the coordinates @@ -116,9 +124,74 @@ public boolean getValue(int x, int y) { /** * Gets the raw values grid. + * * @return The raw values grid */ public boolean[][] getValues() { return values; } + + /** + * Gets the width computed from the internal value 2D array. + * + * @return The width of the image + */ + public int getWidth() { + return this.values[0].length; + } + + /** + * Gets the height computed from the internal value 2D array. + * + * @return The height of the image + */ + public int getHeight() { + return this.values.length; + } + + /** + * Gets a {@link SearchImage} from the bounds of the current image. + * + * @param x The X position to start at + * @param y The Y position to start at + * @param width The width of the sub image + * @param height The height of the sub image + * @return The inner image from the coordinates given + */ + public SearchImage getSubimage(int x, int y, int width, int height) { + var sub = new boolean[height][]; + + for (int i = 0; i < height; i++) { + sub[i] = Arrays.copyOfRange(this.values[i + y], x, x + width); + } + + return new SearchImage(sub); + } + + @Override + public String toString() { + var ret = new StringBuilder(); + for (var row : this.values) { + for (var val : row) { + ret.append(val ? '\uff03' : '\uff0e'); + } + ret.append('\n'); + } + + return ret.toString(); + } + + public BufferedImage toImage() { + var image = new BufferedImage(getWidth(), getHeight(), BufferedImage.TYPE_INT_ARGB); + var black = Color.BLACK.getRGB(); + var white = Color.WHITE.getRGB(); + + for (int y = 0; y < this.values.length; y++) { + for (int x = 0; x < this.values[0].length; x++) { + image.setRGB(x, y, this.values[y][x] ? black : white); + } + } + + return image; + } } diff --git a/src/main/java/com/uddernetworks/newocr/recognition/Actions.java b/src/main/java/com/uddernetworks/newocr/recognition/Actions.java new file mode 100644 index 0000000..0644c77 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/Actions.java @@ -0,0 +1,105 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.character.DatabaseCharacter; +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.character.SearchCharacter; +import com.uddernetworks.newocr.character.TrainedCharacterData; +import com.uddernetworks.newocr.detection.SearchImage; +import com.uddernetworks.newocr.utils.IntPair; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; + +import java.util.List; +import java.util.Optional; + +/** + * A class to provide general actions for the OCR. + */ +public interface Actions { + + /** + * Gets the {@link SearchCharacter} characters found in the given {@link SearchImage}. This works by dividing it up + * into lines, then horizontally. Each individual section then has vertical padding removed. Any 'characters' that + * are 2x2 pixels or less are discarded. More information on this method can be found on page 55 of this paper. + * + * @param searchImage The image to scan + * @param searchCharacters The list that will have all of the {@link SearchCharacter}s added to + */ + void getLetters(SearchImage searchImage, List searchCharacters); + + /** + * Gets the {@link SearchCharacter}s found in training. This is different because it assumes that there are whole + * lines to help group characters. + * + * @param searchImage The training image to scan + * @return A collection of a list contianing the characters in a line + */ + List getLettersDuringTraining(SearchImage searchImage); + + /** + * Actually matches the {@link SearchCharacter} object to a real character from the database. + * + * @param searchCharacter The input {@link SearchCharacter} to match to + * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character + */ + Optional getCharacterFor(SearchCharacter searchCharacter); + + /** + * Actually matches the {@link SearchCharacter} object to a real character from the database with line bounds for + * improved accuracy. + * + * @param searchCharacter The input {@link SearchCharacter} to match to + * @param lineBounds The line bounds (Key/value is top/bottom Y values respectively) for improved accuracy + * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character + */ + Optional getCharacterFor(SearchCharacter searchCharacter, IntPair lineBounds); + + /** + * Actually matches the {@link SearchCharacter} object to a real character from the database. + * + * @param searchCharacter The input {@link SearchCharacter} to match to + * @param data The potential trained {@link TrainedCharacterData} to use + * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character + */ + Optional getCharacterFor(SearchCharacter searchCharacter, List data); + + /** + * Actually matches the {@link SearchCharacter} object to a real character from the database with line bounds for + * improved accuracy. + * + * @param searchCharacter The input {@link SearchCharacter} to match to + * @param data The potential trained {@link TrainedCharacterData} to use + * @param lineBounds The line bounds (Key/value is top/bottom Y values respectively) for improved accuracy + * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character + */ + Optional getCharacterFor(SearchCharacter searchCharacter, List data, IntPair lineBounds); + + /** + * Actually matches the {@link SearchCharacter} object to a real character from the database. + * + * @param searchCharacter The input {@link SearchCharacter} to match to + * @param diffs The potential {@link ImageLetter}s + * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character + */ + Optional getCharacterFor(SearchCharacter searchCharacter, Object2DoubleMap diffs); + + /** + * Actually matches the {@link SearchCharacter} object to a real character from the database with line bounds for + * improved accuracy. + * + * @param searchCharacter The input {@link SearchCharacter} to match to + * @param diffs The potential {@link ImageLetter}s + * @param lineBounds The line bounds (Key/value is top/bottom Y values respectively) for improved accuracy + * @return The {@link ImageLetter} object with the {@link DatabaseCharacter} inside it containing the found character + */ + Optional getCharacterFor(SearchCharacter searchCharacter, Object2DoubleMap diffs, IntPair lineBounds); + + /** + * Gets the top and bottom line bounds found from the value 2D array. This is used for getting characters for + * training data. + * + * @param image The image to get the line bounds from + * @return A list of the absolute top and bottom line values + */ + List getLineBoundsForTraining(SearchImage image); + +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/CharacterLine.java b/src/main/java/com/uddernetworks/newocr/recognition/CharacterLine.java new file mode 100644 index 0000000..60e5c29 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/CharacterLine.java @@ -0,0 +1,32 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.character.SearchCharacter; + +import java.util.List; + +/** + * An object to store {@link SearchCharacter} data for a scanned line. + */ +public interface CharacterLine { + + /** + * Gets the letters in the line ordered by X value. + * + * @return The letters in the line + */ + List getLetters(); + + /** + * Gets the top Y coordinate of the line. + * + * @return The top Y coordinate of the line + */ + int topY(); + + /** + * Gets the top Y coordinate of the line. + * + * @return The top Y coordinate of the line + */ + int bottomY(); +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/OCRActions.java b/src/main/java/com/uddernetworks/newocr/recognition/OCRActions.java new file mode 100644 index 0000000..8cb5331 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/OCRActions.java @@ -0,0 +1,346 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.character.CoordinateCharacter; +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.character.SearchCharacter; +import com.uddernetworks.newocr.character.TrainedCharacterData; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.detection.SearchImage; +import com.uddernetworks.newocr.train.OCROptions; +import com.uddernetworks.newocr.utils.IntPair; +import com.uddernetworks.newocr.utils.OCRUtils; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; +import it.unimi.dsi.fastutil.objects.Object2DoubleOpenHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +public class OCRActions implements Actions { + + private static Logger LOGGER = LoggerFactory.getLogger(OCRActions.class); + + private DatabaseManager databaseManager; + private OCROptions options; + + private double distanceAbove = -1; + private double distanceBelow = -1; + + /** + * Creates a new {@link OCRActions} with a {@link DatabaseManager} and {@link OCROptions}. + * @param databaseManager The {@link DatabaseManager} to use + * @param options The {@link OCROptions} to use + */ + public OCRActions(DatabaseManager databaseManager, OCROptions options) { + this.databaseManager = databaseManager; + this.options = options; + } + + @Override + public void getLetters(SearchImage searchImage, List searchCharacters) { + try { + if (this.distanceAbove == -1) + this.distanceAbove = this.databaseManager.getAveragedData("distanceAbove").get(); + if (this.distanceBelow == -1) + this.distanceBelow = this.databaseManager.getAveragedData("distanceBelow").get(); + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + + var coordinates = new ArrayList(); + + var width = searchImage.getWidth(); + var height = searchImage.getHeight(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + searchImage.scanFrom(x, y, coordinates); + + if (!coordinates.isEmpty()) { + var foundCharacter = new SearchCharacter(new ArrayList<>(coordinates)); + foundCharacter.applySections(); + foundCharacter.analyzeSlices(); + searchCharacters.add(foundCharacter); + coordinates.clear(); + } + } + } + } + + @Override + public List getLettersDuringTraining(SearchImage searchImage) { + var ret = new ArrayList(); + + var charMetaMap = Map.of( + ';', "semicolonDistance", + ':', "colonDistance", + '=', "equalsDistance", + 'i', "distancei", + 'j', "distancej" + ); + + // By default 0 + var configurableBases = Map.of( + Set.of('i', 'j', ':', ';', '='), 1 // The base is the second character (Bottom part) + ); + + var lineNum = 0; + var lineBounds = getLineBoundsForTraining(searchImage); + for (var coords : lineBounds) { + lineNum++; + var fromY = coords.getKey(); + var toY = coords.getValue(); + + var sub = searchImage.getSubimage(0, fromY, searchImage.getWidth(), toY - fromY); + + var width = sub.getWidth(); + var height = sub.getHeight(); + + var coordinates = new ArrayList(); + var found = new ArrayList(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + sub.scanFrom(x, y, coordinates); + + if (!coordinates.isEmpty()) { + var foundCharacter = new SearchCharacter(new ArrayList<>(coordinates), 0, fromY); + foundCharacter.applySections(); + foundCharacter.analyzeSlices(); + found.add(foundCharacter); + coordinates.clear(); + } + } + } + +// Get the characters with horizontal overlap + var ignored = new HashSet(); + + Collections.sort(found); + + // These values represent the indices of characters that require multiple parts + var multipleParts = Arrays.asList(0, 7, 29, 31, 34, 37, 80, 82); + + for (int i1 = 0; i1 < found.size(); i1++) { + var part1 = found.get(i1); + + if (ignored.contains(part1)) continue; + if (!multipleParts.contains(i1)) continue; + + var increment = new AtomicInteger(0); + var list = found.stream() + .filter(part1::isOverlappingX) + .sorted(Comparator.comparingInt(CoordinateCharacter::getY)) + .collect(Collectors.toList()); + + var currentChar = OCRTrain.TRAIN_STRING.charAt(i1); + + if (currentChar == '%') { + list.sort(Comparator.comparingDouble(character -> (double) character.getWidth() * (double) character.getHeight())); + } + + // If this is 1, it gets the second character + var index = configurableBases.entrySet() + .stream() + .filter(entry -> entry.getKey().contains(currentChar)) + .map(Map.Entry::getValue) + .findFirst() + .orElse(0); + + var base = list.get(Math.min(index, list.size() - 1)); + + list.forEach(part2 -> { + if (!base.equals(part2)) { // If part2 is NOT the base + if (currentChar == '!' || currentChar == '?') { // ! ? + double diff = (double) (part2.getY() - (base.getY() + base.getHeight())); + var distance = diff / (double) base.getHeight(); + + base.setTrainingMeta(currentChar == '?' ? "distanceQuestion" : "distanceExclamation", distance); + } else if (currentChar == 'i' || currentChar == 'j' || currentChar == ':' || currentChar == ';' || currentChar == '=') { // i j base below + double diff = (double) (base.getY() - (part2.getY() + part2.getHeight())); + var distance = diff / (double) base.getHeight(); + + base.setTrainingMeta(charMetaMap.getOrDefault(currentChar, "distanceAbove"), distance); + } + } + + var i = increment.getAndIncrement(); + part2.setModifier(i); + ignored.add(part2); + }); + } + + ret.add(new TrainLine(found, fromY, toY)); + } + + return ret; + } + + @Override + public Optional getCharacterFor(SearchCharacter searchCharacter) { + return getCharacterFor(searchCharacter, (IntPair) null); + } + + @Override + public Optional getCharacterFor(SearchCharacter searchCharacter, IntPair lineBounds) { + try { + var diffs = new Object2DoubleOpenHashMap(); // The lower value the better + + var data = new ArrayList<>(databaseManager.getAllCharacterSegments().get()); + + data.forEach(character -> + OCRUtils.getDifferencesFrom(searchCharacter.getSegmentPercentages(), character.getData()).ifPresent(charDifference -> { + // Gets the difference of the database character and searchCharacter (Lower is better) + var imageLetter = new ImageLetter(character.getLetter(), character.getModifier(), searchCharacter.getX(), searchCharacter.getY(), searchCharacter.getWidth(), searchCharacter.getHeight(), character.getAvgWidth(), character.getAvgHeight(), ((double) searchCharacter.getWidth()) / ((double) searchCharacter.getHeight()), searchCharacter.getCoordinates()); + imageLetter.setMaxCenter(character.getMaxCenter()); + imageLetter.setMinCenter(character.getMinCenter()); + diffs.put(imageLetter, charDifference); + })); + + return getCharacterFor(searchCharacter, diffs, lineBounds); + + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + + return Optional.empty(); + } + + @Override + public Optional getCharacterFor(SearchCharacter searchCharacter, List data) { + return getCharacterFor(searchCharacter, data, null); + } + + @Override + public Optional getCharacterFor(SearchCharacter searchCharacter, List data, IntPair lineBounds) { + Object2DoubleMap diffs = new Object2DoubleOpenHashMap<>(); // The lower value the better + + data.forEach(character -> { + character.finishRecalculations(); + OCRUtils.getDifferencesFrom(searchCharacter.getSegmentPercentages(), character.getSegmentPercentages()).ifPresent(charDifference -> { + // Gets the difference of the database character and searchCharacter (Lower is better) + diffs.put(new ImageLetter(character.getLetter(), character.getModifier(), searchCharacter.getX(), searchCharacter.getY(), searchCharacter.getWidth(), searchCharacter.getHeight(), character.getWidthAverage(), character.getHeightAverage(), ((double) searchCharacter.getWidth()) / ((double) searchCharacter.getHeight()), searchCharacter.getCoordinates()), charDifference); + }); + }); + + return getCharacterFor(searchCharacter, diffs, lineBounds); + } + + @Override + public Optional getCharacterFor(SearchCharacter searchCharacter, Object2DoubleMap diffs) { + return getCharacterFor(searchCharacter, diffs, null); + } + + @Override + public Optional getCharacterFor(SearchCharacter searchCharacter, Object2DoubleMap diffs, IntPair lineBounds) { + double searchRatio = (double) searchCharacter.getWidth() / searchCharacter.getHeight(); + var orderedDifferences = diffs.object2DoubleEntrySet().stream() + .peek(entry -> { + var imageLetter = entry.getKey(); + + double ratio = imageLetter.getAverageWidth() / imageLetter.getAverageHeight(); + double ratioDiff = Math.pow(ratio - searchRatio, 2); + ratioDiff *= this.options.getSizeRatioWeight(); + + entry.setValue(ratioDiff + entry.getDoubleValue()); + }) + .sorted(Comparator.comparingDouble(Object2DoubleMap.Entry::getDoubleValue)) + .collect(Collectors.toList()); + + if (orderedDifferences.isEmpty()) return Optional.empty(); + + var imageLetter = orderedDifferences.remove(0).getKey(); + imageLetter.setClosestMatches(orderedDifferences); + imageLetter.setValues(searchCharacter.getValues()); + + return Optional.of(imageLetter); + } + + @Override + public List getLineBoundsForTraining(SearchImage image) { + // Pair + List lines = new ArrayList<>(); + var values = image.getValues(); + + int height = 0; + + for (int y = 0; y < values.length; y++) { + // If there's something on the line, add to their height of it. + if (OCRUtils.isRowPopulated(values, y)) { + height++; + } else if (height > 0) { // If the row has nothing on it and the line is populated, add it to the values + int heightUntil = 0; + int finalSpace = -1; + + // Seeing if the gap under the character is <= the height of the above piece. This is mainly for seeing + // if the dot on an 'i' (And other similar characters) is <= is above the rest of the character the same + // amount as its height (Making it a proper 'i' in Verdana and other fonts) + for (int i = 0; i < height; i++) { + if (y + i >= values.length) { + finalSpace = 0; + break; + } + + if (OCRUtils.isRowPopulated(values, y + i)) { + if (finalSpace == -1) { + finalSpace = heightUntil; + } + } else { + heightUntil++; + } + } + + if (finalSpace > 0) { + if (height == finalSpace) { + y += finalSpace; + height += finalSpace; + continue; + } + } + + lines.add(new IntPair(y - height, y)); + height = 0; + } else { + if (height == 0) continue; + lines.add(new IntPair(y - height, y)); + height = 0; + } + } + + // + + var remove = new ArrayList(); + for (int i = 0; i < lines.size(); i++) { + var current = lines.get(i); + double currentHeight = current.getValue() - current.getKey(); + + var onLast = i == lines.size() - 1; + + if (!onLast) { + var below = lines.get(i + 1); + double belowHeight = below.getValue() - below.getKey(); + if (belowHeight / currentHeight <= this.options.getMaxPercentDiffToMerge() + && ((double) current.getKey() - below.getKey()) / currentHeight <= this.options.getMaxPercentDiffToMerge()) { + remove.add(++i); + current.setValue(below.getValue()); + } + } + } + + remove.stream().sorted(Collections.reverseOrder()).forEach(i -> lines.remove(i.intValue())); + + return lines; + } + + private boolean characterAt(int index, char character) { + return OCRTrain.TRAIN_STRING.charAt(index) == character; + } + + private int index(char character) { + return OCRTrain.TRAIN_STRING.indexOf(character); + } + +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/OCRScan.java b/src/main/java/com/uddernetworks/newocr/recognition/OCRScan.java new file mode 100644 index 0000000..604b5bf --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/OCRScan.java @@ -0,0 +1,215 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.ScannedImage; +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.character.SearchCharacter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.detection.SearchImage; +import com.uddernetworks.newocr.recognition.mergence.DefaultMergenceManager; +import com.uddernetworks.newocr.recognition.mergence.MergenceManager; +import com.uddernetworks.newocr.recognition.similarity.DefaultSimilarityManager; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; +import com.uddernetworks.newocr.train.OCROptions; +import com.uddernetworks.newocr.train.UntrainedDatabaseException; +import com.uddernetworks.newocr.utils.IntPair; +import com.uddernetworks.newocr.utils.OCRUtils; +import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.imageio.ImageIO; +import java.io.File; +import java.util.*; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.ExecutionException; +import java.util.stream.Collectors; + +public class OCRScan implements Scan { + + private static Logger LOGGER = LoggerFactory.getLogger(OCRScan.class); + + // This is the same as OCRTrain.TRAIN_STRING but without duplicates used in training + public static final String RAW_STRING = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~W W"; + private DatabaseManager databaseManager; + private Actions actions; + private SimilarityManager similarityManager; + private MergenceManager mergenceManager; + + /** + * Creates a new {@link OCRScan} with a default {@link SimilarityManager} and {@link MergenceManager}. + * + * @param databaseManager The {@link DatabaseManager} to use + * @param options The {@link OCROptions} to use + */ + public OCRScan(DatabaseManager databaseManager, OCROptions options) { + this(databaseManager, options, new DefaultSimilarityManager().loadDefaults()); + } + + /** + * Creates a new {@link OCRScan} with a default {@link MergenceManager}. + * + * @param databaseManager The {@link DatabaseManager} to use + * @param options The {@link OCROptions} to use + * @param similarityManager The {@link SimilarityManager} to use + */ + public OCRScan(DatabaseManager databaseManager, OCROptions options, SimilarityManager similarityManager) { + this(databaseManager, options, similarityManager, new DefaultMergenceManager(databaseManager, similarityManager).loadDefaults()); + } + + /** + * Creates a new {@link OCRScan}. + * + * @param databaseManager The {@link DatabaseManager} to use + * @param options The {@link OCROptions} to use + * @param similarityManager The {@link SimilarityManager} to use + * @param mergenceManager The {@link MergenceManager} to use + */ + public OCRScan(DatabaseManager databaseManager, OCROptions options, SimilarityManager similarityManager, MergenceManager mergenceManager) { + this.databaseManager = databaseManager; + this.mergenceManager = mergenceManager; + this.similarityManager = similarityManager; + ImageIO.setUseCache(false); + + this.actions = new OCRActions(databaseManager, options); + } + + @Override + public ScannedImage scanImage(File file) { + + if (!this.databaseManager.isTrainedSync()) throw new UntrainedDatabaseException(this.databaseManager); + + var start = System.currentTimeMillis(); + + // Preparing image + var input = OCRUtils.readImage(file); + var values = OCRUtils.createGrid(input); + var searchCharacters = new ArrayList(); + + input = OCRUtils.filter(input).orElseThrow(); + + OCRUtils.toGrid(input, values); + + var searchImage = new SearchImage(values); + + + // Moved from below + // Key = Entry centers are ABSOLUTE + Map> lines = new LinkedHashMap<>(); + + this.actions.getLineBoundsForTraining(searchImage).forEach(pair -> lines.put(pair, new LinkedList<>())); + this.actions.getLetters(searchImage, searchCharacters); + + // Gets all needed character data from the database based on the currently used font sizes + + CompletableFuture.runAsync(() -> { + try { + databaseManager.getAllCharacterSegments().get(); + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + }); + + // Gets the closest matching character (According to the database values) using OCRActions#getCharacterFor(SearchCharacter), + // then it orders them by their X values, and then sorts the ImageLetters so certain ones go first, allowing the + // characters to go to the correct lines + + var sortedLines = new Int2ObjectLinkedOpenHashMap>(); + + // New method: First orders SearchCharacters + lines.keySet() + .stream() + .map(entry -> new AbstractMap.SimpleEntry<>(entry, (int) Math.round(((double) entry.getValue() - (double) entry.getKey()) / 2D + entry.getKey()))) + .sorted(Comparator.comparingInt(AbstractMap.SimpleEntry::getValue)) + .forEach(nestedEntry -> { + var linesEntry = nestedEntry.getKey(); + int y = nestedEntry.getValue(); + + var databaseCharacters = lines.get(linesEntry); + + searchCharacters.removeIf(searchCharacter -> { + var center = searchCharacter.getY() + ((double) searchCharacter.getHeight() / 2); + if (!OCRUtils.isWithin(linesEntry.getKey(), linesEntry.getValue(), center)) return false; + searchCharacter.setCenterOffset(center - searchCharacter.getY()); + this.actions.getCharacterFor(searchCharacter, linesEntry).ifPresent(databaseCharacters::add); + return true; + }); + + if (databaseCharacters.isEmpty()) { + return; + } + + databaseCharacters.sort(Comparator.comparingInt(ImageLetter::getX)); + sortedLines.put(y, databaseCharacters); + }); + + this.mergenceManager.beginMergence(sortedLines, this.similarityManager); + + // Inserts all the spaces in the line. This is based on the first character of the line's height, and will be + // derived from that font size. + sortedLines.values().forEach(line -> line.stream().mapToInt(ImageLetter::getHeight).max().ifPresent(max -> line.addAll(getSpacesFor(line, max)))); + + // Sorts the lines again based on X values, to move spaces from the back to their proper locations in the line. + + ScannedImage scannedImage = new ScannedImage(file, input); + + sortedLines.keySet().stream().sorted().forEach(y -> { + List line = sortedLines.get(y.intValue()); + scannedImage.addLine(y, line.stream().sorted(Comparator.comparingInt(ImageLetter::getX)).collect(Collectors.toList())); + }); + + LOGGER.debug("Finished in " + (System.currentTimeMillis() - start) + "ms"); + return scannedImage; + } + + @Override + public List getSpacesFor(List line, int fontSize) { + var ret = new ArrayList(); + + try { + var data = databaseManager.getAllCharacterSegments().get(); + + // Gets the space DatabaseCharacter used for the current font size from the database + var spaceOptional = data.stream().filter(databaseCharacter -> databaseCharacter.getLetter() == ' ').findFirst(); + + if (spaceOptional.isEmpty()) { + LOGGER.error("No space found for current font size: " + fontSize); + return line; + } + + var space = spaceOptional.get(); + var spaceRatio = space.getAvgWidth() / space.getAvgHeight(); + + ImageLetter prev = null; + + for (var searchCharacter : line) { + var spaceRatioOverride = prev == null ? 0 : databaseManager.getCustomSpace(prev.getLetter()).get(); + int leftX = prev == null ? 0 : prev.getX() + prev.getWidth() + 1; + int rightX = searchCharacter.getX(); + + var gap = rightX - leftX; // The space between the current character and the last character + var usedWidth = spaceRatio * fontSize; // The width of the space for this specific fot size + usedWidth += spaceRatioOverride * fontSize; + + int spaces = '!' == searchCharacter.getLetter() ? (int) Math.floor(gap / usedWidth) : spaceRound(gap / usedWidth); + + for (int i = 0; i < spaces; i++) { + ret.add(new ImageLetter(' ', 0, (int) (leftX + (usedWidth * i)), searchCharacter.getY(), (int) usedWidth, fontSize, usedWidth, fontSize, spaceRatio)); + } + + prev = searchCharacter; + } + } catch (ExecutionException | InterruptedException e) { + e.printStackTrace(); + } + + return ret; + } + + @Override + public int spaceRound(double input) { + int known = (int) Math.floor(input); + double extra = input % 1; + known += OCRUtils.diff(extra, 1) < 0.2D ? 1 : 0; + return known; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/OCRTrain.java b/src/main/java/com/uddernetworks/newocr/recognition/OCRTrain.java new file mode 100644 index 0000000..38f48db --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/OCRTrain.java @@ -0,0 +1,209 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.character.SearchCharacter; +import com.uddernetworks.newocr.character.TrainedCharacterData; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.detection.SearchImage; +import com.uddernetworks.newocr.train.OCROptions; +import com.uddernetworks.newocr.utils.OCRUtils; +import it.unimi.dsi.fastutil.doubles.DoubleArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.imageio.ImageIO; +import java.io.File; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class OCRTrain implements Train { + + private static Logger LOGGER = LoggerFactory.getLogger(OCRTrain.class); + + public static final String TRAIN_STRING = "!!\"#$%%%&'()*+,-./0123456789::;;<==>??@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghiijjklmnopqrstuvwxyz{|}~W W"; + private DatabaseManager databaseManager; + private OCROptions options; + private Actions actions; + + /** + * Creates a new {@link OCRTrain}. + * + * @param databaseManager The {@link DatabaseManager} to use + * @param options The {@link OCROptions} to use + */ + public OCRTrain(DatabaseManager databaseManager, OCROptions options) { + this.databaseManager = databaseManager; + this.options = options; + ImageIO.setUseCache(false); + + this.actions = new OCRActions(databaseManager, options); + } + + @Override + public void trainImage(File file) { + + if (this.databaseManager.isTrainedSync()) { + databaseManager.clearData(); + this.databaseManager.setTrained(false); + } + + List trainedCharacterDataList = new ArrayList<>(); + + // Preparing image + + var input = OCRUtils.readImage(file); + var values = OCRUtils.createGrid(input); + + input = OCRUtils.filter(input).orElseThrow(); + + OCRUtils.toGrid(input, values); + + var searchImage = new SearchImage(values); + + TrainedCharacterData spaceTrainedCharacter = new TrainedCharacterData(' '); + trainedCharacterDataList.add(spaceTrainedCharacter); + + // Stores the height/distance ratio for apostrophe parts + var apostropheRatios = new DoubleArrayList(); + + var customSpaces = new HashMap>(); + + var metaMapping = Stream.of("distanceAbove", "distancei", "distancej", "colonDistance", "semicolonDistance", "equalsDistance", "distanceQuestion", "distanceExclamation") + .collect(Collectors.toMap(name -> name, name -> new DoubleArrayList())); + + // Goes through each line found + for (var line : this.actions.getLettersDuringTraining(searchImage)) { + + // Gets all characters found at the line bounds from the searchCharacters (Collected from the double for loops) + SearchCharacter nextMeasuringSpace = null; + + if (!line.getLetters().isEmpty()) { + var letterIndex = 0; + var beforeSpaceX = 0; + SearchCharacter firstQuote = null; + + for (SearchCharacter searchCharacter : line.getLetters()) { + // Gets the next character it knows it will be + char current = searchCharacter.getLetter() == ' ' ? ' ' : TRAIN_STRING.charAt(letterIndex++); + var modifier = searchCharacter.getModifier(); + var revertIndex = false; + + // If the index is on the quote + if (letterIndex == 3) { + searchCharacter.setLetter('"'); + if (firstQuote == null) { + firstQuote = searchCharacter; + + // Make sure to subtract 1 from the letterIndex at the end, so it can process the " again + revertIndex = true; + } else { + var distance = searchCharacter.getX() - firstQuote.getX() - firstQuote.getWidth(); + var ratio = (double) firstQuote.getHeight() / (double) distance; + apostropheRatios.add(ratio); + + modifier = 1; + } + + // If the current character is the FIRST `W`, sets beforeSpaceX to the current far right coordinate + // of the space (X + width), and go up another character (Skipping the space in TRAIN_STRING) + } else if (letterIndex == TRAIN_STRING.length() - 2) { + searchCharacter.setLetter('W'); + beforeSpaceX = searchCharacter.getX() + searchCharacter.getWidth(); + letterIndex++; + continue; + + // If it's the last character, add the space based on beforeSpaceX and the current X, (Getting the + // width of the space) and reset the line + } else if (letterIndex == TRAIN_STRING.length()) { + searchCharacter.setLetter('W'); + spaceTrainedCharacter.recalculateTo(searchCharacter.getX() - beforeSpaceX, line.bottomY() - line.topY()); + letterIndex = 0; + continue; + } else { + searchCharacter.setLetter(current); + } + + if (nextMeasuringSpace != null) { + double width = searchCharacter.getX() - (nextMeasuringSpace.getX() + nextMeasuringSpace.getWidth()); + double ratio = width / (double) nextMeasuringSpace.getHeight(); + customSpaces.computeIfAbsent(nextMeasuringSpace.getLetter(), x -> new ArrayList<>()).add(ratio); + nextMeasuringSpace = null; + } + + if (this.options.getSpecialSpaces().contains(current)) { + nextMeasuringSpace = searchCharacter; + } + + metaMapping.forEach((meta, list) -> searchCharacter.getTrainingMeta(meta).ifPresent(list::add)); + + searchCharacter.setModifier(modifier); + var trainedSearchCharacter = getTrainedCharacter(trainedCharacterDataList, current, modifier); + + // Adds the current segment values of the current searchCharacter to the trainedSearchCharacter + trainedSearchCharacter.recalculateTo(searchCharacter); + + double halfOfLineHeight = ((double) line.bottomY() - (double) line.topY()) / 2; + double middleOfLineToTopChar = (double) searchCharacter.getY() - (double) line.topY(); + double topOfLetterToCenter = halfOfLineHeight - middleOfLineToTopChar; + + // Sets the current center to be calculated, along with any meta it may have + trainedSearchCharacter.recalculateCenter(topOfLetterToCenter); // This NOW gets offset from top of + + if (revertIndex) letterIndex--; + + // Resets the current letter + if (letterIndex >= TRAIN_STRING.length()) { + letterIndex = 0; + } + } + } + } + + LOGGER.debug("Writing data to database..."); + long start = System.currentTimeMillis(); + + // Add the apostropheRatios data into the database + CompletableFuture.runAsync(() -> metaMapping.forEach(databaseManager::addAveragedData)) + .thenRunAsync(() -> databaseManager.addAveragedData("apostropheRatio", apostropheRatios)) + .thenRunAsync(() -> customSpaces.forEach((character, ratios) -> databaseManager.addCustomSpace(character, ratios.stream().mapToDouble(Double::doubleValue).average().orElse(0)))); + + // Inserts all character data into the database after recalculating the + trainedCharacterDataList.forEach(databaseTrainedCharacter -> { + try { + databaseTrainedCharacter.finishRecalculations(); + + char letter = databaseTrainedCharacter.getLetter(); + + CompletableFuture.runAsync(() -> databaseManager.createLetterEntry(letter, databaseTrainedCharacter.getModifier(), databaseTrainedCharacter.getWidthAverage(), databaseTrainedCharacter.getHeightAverage(), databaseTrainedCharacter.getMinCenter(), databaseTrainedCharacter.getMaxCenter(), letter == ' ')) + .thenRunAsync(() -> { + if (letter != ' ') { + databaseManager.addLetterSegments(letter, databaseTrainedCharacter.getModifier(), databaseTrainedCharacter.getSegmentPercentages()); + } + }); + } catch (Exception e) { + e.printStackTrace(); + } + }); + + this.databaseManager.setTrained(true); + + LOGGER.debug("Finished writing to database in " + (System.currentTimeMillis() - start) + "ms"); + } + + @Override + public TrainedCharacterData getTrainedCharacter(List trainedCharacterDataList, char current, int finalModifier) { + return trainedCharacterDataList + .stream() + .filter(trainedCharacterData -> trainedCharacterData.getLetter() == current + && trainedCharacterData.getModifier() == finalModifier) + .findFirst() + .orElseGet(() -> { + var trained = new TrainedCharacterData(current, finalModifier); + trainedCharacterDataList.add(trained); + return trained; + }); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/Scan.java b/src/main/java/com/uddernetworks/newocr/recognition/Scan.java new file mode 100644 index 0000000..aa88847 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/Scan.java @@ -0,0 +1,43 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.ScannedImage; +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.train.UntrainedDatabaseException; + +import java.io.File; +import java.util.List; + +/** + * The main class that handles character scanning of an image. + */ +public interface Scan { + + /** + * Scans the input image and returns a {@link ScannedImage} containing all the characters and their info. + * + * @param file The input image to be scanned + * @return A {@link ScannedImage} containing all scanned character data + * @throws UntrainedDatabaseException If the database was not trained yet + */ + ScannedImage scanImage(File file); + + /** + * Gets and inserts all the spaces of the current line based on the font size given (The first character of the line + * by default). This method adds the spaces to the end of the line currently, so a resort is needed. + * + * @param line The line to add spaces to + * @param fontSize The font size to base the space widths off of + * @return A copy of the input {@link ImageLetter} List, but with spaces appended to the end + */ + List getSpacesFor(List line, int fontSize); + + /** + * Gets the full space character count for the blank gap divided by the space width. This is calculated by getting + * the amount of times the space can fit in evenly (x % 1) and if the remaining value is within 0.2 of 1, it is + * considered a space. + * + * @param input The amount of spaces that fit in the gap (gap / spaceWidth) + * @return The amount of spaces that is found as a whole number + */ + int spaceRound(double input); +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/Train.java b/src/main/java/com/uddernetworks/newocr/recognition/Train.java new file mode 100644 index 0000000..554caa5 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/Train.java @@ -0,0 +1,32 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.character.TrainedCharacterData; +import com.uddernetworks.newocr.train.ComputerTrainGenerator; + +import java.io.File; +import java.util.List; + +/** + * The main class that handles training of an image/font. + */ +public interface Train { + + /** + * Scans the input image and creates training data based off of it. It must be an input image created from + * {@link ComputerTrainGenerator} or something of a similar format. + * + * @param file The input image to be trained from + */ + void trainImage(File file); + + /** + * Gets the {@link TrainedCharacterData} with the known letter value of the given character, with the same modifier. + * If a character is not found, it will be created and added to the given list. + * + * @param trainedCharacterDataList The list of {@link TrainedCharacterData}s to search though + * @param current The character to find + * @param finalModifier The modifier for the character to find + * @return The {@link TrainedCharacterData} with the same character and modifier + */ + TrainedCharacterData getTrainedCharacter(List trainedCharacterDataList, char current, int finalModifier); +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/TrainLine.java b/src/main/java/com/uddernetworks/newocr/recognition/TrainLine.java new file mode 100644 index 0000000..ffc36fb --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/TrainLine.java @@ -0,0 +1,40 @@ +package com.uddernetworks.newocr.recognition; + +import com.uddernetworks.newocr.character.SearchCharacter; + +import java.util.List; + +public class TrainLine implements CharacterLine { + + private List letters; + private int topY; + private int bottomY; + + /** + * Creates a new {@link TrainLine} with a list of characters populating it. + * + * @param letters The list of {@link SearchCharacter}s to initially populate the line + * @param topY The top Y coordinate of the image the line starts at + * @param bottomY The bottom Y coordinate of the image the line ends at + */ + public TrainLine(List letters, int topY, int bottomY) { + this.letters = letters; + this.topY = topY; + this.bottomY = bottomY; + } + + @Override + public List getLetters() { + return this.letters; + } + + @Override + public int topY() { + return this.topY; + } + + @Override + public int bottomY() { + return this.bottomY; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/DefaultMergenceManager.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/DefaultMergenceManager.java new file mode 100644 index 0000000..466bc4f --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/DefaultMergenceManager.java @@ -0,0 +1,146 @@ +package com.uddernetworks.newocr.recognition.mergence; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.rules.*; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; +import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.*; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.CopyOnWriteArrayList; +import java.util.function.BiFunction; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class DefaultMergenceManager implements MergenceManager { + + private static Logger LOGGER = LoggerFactory.getLogger(DefaultMergenceManager.class); + private DatabaseManager databaseManager; + private SimilarityManager similarityManager; + + private List mergeRules = new CopyOnWriteArrayList<>(); + + // Concurrent from parallel streams + private Map> horizontalLetterRelations = new ConcurrentHashMap<>(); + private Map> verticalLetterRelations = new ConcurrentHashMap<>(); + + /** + * Creates a new {@link DefaultMergenceManager}. + * + * @param databaseManager The {@link DatabaseManager} to use + * @param similarityManager The {@link SimilarityManager} to use + */ + public DefaultMergenceManager(DatabaseManager databaseManager, SimilarityManager similarityManager) { + this.databaseManager = databaseManager; + this.similarityManager = similarityManager; + } + + /** + * Adds the default {@link MergeRule}s, otherwise all rules will need to be added manually via + * {@link MergenceManager#addRule(BiFunction)}. + * + * @return The current {@link MergenceManager} + */ + public MergenceManager loadDefaults() { + return addRule(OverDotMergeRule::new) + .addRule(UnderDotMergeRule::new) + .addRule(ApostropheMergeRule::new) + .addRule(PercentMergeRule::new) + .addRule(EqualVerticalMergeRule::new); + } + + @Override + public MergenceManager addRule(BiFunction rule) { + this.mergeRules.add(rule.apply(this.databaseManager, this.similarityManager)); + return this; + } + + @Override + public void beginMergence(Int2ObjectLinkedOpenHashMap> sortedLines, SimilarityManager similarityManager) { + this.mergeRules.sort(Comparator.comparingInt(rule -> rule.getPriority().getPriorityIndex())); + + long start = System.currentTimeMillis(); + flatKeys(sortedLines).forEach(imageLetter -> verticalLetterRelations.put(imageLetter, getVerticalTo(imageLetter, sortedLines))); + + sortedLines.forEach((y, line) -> line.forEach(imageLetter -> horizontalLetterRelations.put(imageLetter, line))); + + this.mergeRules.stream().map(this::processRule).flatMap(Set::stream).forEach(imageLetter -> removeFromSorted(imageLetter, sortedLines)); + + var dotSimilarity = similarityManager.getRule("dot").orElseThrow(); + + // Cleaning up + flatKeys(sortedLines).forEach(imageLetter -> processLetter(imageLetter, dotSimilarity)); + + LOGGER.debug("Finished merging in " + (System.currentTimeMillis() - start)); + } + + private void processLetter(ImageLetter imageLetter, SimilarRule dotSimilarity) { + if (imageLetter.getAmountOfMerges() > 0) return; + var letter = imageLetter.getLetter(); + var mod = imageLetter.getModifier(); + + // TODO: Make these options + + if (dotSimilarity.matchesLetter(imageLetter)) { + imageLetter.setLetter('.'); + imageLetter.setModifier(0); + } else if (letter == '=' + || (letter == ';' && mod == 1) + || letter == 'j' + || letter == '"' + || letter == '%' + || letter == 'i' + || letter == '!') { + imageLetter.setNextClosest(); + processLetter(imageLetter, dotSimilarity); + } + } + + private Set processRule(MergeRule rule) { + var iterating = rule.isHorizontal() ? horizontalLetterRelations : verticalLetterRelations; + var removing = new HashSet(); + iterating.forEach((base, context) -> { + if (removing.contains(base)) return; + rule.mergeCharacters(base, context).ifPresent(remove -> { + removing.addAll(remove); + iterating.forEach((key, list) -> list.removeAll(remove)); + }); + }); + + removing.forEach(horizontalLetterRelations::remove); + removing.forEach(verticalLetterRelations::remove); + + return removing; + } + + private void removeFromSorted(ImageLetter imageLetter, Map> sortedLines) { + var iterator = sortedLines.entrySet().iterator(); + while (iterator.hasNext()) { + var currentEntry = iterator.next(); + var currentLine = currentEntry.getValue(); + var removed = currentLine.remove(imageLetter); + if (removed) { + if (currentLine.isEmpty()) iterator.remove(); + break; + } + } + } + + private List getVerticalTo(ImageLetter imageLetter, Int2ObjectLinkedOpenHashMap> sortedLines) { + return flatKeys(sortedLines) + .filter(filterChar -> filterChar.isOverlappingX(imageLetter)) + .sorted(Comparator.comparingInt(ImageLetter::getY)) + .collect(Collectors.toList()); + } + + private Stream flatKeys(Map> map) { + return map.values() + .stream() + .flatMap(List::stream) + .parallel(); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergePriority.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergePriority.java new file mode 100644 index 0000000..bcaaae8 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergePriority.java @@ -0,0 +1,19 @@ +package com.uddernetworks.newocr.recognition.mergence; + +public enum MergePriority { + LOWEST(-2), + LOW(-1), + NORMAL(0), + HIGH(1), + HIGHEST(-2); + + private int priority; + + MergePriority(int priority) { + this.priority = priority; + } + + public int getPriorityIndex() { + return priority; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergeRule.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergeRule.java new file mode 100644 index 0000000..355cf28 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergeRule.java @@ -0,0 +1,45 @@ +package com.uddernetworks.newocr.recognition.mergence; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; + +import java.util.List; +import java.util.Optional; + +public abstract class MergeRule { + + protected DatabaseManager databaseManager; + protected SimilarityManager similarityManager; + + public MergeRule(DatabaseManager databaseManager, SimilarityManager similarityManager) { + this.databaseManager = databaseManager; + this.similarityManager = similarityManager; + } + + /** + * Gets if the current merge rule is based on horizontally aligned letters (true) or vertically aligned letters + * (true). + * + * @return If the data given to {@link MergeRule#mergeCharacters(ImageLetter, List)} is horizontal (Full line) data + */ + public abstract boolean isHorizontal(); + + /** + * Gets the priority of the current rule + * + * @return The priority of the rule + */ + public abstract MergePriority getPriority(); + + /** + * Preforms the merging action with the current rule against the given data. If {@link MergeRule#isHorizontal()} is + * true, the given data will be a full line of data. If it returns false, the data will be all characters with a + * horizontal overlap in a column. + * + * @param target The base charcater + * @param letterData The letter data + * @return The {@link ImageLetter} that should be removed if the merge was successful + */ + public abstract Optional> mergeCharacters(ImageLetter target, List letterData); +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergenceManager.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergenceManager.java new file mode 100644 index 0000000..e3a7001 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/MergenceManager.java @@ -0,0 +1,33 @@ +package com.uddernetworks.newocr.recognition.mergence; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; +import it.unimi.dsi.fastutil.ints.Int2ObjectLinkedOpenHashMap; + +import java.util.List; +import java.util.function.BiFunction; + +/** + * The manager for {@link MergeRule}s to combine/merge multi part characters. + */ +public interface MergenceManager { + + /** + * Adds a mergence rule to be ran using the given {@link com.uddernetworks.newocr.database.DatabaseManager} and + * {@link SimilarityManager} from the constructor. + * + * @param rule The rule to add + * @return The current {@link MergenceManager} + */ + MergenceManager addRule(BiFunction rule); + + /** + * Orders and invokes all merge rules' {@link MergeRule#mergeCharacters(ImageLetter, List)} method with appropriate data. + * + * @param sortedLines The read image data + * @param similarityManager The {@link SimilarityManager} used + */ + void beginMergence(Int2ObjectLinkedOpenHashMap> sortedLines, SimilarityManager similarityManager); + +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/ApostropheMergeRule.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/ApostropheMergeRule.java new file mode 100644 index 0000000..1021287 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/ApostropheMergeRule.java @@ -0,0 +1,98 @@ +package com.uddernetworks.newocr.recognition.mergence.rules; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.MergePriority; +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; + +import java.util.Arrays; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutionException; + +import static com.uddernetworks.newocr.recognition.similarity.Letter.*; + +/** + * Merges pieces of apostrophes. + */ +public class ApostropheMergeRule extends MergeRule { + + private double apostropheRatio; + private SimilarRule apostropheRule; + + public ApostropheMergeRule(DatabaseManager databaseManager, SimilarityManager similarityManager) { + super(databaseManager, similarityManager); + + similarityManager.getSafeRule("vertical-line", rule -> this.apostropheRule = rule); + + try { + this.apostropheRatio = databaseManager.getAveragedData("apostropheRatio").get(); + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + } + + @Override + public boolean isHorizontal() { + return true; + } + + @Override + public MergePriority getPriority() { + return MergePriority.HIGH; + } + + @Override + public Optional> mergeCharacters(ImageLetter target, List letterData) { + if (!this.apostropheRule.matchesLetter(target)) return Optional.empty(); + + var index = letterData.indexOf(target) - 1; + + if (letterData.size() <= index || index < 0) return Optional.empty(); + + var before = letterData.get(index); + + if (before == null) return Optional.empty(); + + if (target.getAmountOfMerges() > 0 || before.getAmountOfMerges() > 0) return Optional.empty(); + + if (!this.apostropheRule.matchesLetter(before)) return Optional.empty(); + + // If the size of the apostrophes are not similar, or are not around 50% of their neighbor's + // height, it's probably not actually an apostrophe + var diff = percentDiff(target.getHeight(), before.getHeight()); + if (diff >= 0.25D) return Optional.empty(); + + var compare = Arrays.asList(EXCLAMATION_DOT, QUOTE_LEFT, QUOTE_RIGHT, PERCENT_LDOT, PERCENT_RDOT, APOSTROPHE, ASTERISK, PLUS, COMMA, MINUS, PERIOD, COLON_TOP, COLON_BOTTOM, SEMICOLON_TOP, SEMICOLON_BOTTOM, EQUALS_TOP, EQUALS_BOTTOM, QUESTION_MARK_BOTTOM, CARROT, UNDERSCORE, GRAVE, i_DOT, j_DOT, TILDE, SPACE); + + ImageLetter compareCharacter = null; + for (ImageLetter current : letterData) { + var currentLetter = getLetter(current); + if (target.equals(current) || before.equals(current) || compare.contains(currentLetter)) continue; + compareCharacter = current; + break; + } + + if (compareCharacter != null) { + var sizeDiff = percentDiff(compareCharacter.getHeight(), target.getHeight()); + + if (sizeDiff <= 0.5D) return Optional.empty(); + } + + var avgLength = (double) before.getHeight() * apostropheRatio; + if (target.getX() - before.getX() <= avgLength) { + // If the ' (Represented as ") are close enough to each other, they are put into a single " and the second (current) character is removed + before.setLetter('"'); + before.merge(target); + return Optional.of(List.of(target)); + } + + return Optional.empty(); + } + + private double percentDiff(double one, double two) { + return 1D - (Math.min(one, two) / Math.max(one, two)); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/EqualVerticalMergeRule.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/EqualVerticalMergeRule.java new file mode 100644 index 0000000..dd7814d --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/EqualVerticalMergeRule.java @@ -0,0 +1,89 @@ +package com.uddernetworks.newocr.recognition.mergence.rules; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.MergePriority; +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; + +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutionException; + +import static com.uddernetworks.newocr.utils.OCRUtils.diff; + +/** + * Merges : and = pieces + */ +public class EqualVerticalMergeRule extends MergeRule { + + private double colonDistance; + private double equalsDistance; + private SimilarRule dotRule; + private SimilarRule horizontalLineRule; + + public EqualVerticalMergeRule(DatabaseManager databaseManager, SimilarityManager similarityManager) { + super(databaseManager, similarityManager); + + similarityManager.getSafeRule("dot", rule -> this.dotRule = rule); + similarityManager.getSafeRule("horizontal-line", rule -> this.horizontalLineRule = rule); + + try { + this.colonDistance = this.databaseManager.getAveragedData("colonDistance").get(); + this.equalsDistance = this.databaseManager.getAveragedData("equalsDistance").get(); + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + } + + @Override + public boolean isHorizontal() { + return false; + } + + @Override + public MergePriority getPriority() { + return MergePriority.LOW; + } + + @Override + public Optional> mergeCharacters(ImageLetter target, List letterData) { + var index = letterData.indexOf(target) + 1; + + if (letterData.size() <= index) return Optional.empty(); + + var above = letterData.get(index); + + if (target.getAmountOfMerges() > 0 || above.getAmountOfMerges() > 0) return Optional.empty(); + + var bottomOfCharacterY = above.getY(); + var difference = bottomOfCharacterY - target.getY() - target.getHeight(); + + var isPartAbove = above.getHeight() < target.getHeight(); + double minHeight = Math.min(above.getHeight(), target.getHeight()); + double projectedDifference; + var colon = true; + + if (this.horizontalLineRule.matchesLetter(target) && this.horizontalLineRule.matchesLetter(above)) { // = + projectedDifference = this.equalsDistance * minHeight; + colon = false; + } else if (this.dotRule.matchesLetter(target) && this.dotRule.matchesLetter(above)) { // : + projectedDifference = this.colonDistance * minHeight; + } else { + return Optional.empty(); + } + + var delta = projectedDifference * 0.5D; + + if (diff(difference, projectedDifference) <= delta) { + var base = !isPartAbove ? above : target; + var adding = !isPartAbove ? target : above; + base.merge(adding); + base.setLetter(colon ? ':' : '='); + return Optional.of(List.of(adding)); + } + + return Optional.empty(); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/OverDotMergeRule.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/OverDotMergeRule.java new file mode 100644 index 0000000..e02876d --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/OverDotMergeRule.java @@ -0,0 +1,114 @@ +package com.uddernetworks.newocr.recognition.mergence.rules; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.MergePriority; +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; + +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutionException; + +import static com.uddernetworks.newocr.utils.OCRUtils.diff; + +/** + * Merges dots above base characters for the letter i, j, and ; + */ +public class OverDotMergeRule extends MergeRule { + + private double distancei; + private double distancej; + private double semicolonDistance; + private SimilarRule dotRule; + private SimilarRule verticalLineRule; + + public OverDotMergeRule(DatabaseManager databaseManager, SimilarityManager similarityManager) { + super(databaseManager, similarityManager); + + similarityManager.getSafeRule("dot", rule -> this.dotRule = rule); + similarityManager.getSafeRule("vertical-line", rule -> this.verticalLineRule = rule); + + try { + this.distancei = this.databaseManager.getAveragedData("distancei").get(); + this.distancej = this.databaseManager.getAveragedData("distancej").get(); + this.semicolonDistance = this.databaseManager.getAveragedData("semicolonDistance").get(); + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + } + + @Override + public boolean isHorizontal() { + return false; + } + + @Override + public MergePriority getPriority() { + return MergePriority.HIGH; + } + + @Override + public Optional> mergeCharacters(ImageLetter target, List letterData) { + var index = letterData.indexOf(target) - 1; + + if (index < 0 || letterData.size() <= index) return Optional.empty(); + + var targetLetter = target.getLetter(); + + var semicolon = (targetLetter == ';' && target.getModifier() == 1) || targetLetter == ','; + + var verticalTarget = this.verticalLineRule.matchesLetter(target); + + // Base + if (!semicolon && + !verticalTarget && + !(targetLetter == 'j' && target.getModifier() == 1) && + (targetLetter != 'J')) { + return Optional.empty(); + } + + // Dot + var above = letterData.get(index); + if (!this.dotRule.matchesLetter(above)) return Optional.empty(); + + if (target.getAmountOfMerges() > 0 || above.getAmountOfMerges() > 0) return Optional.empty(); + + double distance; + + if (semicolon) { + distance = this.semicolonDistance; + } else if (targetLetter == 'j' || targetLetter == 'J') { + distance = this.distancej; + } else { + distance = this.distancei; + } + + var bottomOfCharacterY = above.getY() + above.getHeight(); + var difference = Math.abs(bottomOfCharacterY - target.getY()); + var isPartAbove = above.getHeight() < target.getHeight(); + double maxHeight = Math.max(above.getHeight(), target.getHeight()); + double projectedDifference = distance * maxHeight; + double delta = projectedDifference * 0.5D; + + if (diff(difference, projectedDifference) <= delta) { + var base = !isPartAbove ? above : target; + var adding = !isPartAbove ? target : above; + base.merge(adding); + var usingChar = targetLetter; + if (usingChar == 'J') { + usingChar = 'j'; + } else if (usingChar == ',') { + usingChar = ';'; + } else if (verticalTarget) { + usingChar = 'i'; + } + + base.setLetter(usingChar); + return Optional.of(List.of(adding)); + } + + return Optional.empty(); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/PercentMergeRule.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/PercentMergeRule.java new file mode 100644 index 0000000..68a17ab --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/PercentMergeRule.java @@ -0,0 +1,98 @@ +package com.uddernetworks.newocr.recognition.mergence.rules; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.MergePriority; +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; + +import java.util.List; +import java.util.Optional; + +/** + * Merges all pieces of a percent sign + */ +public class PercentMergeRule extends MergeRule { + + private SimilarRule percentDot; + private SimilarRule percentBase; + + public PercentMergeRule(DatabaseManager databaseManager, SimilarityManager similarityManager) { + super(databaseManager, similarityManager); + + similarityManager.getSafeRule("percent-dot", rule -> this.percentDot = rule); + similarityManager.getSafeRule("percent-base", rule -> this.percentBase = rule); + } + + @Override + public boolean isHorizontal() { + return true; + } + + @Override + public MergePriority getPriority() { + return MergePriority.HIGH; + } + + @Override + public Optional> mergeCharacters(ImageLetter target, List letterData) { + var baseIndex = letterData.indexOf(target); + + if (baseIndex - 1 < 0 || baseIndex + 1 >= letterData.size()) return Optional.empty(); + + var part1 = letterData.get(baseIndex - 1); + var part2 = letterData.get(baseIndex + 1); + + if (target.getAmountOfMerges() > 0 || part1.getAmountOfMerges() > 0 || part2.getAmountOfMerges() > 0) + return Optional.empty(); + + var partsOptional = getParts(target, part1, part2); + if (partsOptional.isEmpty()) return Optional.empty(); + + var parts = partsOptional.get(); + var base = parts[0]; + var dot1 = parts[1]; + var dot2 = parts[2]; + + if (!base.isOverlappingY(dot1) || !base.isOverlappingY(dot2)) return Optional.empty(); + + base.merge(dot1); + base.merge(dot2); + + base.setModifier(0); + base.setLetter('%'); + + return Optional.of(List.of(dot1, dot2)); + } + + // base, part1, part2 + private Optional getParts(ImageLetter one, ImageLetter two, ImageLetter three) { + var oneDot = isDot(one); + var twoDot = isDot(two); + var threeDot = isDot(three); + + var oneBase = isBase(one); + var twoBase = isBase(two); + var threeBase = isBase(three); + + // TODO: Improve/shorten logic? + if (oneDot && twoDot && threeBase) { + return Optional.of(new ImageLetter[]{three, one, two}); + } else if (oneDot && twoBase && threeDot) { + return Optional.of(new ImageLetter[]{two, one, three}); + } else if (oneBase && twoDot && threeDot) { + return Optional.of(new ImageLetter[]{one, two, three}); + } + + return Optional.empty(); + } + + private boolean isDot(ImageLetter imageLetter) { + return this.percentDot.matchesLetter(imageLetter); + } + + private boolean isBase(ImageLetter imageLetter) { + return this.percentBase.matchesLetter(imageLetter); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/UnderDotMergeRule.java b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/UnderDotMergeRule.java new file mode 100644 index 0000000..7246b8d --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/mergence/rules/UnderDotMergeRule.java @@ -0,0 +1,90 @@ +package com.uddernetworks.newocr.recognition.mergence.rules; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.recognition.mergence.MergePriority; +import com.uddernetworks.newocr.recognition.mergence.MergeRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; + +import java.util.List; +import java.util.Optional; +import java.util.concurrent.ExecutionException; + +import static com.uddernetworks.newocr.recognition.similarity.Letter.QUESTION_MARK_BOTTOM; +import static com.uddernetworks.newocr.recognition.similarity.Letter.QUESTION_MARK_TOP; +import static com.uddernetworks.newocr.utils.OCRUtils.diff; + +/** + * Merges the dot under a character for ! and ? + */ +public class UnderDotMergeRule extends MergeRule { + + private double distanceExclamation; + private double distanceQuestion; + private SimilarRule dotRule; + private SimilarRule verticalLineRule; + + public UnderDotMergeRule(DatabaseManager databaseManager, SimilarityManager similarityManager) { + super(databaseManager, similarityManager); + + similarityManager.getSafeRule("dot", rule -> this.dotRule = rule); + similarityManager.getSafeRule("vertical-line", rule -> this.verticalLineRule = rule); + + try { + this.distanceExclamation = this.databaseManager.getAveragedData("distanceExclamation").get(); + this.distanceQuestion = this.databaseManager.getAveragedData("distanceQuestion").get(); + } catch (InterruptedException | ExecutionException e) { + e.printStackTrace(); + } + } + + @Override + public boolean isHorizontal() { + return false; + } + + @Override + public MergePriority getPriority() { + return MergePriority.LOW; + } + + @Override + public Optional> mergeCharacters(ImageLetter target, List letterData) { + var index = letterData.indexOf(target) + 1; + + if (letterData.size() <= index) return Optional.empty(); + + // Base, we want this to be a line + if (QUESTION_MARK_BOTTOM.matches(target) + && !this.verticalLineRule.matchesLetter(target)) return Optional.empty(); + + var question = QUESTION_MARK_TOP.matches(target); + + // Dot + var below = letterData.get(index); + if (!this.dotRule.matchesLetter(below)) return Optional.empty(); + + if (target.getAmountOfMerges() > 0 || below.getAmountOfMerges() > 0) return Optional.empty(); + + var bottomOfCharacterY = below.getY(); + var aboveY = target.getY() + target.getHeight(); + var difference = Math.abs(bottomOfCharacterY - aboveY); + var isBelowBase = below.getHeight() < target.getHeight(); + double minHeight = target.getHeight(); + double distanceUsed = question ? this.distanceQuestion : this.distanceExclamation; + + double projectedDifference = distanceUsed * minHeight; + double delta = projectedDifference * 0.75D; + + if (diff(difference, projectedDifference) <= delta) { + var base = !isBelowBase ? below : target; + var adding = !isBelowBase ? target : below; + base.merge(adding); + if (base.getLetter() != '?') base.setLetter('!'); + return Optional.of(List.of(adding)); + } + + return Optional.empty(); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/DefaultSimilarityManager.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/DefaultSimilarityManager.java new file mode 100644 index 0000000..0db7c70 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/DefaultSimilarityManager.java @@ -0,0 +1,78 @@ +package com.uddernetworks.newocr.recognition.similarity; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.recognition.similarity.rules.DotSimilarityRule; +import com.uddernetworks.newocr.recognition.similarity.rules.HorizontalLineSimilarityRule; +import com.uddernetworks.newocr.recognition.similarity.rules.PercentDotSimilarityRule; +import com.uddernetworks.newocr.recognition.similarity.rules.VerticalLineSimilarityRule; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.function.Consumer; + +public class DefaultSimilarityManager implements SimilarityManager { + + private static Logger LOGGER = LoggerFactory.getLogger(DefaultSimilarityManager.class); + + private List similarRules = new ArrayList<>(); + + /** + * Load the default {@link SimilarRule}s, otherwise all rules will need to be added manually via + * {@link SimilarityManager#addSimilarity(SimilarRule)}. + * + * @return The current {@link SimilarityManager} + */ + public SimilarityManager loadDefaults() { + return addSimilarity(new DotSimilarityRule()) + .addSimilarity(new VerticalLineSimilarityRule()) + .addSimilarity(new HorizontalLineSimilarityRule()) + .addSimilarity(new PercentDotSimilarityRule()); + } + + @Override + public SimilarityManager addSimilarity(SimilarRule rule) { + this.similarRules.add(rule); + return this; + } + + @Override + public SimilarityManager removeSimilarity(Class ruleClass) { + this.similarRules.removeIf(rule -> rule.getClass().equals(ruleClass)); + return this; + } + + @Override + public boolean isSimilar(ImageLetter first, ImageLetter second) { + return this.similarRules.stream() + .filter(rule -> rule.matchesLetter(first)) + .anyMatch(rule -> rule.matchesLetter(second)); + } + + @Override + public Optional getRule(String similarityRuleName) { + return this.similarRules.stream() + .filter(rule -> rule.getName().equals(similarityRuleName)) + .findFirst(); + } + + @Override + public void getSafeRule(String similarityRuleName, Consumer ruleConsumer) { + this.similarRules.stream() + .filter(rule -> rule.getName().equals(similarityRuleName)) + .findFirst() + .ifPresentOrElse(ruleConsumer, () -> LOGGER.error("Tried to use uninitialized rule of name " + similarityRuleName)); + } + + @Override + public Optional> getSecondHighest(List> data) { + var first = data.get(0); + return this.similarRules.stream() + .filter(rule -> rule.matchesLetter(first.getKey())) + .findFirst() + .flatMap(rule -> rule.process(data)); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/Letter.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/Letter.java new file mode 100644 index 0000000..27f6522 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/Letter.java @@ -0,0 +1,172 @@ +package com.uddernetworks.newocr.recognition.similarity; + +import com.uddernetworks.newocr.character.ImageLetter; + +import java.util.Arrays; + +/** + * An enum containing every possible character piece that can be detected. + * Each one of these are recognised as a separate character and will be trained as such. + */ +public enum Letter { + EXCLAMATION('!', 0, "|"), + EXCLAMATION_DOT('!', 1, "."), + QUOTE_LEFT('\"', 0), + QUOTE_RIGHT('\"', 1), + HASHTAG('#'), + DOLLAR('$'), + PERCENT_BASE('%', 2), + PERCENT_LDOT('%', 1), + PERCENT_RDOT('%', 0), + AMPERSAND('&'), + APOSTROPHE('\''), + LEFT_PARENTHESE('('), + RIGHT_PARENTHESE(')'), + ASTERISK('*'), + PLUS('+'), + COMMA(','), + MINUS('-'), + PERIOD('.'), + FORWARD_SLASH('/'), + ZERO('0'), + ONE('1'), + TWO('2'), + THREE('3'), + FOUR('4'), + FIVE('5'), + SIX('6'), + SEVEN('7'), + EIGHT('8'), + NINE('9'), + COLON_TOP(':', 0), + COLON_BOTTOM(':', 1), + SEMICOLON_TOP(';', 0), + SEMICOLON_BOTTOM(';', 1), + LESS_THAN('<'), + EQUALS_TOP('=', 0), + EQUALS_BOTTOM('=', 1), + GREATER_THAN('>'), + QUESTION_MARK_TOP('?', 0), + QUESTION_MARK_BOTTOM('?', 1), + AT('@'), + A('A'), + B('B'), + C('C'), + D('D'), + E('E'), + F('F'), + G('G'), + H('H'), + I('I'), + J('J'), + K('K'), + L('L'), + M('M'), + N('N'), + O('O'), + P('P'), + Q('Q'), + R('R'), + S('S'), + T('T'), + U('U'), + V('V'), + W('W'), + X('X'), + Y('Y'), + Z('Z'), + LEFT_SQUARE_BRACKET('['), + BACKSLASH('\\'), + RIGHT_SQUARE_BRACKET(']'), + CARROT('^'), + UNDERSCORE('_'), + GRAVE('`'), + a('a'), + b('b'), + c('c'), + d('d'), + e('e'), + f('f'), + g('g'), + h('h'), + i_DOT('i', 0), + i('i', 1), + j_DOT('j', 0), + j('j', 1), + k('k'), + l('l'), + m('m'), + n('n'), + o('o'), + p('p'), + q('q'), + r('r'), + s('s'), + t('t'), + u('u'), + v('v'), + w('w'), + x('x'), + y('y'), + z('z'), + LEFT_CURLY_BRACKET('{'), + PIPE('|'), + RIGHT_CURLY_BRACKET('}'), + TILDE('~'), + SPACE(' '); + + private final char letter; + private final int mod; + private final String print; + + Letter(char letter) { + this(letter, 0); + } + + Letter(char letter, int mod) { + this(letter, mod, String.valueOf(letter)); + } + + Letter(char letter, int mod, String print) { + this.letter = letter; + this.mod = mod; + this.print = print; + } + + public char getLetter() { + return letter; + } + + public int getMod() { + return mod; + } + + public boolean matches(ImageLetter imageLetter) { + return matches(imageLetter.getLetter(), imageLetter.getModifier()); + } + + public boolean matches(char letter, int mod) { + return letter == this.letter && mod == this.mod; + } + + public static Letter getLetter(ImageLetter imageLetter) { + return getLetter(imageLetter.getLetter(), imageLetter.getModifier()); + } + + public static Letter getLetter(char character, int mod) { + var optional = Arrays.stream(values()) + .filter(letter -> letter.letter == character && letter.mod == mod) + .findFirst(); + if (optional.isEmpty()) { + System.err.println("Couldn't find a letter with char " + character + " and mod " + mod); + return SPACE; + } + + return optional.get(); + } + + @Override + public String toString() { + return this.print; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/SimilarRule.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/SimilarRule.java new file mode 100644 index 0000000..57d0913 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/SimilarRule.java @@ -0,0 +1,45 @@ +package com.uddernetworks.newocr.recognition.similarity; + +import com.uddernetworks.newocr.character.ImageLetter; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; + +import java.util.List; +import java.util.Optional; + +/** + * A rule to state certain character pieces are effectively the same, i.e. an l and a | and a 1 + */ +public interface SimilarRule { + + /** + * Gets if the current rule matches with the given {@link ImageLetter} and is allowed to be processed. + * + * @param first The first {@link ImageLetter} of the data + * @return If the given {@link ImageLetter} can be processed by the current rule + */ + boolean matchesLetter(ImageLetter first); + + /** + * Gets the name of the rule. + * + * @return The name of the rule + */ + String getName(); + + /** + * When given a list of the potential results of a character (Irrelevant what character it is), this will find the + * character lowest in the list that does not match the first character's letter and modifier to the current + * rule. + * + * @param data The possible combination data + * @return If found, the second character + */ + default Optional> process(List> data) { + for (var entry : data) { + if (matchesLetter(entry.getKey())) continue; + return Optional.of(entry); + } + + return Optional.empty(); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/SimilarityManager.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/SimilarityManager.java new file mode 100644 index 0000000..56511ad --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/SimilarityManager.java @@ -0,0 +1,68 @@ +package com.uddernetworks.newocr.recognition.similarity; + +import com.uddernetworks.newocr.character.ImageLetter; +import it.unimi.dsi.fastutil.objects.Object2DoubleMap; + +import java.util.List; +import java.util.Optional; +import java.util.function.Consumer; + +/** + * The manager for {@link SimilarRule}s, usually derived from + * {@link com.uddernetworks.newocr.configuration.FontConfiguration}s. + */ +public interface SimilarityManager { + + /** + * Adds a {@link SimilarRule} to the internal list. + * + * @param rule The {@link SimilarRule} to add + * @return The current {@link SimilarityManager} + */ + SimilarityManager addSimilarity(SimilarRule rule); + + /** + * Removes a {@link SimilarRule} from the internal list if found. + * + * @param ruleClass The {@link SimilarRule} to add if found + * @return The current {@link SimilarityManager} + */ + SimilarityManager removeSimilarity(Class ruleClass); + + /** + * Finds any matching {@link SimilarRule}s for the first {@link ImageLetter}, and then checks if the second + * {@link ImageLetter} also matches any of them. + * + * @param first The first {@link ImageLetter} + * @param second The second {@link ImageLetter} + * @return If the two {@link ImageLetter}s are similar + */ + boolean isSimilar(ImageLetter first, ImageLetter second); + + /** + * Gets the instance of {@link SimilarRule} with the given name. + * + * @param similarityRuleName The name of the {@link SimilarRule} to get, if present + * @return A {@link SimilarRule} with the given name + */ + Optional getRule(String similarityRuleName); + + /** + * Gets a rule from the given name, and if found, sends it through the consumer. A message is sent saying the + * {@link SimilarRule} is not found if one isn't found. + * + * @param similarityRuleName The name of the {@link SimilarRule} to get + * @param ruleConsumer The consumer to be given the {@link SimilarRule} if found + */ + void getSafeRule(String similarityRuleName, Consumer ruleConsumer); + + /** + * When given a list of the potential results of a character (Irrelevant what character it is), this will find the + * character lowest in the list that does not match the first character's letter and modifier to any of the added + * {@link SimilarRule}s. + * + * @param data The possible combination data + * @return If found, the second character + */ + Optional> getSecondHighest(List> data); +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/BasicSimilarityRule.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/BasicSimilarityRule.java new file mode 100644 index 0000000..6f4ce05 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/BasicSimilarityRule.java @@ -0,0 +1,49 @@ +package com.uddernetworks.newocr.recognition.similarity.rules; + +import com.uddernetworks.newocr.character.ImageLetter; +import com.uddernetworks.newocr.recognition.similarity.Letter; +import com.uddernetworks.newocr.recognition.similarity.SimilarRule; + +import java.util.EnumSet; +import java.util.Set; + +/** + * A simple {@link SimilarRule} that is used by {@link com.uddernetworks.newocr.configuration.HOCONFontConfiguration} + * and by other default {@link SimilarRule}s. + */ +public class BasicSimilarityRule implements SimilarRule { + + private Set characters; + private String name; + + public BasicSimilarityRule(String name, Set characters) { + this.name = name; + this.characters = EnumSet.copyOf(characters); + } + + public BasicSimilarityRule(String name, Letter... characters) { + this.name = name; + this.characters = characters.length > 0 ? + EnumSet.of(characters[0], characters) : + EnumSet.noneOf(Letter.class); + } + + public BasicSimilarityRule addLetter(Letter letter) { + this.characters.add(letter); + return this; + } + + public void removeLetter(Letter letter) { + this.characters.remove(letter); + } + + @Override + public String getName() { + return this.name; + } + + @Override + public boolean matchesLetter(ImageLetter first) { + return this.characters.contains(Letter.getLetter(first)); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/DotSimilarityRule.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/DotSimilarityRule.java new file mode 100644 index 0000000..29dcf75 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/DotSimilarityRule.java @@ -0,0 +1,10 @@ +package com.uddernetworks.newocr.recognition.similarity.rules; + +import static com.uddernetworks.newocr.recognition.similarity.Letter.*; + +public class DotSimilarityRule extends BasicSimilarityRule { + + public DotSimilarityRule() { + super("dot", PERIOD, COLON_TOP, COLON_BOTTOM, EXCLAMATION_DOT, SEMICOLON_TOP, i_DOT, j_DOT, QUESTION_MARK_BOTTOM); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/HorizontalLineSimilarityRule.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/HorizontalLineSimilarityRule.java new file mode 100644 index 0000000..3b893b7 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/HorizontalLineSimilarityRule.java @@ -0,0 +1,10 @@ +package com.uddernetworks.newocr.recognition.similarity.rules; + +import static com.uddernetworks.newocr.recognition.similarity.Letter.*; + +public class HorizontalLineSimilarityRule extends BasicSimilarityRule { + + public HorizontalLineSimilarityRule() { + super("horizontal-line", MINUS, EQUALS_BOTTOM, EQUALS_TOP, UNDERSCORE); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/PercentDotSimilarityRule.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/PercentDotSimilarityRule.java new file mode 100644 index 0000000..2c6caa3 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/PercentDotSimilarityRule.java @@ -0,0 +1,11 @@ +package com.uddernetworks.newocr.recognition.similarity.rules; + +import static com.uddernetworks.newocr.recognition.similarity.Letter.PERCENT_LDOT; +import static com.uddernetworks.newocr.recognition.similarity.Letter.PERCENT_RDOT; + +public class PercentDotSimilarityRule extends BasicSimilarityRule { + + public PercentDotSimilarityRule() { + super("percent-dot", PERCENT_LDOT, PERCENT_RDOT); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/VerticalLineSimilarityRule.java b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/VerticalLineSimilarityRule.java new file mode 100644 index 0000000..db0e0a6 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/recognition/similarity/rules/VerticalLineSimilarityRule.java @@ -0,0 +1,10 @@ +package com.uddernetworks.newocr.recognition.similarity.rules; + +import static com.uddernetworks.newocr.recognition.similarity.Letter.*; + +public class VerticalLineSimilarityRule extends BasicSimilarityRule { + + public VerticalLineSimilarityRule() { + super("vertical-line", APOSTROPHE, QUOTE_LEFT, QUOTE_RIGHT, PIPE, l, i, EXCLAMATION); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/train/ComputerTrainGenerator.java b/src/main/java/com/uddernetworks/newocr/train/ComputerTrainGenerator.java new file mode 100644 index 0000000..f27c74c --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/train/ComputerTrainGenerator.java @@ -0,0 +1,73 @@ +package com.uddernetworks.newocr.train; + +import com.uddernetworks.newocr.recognition.OCRScan; +import com.uddernetworks.newocr.utils.ConversionUtils; + +import javax.imageio.ImageIO; +import java.awt.*; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; + +public class ComputerTrainGenerator implements TrainGenerator { + + @Override + public void generateTrainingImage(File file) { + generateTrainingImage(file, new TrainGeneratorOptions()); + } + + @Override + public void generateTrainingImage(File file, TrainGeneratorOptions options) { + BufferedImage image = new BufferedImage(1, 1, BufferedImage.TYPE_INT_ARGB); + Graphics2D graphics = image.createGraphics(); + + RenderingHints rht = new RenderingHints(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON); + graphics.setRenderingHints(rht); + + Font font = new Font(options.getFontFamily(), Font.PLAIN, options.getMaxFontSize()); + graphics.setFont(font); + + int newHeight = 11; + + int size2 = options.getMaxFontSize(); + for (int i = 0; i < options.getMaxFontSize() - options.getMinFontSize(); i++) { + newHeight += size2 + 11; + size2--; + } + + image = new BufferedImage(graphics.getFontMetrics().stringWidth(OCRScan.RAW_STRING) + 50, newHeight, BufferedImage.TYPE_INT_ARGB); + for (int y = 0; y < image.getHeight(); y++) { + for (int x = 0; x < image.getWidth(); x++) { + image.setRGB(x, y, Color.WHITE.getRGB()); + } + } + + graphics = image.createGraphics(); + + RenderingHints rh = new RenderingHints(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON); + graphics.setRenderingHints(rh); + + int size = options.getMaxFontSize(); + int offset = options.getMaxFontSize(); + for (int i = 0; i < options.getMaxFontSize() - options.getMinFontSize(); i++) { + drawLine(graphics, options.getFontFamily(), offset, size); + offset += ConversionUtils.pointToPixel(size) + 15; + size--; + } + + try { + ImageIO.write(image, "png", file); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void drawLine(Graphics2D drawTo, String fontName, int yOffset, int size) { + Font font = new Font(fontName, Font.PLAIN, size); + drawTo.setFont(font); + drawTo.setPaint(Color.BLACK); + + drawTo.drawString(OCRScan.RAW_STRING, 10, yOffset); + } + +} diff --git a/src/main/java/com/uddernetworks/newocr/train/OCROptions.java b/src/main/java/com/uddernetworks/newocr/train/OCROptions.java new file mode 100644 index 0000000..d2087a3 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/train/OCROptions.java @@ -0,0 +1,93 @@ +package com.uddernetworks.newocr.train; + +import java.util.HashSet; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * General options used by the OCR scanning and training. + */ +public class OCROptions { + private Set specialSpaces = new HashSet<>(); + private double maxPercentDiffToMerge = 0.5; + private double sizeRatioWeight = 4; + + /** + * Gets the characters requiring custom trained spaces. + * + * @return The characters requiring custom spacing + */ + public Set getSpecialSpaces() { + return this.specialSpaces; + } + + /** + * Sets the characters requiring custom trained spaces. + * + * @param specialSpaces The characters requiring separate training for their trailing spaces. + * @return The current {@link OCROptions} object + */ + public OCROptions setSpecialSpaces(Set specialSpaces) { + this.specialSpaces = new HashSet<>(specialSpaces); + return this; + } + + /** + * Sets the characters requiring custom trained spaces. + * + * @param specialSpaces The characters requiring separate training for their trailing spaces. + * @return The current {@link OCROptions} object + */ + public OCROptions setSpecialSpaces(char... specialSpaces) { + this.specialSpaces = IntStream.range(0, specialSpaces.length) + .mapToObj(x -> specialSpaces[x]) + .collect(Collectors.toSet()); + return this; + } + + /** + * Gets the value set by {@link OCROptions#setMaxPercentDiffToMerge(double)} + * + * @return The value set by {@link OCROptions#setMaxPercentDiffToMerge(double)} + */ + public double getMaxPercentDiffToMerge() { + return maxPercentDiffToMerge; + } + + /** + * Sets the maximum percentage difference a line must be in order to merge in the very first phase of training. This + * is primarily for when underscores are below a line, and will need to be X% smaller than the line to merge. + *

+ * This value is by default 0.5 + * + * @param maxPercentDiffToMerge The percentage to set + * @return The current {@link OCROptions} object + */ + public OCROptions setMaxPercentDiffToMerge(double maxPercentDiffToMerge) { + this.maxPercentDiffToMerge = maxPercentDiffToMerge; + return this; + } + + /** + * Gets the amount the width/height radio should be multiplied across all a character's potential matches, to + * increase its effects compared to the actual section similarity. + * + * @return The weight of the width/height ratio + */ + public double getSizeRatioWeight() { + return sizeRatioWeight; + } + + /** + * Sets the amount the width/height radio should be multiplied across all a character's potential matches, to + * increase its effects compared to the actual section similarity. + * + * @param sizeRatioWeight The weight of the width/height ratio + * @return The current {@link OCROptions} object + */ + public OCROptions setSizeRatioWeight(double sizeRatioWeight) { + this.sizeRatioWeight = sizeRatioWeight; + return this; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java b/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java index 2286106..3ada2cf 100644 --- a/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java +++ b/src/main/java/com/uddernetworks/newocr/train/TrainGenerator.java @@ -1,68 +1,22 @@ package com.uddernetworks.newocr.train; -import javax.imageio.ImageIO; -import java.awt.*; -import java.awt.image.BufferedImage; import java.io.File; -import java.io.IOException; - -public class TrainGenerator { - - private static String trainString = "!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghjiklmnopqrstuvwxyz{|}~W W"; - public static final int UPPER_FONT_BOUND = 90; - public static final int LOWER_FONT_BOUND = 20; - - public static void main(String[] args) { - BufferedImage image = new BufferedImage(1500, 500, BufferedImage.TYPE_INT_ARGB); - Graphics2D graphics = image.createGraphics(); - - RenderingHints rht = new RenderingHints(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON); - graphics.setRenderingHints(rht); - - Font font = new Font("Verdana", Font.PLAIN, 92); - graphics.setFont(font); - - int newHeight = 11; - - int size2 = UPPER_FONT_BOUND; - for (int i = 0; i < UPPER_FONT_BOUND - LOWER_FONT_BOUND; i++) { - newHeight += size2 + 11; - size2--; - } - - image = new BufferedImage(graphics.getFontMetrics().stringWidth(trainString) + 50, newHeight, BufferedImage.TYPE_INT_ARGB); - for (int y = 0; y < image.getHeight(); y++) { - for (int x = 0; x < image.getWidth(); x++) { - image.setRGB(x, y, Color.WHITE.getRGB()); - } - } - - graphics = image.createGraphics(); - - RenderingHints rh = new RenderingHints(RenderingHints.KEY_TEXT_ANTIALIASING, RenderingHints.VALUE_TEXT_ANTIALIAS_ON); - graphics.setRenderingHints(rh); - - int size = UPPER_FONT_BOUND; - int offset = UPPER_FONT_BOUND; - for (int i = 0; i < UPPER_FONT_BOUND - LOWER_FONT_BOUND; i++) { - drawLine(graphics, trainString, offset, size); - offset += size + 10; - size--; - } - - try { - ImageIO.write(image, "png", new File("training.png")); - } catch (IOException e) { - e.printStackTrace(); - } - } - - private static void drawLine(Graphics2D drawTo, String line, int yOffset, int size) { - Font font = new Font("Verdana", Font.PLAIN, size); - drawTo.setFont(font); - drawTo.setPaint(Color.BLACK); - - drawTo.drawString(line, 10, yOffset); - } +public interface TrainGenerator { + + /** + * Generates an image that can be used while training the OCR using default options of font bounds 90-30, and a font + * family of Comic Sans MS. + * + * @param file The file to write to + */ + void generateTrainingImage(File file); + + /** + * Generates an image that can be used while training the OCR using the given options. + * + * @param file The file to write to + * @param options The options used during image generation + */ + void generateTrainingImage(File file, TrainGeneratorOptions options); } diff --git a/src/main/java/com/uddernetworks/newocr/train/TrainGeneratorOptions.java b/src/main/java/com/uddernetworks/newocr/train/TrainGeneratorOptions.java new file mode 100644 index 0000000..ff1e569 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/train/TrainGeneratorOptions.java @@ -0,0 +1,70 @@ +package com.uddernetworks.newocr.train; + +/** + * Defines options for the actual generation of the image to train on. + */ +public class TrainGeneratorOptions { + private int maxFontSize = 90; + private int minFontSize = 30; + private String fontFamily = ""; + + /** + * Gets the maximum font size to generate up to in points. + * + * @return The maximum font size + */ + public int getMaxFontSize() { + return maxFontSize; + } + + /** + * Sets the maximum font size to generate up to in points. + * + * @param maxFontSize The maximum font size + * @return The current {@link TrainGeneratorOptions} + */ + public TrainGeneratorOptions setMaxFontSize(int maxFontSize) { + this.maxFontSize = maxFontSize; + return this; + } + + /** + * Gets the minimum font size to generate down to in points. + * + * @return The minimum font size + */ + public int getMinFontSize() { + return minFontSize; + } + + /** + * Sets the minimum font size to generate up to in points. + * + * @param minFontSize The minimum font size + * @return The current {@link TrainGeneratorOptions} + */ + public TrainGeneratorOptions setMinFontSize(int minFontSize) { + this.minFontSize = minFontSize; + return this; + } + + /** + * Gets the system font family used during training image generation. + * + * @return The font family + */ + public String getFontFamily() { + return fontFamily; + } + + /** + * Sets the system font family used during training image generation. + * + * @param fontFamily The font family to set + * @return The current {@link TrainGeneratorOptions} + */ + public TrainGeneratorOptions setFontFamily(String fontFamily) { + this.fontFamily = fontFamily; + return this; + } +} diff --git a/src/main/java/com/uddernetworks/newocr/train/UntrainedDatabaseException.java b/src/main/java/com/uddernetworks/newocr/train/UntrainedDatabaseException.java new file mode 100644 index 0000000..78d7455 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/train/UntrainedDatabaseException.java @@ -0,0 +1,10 @@ +package com.uddernetworks.newocr.train; + +import com.uddernetworks.newocr.database.DatabaseManager; + +public class UntrainedDatabaseException extends RuntimeException { + + public UntrainedDatabaseException(DatabaseManager databaseManager) { + super("The given database " + databaseManager.getName() + " has not been trained yet."); + } +} diff --git a/src/main/java/com/uddernetworks/newocr/utils/CharacterGettingUtils.java b/src/main/java/com/uddernetworks/newocr/utils/CharacterGettingUtils.java deleted file mode 100644 index 8f8af6b..0000000 --- a/src/main/java/com/uddernetworks/newocr/utils/CharacterGettingUtils.java +++ /dev/null @@ -1,223 +0,0 @@ -package com.uddernetworks.newocr.utils; - -import com.uddernetworks.newocr.CombineMethod; -import com.uddernetworks.newocr.LetterMeta; -import com.uddernetworks.newocr.character.SearchCharacter; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; - -public class CharacterGettingUtils { - - /** - * Gets the base of a character with a dot on top of it and combines it with the found character. - * @param dotCharacter The dot character to search from - * @param coordinates The coordinates used by the dotCharacter currently - * @param searchCharacters The SearchCharacter list to check for the base - * @return If a successful combination was made - */ - public static boolean doDotStuff(SearchCharacter dotCharacter, List coordinates, List searchCharacters) { - if (!dotCharacter.isProbablyDot()) { - return false; - } - - Optional baseCharacterOptional = getBaseOfDot(searchCharacters, dotCharacter); - - baseCharacterOptional.ifPresent(baseCharacter -> { - combine(baseCharacter, dotCharacter, coordinates, CombineMethod.DOT, LetterMeta.DOT_ABOVE); - baseCharacter.setHasDot(true); - dotCharacter.setHasDot(true); - }); - - return baseCharacterOptional.isPresent(); - } - - /** - * Gets the base of the percent and adds the given circle/dot to it. - * @param percentDotCharacter The dot character of the percentage - * @param coordinates The coordinates used by the percentDotCharacter currently - * @param searchCharacters The SearchCharacter list to check for the base - * @return If a successful combination was made - */ - public static boolean doPercentStuff(SearchCharacter percentDotCharacter, List coordinates, List searchCharacters) { - if (!percentDotCharacter.isProbablyCircleOfPercent()) return false; - Optional baseCharacterOptional = getBaseForPercent(searchCharacters, percentDotCharacter); - baseCharacterOptional.ifPresent(baseCharacter -> { - combine(baseCharacter, percentDotCharacter, coordinates, CombineMethod.PERCENTAGE_CIRCLE, LetterMeta.PERCENT); - baseCharacter.setHasDot(true); - percentDotCharacter.setHasDot(true); - }); - - return baseCharacterOptional.isPresent(); - } - - /** - * Gets the left apostrophe and adds the given left apostrophe with it. - * @param rightApostrophe The apostrophe on the right side - * @param coordinates The coordinates used by the rightApostrophe currently - * @param searchCharacters The SearchCharacter list to check for the base - * @return If a successful combination was made - */ - public static boolean doApostropheStuff(SearchCharacter rightApostrophe, List coordinates, List searchCharacters) { - if (!rightApostrophe.isProbablyApostraphe()) { - return false; - } - - Optional leftApostropheOptional = getLeftApostrophe(searchCharacters, rightApostrophe); - - leftApostropheOptional.ifPresent(leftApostrophe -> { - combine(leftApostrophe, rightApostrophe, coordinates, CombineMethod.APOSTROPHE, LetterMeta.QUOTE); - leftApostrophe.setHasDot(true); - rightApostrophe.setHasDot(true); - }); - - return leftApostropheOptional.isPresent(); - } - - /** - * Combines a given {@link SearchCharacter} with another using one of several methods. - * @param baseCharacter The {@link SearchCharacter} that will be added to - * @param adding The {@link SearchCharacter} that will be added to the baseCharacter - * @param coordinates The coordinates used by the `adding` parameter - * @param combineMethod The method to be used when combining the characters. {@link CombineMethod#DOT} and {@link CombineMethod#COLON} do the same thing - * @param letterMeta The {@link LetterMeta} to add to the base character - */ - public static void combine(SearchCharacter baseCharacter, SearchCharacter adding, List coordinates, CombineMethod combineMethod, LetterMeta letterMeta) { - int minX = Math.min(baseCharacter.getX(), adding.getX()); - int minY = Math.min(baseCharacter.getY(), adding.getY()); - int maxX = Math.max(baseCharacter.getX() + baseCharacter.getWidth(), adding.getX() + adding.getWidth()); - int maxY = Math.max(baseCharacter.getY() + baseCharacter.getHeight(), adding.getY() + adding.getHeight()); - - baseCharacter.setWidth(maxX - minX); - baseCharacter.setHeight(maxY - minY); - baseCharacter.setX(minX); - baseCharacter.setY(minY); - baseCharacter.setLetterMeta(letterMeta); - - switch (combineMethod) { - case DOT: - case COLON: - maxX = baseCharacter.getX() + baseCharacter.getWidth(); - maxY = baseCharacter.getY() + baseCharacter.getHeight(); - baseCharacter.setHeight(maxY - adding.getY()); - baseCharacter.setY(adding.getY()); - - int dotMaxX = adding.getX() + adding.getWidth(); - - if (dotMaxX > maxX) { - baseCharacter.setWidth(dotMaxX - baseCharacter.getX()); - } - - baseCharacter.addDot(coordinates); - break; - case PERCENTAGE_CIRCLE: - baseCharacter.addPercentageCircle(coordinates, OCRUtils.isWithin(adding.getY(), baseCharacter.getY(), (double) baseCharacter.getHeight() / 10D)); - break; - case APOSTROPHE: - baseCharacter.addPercentageCircle(coordinates, false); - break; - } - - coordinates.clear(); - } - - /** - * Gets the base of character like i and j from a dot character - * @param characters The list of {@link SearchCharacter}s to search from - * @param dotCharacter The dot character to search from - * @return The {@link SearchCharacter} base Optional - */ - public static Optional getBaseOfDot(List characters, SearchCharacter dotCharacter) { - return characters.parallelStream() - .filter(character -> !character.equals(dotCharacter)) - .filter(character -> !character.hasDot()) - .filter(character -> character.isInBounds(dotCharacter.getX() + (dotCharacter.getWidth() / 2), character.getY() + 4)) - .filter(character -> character.getHeight() > dotCharacter.getHeight() * 5) - .filter(baseCharacter -> { - int below = dotCharacter.getY() + dotCharacter.getHeight() + 1; - - return OCRUtils.checkDifference(below, baseCharacter.getY(), dotCharacter.getHeight() + 2); - }) - .findFirst(); - } - - /** - * Gets the dot of a character like ! and ? from a base character - * @param characters The list of {@link SearchCharacter}s to search from - * @param baseCharacter The base character to search from - * @return The {@link SearchCharacter} dot Optional - */ - public static Optional getDotUnderLetter(List characters, SearchCharacter baseCharacter) { - return characters.parallelStream() - .filter(character -> !character.equals(baseCharacter)) - .filter(character -> !character.hasDot()) - .filter(SearchCharacter::isProbablyDot) - .filter(character -> baseCharacter.isInBounds(character.getX() + (character.getWidth() / 2), baseCharacter.getY() + 4)) - .filter(character -> baseCharacter.getHeight() > character.getHeight() * 2) - .filter(dotCharacter -> { - int below = dotCharacter.getY() - dotCharacter.getHeight(); - int mod = dotCharacter.getHeight(); - return OCRUtils.checkDifference(below, baseCharacter.getY() + baseCharacter.getHeight(), mod + 2); - }) - .findFirst(); - } - - /** - * Gets the bottom dot of a character like : and ; from its top dot - * @param characters The list of {@link SearchCharacter}s to search from - * @param topDot The bottom dot to search from - * @return The {@link SearchCharacter} dot Optional - */ - public static Optional getBottomColon(List characters, SearchCharacter topDot) { - return characters.stream() - .filter(character -> !character.equals(topDot)) - .filter(character -> !character.hasDot()) - .filter(character -> topDot.isInXBounds(character.getX() + (character.getWidth() / 2))) - .filter(character -> { - double ratio = (double) topDot.getHeight() / (double) character.getHeight(); - if (character.getWidth() * 2 < topDot.getWidth()) return false; - return (ratio >= 0.25 && ratio <= 0.5) || (topDot.getHeight() == character.getHeight() && topDot.getWidth() == character.getWidth()); - }) - .filter(bottomCharacter -> { - double mult = ((double) bottomCharacter.getWidth() / (double) bottomCharacter.getHeight() > 3 && Arrays.deepEquals(bottomCharacter.getValues(), topDot.getValues())) ? 5 : 5; - int mod = (int) (topDot.getHeight() * mult); - - return OCRUtils.checkDifference(bottomCharacter.getY(), topDot.getY() + topDot.getHeight(), mod + 1); - }) - .findFirst(); - } - - /** - * Gets the left apostrophe from the given left apostrophe - * @param characters The list of {@link SearchCharacter}s to search from - * @param rightApostrophe The right apostrophe to search from - * @return The {@link SearchCharacter} dot Optional - */ - public static Optional getLeftApostrophe(List characters, SearchCharacter rightApostrophe) { - return characters.parallelStream() - .filter(SearchCharacter::isProbablyApostraphe) - .filter(character -> character.getY() == rightApostrophe.getY()) - .filter(character -> { - boolean[][] values = character.getValues(); - boolean[][] values2 = rightApostrophe.getValues(); - if (values.length != values2.length || values[0].length != values2[0].length) return false; - - double diff = OCRUtils.getDifferencesFrom2D(values, values2); - return diff <= 0.05; // If it's at least 5% similar - }) - .filter(character -> OCRUtils.isWithin(character.getX() + character.getWidth(), rightApostrophe.getX(), rightApostrophe.getWidth() - 1D, ((double) rightApostrophe.getWidth() * 1.1D) + 4D)) - .findFirst(); - } - - /** - * Gets the base character for the given percent circle/dot character - * @param characters The list of {@link SearchCharacter}s to search from - * @param circleOfPercent The circle/dot of the percent to search from - * @return The {@link SearchCharacter} dot Optional - */ - public static Optional getBaseForPercent(List characters, SearchCharacter circleOfPercent) { - return characters.parallelStream() - .filter(searchCharacter -> searchCharacter.isOverlaping(circleOfPercent)) - .findFirst(); - } -} diff --git a/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java b/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java index 0269722..5f2b085 100644 --- a/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java +++ b/src/main/java/com/uddernetworks/newocr/utils/ConversionUtils.java @@ -1,5 +1,8 @@ package com.uddernetworks.newocr.utils; +/** + * Provides simple conversions of units used by the OCR and programs using the library. + */ public class ConversionUtils { /** @@ -9,7 +12,7 @@ public class ConversionUtils { * @return The point value of the pixel */ public static int pixelToPoint(int pixel) { - return (int) Math.round(((double) pixel) / (4D/3D)); + return (int) Math.round(((double) pixel) / (4D / 3D)); } /** @@ -19,7 +22,7 @@ public static int pixelToPoint(int pixel) { * @return The pixel value of the point */ public static int pointToPixel(int point) { - return (int) Math.round(((double) point) * (4D/3D)); + return (int) Math.round(((double) point) * (4D / 3D)); } } diff --git a/src/main/java/com/uddernetworks/newocr/utils/IntPair.java b/src/main/java/com/uddernetworks/newocr/utils/IntPair.java index 8d82c42..30c4816 100644 --- a/src/main/java/com/uddernetworks/newocr/utils/IntPair.java +++ b/src/main/java/com/uddernetworks/newocr/utils/IntPair.java @@ -9,17 +9,17 @@ * @version January 12, 2019 */ public final class IntPair { - + /** * The key of this {@link IntPair}. */ private int key; - + /** * The value of this {@link IntPair}. */ private int value; - + /** * Creates a new {@link IntPair} with the specified key and value. * @@ -30,7 +30,7 @@ public IntPair(int key, int value) { this.key = key; this.value = value; } - + /** * {@inheritDoc} */ @@ -39,12 +39,12 @@ public boolean equals(Object o) { if (!(o instanceof IntPair)) { return false; } - + var pair = (IntPair) o; - + return key == pair.key && value == pair.value; } - + /** * {@inheritDoc} */ @@ -52,7 +52,15 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hash(key, value); } - + + /** + * {@inheritDoc} + */ + @Override + public String toString() { + return "IntPair[key = " + key + ", value = " + value + "]"; + } + /** * Gets this {@link IntPair}'s key. * @@ -61,7 +69,16 @@ public int hashCode() { public int getKey() { return key; } - + + /** + * Sets this {@link IntPair}'s key. + * + * @param key The key as an {@code int}. + */ + public void setKey(int key) { + this.key = key; + } + /** * Gets this {@link IntPair}'s value. * @@ -70,5 +87,14 @@ public int getKey() { public int getValue() { return value; } - + + /** + * Sets this {@link IntPair}'s value. + * + * @param value The value as an {@code int}. + */ + public void setValue(int value) { + this.value = value; + } + } diff --git a/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java b/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java index bf35c6c..b518f69 100644 --- a/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java +++ b/src/main/java/com/uddernetworks/newocr/utils/OCRUtils.java @@ -1,29 +1,22 @@ package com.uddernetworks.newocr.utils; -import com.uddernetworks.newocr.character.SearchCharacter; -import it.unimi.dsi.fastutil.ints.IntArrayList; -import it.unimi.dsi.fastutil.ints.IntList; -import java.awt.Color; -import java.awt.Graphics; +import javax.imageio.ImageIO; +import javax.swing.*; +import java.awt.*; import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.net.MalformedURLException; -import java.util.LinkedList; -import java.util.List; +import java.util.Arrays; import java.util.Optional; +import java.util.OptionalDouble; import java.util.stream.Collectors; -import java.util.stream.Stream; -import javax.imageio.ImageIO; -import javax.swing.ImageIcon; /** * Some various utility methods used by the OCR that may assist others using the library. */ public class OCRUtils { - public static final IntPair ZERO_PLACEHOLDER = new IntPair(0, 0); - /** * An ImageIO.read() replacement, which in tests can be up to 15x faster. This has shown to significantly improve * the OCR's performance both in training and actual usage. @@ -45,6 +38,24 @@ public static BufferedImage readImage(File input) { return bufferedImage; } + /** + * Removes all common spaces between all newlines, useful if the OCR say adds an extra 2 spaces before all lines of + * text, this will remove the 2 spaces. + * + * @param string The input string + * @return The input string trimmed properly + */ + public static String removeLeadingSpaces(String string) { + var split = string.split("\n"); + var commonSpaces = Arrays.stream(split).mapToInt(OCRUtils::countLeadingSpaces).min().orElse(0); + if (commonSpaces == 0) return string; + return Arrays.stream(split).map(line -> line.substring(commonSpaces)).collect(Collectors.joining("\n")); + } + + private static int countLeadingSpaces(String input) { + return input.length() - input.stripLeading().length(); + } + /* * Advanced/Convenient Comparisons */ @@ -56,8 +67,8 @@ public static BufferedImage readImage(File input) { * @param two The second number * @return The difference */ - public static double getDiff(double one, double two) { - return Math.max(one, two) - Math.min(one, two); + public static double diff(double one, double two) { + return Math.abs(one - two); } /** @@ -67,54 +78,8 @@ public static double getDiff(double one, double two) { * @param two The second number * @return The difference */ - public static int getDiff(int one, int two) { - return Math.max(one, two) - Math.min(one, two); - } - - /** - * Gets if two ints are within a given double. - * - * @param one Bound 1 - * @param two Bound 2 - * @param within The number - * @return If one and two are within `within` - */ - public static boolean isWithin(int one, int two, double within) { - double diff = Math.max((double) one, (double) two) - Math.min((double) one, (double) two); - return diff <= within; - } - - /** - * Gets if the difference of the two given ints are between both of the two doubles given. - * - * @param one The first number - * @param two The second number - * @param lowerBound The lower bound to check - * @param upperBound The upper bound to check - * @return If the difference of the two given ints are between both of the two doubles given - */ - public static boolean isWithin(int one, int two, double lowerBound, double upperBound) { - double diff = Math.max((double) one, (double) two) - Math.min((double) one, (double) two); - return diff <= upperBound && lowerBound <= diff; - } - - /** - * Gets the percentage difference of two different 2D boolean arrays. - * - * @param input1 The first 2D array - * @param input2 The second 2D array - * @return The percentage difference <= 1 - */ - public static double getDifferencesFrom2D(boolean[][] input1, boolean[][] input2) { - if (input1.length != input2.length) return 1D; - double result = 0; - for (int x = 0; x < input1.length; x++) { - for (int y = 0; y < input1[0].length; y++) { - if (input1[x][y] != input2[x][y]) result++; - } - } - - return result / ((double) input1.length * (double) input1[0].length); + public static int diff(int one, int two) { + return Math.abs(one - two); } /** @@ -124,34 +89,19 @@ public static double getDifferencesFrom2D(boolean[][] input1, boolean[][] input2 * @param input2 The second array * @return An array with the same length as the inputs containing the difference of both arrays' respective values */ - public static Optional getDifferencesFrom(double[] input1, double[] input2) { - if (input1.length != input2.length) return Optional.empty(); - double[] ret = new double[input1.length]; + public static OptionalDouble getDifferencesFrom(double[] input1, double[] input2) { + if (input1 == null || input2 == null || input1.length != input2.length) return OptionalDouble.empty(); + var res = 0D; for (int i = 0; i < input1.length; i++) { - double one = input1[i]; - double two = input2[i]; - - ret[i] = Math.max(one, two) - Math.min(one, two); + res += Math.pow(input1[i] - input2[i], 2); } - return Optional.of(ret); - } - - /** - * Gets if a given number is within two bounds. The same as {@link #isWithin(double, double, double)} but with ints. - * - * @param lowerBound The lower bound to check - * @param upperBound The upper bound to check - * @param value The value to check - * @return If the two values are within the given bounds - */ - public static boolean isWithin(int lowerBound, int upperBound, int value) { - return lowerBound <= value && value <= upperBound; + return OptionalDouble.of(res); } /** - * Gets if a given number is within two bounds. The same as {@link #isWithin(int, int, double)} but with doubles. + * Gets if a given number is within two bounds. * * @param lowerBound The lower bound to check * @param upperBound The upper bound to check @@ -162,18 +112,6 @@ public static boolean isWithin(double lowerBound, double upperBound, double valu return lowerBound <= value && value <= upperBound; } - /** - * Gets if the difference or two doubles is less than or equal to another given double. - * - * @param num1 The first number - * @param num2 The second number - * @param amount The inclusive amount the difference can be - * @return If the difference is less than or equal to the `amount` - */ - public static boolean checkDifference(double num1, double num2, double amount) { - return Math.max(num1, num2) - Math.min(num1, num2) <= amount; - } - /* * Image-related methods */ @@ -224,65 +162,21 @@ public static boolean isRowPopulated(boolean[][] values, int y) { } /** - * Gets all the characters between the two Y values (The line bounds) form the {@link SearchCharacter} list. - * - * @param topY The top Y value of the line - * @param bottomY The bottom Y value of the line - * @param searchCharacters The {@link SearchCharacter} list to check from - * @return The {@link SearchCharacter} objects between the given Y values - */ - public static List findCharactersAtLine(int topY, int bottomY, List searchCharacters) { - return searchCharacters - .stream() - .sorted() - .filter(searchCharacter -> OCRUtils.isWithin(topY, bottomY, searchCharacter.getY())) - .collect(Collectors.toCollection(LinkedList::new)); - } - - /** - * Sets all pixels from input to temp. When running in the program if the System property `newocr.rewrite` is set to - * true, it will write the image to stop any weird image decoding issues + * Binarizes the input image, making all pixels wither black or white with an alpha of 255 * - * @param temp The empty image with the same size as the input that will be written to - * @param input The input that will be read from + * @param input The input image to be filtered + * @return The filtered image */ - public static void rewriteImage(BufferedImage temp, BufferedImage input) { - for (int y = 0; y < temp.getHeight(); y++) { - for (int x = 0; x < temp.getWidth(); x++) { - temp.setRGB(x, y, input.getRGB(x, y)); - } - } - } + public static Optional filter(BufferedImage input) { + var result = new BufferedImage(input.getWidth(), input.getHeight(), BufferedImage.TYPE_INT_ARGB); - /** - * Gets if a {@link SearchCharacter} is fully black for things like . or the sections of = - * - * @param searchCharacter The input {@link SearchCharacter} to check - * @return If the input is all black - */ - public static boolean isAllBlack(SearchCharacter searchCharacter) { - // TODO: Replace with a difference check with threshold and/or a circular check for other fonts - for (boolean[] row : searchCharacter.getValues()) { - for (boolean bool : row) { - if (!bool) return false; + for (int y = 0; y < input.getHeight(); y++) { + for (int x = 0; x < input.getWidth(); x++) { + result.setRGB(x, y, isBlack(input, x, y) ? Color.BLACK.getRGB() : Color.WHITE.getRGB()); } } - return true; - } - - /** - * Binarizes the input image, making all pixels wither black or white with an alpha of 255 - * - * @param bufferedImage The input image to be mutated - */ - public static void filter(BufferedImage bufferedImage) { - for (int y = 0; y < bufferedImage.getHeight(); y++) { - for (int x = 0; x < bufferedImage.getWidth(); x++) { - Color writeColor = isBlack(bufferedImage, x, y) ? new Color(0, 0, 0, 255) : new Color(255, 255, 255, 255); - bufferedImage.setRGB(x, y, writeColor.getRGB()); - } - } + return Optional.of(result); } /** @@ -296,200 +190,12 @@ public static void filter(BufferedImage bufferedImage) { public static boolean isBlack(BufferedImage image, int x, int y) { try { Color pixel = new Color(image.getRGB(x, y)); - return (pixel.getRed() + pixel.getGreen() + pixel.getBlue()) / 3 < 255 * 0.75; + return (pixel.getRed() + pixel.getGreen() + pixel.getBlue()) / 3D < 255 * 0.75; } catch (ArrayIndexOutOfBoundsException e) { return true; } } - /* - * Getting array sections - */ - - /** - * Splits a grid of values in half horizontally - * - * @param values The grid to split - * @return A stream of 2 halves, top and bottom - */ - public static Stream getHorizontalHalf(boolean[][] values) { - int topHeight = values.length / 2; - int bottomHeight = values.length - topHeight; - - boolean[][] topHalf = new boolean[topHeight][]; - boolean[][] bottomHalf = new boolean[bottomHeight][]; - - for (int y = 0; y < values.length; y++) { - if (y < topHeight) { - topHalf[y] = values[y]; - } else { - bottomHalf[y - topHeight] = values[y]; - } - } - - return Stream.of(topHalf, bottomHalf).sequential(); - } - - /** - * Splits a grid of values in thirds horizontally - * - * @param values The grid to split - * @return A stream of 3 thirds: top, middle, and bottom - */ - public static Stream getHorizontalThird(boolean[][] values) { - int topHeight = values.length / 3; - int middleHeight = values.length - topHeight * 2; - int bottomHeight = topHeight; - - boolean[][] topThird = new boolean[topHeight][]; - boolean[][] middleThird = new boolean[middleHeight][]; - boolean[][] bottomThird = new boolean[bottomHeight][]; - - for (int y = 0; y < values.length; y++) { - if (y < topHeight) { - topThird[y] = values[y]; - } else if (y < topHeight + middleHeight) { - middleThird[y - topHeight] = values[y]; - } else { - bottomThird[y - topHeight - middleHeight] = values[y]; - } - } - - return Stream.of(topThird, middleThird, bottomThird).sequential(); - } - - /** - * Splits a grid of values in half vertically - * - * @param values The grid to split - * @return A stream of 2 halves, left and right - */ - public static Stream getVerticalHalf(boolean[][] values) { - if (values.length == 0) return Stream.of(null, null); - int leftHeight = values[0].length / 2; - int rightHeight = values[0].length - leftHeight; - - boolean[][] leftHalf = new boolean[values.length][]; - boolean[][] rightHalf = new boolean[values.length][]; - - for (int i = 0; i < values.length; i++) { - leftHalf[i] = new boolean[leftHeight]; - rightHalf[i] = new boolean[rightHeight]; - } - - for (int y = 0; y < values.length; y++) { - for (int x = 0; x < values[0].length; x++) { - if (x < leftHeight) { - leftHalf[y][x] = values[y][x]; - } else { - rightHalf[y][x - leftHeight] = values[y][x]; - } - } - } - - return Stream.of(leftHalf, rightHalf).sequential(); - } - - /** - * Splits a grid of values in thirds vertically - * - * @param values The grid to split - * @return A stream of 3 thirds: left, middle, and right - */ - public static Stream getVerticalThird(boolean[][] values) { - if (values.length == 0) return Stream.of(ZERO_PLACEHOLDER, ZERO_PLACEHOLDER, ZERO_PLACEHOLDER); - int leftHeight = values[0].length / 3; - int middleHeight = values[0].length - leftHeight * 2; - - int leftSize = 0, leftTrue = 0; - int middleSize = 0, middleTrue = 0; - int rightSize = 0, rightTrue = 0; - - for (boolean[] line : values) { - for (int x = 0; x < values[0].length; x++) { - if (x < leftHeight) { - if (line[x]) { - leftTrue++; - } - - leftSize++; - } else if (x < middleHeight + leftHeight) { - if (line[x]){ - middleTrue++; - } - - middleSize++; - } else { - if (line[x]) { - rightTrue++; - } - - rightSize++; - } - } - } - - return Stream.of(new IntPair(leftTrue, leftSize), new IntPair(middleTrue, middleSize), new IntPair(rightTrue, rightSize)); - } - - /** - * Splits a grid of values in half diagonally. The diagonal line will be going from the top left to bototm right if - * `increasing` is `true`, and top left to bottom right if it is `false`. - * - * @param values The grid to split into halves diagonally - * @param increasing The line's slope will be positive when `true`, and negative when `false`. - * @return A List of 2 halves - */ - public static List getDiagonal(boolean[][] values, boolean increasing) { - int topSize = 0; - int topTrue = 0; - int bottomSize = 0; - int bottomTrue = 0; - - if (values != null) { - double slope = (double) values.length / (double) values[0].length; - - IntList yPositions = new IntArrayList(); - - for (int x = 0; x < values[0].length; x++) { - double y = slope * x; - - if (increasing) { - y = values.length - y; - } - - yPositions.add((int) y); - } - - for (int x = 0; x < values[0].length; x++) { - int yPos = yPositions.get(x); - - for (int y = 0; y < values.length; y++) { - if (y < yPos) { - if (values[y][x]) { - bottomTrue++; - } - - bottomSize++; - } else { - if (values[y][x]) { - topTrue++; - } - - topSize++; - } - } - } - } - - List ret = new LinkedList<>(); - - ret.add(new IntPair(topTrue, topSize)); - ret.add(new IntPair(bottomTrue, bottomSize)); - - return ret; - } - /* * For debugging */ @@ -558,7 +264,7 @@ public static void printOut(boolean[][] values) { System.out.print(bool ? "#" : " "); } - System.out.println(""); + System.out.println(); } } } diff --git a/src/main/java/com/uddernetworks/newocr/utils/SegmentationUtils.java b/src/main/java/com/uddernetworks/newocr/utils/SegmentationUtils.java new file mode 100644 index 0000000..dfa4360 --- /dev/null +++ b/src/main/java/com/uddernetworks/newocr/utils/SegmentationUtils.java @@ -0,0 +1,200 @@ +package com.uddernetworks.newocr.utils; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; + +import java.util.LinkedList; +import java.util.List; +import java.util.stream.Stream; + +/** + * A utility class for methods to create segments separating character images for training and scanning. + */ +public class SegmentationUtils { + + public static final IntPair ZERO_PLACEHOLDER = new IntPair(0, 0); + + /** + * Splits a grid of values in half horizontally + * + * @param values The grid to split + * @return A stream of 2 halves, top and bottom + */ + public static Stream getHorizontalHalf(boolean[][] values) { + int topHeight = values.length / 2; + int bottomHeight = values.length - topHeight; + + boolean[][] topHalf = new boolean[topHeight][]; + boolean[][] bottomHalf = new boolean[bottomHeight][]; + + for (int y = 0; y < values.length; y++) { + if (y < topHeight) { + topHalf[y] = values[y]; + } else { + bottomHalf[y - topHeight] = values[y]; + } + } + + return Stream.of(topHalf, bottomHalf).sequential(); + } + + /** + * Splits a grid of values in thirds horizontally + * + * @param values The grid to split + * @return A stream of 3 thirds: top, middle, and bottom + */ + public static Stream getHorizontalThird(boolean[][] values) { + int topHeight = values.length / 3; + int middleHeight = values.length - topHeight * 2; + + boolean[][] topThird = new boolean[topHeight][]; + boolean[][] middleThird = new boolean[middleHeight][]; + boolean[][] bottomThird = new boolean[topHeight][]; + + for (int y = 0; y < values.length; y++) { + if (y < topHeight) { + topThird[y] = values[y]; + } else if (y < topHeight + middleHeight) { + middleThird[y - topHeight] = values[y]; + } else { + bottomThird[y - topHeight - middleHeight] = values[y]; + } + } + + return Stream.of(topThird, middleThird, bottomThird).sequential(); + } + + /** + * Splits a grid of values in half vertically + * + * @param values The grid to split + * @return A stream of 2 halves, left and right + */ + public static Stream getVerticalHalf(boolean[][] values) { + if (values.length == 0) return Stream.of(null, null); + int leftHeight = values[0].length / 2; + int rightHeight = values[0].length - leftHeight; + + boolean[][] leftHalf = new boolean[values.length][]; + boolean[][] rightHalf = new boolean[values.length][]; + + for (int i = 0; i < values.length; i++) { + leftHalf[i] = new boolean[leftHeight]; + rightHalf[i] = new boolean[rightHeight]; + } + + for (int y = 0; y < values.length; y++) { + for (int x = 0; x < values[0].length; x++) { + if (x < leftHeight) { + leftHalf[y][x] = values[y][x]; + } else { + rightHalf[y][x - leftHeight] = values[y][x]; + } + } + } + + return Stream.of(leftHalf, rightHalf).sequential(); + } + + /** + * Splits a grid of values in thirds vertically + * + * @param values The grid to split + * @return A stream of 3 thirds: left, middle, and right + */ + public static Stream getVerticalThird(boolean[][] values) { + if (values.length == 0) return Stream.of(ZERO_PLACEHOLDER, ZERO_PLACEHOLDER, ZERO_PLACEHOLDER); + int leftHeight = values[0].length / 3; + int middleHeight = values[0].length - leftHeight * 2; + + int leftSize = 0, leftTrue = 0; + int middleSize = 0, middleTrue = 0; + int rightSize = 0, rightTrue = 0; + + for (boolean[] line : values) { + for (int x = 0; x < values[0].length; x++) { + if (x < leftHeight) { + if (line[x]) { + leftTrue++; + } + + leftSize++; + } else if (x < middleHeight + leftHeight) { + if (line[x]) { + middleTrue++; + } + + middleSize++; + } else { + if (line[x]) { + rightTrue++; + } + + rightSize++; + } + } + } + + return Stream.of(new IntPair(leftTrue, leftSize), new IntPair(middleTrue, middleSize), new IntPair(rightTrue, rightSize)); + } + + /** + * Splits a grid of values in half diagonally. The diagonal line will be going from the top left to bottom right if + * `increasing` is `true`, and top left to bottom right if it is `false`. + * + * @param values The grid to split into halves diagonally + * @param increasing The line's slope will be positive when `true`, and negative when `false`. + * @return A List of 2 halves + */ + public static List getDiagonal(boolean[][] values, boolean increasing) { + int topSize = 0; + int topTrue = 0; + int bottomSize = 0; + int bottomTrue = 0; + + if (values != null) { + double slope = (double) values.length / (double) values[0].length; + + IntList yPositions = new IntArrayList(); + + for (int x = 0; x < values[0].length; x++) { + double y = slope * x; + + if (increasing) { + y = values.length - y; + } + + yPositions.add((int) y); + } + + for (int x = 0; x < values[0].length; x++) { + int yPos = yPositions.getInt(x); + + for (int y = 0; y < values.length; y++) { + if (y < yPos) { + if (values[y][x]) { + bottomTrue++; + } + + bottomSize++; + } else { + if (values[y][x]) { + topTrue++; + } + + topSize++; + } + } + } + } + + List ret = new LinkedList<>(); + + ret.add(new IntPair(topTrue, topSize)); + ret.add(new IntPair(bottomTrue, bottomSize)); + + return ret; + } + +} diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java index 2a2daa8..810594b 100644 --- a/src/main/java/module-info.java +++ b/src/main/java/module-info.java @@ -1,13 +1,22 @@ module NewOCR { requires java.desktop; requires java.sql; - + requires com.zaxxer.hikari; requires it.unimi.dsi.fastutil; - + requires slf4j.api; + requires typesafe.config; + exports com.uddernetworks.newocr; exports com.uddernetworks.newocr.character; + exports com.uddernetworks.newocr.configuration; exports com.uddernetworks.newocr.database; + exports com.uddernetworks.newocr.detection; + exports com.uddernetworks.newocr.recognition; + exports com.uddernetworks.newocr.recognition.mergence; + exports com.uddernetworks.newocr.recognition.mergence.rules; + exports com.uddernetworks.newocr.recognition.similarity; + exports com.uddernetworks.newocr.recognition.similarity.rules; exports com.uddernetworks.newocr.train; exports com.uddernetworks.newocr.utils; } \ No newline at end of file diff --git a/src/main/resources/addAverageData.sql b/src/main/resources/addAverageData.sql new file mode 100644 index 0000000..e2c5b9a --- /dev/null +++ b/src/main/resources/addAverageData.sql @@ -0,0 +1 @@ +INSERT INTO data VALUES(?, ?); \ No newline at end of file diff --git a/src/main/resources/addCustomSpace.sql b/src/main/resources/addCustomSpace.sql new file mode 100644 index 0000000..0ec67eb --- /dev/null +++ b/src/main/resources/addCustomSpace.sql @@ -0,0 +1 @@ +INSERT INTO customSpaces VALUES(?, ?) ON DUPLICATE KEY UPDATE value = value; \ No newline at end of file diff --git a/src/main/resources/addLetterSegment.sql b/src/main/resources/addLetterSegment.sql index e88a054..5e000cb 100644 --- a/src/main/resources/addLetterSegment.sql +++ b/src/main/resources/addLetterSegment.sql @@ -1 +1 @@ -INSERT INTO sectionData VALUES (?, ?, ?, ?, ?); \ No newline at end of file +INSERT INTO sectionData VALUES (?, ?, ?, ?); \ No newline at end of file diff --git a/src/main/resources/addLetterSize.sql b/src/main/resources/addLetterSize.sql deleted file mode 100644 index a267ea4..0000000 --- a/src/main/resources/addLetterSize.sql +++ /dev/null @@ -1 +0,0 @@ -INSERT INTO sizing VALUES (?, ?, ?) ON DUPLICATE KEY UPDATE letter = letter; \ No newline at end of file diff --git a/src/main/resources/booleanProperties.sql b/src/main/resources/booleanProperties.sql new file mode 100644 index 0000000..1325cb2 --- /dev/null +++ b/src/main/resources/booleanProperties.sql @@ -0,0 +1,4 @@ +CREATE TABLE IF NOT EXISTS boolean_properties ( + name VARCHAR(64), + value INT +); \ No newline at end of file diff --git a/src/main/resources/clearLetterSegments.sql b/src/main/resources/clearLetterSegments.sql index 413cf01..3d3a3bf 100644 --- a/src/main/resources/clearLetterSegments.sql +++ b/src/main/resources/clearLetterSegments.sql @@ -1 +1 @@ -DELETE FROM %s WHERE letter = ? AND minFontSize = ? AND maxFontSize = ?; \ No newline at end of file +DELETE FROM %s WHERE letter = ?; \ No newline at end of file diff --git a/src/main/resources/createLetterEntry.sql b/src/main/resources/createLetterEntry.sql index d3f123f..3aa44f5 100644 --- a/src/main/resources/createLetterEntry.sql +++ b/src/main/resources/createLetterEntry.sql @@ -1 +1 @@ -INSERT INTO letters VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?); \ No newline at end of file +INSERT INTO letters VALUES (?, ?, ?, ?, ?, ?, ?); \ No newline at end of file diff --git a/src/main/resources/customSpaces.sql b/src/main/resources/customSpaces.sql new file mode 100644 index 0000000..3d0680e --- /dev/null +++ b/src/main/resources/customSpaces.sql @@ -0,0 +1,4 @@ +CREATE TABLE IF NOT EXISTS customSpaces ( + letter INT UNIQUE, + value DOUBLE +); \ No newline at end of file diff --git a/src/main/resources/data.sql b/src/main/resources/data.sql new file mode 100644 index 0000000..2646a61 --- /dev/null +++ b/src/main/resources/data.sql @@ -0,0 +1,4 @@ +CREATE TABLE IF NOT EXISTS data ( + name VARCHAR(64), + value DOUBLE +); \ No newline at end of file diff --git a/src/main/resources/fonts/Calibri.conf b/src/main/resources/fonts/Calibri.conf new file mode 100644 index 0000000..6e0099b --- /dev/null +++ b/src/main/resources/fonts/Calibri.conf @@ -0,0 +1,11 @@ +include "Default.conf" +language { + properties { + system-name: "Calibri" + } + options { + special-spaces: ["`", "|", "{", "}", "!"] + max-percent-diff-to-merge: 0.5 + size-ratio-weight: 4 + } +} \ No newline at end of file diff --git a/src/main/resources/fonts/ComicSans.conf b/src/main/resources/fonts/ComicSans.conf new file mode 100644 index 0000000..cd62e40 --- /dev/null +++ b/src/main/resources/fonts/ComicSans.conf @@ -0,0 +1,18 @@ +include "Default.conf" +language { + properties { + system-name: "Comic Sans MS" + friendly-name: "Comic Sans" + } + options { + special-spaces: ["`"] + max-percent-diff-to-merge: 0.5 + size-ratio-weight: 5.5 + } + similarities { + vertical-line { + name: vertical-line + letters: [APOSTROPHE, QUOTE_LEFT, QUOTE_RIGHT, PIPE, l, i, EXCLAMATION, PERIOD, COLON_TOP, COLON_BOTTOM, EXCLAMATION_DOT, SEMICOLON_TOP, i_DOT, j_DOT, QUESTION_MARK_BOTTOM] + } + } +} \ No newline at end of file diff --git a/src/main/resources/fonts/Consolas.conf b/src/main/resources/fonts/Consolas.conf new file mode 100644 index 0000000..5bbd2c9 --- /dev/null +++ b/src/main/resources/fonts/Consolas.conf @@ -0,0 +1,11 @@ +include "Default.conf" +language { + properties { + system-name: "Consolas" + } + options { + special-spaces: ["`"] + max-percent-diff-to-merge: 0.5 + size-ratio-weight: 4 + } +} \ No newline at end of file diff --git a/src/main/resources/fonts/CourierNew.conf b/src/main/resources/fonts/CourierNew.conf new file mode 100644 index 0000000..0ba8160 --- /dev/null +++ b/src/main/resources/fonts/CourierNew.conf @@ -0,0 +1,17 @@ +include "Default.conf" +language { + properties { + system-name: "Courier New" + } + options { + special-spaces: ["`", "'", "{", "|", "}"] + max-percent-diff-to-merge: 0.5 + size-ratio-weight: 4 + } + similarities { + percent-base { + name: percent-base + letters: [PERCENT_BASE, FORWARD_SLASH, TILDE] + } + } +} \ No newline at end of file diff --git a/src/main/resources/fonts/Default.conf b/src/main/resources/fonts/Default.conf new file mode 100644 index 0000000..8b51e25 --- /dev/null +++ b/src/main/resources/fonts/Default.conf @@ -0,0 +1,42 @@ +language { + properties { + system-name: null + friendly-name: ${language.properties.system-name} + } + options { + special-spaces: ["`"] + max-percent-diff-to-merge: 0.5 + size-ratio-weight: 4 + } + similarities { + dot { + name: dot + letters: [PERIOD, COLON_TOP, COLON_BOTTOM, EXCLAMATION_DOT, SEMICOLON_TOP, i_DOT, j_DOT, QUESTION_MARK_BOTTOM] + } + horizontal-line { + name: horizontal-line + letters: [MINUS, EQUALS_BOTTOM, EQUALS_TOP, UNDERSCORE] + } + percent-dot { + name: percent-dot + letters: [PERCENT_LDOT, PERCENT_RDOT, o] + } + percent-base { + name: percent-base + letters: [PERCENT_BASE, FORWARD_SLASH] + } + vertical-line { + name: vertical-line + letters: [APOSTROPHE, QUOTE_LEFT, QUOTE_RIGHT, PIPE, l, i, EXCLAMATION] + } + } + mergence { + rules: [ + com.uddernetworks.newocr.recognition.mergence.rules.ApostropheMergeRule, + com.uddernetworks.newocr.recognition.mergence.rules.EqualVerticalMergeRule, + com.uddernetworks.newocr.recognition.mergence.rules.OverDotMergeRule, + com.uddernetworks.newocr.recognition.mergence.rules.PercentMergeRule, + com.uddernetworks.newocr.recognition.mergence.rules.UnderDotMergeRule, + ] + } +} \ No newline at end of file diff --git a/src/main/resources/fonts/Monospaced.plain.conf b/src/main/resources/fonts/Monospaced.plain.conf new file mode 100644 index 0000000..6a407fa --- /dev/null +++ b/src/main/resources/fonts/Monospaced.plain.conf @@ -0,0 +1,18 @@ +include "Default.conf" +language { + properties { + system-name: "Monospaced.plain" + friendly-name: "Monospaced" + } + options { + special-spaces: ["`", "'", "|", "{", "}"] + max-percent-diff-to-merge: 0.5 + size-ratio-weight: 4 + } + similarities { + percent-base { + name: percent-base + letters: [PERCENT_BASE, FORWARD_SLASH, TILDE] + } + } +} \ No newline at end of file diff --git a/src/main/resources/fonts/Verdana.conf b/src/main/resources/fonts/Verdana.conf new file mode 100644 index 0000000..94b752d --- /dev/null +++ b/src/main/resources/fonts/Verdana.conf @@ -0,0 +1,6 @@ +include "Default.conf" +language { + properties { + system-name: "Verdana" + } +} \ No newline at end of file diff --git a/src/main/resources/getAverageData.sql b/src/main/resources/getAverageData.sql new file mode 100644 index 0000000..9dc5cdb --- /dev/null +++ b/src/main/resources/getAverageData.sql @@ -0,0 +1 @@ +SELECT AVG(value) FROM data WHERE name = ?; \ No newline at end of file diff --git a/src/main/resources/getBooleanProperty.sql b/src/main/resources/getBooleanProperty.sql new file mode 100644 index 0000000..7deb65e --- /dev/null +++ b/src/main/resources/getBooleanProperty.sql @@ -0,0 +1 @@ +SELECT value FROM boolean_properties WHERE name = ? LIMIT 1; \ No newline at end of file diff --git a/src/main/resources/getCustomSpace.sql b/src/main/resources/getCustomSpace.sql new file mode 100644 index 0000000..c310945 --- /dev/null +++ b/src/main/resources/getCustomSpace.sql @@ -0,0 +1 @@ +SELECT value FROM customSpaces WHERE letter = ?; \ No newline at end of file diff --git a/src/main/resources/getLetterEntry.sql b/src/main/resources/getLetterEntry.sql index baaa9dc..e8e731e 100644 --- a/src/main/resources/getLetterEntry.sql +++ b/src/main/resources/getLetterEntry.sql @@ -1 +1 @@ -SELECT avgWidth, avgHeight, minFontSize, maxFontSize, minCenter, maxCenter, hasDot, letterMeta FROM letters WHERE letter = ? AND minFontSize = ? AND maxFontSize = ? ORDER BY letter; \ No newline at end of file +SELECT avgWidth, avgHeight, minCenter, maxCenter FROM letters WHERE letter = ? AND modifier = ? ORDER BY letter; \ No newline at end of file diff --git a/src/main/resources/getLetterSize.sql b/src/main/resources/getLetterSize.sql deleted file mode 100644 index 499eddc..0000000 --- a/src/main/resources/getLetterSize.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT size FROM sizing WHERE letter = ? ORDER BY ABS(height - /* inputHeight */ ?); \ No newline at end of file diff --git a/src/main/resources/getSpaceEntry.sql b/src/main/resources/getSpaceEntry.sql index 483b188..2e80938 100644 --- a/src/main/resources/getSpaceEntry.sql +++ b/src/main/resources/getSpaceEntry.sql @@ -1 +1 @@ -SELECT avgWidth, avgHeight, minFontSize, maxFontSize, minCenter, maxCenter FROM letters WHERE isSpace = TRUE ORDER BY letter; \ No newline at end of file +SELECT avgWidth, avgHeight, minCenter, maxCenter FROM letters WHERE isSpace = TRUE ORDER BY letter; \ No newline at end of file diff --git a/src/main/resources/letters.sql b/src/main/resources/letters.sql index c15a282..367b357 100644 --- a/src/main/resources/letters.sql +++ b/src/main/resources/letters.sql @@ -1,13 +1,10 @@ CREATE TABLE IF NOT EXISTS letters ( letter INTEGER, -- The letter the data set is for + modifier INTEGER, -- The modifier number of the letter. E.g. different parts of a " avgWidth DOUBLE, -- The average width of all tested character images of this letter avgHeight DOUBLE, -- The average height of all tested character images of this letter - minFontSize INTEGER, -- The minimum font size this data set was trained on - maxFontSize INTEGER, -- The maximum font size this data set was trained on minCenter DOUBLE, maxCenter DOUBLE, - hasDot BOOLEAN, - letterMeta INTEGER, isSpace BOOLEAN, - UNIQUE(letter, minFontSize, maxFontSize) + UNIQUE(letter, modifier) ); \ No newline at end of file diff --git a/src/main/resources/sectionData.sql b/src/main/resources/sectionData.sql index b050c1f..a80c8cb 100644 --- a/src/main/resources/sectionData.sql +++ b/src/main/resources/sectionData.sql @@ -1,7 +1,6 @@ CREATE TABLE IF NOT EXISTS sectionData ( letter INTEGER, - minFontSize INTEGER, - maxFontSize INTEGER, + modifier INTEGER, sectionIndex INTEGER, data DOUBLE ); \ No newline at end of file diff --git a/src/main/resources/selectAllSegments.sql b/src/main/resources/selectAllSegments.sql index acacf5d..e9313b6 100644 --- a/src/main/resources/selectAllSegments.sql +++ b/src/main/resources/selectAllSegments.sql @@ -1 +1 @@ -SELECT CHAR(letter) AS letter, sectionIndex, data FROM sectionData WHERE minFontSize = ? AND maxFontSize = ? ORDER BY letter, sectionIndex; \ No newline at end of file +SELECT CHAR(letter) AS letter, modifier, sectionIndex, data FROM sectionData ORDER BY letter, modifier, sectionIndex; \ No newline at end of file diff --git a/src/main/resources/selectSegments.sql b/src/main/resources/selectSegments.sql deleted file mode 100644 index 0e96ffd..0000000 --- a/src/main/resources/selectSegments.sql +++ /dev/null @@ -1 +0,0 @@ -SELECT sectionIndex, data FROM sectionData WHERE letter = ?; \ No newline at end of file diff --git a/src/main/resources/setBooleanProperty.sql b/src/main/resources/setBooleanProperty.sql new file mode 100644 index 0000000..882ebcf --- /dev/null +++ b/src/main/resources/setBooleanProperty.sql @@ -0,0 +1 @@ +INSERT INTO boolean_properties VALUES(?, ?); \ No newline at end of file diff --git a/src/main/resources/sizing.sql b/src/main/resources/sizing.sql deleted file mode 100644 index a2b6d50..0000000 --- a/src/main/resources/sizing.sql +++ /dev/null @@ -1,6 +0,0 @@ -CREATE TABLE IF NOT EXISTS sizing ( - letter INTEGER, - size INTEGER, - height INTEGER, - UNIQUE(letter, height) -); \ No newline at end of file diff --git a/src/test/java/com/uddernetworks/newocr/FontTestNameGenerator.java b/src/test/java/com/uddernetworks/newocr/FontTestNameGenerator.java new file mode 100644 index 0000000..aefa8b2 --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/FontTestNameGenerator.java @@ -0,0 +1,34 @@ +package com.uddernetworks.newocr; + +import org.junit.jupiter.api.DisplayNameGenerator; + +import java.lang.reflect.Method; + +public class FontTestNameGenerator implements DisplayNameGenerator { + + @Override + public String generateDisplayNameForClass(Class testClass) { + var className = testClass.getSimpleName(); + if (!className.startsWith("Font")) return className; + return className.substring(4); + } + + @Override + public String generateDisplayNameForNestedClass(Class nestedClass) { + return generateDisplayNameForClass(nestedClass); + } + + @Override + public String generateDisplayNameForMethod(Class testClass, Method testMethod) { + var className = testClass.getSimpleName(); + var methodName = testMethod.getName(); + var defaultName = className + "#" + methodName; + if (!className.startsWith("Font")) return defaultName; + + if (methodName.equals("accuracyTest")) { + return className.substring(4) + " Accuracy"; + } + + return defaultName; + } +} diff --git a/src/test/java/com/uddernetworks/newocr/OCRHandleTest.java b/src/test/java/com/uddernetworks/newocr/OCRHandleTest.java deleted file mode 100644 index 6d8a561..0000000 --- a/src/test/java/com/uddernetworks/newocr/OCRHandleTest.java +++ /dev/null @@ -1,72 +0,0 @@ -package com.uddernetworks.newocr; - -import com.uddernetworks.newocr.character.ImageLetter; -import com.uddernetworks.newocr.database.DatabaseManager; -import com.uddernetworks.newocr.database.OCRDatabaseManager; -import it.unimi.dsi.fastutil.doubles.DoubleArrayList; -import java.io.File; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import org.apache.commons.math3.stat.correlation.PearsonsCorrelation; -import org.junit.Before; -import org.junit.Test; - - -import static org.junit.Assert.assertTrue; - -public class OCRHandleTest { - - private final double ACCURACY = -0.9; // What the accuracy threshold of all tests should (Max of 1) - - private DatabaseManager databaseManager; - private OCRHandle ocrHandle; - private ScannedImage trainImage; - - @Before - public void setUp() throws Exception { - System.out.println("Setting up database..."); - this.databaseManager = new OCRDatabaseManager(new File("database" + File.separator + "ocr_db")); - this.ocrHandle = new OCRHandle(this.databaseManager); - - System.out.println("Scanning training image..."); - this.trainImage = this.ocrHandle.scanImage(new File("src\\test\\resources\\size\\training.png")); - } - - @Test - public void characterSizeRecognizer() throws ExecutionException, InterruptedException { - int characterDepth = 20; - - var def = new DoubleArrayList(); - var gen = new DoubleArrayList(); - - for (int i = 0; i < this.trainImage.getLineCount() * characterDepth; i++) { - def.add(i); - gen.add(0D); - } - - for (int i = 0; i < this.trainImage.getLineCount(); i++) { - for (int i1 = 0; i1 < characterDepth; i1++) { - ImageLetter firstOfLine = this.trainImage.getLine(i).get(i1); - - if (firstOfLine.getLetter() == ' ') { // Ignore spaces (Not found in the database) - def.set(i * characterDepth + i1, -2D); // The -2 values will be ignored later - gen.set(i * characterDepth + i1, -2D); - continue; - } - - Future fontSize = this.ocrHandle.getFontSize(firstOfLine); - gen.set(i * characterDepth + i1, (double) fontSize.get()); - } - } - - double[] defArray = def.stream().mapToDouble(Double::doubleValue).filter(val -> val != -2D).toArray(); - double[] genArray = gen.stream().mapToDouble(Double::doubleValue).filter(val -> val != -2).toArray(); - - double coeff = Math.abs(new PearsonsCorrelation().correlation(defArray, genArray)); - - System.out.println("Accuracy is " + coeff); - - assertTrue(coeff >= ACCURACY); - } - -} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/Accuracy.java b/src/test/java/com/uddernetworks/newocr/fonts/Accuracy.java new file mode 100644 index 0000000..643addf --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/Accuracy.java @@ -0,0 +1,109 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.ScannedImage; +import com.uddernetworks.newocr.configuration.ConfigReflectionCacher; +import com.uddernetworks.newocr.configuration.FontConfiguration; +import com.uddernetworks.newocr.configuration.HOCONFontConfiguration; +import com.uddernetworks.newocr.database.DatabaseManager; +import com.uddernetworks.newocr.database.OCRDatabaseManager; +import com.uddernetworks.newocr.recognition.OCRScan; +import com.uddernetworks.newocr.recognition.OCRTrain; +import com.uddernetworks.newocr.recognition.mergence.DefaultMergenceManager; +import com.uddernetworks.newocr.recognition.similarity.DefaultSimilarityManager; +import com.uddernetworks.newocr.recognition.similarity.SimilarityManager; +import com.uddernetworks.newocr.train.ComputerTrainGenerator; +import com.uddernetworks.newocr.train.OCROptions; +import com.uddernetworks.newocr.train.TrainGeneratorOptions; +import org.bitbucket.cowwoc.diffmatchpatch.DiffMatchPatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; + +import static org.bitbucket.cowwoc.diffmatchpatch.DiffMatchPatch.Operation.DELETE; +import static org.bitbucket.cowwoc.diffmatchpatch.DiffMatchPatch.Operation.EQUAL; +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class Accuracy { + + private static Logger LOGGER = LoggerFactory.getLogger(Accuracy.class); + private static final double MINIMUM_SUCCESS_RATE = 98; // Requires at least a 98% success rate + private static final boolean TRIM_SPACES = true; // If spaces before the input should be trimmed, to take into account input image padding + + public static ScannedImage generate(String fontFamily, String configFileName) throws IOException { + var strippedName = fontFamily.replaceAll("[^a-zA-Z\\d\\s:]", "_"); + var databaseManager = new OCRDatabaseManager(new File("src\\test\\resources\\database\\ocr_db_" + strippedName)); + var similarityManager = new DefaultSimilarityManager(); + + var fontConfiguration = new HOCONFontConfiguration(configFileName, new ConfigReflectionCacher()); + var options = fontConfiguration.fetchOptions(); + fontConfiguration.fetchAndApplySimilarities(similarityManager); + + return generate(fontFamily, options, similarityManager, databaseManager, fontConfiguration); + } + + public static ScannedImage generate(String fontFamily, OCROptions options, SimilarityManager similarityManager, DatabaseManager databaseManager, FontConfiguration fontConfiguration) { + LOGGER.info("Setting up database..."); + + var readingImage = new File("src\\test\\resources\\training_" + fontFamily.replaceAll("[^a-zA-Z\\d\\s:]", "_") + ".png"); + + var mergenceManager = new DefaultMergenceManager(databaseManager, similarityManager); + var ocrTrain = new OCRTrain(databaseManager, options); + + LOGGER.info("Generating image for {}", fontFamily); + new ComputerTrainGenerator().generateTrainingImage(readingImage, new TrainGeneratorOptions() + .setFontFamily(fontFamily)); + + LOGGER.info("Starting training for {}...", fontFamily); + + var start = System.currentTimeMillis(); + ocrTrain.trainImage(readingImage); + + LOGGER.info("Finished training in {}ms", System.currentTimeMillis() - start); + + // It needs some kind of delay, I need to inspect in the future on what causes this. Check the repo for any + // issues on the matter before reporting. + try { + Thread.sleep(3000); + } catch (InterruptedException e) {} + + fontConfiguration.fetchAndApplyMergeRules(mergenceManager); + var ocrScan = new OCRScan(databaseManager, options, similarityManager, mergenceManager); + + LOGGER.info("Scanning training image..."); + + return ocrScan.scanImage(readingImage); + } + + public void accuracyTest(ScannedImage trainImage) { + var scannedString = trainImage.getPrettyString(); + var diffMatchPath = new DiffMatchPatch(); + var lines = scannedString.split("\n"); + var differences = 0; + for (String line : lines) { + line = TRIM_SPACES ? line.trim() : line; + var difference = diffMatchPath.diffMain(line, OCRScan.RAW_STRING); + final int[] insert = {0}; + final int[] delete = {0}; + difference.stream().filter(diff -> diff.operation != EQUAL) + .forEach(diff -> { + if (diff.operation == DELETE) { + delete[0] += diff.text.length(); + } else { + insert[0] += diff.text.length(); + } + }); + differences += Math.max(insert[0], delete[0]); + + System.out.println(line); + } + + var totalChars = lines.length * OCRScan.RAW_STRING.length(); + var accuracy = (Math.round((1 - (double) differences / (double) totalChars) * 100_00D) / 100D); + LOGGER.info("{} errors out of {} at a {}% success rate", differences, totalChars, accuracy); + + assertTrue(accuracy >= MINIMUM_SUCCESS_RATE); // We're looking for at *least* a 95% success rate + } + +} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontCalibri.java b/src/test/java/com/uddernetworks/newocr/fonts/FontCalibri.java new file mode 100644 index 0000000..0b38d33 --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/FontCalibri.java @@ -0,0 +1,24 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.FontTestNameGenerator; +import com.uddernetworks.newocr.ScannedImage; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(FontTestNameGenerator.class) +public class FontCalibri extends Accuracy { + + private static ScannedImage trainImage; + + @BeforeAll + public static void setUp() throws Exception { + trainImage = generate("Calibri", "fonts/Calibri"); + } + + @Test + public void accuracyTest() { + super.accuracyTest(trainImage); + } + +} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontComicSansMS.java b/src/test/java/com/uddernetworks/newocr/fonts/FontComicSansMS.java new file mode 100644 index 0000000..0164a32 --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/FontComicSansMS.java @@ -0,0 +1,24 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.FontTestNameGenerator; +import com.uddernetworks.newocr.ScannedImage; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(FontTestNameGenerator.class) +public class FontComicSansMS extends Accuracy { + + private static ScannedImage trainImage; + + @BeforeAll + public static void setUp() throws Exception { + trainImage = generate("Comic Sans MS", "fonts/ComicSans"); + } + + @Test + public void accuracyTest() { + super.accuracyTest(trainImage); + } + +} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontConsolas.java b/src/test/java/com/uddernetworks/newocr/fonts/FontConsolas.java new file mode 100644 index 0000000..8a9718d --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/FontConsolas.java @@ -0,0 +1,24 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.FontTestNameGenerator; +import com.uddernetworks.newocr.ScannedImage; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(FontTestNameGenerator.class) +public class FontConsolas extends Accuracy { + + private static ScannedImage trainImage; + + @BeforeAll + public static void setUp() throws Exception { + trainImage = generate("Consolas", "fonts/Consolas"); + } + + @Test + public void accuracyTest() { + super.accuracyTest(trainImage); + } + +} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontCourierNew.java b/src/test/java/com/uddernetworks/newocr/fonts/FontCourierNew.java new file mode 100644 index 0000000..4ff9770 --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/FontCourierNew.java @@ -0,0 +1,24 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.FontTestNameGenerator; +import com.uddernetworks.newocr.ScannedImage; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(FontTestNameGenerator.class) +public class FontCourierNew extends Accuracy { + + private static ScannedImage trainImage; + + @BeforeAll + public static void setUp() throws Exception { + trainImage = generate("Courier New", "fonts/CourierNew"); + } + + @Test + public void accuracyTest() { + super.accuracyTest(trainImage); + } + +} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontMonospaced.java b/src/test/java/com/uddernetworks/newocr/fonts/FontMonospaced.java new file mode 100644 index 0000000..7697cbd --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/FontMonospaced.java @@ -0,0 +1,24 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.FontTestNameGenerator; +import com.uddernetworks.newocr.ScannedImage; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(FontTestNameGenerator.class) +public class FontMonospaced extends Accuracy { + + private static ScannedImage trainImage; + + @BeforeAll + public static void setUp() throws Exception { + trainImage = generate("Monospaced.plain", "fonts/Monospaced.plain"); + } + + @Test + public void accuracyTest() { + super.accuracyTest(trainImage); + } + +} diff --git a/src/test/java/com/uddernetworks/newocr/fonts/FontVerdana.java b/src/test/java/com/uddernetworks/newocr/fonts/FontVerdana.java new file mode 100644 index 0000000..92770ed --- /dev/null +++ b/src/test/java/com/uddernetworks/newocr/fonts/FontVerdana.java @@ -0,0 +1,24 @@ +package com.uddernetworks.newocr.fonts; + +import com.uddernetworks.newocr.FontTestNameGenerator; +import com.uddernetworks.newocr.ScannedImage; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.DisplayNameGeneration; +import org.junit.jupiter.api.Test; + +@DisplayNameGeneration(FontTestNameGenerator.class) +public class FontVerdana extends Accuracy { + + private static ScannedImage trainImage; + + @BeforeAll + public static void setUp() throws Exception { + trainImage = generate("Verdana", "fonts/Verdana"); + } + + @Test + public void accuracyTest() { + super.accuracyTest(trainImage); + } + +} diff --git a/src/test/resources/log4j.xml b/src/test/resources/log4j.xml new file mode 100644 index 0000000..6e547a0 --- /dev/null +++ b/src/test/resources/log4j.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/test/resources/size/training.png b/src/test/resources/size/training.png deleted file mode 100644 index 3025d85..0000000 Binary files a/src/test/resources/size/training.png and /dev/null differ diff --git a/training.png b/training.png deleted file mode 100644 index 3025d85..0000000 Binary files a/training.png and /dev/null differ