From ba9b5739d77f712938c7c05098044b950204576f Mon Sep 17 00:00:00 2001
From: Beckenb
Date: Tue, 12 Dec 2017 12:29:18 +0100
Subject: [PATCH 1/2] Added option to load training images through file
Added the option to pass the training images through a file, avoiding the "Argument list too long" error when passing too many files.
---
ocropus-rtrain | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/ocropus-rtrain b/ocropus-rtrain
index 9d365ae3..8ab290d1 100755
--- a/ocropus-rtrain
+++ b/ocropus-rtrain
@@ -72,10 +72,20 @@ parser.add_argument("-q","--quiet",action="store_true")
parser.add_argument("-Q","--nocheck",action="store_true")
parser.add_argument("-p","--pad",type=int,default=16)
+# add file
+parser.add_argument("-f","--file",default=None,help="input the training data as a file list")
+
parser.add_argument("files",nargs="*")
args = parser.parse_args()
inputs = ocrolib.glob_all(args.files)
+
+if args.file is not None:
+ print("getting training data from file")
+ with open(args.file) as file:
+ for l in file:
+ inputs.append(l.rstrip())
+
if len(inputs)==0:
parser.print_help()
sys.exit(0)
From 7bbc37cacb2896c121fd719581d7d844ca31e20e Mon Sep 17 00:00:00 2001
From: Konstantin Baierer
Date: Tue, 12 Dec 2017 13:05:56 +0100
Subject: [PATCH 2/2] test ocropus-rtrain --file/-f feature, help message
---
ocropus-rtrain | 2 +-
run-test-ci | 37 +++++++++++++++++++++++++++++--------
2 files changed, 30 insertions(+), 9 deletions(-)
diff --git a/ocropus-rtrain b/ocropus-rtrain
index 8ab290d1..1e92e3cb 100755
--- a/ocropus-rtrain
+++ b/ocropus-rtrain
@@ -73,7 +73,7 @@ parser.add_argument("-Q","--nocheck",action="store_true")
parser.add_argument("-p","--pad",type=int,default=16)
# add file
-parser.add_argument("-f","--file",default=None,help="input the training data as a file list")
+parser.add_argument("-f","--file",default=None,help="path to file listing input files, one per line")
parser.add_argument("files",nargs="*")
args = parser.parse_args()
diff --git a/run-test-ci b/run-test-ci
index 1b3f70e5..b481cdc5 100755
--- a/run-test-ci
+++ b/run-test-ci
@@ -1,5 +1,12 @@
#!/bin/bash -e
+# Usage: ./run-test-ci [tests...]
+#
+# Examples:
+#
+# ./run-test-ci # Run all tests
+# ./run-test-ci page conf # Run only 'test_page' and 'test_conf'
+#
BASE=$(dirname $0)
# 'RUNNER' is the binary that the scripts are executed by. It defaults to
@@ -61,6 +68,13 @@ test_rtrain() {
$RUNNER $BASE/ocropus-rtrain 'book/*/*.bin.png' -N 5 -o ci-test-model
}
+test_rtrain_files() {
+ tar -zxf $BASE/tests/uw3-500.tgz
+ find 'book' -name '*.bin.png' > INPUT_FILES
+ $RUNNER $BASE/ocropus-rtrain -f INPUT_FILES -N 5 -o ci-test-model
+ rm INPUT_FILES
+}
+
test_nlbin() {
local TESTIMAGE=0071-010012.png
cp $BASE/tests/$TESTIMAGE temp
@@ -100,11 +114,18 @@ test_gtedit() {
rm -rf temp
mkdir -p temp
-test_page
-test_conf
-test_linegen
-test_rtrain
-test_nlbin
-test_gpageseg
-test_rpred
-test_gtedit
+if (( $# > 0 ));then
+ for test in "$@";do
+ test_$test
+ done
+else
+ test_page
+ test_conf
+ test_linegen
+ test_rtrain
+ test_rtrain_files
+ test_nlbin
+ test_gpageseg
+ test_rpred
+ test_gtedit
+fi